diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 01ad1577b7..6cbdeb6270 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1,2 @@ -* @microsoft/hlsl-release +# Uncomment the next line in release branches after ask-mode begins +# * @microsoft/hlsl-release diff --git a/.github/workflows/clang-format-checker.yml b/.github/workflows/clang-format-checker.yml index 7e39a5b0be..d1887e4519 100644 --- a/.github/workflows/clang-format-checker.yml +++ b/.github/workflows/clang-format-checker.yml @@ -13,16 +13,23 @@ jobs: pull-requests: write steps: - name: Fetch LLVM sources - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - fetch-depth: 2 + ref: ${{ github.event.pull_request.head.sha }} + + - name: Checkout through merge base + uses: rmacklin/fetch-through-merge-base@bfe4d03a86f9afa52bc1a70e9814fc92a07f7b75 # v0.3.0 + with: + base_ref: ${{ github.event.pull_request.base.ref }} + head_ref: ${{ github.event.pull_request.head.sha }} + deepen_length: 500 - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v41 + uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1 with: separator: "," - fetch_depth: 100 # Fetches only the last 10 commits + skip_initial_fetch: true - name: "Listed files" env: diff --git a/.github/workflows/coverage-gh-pages.yml b/.github/workflows/coverage-gh-pages.yml index 4c7b2c2018..07e63584e3 100644 --- a/.github/workflows/coverage-gh-pages.yml +++ b/.github/workflows/coverage-gh-pages.yml @@ -26,11 +26,11 @@ jobs: timeout-minutes: 240 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: true - name: Setup Pages - uses: actions/configure-pages@v2 + uses: actions/configure-pages@v5 - name: Install dependencies run: sudo apt install -y ninja-build - name: Configure @@ -44,7 +44,7 @@ jobs: - name: Force artifact permissions run: chmod -c -R +rX ${{github.workspace}}/build/report - name: Upload artifact - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@v3 with: path: ${{github.workspace}}/build/report @@ -60,4 +60,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@v4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f7db99784..5210718005 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,15 +17,6 @@ if(POLICY CMP0022) cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required endif() -if (POLICY CMP0051) - # CMake 3.1 and higher include generator expressions of the form - # $ in the SOURCES property. These need to be - # stripped everywhere that access the SOURCES property, so we just - # defer to the OLD behavior of not including generator expressions - # in the output for now. - cmake_policy(SET CMP0051 OLD) -endif() - if(CMAKE_VERSION VERSION_LESS 3.1.20141117) set(cmake_3_2_USES_TERMINAL) else() @@ -686,6 +677,8 @@ add_subdirectory(include/dxc) # really depend on anything else in the build it is safe. list(APPEND LLVM_COMMON_DEPENDS HCTGen) +add_subdirectory(utils/hct) + if(EXISTS "${LLVM_MAIN_SRC_DIR}/external") add_subdirectory(external) # SPIRV change endif() @@ -769,9 +762,7 @@ if (LLVM_INCLUDE_DOCS) add_subdirectory(docs) endif() -if (LLVM_BUILD_DOCS) - add_hlsl_hctgen(DxilDocs OUTPUT docs/DXIL.rst CODE_TAG) # HLSL Change -endif() +add_hlsl_hctgen(DxilDocs OUTPUT docs/DXIL.rst CODE_TAG) # HLSL Change add_subdirectory(cmake/modules) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 233211f150..840b4f0f17 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,10 +40,32 @@ Before submitting a feature or substantial code contribution please discuss it w ### Coding guidelines -The coding, style, and general engineering guidelines follow those described in the docs/CodingStandards.rst. For additional guidelines in code specific to HLSL, see the docs/HLSLChanges.rst file. +The coding, style, and general engineering guidelines follow those described in the [LLVM Coding Standards](docs/CodingStandards.rst). For additional guidelines in code specific to HLSL, see the [HLSL Changes](docs/HLSLChanges.rst) docs. DXC has adopted a clang-format requirement for all incoming changes to C and C++ files. PRs to DXC should have the *changed code* clang formatted to the LLVM style, and leave the remaining portions of the file unchanged. This can be done using the `git-clang-format` tool or IDE driven workflows. A GitHub action will run on all PRs to validate that the change is properly formatted. +#### Applying LLVM Standards + +All new code contributed to DXC should follow the LLVM coding standards. + +Note that the LLVM Coding Standards have a golden rule: + +> **If you are extending, enhancing, or bug fixing already implemented code, use the style that is already being used so that the source is uniform and easy to follow.** + +The golden rule should continue to be applied to places where DXC is self-consistent. A good example is DXC's common use of `PascalCase` instead of `camelCase` for APIs in some parts of the HLSL implementation. In any place where DXC is not self-consistent new code should follow the LLVM Coding Standard. + +A good secondary rule to follow is: + +> **When in doubt, follow LLVM.** + +Adopting LLVM's coding standards provides a consistent set of rules and guidelines to hold all contributions to. This allows patch authors to clearly understand the expectations placed on contributions, and allows reviewers to have a bar to measure contributions against. Aligning with LLVM by default ensures the path of least resistance for everyone. + +Since many of the LLVM Coding Standards are not enforced automatically we rely on code reviews to provide feedback and ensure contributions align with the expected coding standards. Since we rely on reviewers for enforcement and humans make mistakes, please keep in mind: + +> **Code review is a conversation.** + +It is completely reasonable for a patch author to question feedback and provide additional context about why something was done the way it was. Reviewers often see narrow slices in diffs rather than the full context of a file or part of the compiler, so they may not always provide perfect feedback. This is especially true with the application of the "golden rule" since it depends on understanding a wider context. + ### Documenting Pull Requests Pull request descriptions should have the following format: diff --git a/README.md b/README.md index 35c0132068..ddafde2115 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,16 @@ Development kits containing only the dxc.exe driver app, the dxcompiler.dll, and As an example of community contribution, this project can also target the [SPIR-V](https://www.khronos.org/registry/spir-v/) intermediate representation. Please see the [doc](docs/SPIR-V.rst) for how HLSL features are mapped to SPIR-V, and the [wiki](https://github.com/microsoft/DirectXShaderCompiler/wiki/SPIR%E2%80%90V-CodeGen) page for how to build, use, and contribute to the SPIR-V CodeGen. +### Metal CodeGen + +When built from source DXC can utilize the [Metal Shader +Converter](https://developer.apple.com/metal/shader-converter/) if it is +available during build and configuration time. This allows DXC to generate Metal +shader libraries directly using the `-metal` flag. + +Note: DXC cannot currently disassemble Metal shaders so the `-Fc` flag cannot be +used in conjunction with the `-Fo` flag. + ## Building Sources See the full documentation for [Building and testing DXC](docs/BuildingAndTestingDXC.rst) for detailed instructions. diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 01b30568a9..4541d08162 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -568,3 +568,12 @@ else() endif() string(REPLACE " " ";" LLVM_BINDINGS_LIST "${LLVM_BINDINGS}") + +# HLSL Change Begin - Metal IR Converter +find_package(MetalIRConverter) +if (METAL_IRCONVERTER_FOUND) + set(ENABLE_METAL_CODEGEN On) + message(STATUS "Enabling Metal Support") + add_definitions(-DENABLE_METAL_CODEGEN) +endif() +# HLSL Change End - Metal IR Converter diff --git a/cmake/modules/FindMetalIRConverter.cmake b/cmake/modules/FindMetalIRConverter.cmake new file mode 100644 index 0000000000..fc7df1d6cc --- /dev/null +++ b/cmake/modules/FindMetalIRConverter.cmake @@ -0,0 +1,16 @@ +find_path(METAL_IRCONVERTER_INCLUDE_DIR metal_irconverter.h + HINTS /usr/local/include/metal_irconverter + DOC "Path to metal IR converter headers" + ) + +find_library(METAL_IRCONVERTER_LIB NAMES metalirconverter + PATH_SUFFIXES lib + ) + +include(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(METAL_IRCONVERTER + REQUIRED_VARS METAL_IRCONVERTER_LIB METAL_IRCONVERTER_INCLUDE_DIR) + +message(STATUS "Metal IR Converter Include Dir: ${METAL_IRCONVERTER_INCLUDE_DIR}") +message(STATUS "Metal IR Converter Library: ${METAL_IRCONVERTER_LIB}") +mark_as_advanced(METAL_IRCONVERTER_LIB METAL_IRCONVERTER_INCLUDE_DIR) diff --git a/docs/DXIL.rst b/docs/DXIL.rst index c3baf4e454..a1c5055085 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -225,10 +225,10 @@ DXIL uses 32-bit pointers in its representation. Out-of-bounds behavior ---------------------- -Indexable thread-local accesses are done via LLVM pointer and have C-like OOB semantics. -Groupshared accesses are done via LLVM pointer too. The origin of a groupshared pointer must be a single TGSM allocation. -If a groupshared pointer uses in-bound GEP instruction, it should not OOB. The behavior for an OOB access for in-bound pointer is undefined. -For groupshared pointer from regular GEP, OOB will has same behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped. +Indexable thread-local accesses are done via LLVM pointers and have C-like OOB semantics. +Groupshared accesses are done via LLVM pointers too. The origin of a groupshared pointer must be a single TGSM allocation. +If a groupshared pointer uses an in-bound GEP instruction, it should not OOB. The behavior for an OOB access for in-bound pointer is undefined. +For a groupshared pointer from regular GEP, OOB will have the same behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped. Resource accesses keeps the same out-of-bounds behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped. @@ -1984,54 +1984,57 @@ The following LLVM instructions are valid in a DXIL program, with the specified .. hctdb_instrhelp.get_instrs_rst() .. INSTR-RST:BEGIN -============= ======================================================================= ================= -Instruction Action Operand overloads -============= ======================================================================= ================= -Ret returns a value (possibly void), from a function. vhfd1wil -Br branches (conditional or unconditional) -Switch performs a multiway switch -Add returns the sum of its two operands wil -FAdd returns the sum of its two operands hfd -Sub returns the difference of its two operands wil -FSub returns the difference of its two operands hfd -Mul returns the product of its two operands wil -FMul returns the product of its two operands hfd -UDiv returns the quotient of its two unsigned operands wil -SDiv returns the quotient of its two signed operands wil -FDiv returns the quotient of its two operands hfd -URem returns the remainder from the unsigned division of its two operands wil -SRem returns the remainder from the signed division of its two operands wil -FRem returns the remainder from the division of its two operands hfd -Shl shifts left (logical) wil -LShr shifts right (logical), with zero bit fill wil -AShr shifts right (arithmetic), with 'a' operand sign bit fill wil -And returns a bitwise logical and of its two operands 1wil -Or returns a bitwise logical or of its two operands 1wil -Xor returns a bitwise logical xor of its two operands 1wil -Alloca allocates memory on the stack frame of the currently executing function -Load reads from memory -Store writes to memory -GetElementPtr gets the address of a subelement of an aggregate value -AtomicCmpXchg atomically modifies memory -AtomicRMW atomically modifies memory -Trunc truncates an integer 1wil -ZExt zero extends an integer 1wil -SExt sign extends an integer 1wil -FPToUI converts a floating point to UInt hfd1wil -FPToSI converts a floating point to SInt hfd1wil -UIToFP converts a UInt to floating point hfd1wil -SIToFP converts a SInt to floating point hfd1wil -FPTrunc truncates a floating point hfd -FPExt extends a floating point hfd -BitCast performs a bit-preserving type cast hfd1wil -AddrSpaceCast casts a value addrspace -ICmp compares integers 1wil -FCmp compares floating points hfd -PHI is a PHI node instruction -Call calls a function -Select selects an instruction -ExtractValue extracts from aggregate -============= ======================================================================= ================= +============== ======================================================================= ================= +Instruction Action Operand overloads +============== ======================================================================= ================= +Ret returns a value (possibly void), from a function. vhfd1wil +Br branches (conditional or unconditional) +Switch performs a multiway switch +Add returns the sum of its two operands wil +FAdd returns the sum of its two operands hfd +Sub returns the difference of its two operands wil +FSub returns the difference of its two operands hfd +Mul returns the product of its two operands wil +FMul returns the product of its two operands hfd +UDiv returns the quotient of its two unsigned operands wil +SDiv returns the quotient of its two signed operands wil +FDiv returns the quotient of its two operands hfd +URem returns the remainder from the unsigned division of its two operands wil +SRem returns the remainder from the signed division of its two operands wil +FRem returns the remainder from the division of its two operands hfd +Shl shifts left (logical) wil +LShr shifts right (logical), with zero bit fill wil +AShr shifts right (arithmetic), with 'a' operand sign bit fill wil +And returns a bitwise logical and of its two operands 1wil +Or returns a bitwise logical or of its two operands 1wil +Xor returns a bitwise logical xor of its two operands 1wil +Alloca allocates memory on the stack frame of the currently executing function +Load reads from memory +Store writes to memory +GetElementPtr gets the address of a subelement of an aggregate value +AtomicCmpXchg atomically modifies memory +AtomicRMW atomically modifies memory +Trunc truncates an integer 1wil +ZExt zero extends an integer 1wil +SExt sign extends an integer 1wil +FPToUI converts a floating point to UInt hfd1wil +FPToSI converts a floating point to SInt hfd1wil +UIToFP converts a UInt to floating point hfd1wil +SIToFP converts a SInt to floating point hfd1wil +FPTrunc truncates a floating point hfd +FPExt extends a floating point hfd +BitCast performs a bit-preserving type cast hfd1wil +AddrSpaceCast casts a value addrspace +ICmp compares integers 1wil +FCmp compares floating points hfd +PHI is a PHI node instruction +Call calls a function +Select selects an instruction +ExtractElement extracts from vector +InsertElement inserts into vector +ShuffleVector Shuffle two vectors +ExtractValue extracts from aggregate +============== ======================================================================= ================= FAdd @@ -2369,6 +2372,53 @@ ID Name Description 255 SampleCmpBias samples a texture after applying the input bias to the mipmap level and compares a single component against the specified comparison value 256 StartVertexLocation returns the BaseVertexLocation from DrawIndexedInstanced or StartVertexLocation from DrawInstanced 257 StartInstanceLocation returns the StartInstanceLocation from Draw*Instanced +258 AllocateRayQuery2 allocates space for RayQuery and return handle +259 ReservedA0 reserved +260 ReservedA1 reserved +261 ReservedA2 reserved +262 HitObject_TraceRay Analogous to TraceRay but without invoking CH/MS and returns the intermediate state as a HitObject +263 HitObject_FromRayQuery Creates a new HitObject representing a committed hit from a RayQuery +264 HitObject_FromRayQueryWithAttrs Creates a new HitObject representing a committed hit from a RayQuery and committed attributes +265 HitObject_MakeMiss Creates a new HitObject representing a miss +266 HitObject_MakeNop Creates an empty nop HitObject +267 HitObject_Invoke Represents the invocation of the CH/MS shader represented by the HitObject +268 MaybeReorderThread Reorders the current thread +269 HitObject_IsMiss Returns `true` if the HitObject represents a miss +270 HitObject_IsHit Returns `true` if the HitObject is a NOP-HitObject +271 HitObject_IsNop Returns `true` if the HitObject represents a nop +272 HitObject_RayFlags Returns the ray flags set in the HitObject +273 HitObject_RayTMin Returns the TMin value set in the HitObject +274 HitObject_RayTCurrent Returns the current T value set in the HitObject +275 HitObject_WorldRayOrigin Returns the ray origin in world space +276 HitObject_WorldRayDirection Returns the ray direction in world space +277 HitObject_ObjectRayOrigin Returns the ray origin in object space +278 HitObject_ObjectRayDirection Returns the ray direction in object space +279 HitObject_ObjectToWorld3x4 Returns the object to world space transformation matrix in 3x4 form +280 HitObject_WorldToObject3x4 Returns the world to object space transformation matrix in 3x4 form +281 HitObject_GeometryIndex Returns the geometry index committed on hit +282 HitObject_InstanceIndex Returns the instance index committed on hit +283 HitObject_InstanceID Returns the instance id committed on hit +284 HitObject_PrimitiveIndex Returns the primitive index committed on hit +285 HitObject_HitKind Returns the HitKind of the hit +286 HitObject_ShaderTableIndex Returns the shader table index set for this HitObject +287 HitObject_SetShaderTableIndex Returns a HitObject with updated shader table index +288 HitObject_LoadLocalRootTableConstant Returns the root table constant for this HitObject and offset +289 HitObject_Attributes Returns the attributes set for this HitObject +290 ReservedB28 reserved +291 ReservedB29 reserved +292 ReservedB30 reserved +293 ReservedC0 reserved +294 ReservedC1 reserved +295 ReservedC2 reserved +296 ReservedC3 reserved +297 ReservedC4 reserved +298 ReservedC5 reserved +299 ReservedC6 reserved +300 ReservedC7 reserved +301 ReservedC8 reserved +302 ReservedC9 reserved +303 RawBufferVectorLoad reads from a raw buffer and structured buffer +304 RawBufferVectorStore writes to a RWByteAddressBuffer or RWStructuredBuffer === ===================================================== ======================================================================================================================================================================================================================= @@ -3015,277 +3065,287 @@ The set of validation rules that are known to hold for a DXIL program is identif .. hctdb_instrhelp.get_valrules_rst() .. VALRULES-RST:BEGIN -========================================= ======================================================================================================================================================================================================================================================================================================== -Rule Code Description -========================================= ======================================================================================================================================================================================================================================================================================================== -BITCODE.VALID Module must be bitcode-valid -CONTAINER.PARTINVALID DXIL Container must not contain unknown parts -CONTAINER.PARTMATCHES DXIL Container Parts must match Module -CONTAINER.PARTMISSING DXIL Container requires certain parts, corresponding to module -CONTAINER.PARTREPEATED DXIL Container must have only one of each part type -CONTAINER.ROOTSIGNATUREINCOMPATIBLE Root Signature in DXIL Container must be compatible with shader -DECL.ATTRSTRUCT Attributes parameter must be struct type -DECL.DXILFNEXTERN External function must be a DXIL function -DECL.DXILNSRESERVED The DXIL reserved prefixes must only be used by built-in functions and types -DECL.EXTRAARGS Extra arguments not allowed for shader functions -DECL.FNATTRIBUTE Functions should only contain known function attributes -DECL.FNFLATTENPARAM Function parameters must not use struct types -DECL.FNISCALLED Functions can only be used by call instructions -DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record -DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type -DECL.NOTUSEDEXTERNAL External declaration should not be used -DECL.PARAMSTRUCT Callable function parameter must be struct type -DECL.PAYLOADSTRUCT Payload parameter must be struct type -DECL.RAYQUERYINFNSIG Rayquery objects not allowed in function signatures -DECL.RESOURCEINFNSIG Resources not allowed in function signatures -DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types -DECL.SHADERRETURNVOID Shader functions must return void -DECL.USEDEXTERNALFUNCTION External function must be used -DECL.USEDINTERNAL Internal declaration must be used -FLOW.DEADLOOP Loop must have break. -FLOW.FUNCTIONCALL Function with parameter is not permitted -FLOW.NORECURSION Recursion is not permitted. -FLOW.REDUCIBLE Execution flow must be reducible. -INSTR.ALLOWED Instructions must be of an allowed type. -INSTR.ATOMICCONST Constant destination to atomic. -INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. -INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. -INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. -INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' -INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). -INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. -INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. -INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant -INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory. -INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. -INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. -INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature -INSTR.CANNOTPULLPOSITION pull-model evaluation of position disallowed -INSTR.CBUFFERCLASSFORCBUFFERHANDLE Expect Cbuffer for CBufferLoad handle. -INSTR.CBUFFEROUTOFBOUND Cbuffer access out of bound. -INSTR.CHECKACCESSFULLYMAPPED CheckAccessFullyMapped should only be used on resource status. -INSTR.COORDINATECOUNTFORRAWTYPEDBUF raw/typed buffer don't need 2 coordinates. -INSTR.COORDINATECOUNTFORSTRUCTBUF structured buffer require 2 coordinates. -INSTR.CREATEHANDLEIMMRANGEID Local resource must map to global resource. -INSTR.DXILSTRUCTUSER Dxil struct types should only be used by ExtractValue. -INSTR.DXILSTRUCTUSEROUTOFBOUND Index out of bound when extract value from dxil struct types. -INSTR.EVALINTERPOLATIONMODE Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample. -INSTR.EXTRACTVALUE ExtractValue should only be used on dxil struct types and cmpxchg. -INSTR.FAILTORESLOVETGSMPOINTER TGSM pointers must originate from an unambiguous TGSM global variable. -INSTR.HANDLENOTFROMCREATEHANDLE Resource handle should returned by createHandle. -INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0]. %1 specified. -INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. -INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. -INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. -INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. -INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. -INSTR.MIPLEVELFORGETDIMENSION Use mip level on buffer when GetDimensions. -INSTR.MIPONUAVLOAD uav load don't support mipLevel/sampleIndex. -INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. -INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. -INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. -INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. -INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. -INSTR.NOIDIVBYZERO No signed integer division by zero. -INSTR.NOINDEFINITEACOS No indefinite arccosine. -INSTR.NOINDEFINITEASIN No indefinite arcsine. -INSTR.NOINDEFINITEDSXY No indefinite derivative calculation. -INSTR.NOINDEFINITELOG No indefinite logarithm. -INSTR.NONDOMINATINGDISPATCHMESH Non-Dominating DispatchMesh call. -INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS Non-Dominating SetMeshOutputCounts call. -INSTR.NOREADINGUNINITIALIZED Instructions should not read uninitialized value. -INSTR.NOTONCEDISPATCHMESH DispatchMesh must be called exactly once in an Amplification shader. -INSTR.NOUDIVBYZERO No unsigned integer division by zero. -INSTR.OFFSETONUAVLOAD uav load don't support offset. -INSTR.OLOAD DXIL intrinsic overload must be valid. -INSTR.ONLYONEALLOCCONSUME RWStructuredBuffers may increment or decrement their counters, but not both. -INSTR.OPCODERESERVED Instructions must not reference reserved opcodes. -INSTR.OPCONST DXIL intrinsic requires an immediate constant operand -INSTR.OPCONSTRANGE Constant values must be in-range for operation. -INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range -INSTR.PTRBITCAST Pointer type bitcast must be have same size. -INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. -INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. -INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. -INSTR.RESOURCECOORDINATEMISS coord uninitialized. -INSTR.RESOURCECOORDINATETOOMANY out of bound coord must be undef. -INSTR.RESOURCEKINDFORBUFFERLOADSTORE buffer load/store only works on Raw/Typed/StructuredBuffer. -INSTR.RESOURCEKINDFORCALCLOD lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray. -INSTR.RESOURCEKINDFORGATHER gather requires resource declared as texture/2D/Cube/2DArray/CubeArray. -INSTR.RESOURCEKINDFORGETDIM Invalid resource kind on GetDimensions. -INSTR.RESOURCEKINDFORSAMPLE sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray. -INSTR.RESOURCEKINDFORSAMPLEC samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray. -INSTR.RESOURCEKINDFORTEXTURELOAD texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray. -INSTR.RESOURCEKINDFORTEXTURESTORE texture store only works on Texture1D/1DArray/2D/2DArray/3D. -INSTR.RESOURCEKINDFORTRACERAY TraceRay should only use RTAccelerationStructure. -INSTR.RESOURCEMAPTOSINGLEENTRY Fail to map resource to resource table. -INSTR.RESOURCEOFFSETMISS offset uninitialized. -INSTR.RESOURCEOFFSETTOOMANY out of bound offset must be undef. -INSTR.RESOURCEUSER Resource should only be used by Load/GEP/Call. -INSTR.SAMPLECOMPTYPE sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT. -INSTR.SAMPLEINDEXFORLOAD2DMS load on Texture2DMS/2DMSArray require sampleIndex. -INSTR.SAMPLERMODEFORLOD lod instruction requires sampler declared in default mode. -INSTR.SAMPLERMODEFORSAMPLE sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode. -INSTR.SAMPLERMODEFORSAMPLEC sample_c_*/gather_c instructions require sampler declared in comparison mode. -INSTR.SIGNATUREOPERATIONNOTINENTRY Dxil operation for input output signature must be in entryPoints. -INSTR.STATUS Resource status should only be used by CheckAccessFullyMapped. -INSTR.STRUCTBITCAST Bitcast on struct types is not allowed. -INSTR.SVCONFLICTINGLAUNCHMODE Input system values are compatible with node shader launch mode. -INSTR.TEXTUREOFFSET offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7. -INSTR.TGSMRACECOND Race condition writing to shared memory detected, consider making this write conditional. -INSTR.UNDEFINEDVALUEFORUAVSTORE Assignment of undefined values to UAV. -INSTR.UNDEFRESULTFORGETDIMENSION GetDimensions used undef dimension %0 on %1. -INSTR.WRITEMASKFORTYPEDUAVSTORE store on typed uav must write to all four components of the UAV. -INSTR.WRITEMASKGAPFORUAV UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw. -INSTR.WRITEMASKMATCHVALUEFORUAVSTORE uav store write mask must match store value mask, write mask is %0 and store value mask is %1. -META.BARYCENTRICSFLOAT3 only 'float3' type is allowed for SV_Barycentrics. -META.BARYCENTRICSINTERPOLATION SV_Barycentrics cannot be used with 'nointerpolation' type. -META.BARYCENTRICSTWOPERSPECTIVES There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode. -META.BRANCHFLATTEN Can't use branch and flatten attributes together. -META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components -META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. -META.COMPUTEWITHNODE Compute entry must not have node metadata -META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. -META.DENSERESIDS Resource identifiers must be zero-based and dense. -META.DUPLICATESYSVALUE System value may only appear once in signature -META.ENTRYFUNCTION entrypoint not found. -META.FLAGSUSAGE Flags must match usage. -META.FORCECASEONSWITCH Attribute forcecase only works for switch. -META.GLCNOTONAPPENDCONSUME globallycoherent cannot be used with append/consume buffers: '%0'. -META.INTEGERINTERPMODE Interpolation mode on integer must be Constant -META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. -META.INTERPMODEVALID Interpolation mode must be valid -META.INVALIDCONTROLFLOWHINT Invalid control flow hint. -META.KNOWN Named metadata should be known -META.MAXTESSFACTOR Hull Shader MaxTessFactor must be [%0..%1]. %2 specified. -META.NOENTRYPROPSFORENTRY Entry point %0 must have entry properties. -META.NOSEMANTICOVERLAP Semantics must not overlap -META.REQUIRED Required metadata missing. -META.SEMAKINDMATCHESNAME Semantic name must match system value, when defined. -META.SEMAKINDVALID Semantic kind must be valid -META.SEMANTICCOMPTYPE %0 must be %1. -META.SEMANTICINDEXMAX System value semantics have a maximum valid semantic index -META.SEMANTICLEN Semantic length must be at least 1 and at most 64. -META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location -META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1 -META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type. -META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row. -META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value -META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together -META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size -META.SIGNATUREOVERLAP Signature elements may not overlap in packing location. -META.STRUCTBUFALIGNMENT StructuredBuffer stride not aligned -META.STRUCTBUFALIGNMENTOUTOFBOUND StructuredBuffer stride out of bounds -META.SYSTEMVALUEROWS System value may only have 1 row -META.TARGET Target triple must be 'dxil-ms-dx' -META.TESSELLATOROUTPUTPRIMITIVE Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW. -META.TESSELLATORPARTITION Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even. -META.TEXTURETYPE elements of typed buffers and textures must fit in four 32-bit quantities. -META.USED All metadata must be used by dxil. -META.VALIDSAMPLERMODE Invalid sampler mode on sampler . -META.VALUERANGE Metadata value must be within range. -META.VERSIONSUPPORTED Version in metadata must be supported. -META.WELLFORMED Metadata must be well-formed in operand count and types. -SM.64BITRAWBUFFERLOADSTORE i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3. -SM.AMPLIFICATIONSHADERPAYLOADSIZE For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. -SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. -SM.APPENDANDCONSUMEONSAMEUAV BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1. -SM.CBUFFERARRAYOFFSETALIGNMENT CBuffer array offset must be aligned to 16-bytes -SM.CBUFFERELEMENTOVERFLOW CBuffer elements must not overflow -SM.CBUFFEROFFSETOVERLAP CBuffer offsets must not overlap -SM.CBUFFERSIZE CBuffer size must not exceed 65536 bytes -SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT D3D12 constant/texture buffer template element can only be a struct. -SM.COMPLETEPOSITION Not all elements of SV_Position were written. -SM.CONSTANTINTERPMODE Interpolation mode must be constant for MS primitive output. -SM.COUNTERONLYONSTRUCTBUF BufferUpdateCounter valid only on structured buffers. -SM.CSNOSIGNATURES Compute shaders must not have shader signatures. -SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain. -SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified. -SM.DXILVERSION Target shader model requires specific Dxil Version -SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified. -SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified. -SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3. -SM.GSVALIDINPUTPRIMITIVE GS input primitive unrecognized. -SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY GS output primitive topology unrecognized. -SM.HSINPUTCONTROLPOINTCOUNTRANGE HS input control point count must be [0..%0]. %1 specified. -SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count -SM.INCOMPATIBLECALLINENTRY Features used in internal function calls must be compatible with entry -SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL Derivatives in compute-model shaders require shader model 6.6 and above -SM.INCOMPATIBLEDERIVLAUNCH Node shaders only support derivatives in broadcasting launch mode -SM.INCOMPATIBLEOPERATION Operations used in entry function must be compatible with shader stage and other properties -SM.INCOMPATIBLEREQUIRESGROUP Functions requiring groupshared memory must be called from shaders with a visible group -SM.INCOMPATIBLESHADERMODEL Functions may only use features available in the current shader model -SM.INCOMPATIBLESTAGE Functions may only use features available in the entry function's stage -SM.INCOMPATIBLETHREADGROUPDIM When derivatives are used in compute-model shaders, the thread group dimensions must be compatible -SM.INSIDETESSFACTORSIZEMATCHDOMAIN InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. -SM.INVALIDRESOURCECOMPTYPE Invalid resource return type. -SM.INVALIDRESOURCEKIND Invalid resources kind. -SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler feedback type. -SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs. -SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain. -SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. -SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. -SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1. -SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows. -SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1. -SM.MESHSHADERMAXPRIMITIVECOUNT MS max primitive output count must be [0..%0]. %1 specified. -SM.MESHSHADERMAXVERTEXCOUNT MS max vertex output count must be [0..%0]. %1 specified. -SM.MESHSHADEROUTPUTSIZE For shader '%0', vertex plus primitive output size is greater than %1. -SM.MESHSHADERPAYLOADSIZE For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. -SM.MESHSHADERPAYLOADSIZEDECLARED For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. -SM.MESHTOTALSIGROWCOUNT For shader '%0', vertex and primitive output signatures are taking up more than %1 rows. -SM.MESHVSIGROWCOUNT For shader '%0', vertex output signatures are taking up more than %1 rows. -SM.MULTISTREAMMUSTBEPOINT When multiple GS output streams are used they must be pointlists -SM.NAME Target shader model name must be known -SM.NOINTERPMODE Interpolation mode must be undefined for VS input/PS output/patch constant. -SM.NOPSOUTPUTIDX Pixel shader output registers are not indexable. -SM.OPCODE Opcode must be defined in target shader model -SM.OPCODEININVALIDFUNCTION Invalid DXIL opcode usage like StorePatchConstant in patch constant function -SM.OPERAND Operand must be defined in target shader model. -SM.OUTPUTCONTROLPOINTCOUNTRANGE output control point count must be [%0..%1]. %2 specified. -SM.OUTPUTCONTROLPOINTSTOTALSCALARS Total number of scalars across all HS output control points must not exceed . -SM.PATCHCONSTANTONLYFORHSDS patch constant signature only valid in HS and DS. -SM.PSCONSISTENTINTERP Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample). -SM.PSCOVERAGEANDINNERCOVERAGE InnerCoverage and Coverage are mutually exclusive. -SM.PSMULTIPLEDEPTHSEMANTIC Pixel Shader only allows one type of depth semantic to be declared. -SM.PSOUTPUTSEMANTIC Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found. -SM.PSTARGETCOL0 SV_Target packed location must start at column 0. -SM.PSTARGETINDEXMATCHESROW SV_Target semantic index must match packed row location. -SM.RAYSHADERPAYLOADSIZE For shader '%0', %1 size is smaller than argument's allocation size. -SM.RAYSHADERSIGNATURES Ray tracing shader '%0' should not have any shader signatures. -SM.RESOURCERANGEOVERLAP Resource ranges must not overlap -SM.ROVONLYINPS RasterizerOrdered objects are only allowed in 5.0+ pixel shaders. -SM.SAMPLECOUNTONLYON2DMS Only Texture2DMS/2DMSArray could has sample count. -SM.SEMANTIC Semantic must be defined in target shader model -SM.STREAMINDEXRANGE Stream index (%0) must between 0 and %1. -SM.TESSFACTORFORDOMAIN Required TessFactor for domain not found declared anywhere in Patch Constant data. -SM.TESSFACTORSIZEMATCHDOMAIN TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. -SM.TGSMUNSUPPORTED Thread Group Shared Memory not supported %0. -SM.THREADGROUPCHANNELRANGE Declared Thread Group %0 size %1 outside valid range [%2..%3]. -SM.TRIOUTPUTPRIMITIVEMISMATCH Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain. -SM.UNDEFINEDOUTPUT Not all elements of output %0 were written. -SM.VALIDDOMAIN Invalid Tessellator Domain specified. Must be isoline, tri or quad. -SM.VIEWIDNEEDSSLOT ViewID requires compatible space in pixel shader input signature -SM.WAVESIZEALLZEROWHENUNDEFINED WaveSize Max and Preferred must be 0 when Min is 0 -SM.WAVESIZEEXPECTSONEPARAM WaveSize tag expects exactly 1 parameter. -SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max -SM.WAVESIZEMAXGREATERTHANMIN WaveSize Max must greater than Min -SM.WAVESIZENEEDSCONSTANTOPERANDS WaveSize metadata operands must be constant values. -SM.WAVESIZENEEDSSM66OR67 WaveSize is valid only for Shader Model 6.6 and 6.7. -SM.WAVESIZEONCOMPUTEORNODE WaveSize only allowed on compute or node shaders -SM.WAVESIZEPREFERREDINRANGE WaveSize Preferred must be within Min..Max range -SM.WAVESIZERANGEEXPECTSTHREEPARAMS WaveSize Range tag expects exactly 3 parameters. -SM.WAVESIZERANGENEEDSSM68PLUS WaveSize Range is valid only for Shader Model 6.8 and higher. -SM.WAVESIZETAGDUPLICATE WaveSize or WaveSizeRange tag may only appear once per entry point. -SM.WAVESIZEVALUE WaveSize value must be a power of 2 in range [4..128] -SM.ZEROHSINPUTCONTROLPOINTWITHINPUT When HS input control point count is 0, no input signature should exist. -TYPES.DEFINED Type must be defined based on DXIL primitives -TYPES.I8 I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics. -TYPES.INTWIDTH Int type must be of valid width -TYPES.NOMULTIDIM Only one dimension allowed for array type. -TYPES.NOPTRTOPTR Pointers to pointers, or pointers in structures are not allowed. -TYPES.NOVECTOR Vector types must not be present -========================================= ======================================================================================================================================================================================================================================================================================================== +===================================================== ======================================================================================================================================================================================================================================================================================================== +Rule Code Description +===================================================== ======================================================================================================================================================================================================================================================================================================== +BITCODE.VALID Module must be bitcode-valid +CONTAINER.CONTENTINVALID DXIL Container Content is well-formed +CONTAINER.CONTENTMATCHES DXIL Container Content must match Module +CONTAINER.PARTINVALID DXIL Container must not contain unknown parts +CONTAINER.PARTMATCHES DXIL Container Parts must match Module +CONTAINER.PARTMISSING DXIL Container requires certain parts, corresponding to module +CONTAINER.PARTREPEATED DXIL Container must have only one of each part type +CONTAINER.ROOTSIGNATUREINCOMPATIBLE Root Signature in DXIL Container must be compatible with shader +CONTAINER.UNUSEDITEMINTABLE Items in Table must be used +DECL.ALLOCATERAYQUERY2FLAGSARECONST constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant +DECL.ALLOCATERAYQUERYFLAGSARECONST RayFlags for AllocateRayQuery must be constant +DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument +DECL.ATTRSTRUCT Attributes parameter must be struct type +DECL.DXILFNEXTERN External function must be a DXIL function +DECL.DXILNSRESERVED The DXIL reserved prefixes must only be used by built-in functions and types +DECL.EXTRAARGS Extra arguments not allowed for shader functions +DECL.FNATTRIBUTE Functions should only contain known function attributes +DECL.FNFLATTENPARAM Function parameters must not use struct types +DECL.FNISCALLED Functions can only be used by call instructions +DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record +DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type +DECL.NOTUSEDEXTERNAL External declaration should not be used +DECL.PARAMSTRUCT Callable function parameter must be struct type +DECL.PAYLOADSTRUCT Payload parameter must be struct type +DECL.RAYQUERYINFNSIG Rayquery objects not allowed in function signatures +DECL.RESOURCEINFNSIG Resources not allowed in function signatures +DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types +DECL.SHADERRETURNVOID Shader functions must return void +DECL.USEDEXTERNALFUNCTION External function must be used +DECL.USEDINTERNAL Internal declaration must be used +FLOW.DEADLOOP Loop must have break. +FLOW.FUNCTIONCALL Function with parameter is not permitted +FLOW.NORECURSION Recursion is not permitted. +FLOW.REDUCIBLE Execution flow must be reducible. +INSTR.ALLOWED Instructions must be of an allowed type. +INSTR.ATOMICCONST Constant destination to atomic. +INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. +INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. +INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. +INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' +INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). +INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. +INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. +INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant +INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory. +INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. +INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. +INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature +INSTR.CANNOTPULLPOSITION pull-model evaluation of position disallowed +INSTR.CBUFFERCLASSFORCBUFFERHANDLE Expect Cbuffer for CBufferLoad handle. +INSTR.CBUFFEROUTOFBOUND Cbuffer access out of bound. +INSTR.CHECKACCESSFULLYMAPPED CheckAccessFullyMapped should only be used on resource status. +INSTR.CONSTALIGNFORRAWBUF Raw Buffer alignment value must be a constant. +INSTR.COORDINATECOUNTFORRAWTYPEDBUF raw/typed buffer offset must be undef. +INSTR.COORDINATECOUNTFORSTRUCTBUF structured buffer requires defined index and offset coordinates. +INSTR.CREATEHANDLEIMMRANGEID Local resource must map to global resource. +INSTR.DXILSTRUCTUSER Dxil struct types should only be used by ExtractValue. +INSTR.DXILSTRUCTUSEROUTOFBOUND Index out of bound when extract value from dxil struct types. +INSTR.EVALINTERPOLATIONMODE Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample. +INSTR.EXTRACTVALUE ExtractValue should only be used on dxil struct types and cmpxchg. +INSTR.FAILTORESLOVETGSMPOINTER TGSM pointers must originate from an unambiguous TGSM global variable. +INSTR.HANDLENOTFROMCREATEHANDLE Resource handle should returned by createHandle. +INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0]. %1 specified. +INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. +INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. +INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. +INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. +INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. +INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. +INSTR.MIPLEVELFORGETDIMENSION Use mip level on buffer when GetDimensions. +INSTR.MIPONUAVLOAD uav load don't support mipLevel/sampleIndex. +INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. +INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. +INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. +INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. +INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. +INSTR.NOIDIVBYZERO No signed integer division by zero. +INSTR.NOINDEFINITEACOS No indefinite arccosine. +INSTR.NOINDEFINITEASIN No indefinite arcsine. +INSTR.NOINDEFINITEDSXY No indefinite derivative calculation. +INSTR.NOINDEFINITELOG No indefinite logarithm. +INSTR.NONDOMINATINGDISPATCHMESH Non-Dominating DispatchMesh call. +INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS Non-Dominating SetMeshOutputCounts call. +INSTR.NOREADINGUNINITIALIZED Instructions should not read uninitialized value. +INSTR.NOTONCEDISPATCHMESH DispatchMesh must be called exactly once in an Amplification shader. +INSTR.NOUDIVBYZERO No unsigned integer division by zero. +INSTR.OFFSETONUAVLOAD uav load don't support offset. +INSTR.OLOAD DXIL intrinsic overload must be valid. +INSTR.ONLYONEALLOCCONSUME RWStructuredBuffers may increment or decrement their counters, but not both. +INSTR.OPCODERESERVED Instructions must not reference reserved opcodes. +INSTR.OPCONST DXIL intrinsic requires an immediate constant operand +INSTR.OPCONSTRANGE Constant values must be in-range for operation. +INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range +INSTR.PTRBITCAST Pointer type bitcast must be have same size. +INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. +INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. +INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. +INSTR.RESOURCECOORDINATEMISS coord uninitialized. +INSTR.RESOURCECOORDINATETOOMANY out of bound coord must be undef. +INSTR.RESOURCEKINDFORBUFFERLOADSTORE buffer load/store only works on Raw/Typed/StructuredBuffer. +INSTR.RESOURCEKINDFORCALCLOD lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray. +INSTR.RESOURCEKINDFORGATHER gather requires resource declared as texture/2D/Cube/2DArray/CubeArray. +INSTR.RESOURCEKINDFORGETDIM Invalid resource kind on GetDimensions. +INSTR.RESOURCEKINDFORSAMPLE sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray. +INSTR.RESOURCEKINDFORSAMPLEC samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray. +INSTR.RESOURCEKINDFORTEXTURELOAD texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray. +INSTR.RESOURCEKINDFORTEXTURESTORE texture store only works on Texture1D/1DArray/2D/2DArray/3D. +INSTR.RESOURCEKINDFORTRACERAY TraceRay should only use RTAccelerationStructure. +INSTR.RESOURCEMAPTOSINGLEENTRY Fail to map resource to resource table. +INSTR.RESOURCEOFFSETMISS offset uninitialized. +INSTR.RESOURCEOFFSETTOOMANY out of bound offset must be undef. +INSTR.RESOURCEUSER Resource should only be used by Load/GEP/Call. +INSTR.SAMPLECOMPTYPE sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT. +INSTR.SAMPLEINDEXFORLOAD2DMS load on Texture2DMS/2DMSArray require sampleIndex. +INSTR.SAMPLERMODEFORLOD lod instruction requires sampler declared in default mode. +INSTR.SAMPLERMODEFORSAMPLE sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode. +INSTR.SAMPLERMODEFORSAMPLEC sample_c_*/gather_c instructions require sampler declared in comparison mode. +INSTR.SIGNATUREOPERATIONNOTINENTRY Dxil operation for input output signature must be in entryPoints. +INSTR.STATUS Resource status should only be used by CheckAccessFullyMapped. +INSTR.STRUCTBITCAST Bitcast on struct types is not allowed. +INSTR.SVCONFLICTINGLAUNCHMODE Input system values are compatible with node shader launch mode. +INSTR.TEXTUREOFFSET offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7. +INSTR.TGSMRACECOND Race condition writing to shared memory detected, consider making this write conditional. +INSTR.UNDEFHITOBJECT HitObject is undef. +INSTR.UNDEFINEDVALUEFORUAVSTORE Assignment of undefined values to UAV. +INSTR.UNDEFRESULTFORGETDIMENSION GetDimensions used undef dimension %0 on %1. +INSTR.WRITEMASKFORTYPEDUAVSTORE store on typed uav must write to all four components of the UAV. +INSTR.WRITEMASKGAPFORUAV UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw. +INSTR.WRITEMASKMATCHVALUEFORUAVSTORE uav store write mask must match store value mask, write mask is %0 and store value mask is %1. +META.BARYCENTRICSFLOAT3 only 'float3' type is allowed for SV_Barycentrics. +META.BARYCENTRICSINTERPOLATION SV_Barycentrics cannot be used with 'nointerpolation' type. +META.BARYCENTRICSTWOPERSPECTIVES There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode. +META.BRANCHFLATTEN Can't use branch and flatten attributes together. +META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components +META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. +META.COMPUTEWITHNODE Compute entry must not have node metadata +META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. +META.DENSERESIDS Resource identifiers must be zero-based and dense. +META.DUPLICATESYSVALUE System value may only appear once in signature +META.ENTRYFUNCTION entrypoint not found. +META.FLAGSUSAGE Flags must match usage. +META.FORCECASEONSWITCH Attribute forcecase only works for switch. +META.GLCNOTONAPPENDCONSUME globallycoherent cannot be used with append/consume buffers: '%0'. +META.INTEGERINTERPMODE Interpolation mode on integer must be Constant +META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. +META.INTERPMODEVALID Interpolation mode must be valid +META.INVALIDCONTROLFLOWHINT Invalid control flow hint. +META.KNOWN Named metadata should be known +META.MAXTESSFACTOR Hull Shader MaxTessFactor must be [%0..%1]. %2 specified. +META.NOENTRYPROPSFORENTRY Entry point %0 must have entry properties. +META.NOSEMANTICOVERLAP Semantics must not overlap +META.REQUIRED Required metadata missing. +META.SEMAKINDMATCHESNAME Semantic name must match system value, when defined. +META.SEMAKINDVALID Semantic kind must be valid +META.SEMANTICCOMPTYPE %0 must be %1. +META.SEMANTICINDEXMAX System value semantics have a maximum valid semantic index +META.SEMANTICLEN Semantic length must be at least 1 and at most 64. +META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location +META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1 +META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type. +META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row. +META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value +META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together +META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size +META.SIGNATUREOVERLAP Signature elements may not overlap in packing location. +META.STRUCTBUFALIGNMENT StructuredBuffer stride not aligned +META.STRUCTBUFALIGNMENTOUTOFBOUND StructuredBuffer stride out of bounds +META.SYSTEMVALUEROWS System value may only have 1 row +META.TARGET Target triple must be 'dxil-ms-dx' +META.TESSELLATOROUTPUTPRIMITIVE Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW. +META.TESSELLATORPARTITION Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even. +META.TEXTURETYPE elements of typed buffers and textures must fit in four 32-bit quantities. +META.USED All metadata must be used by dxil. +META.VALIDSAMPLERMODE Invalid sampler mode on sampler . +META.VALUERANGE Metadata value must be within range. +META.VERSIONSUPPORTED Version in metadata must be supported. +META.WELLFORMED Metadata must be well-formed in operand count and types. +SM.64BITRAWBUFFERLOADSTORE i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3. +SM.AMPLIFICATIONSHADERPAYLOADSIZE For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. +SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. +SM.APPENDANDCONSUMEONSAMEUAV BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1. +SM.CBUFFERARRAYOFFSETALIGNMENT CBuffer array offset must be aligned to 16-bytes +SM.CBUFFERELEMENTOVERFLOW CBuffer elements must not overflow +SM.CBUFFEROFFSETOVERLAP CBuffer offsets must not overlap +SM.CBUFFERSIZE CBuffer size must not exceed 65536 bytes +SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT D3D12 constant/texture buffer template element can only be a struct. +SM.COMPLETEPOSITION Not all elements of SV_Position were written. +SM.CONSTANTINTERPMODE Interpolation mode must be constant for MS primitive output. +SM.COUNTERONLYONSTRUCTBUF BufferUpdateCounter valid only on structured buffers. +SM.CSNOSIGNATURES Compute shaders must not have shader signatures. +SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain. +SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified. +SM.DXILVERSION Target shader model requires specific Dxil Version +SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified. +SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified. +SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3. +SM.GSVALIDINPUTPRIMITIVE GS input primitive unrecognized. +SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY GS output primitive topology unrecognized. +SM.HSINPUTCONTROLPOINTCOUNTRANGE HS input control point count must be [0..%0]. %1 specified. +SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count +SM.INCOMPATIBLECALLINENTRY Features used in internal function calls must be compatible with entry +SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL Derivatives in compute-model shaders require shader model 6.6 and above +SM.INCOMPATIBLEDERIVLAUNCH Node shaders only support derivatives in broadcasting launch mode +SM.INCOMPATIBLEOPERATION Operations used in entry function must be compatible with shader stage and other properties +SM.INCOMPATIBLEREQUIRESGROUP Functions requiring groupshared memory must be called from shaders with a visible group +SM.INCOMPATIBLESHADERMODEL Functions may only use features available in the current shader model +SM.INCOMPATIBLESTAGE Functions may only use features available in the entry function's stage +SM.INCOMPATIBLETHREADGROUPDIM When derivatives are used in compute-model shaders, the thread group dimensions must be compatible +SM.INSIDETESSFACTORSIZEMATCHDOMAIN InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. +SM.INVALIDRESOURCECOMPTYPE Invalid resource return type. +SM.INVALIDRESOURCEKIND Invalid resources kind. +SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler feedback type. +SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs. +SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain. +SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. +SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. +SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1. +SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows. +SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1. +SM.MESHSHADERMAXPRIMITIVECOUNT MS max primitive output count must be [0..%0]. %1 specified. +SM.MESHSHADERMAXVERTEXCOUNT MS max vertex output count must be [0..%0]. %1 specified. +SM.MESHSHADEROUTPUTSIZE For shader '%0', vertex plus primitive output size is greater than %1. +SM.MESHSHADERPAYLOADSIZE For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. +SM.MESHSHADERPAYLOADSIZEDECLARED For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. +SM.MESHTOTALSIGROWCOUNT For shader '%0', vertex and primitive output signatures are taking up more than %1 rows. +SM.MESHVSIGROWCOUNT For shader '%0', vertex output signatures are taking up more than %1 rows. +SM.MULTISTREAMMUSTBEPOINT When multiple GS output streams are used they must be pointlists +SM.NAME Target shader model name must be known +SM.NOINTERPMODE Interpolation mode must be undefined for VS input/PS output/patch constant. +SM.NOPSOUTPUTIDX Pixel shader output registers are not indexable. +SM.OPCODE Opcode must be defined in target shader model +SM.OPCODEININVALIDFUNCTION Invalid DXIL opcode usage like StorePatchConstant in patch constant function +SM.OPERAND Operand must be defined in target shader model. +SM.OUTPUTCONTROLPOINTCOUNTRANGE output control point count must be [%0..%1]. %2 specified. +SM.OUTPUTCONTROLPOINTSTOTALSCALARS Total number of scalars across all HS output control points must not exceed . +SM.PATCHCONSTANTONLYFORHSDS patch constant signature only valid in HS and DS. +SM.PROGRAMVERSION Program Version in Dxil Container does not match Dxil Module shader model version +SM.PSCONSISTENTINTERP Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample). +SM.PSCOVERAGEANDINNERCOVERAGE InnerCoverage and Coverage are mutually exclusive. +SM.PSMULTIPLEDEPTHSEMANTIC Pixel Shader only allows one type of depth semantic to be declared. +SM.PSOUTPUTSEMANTIC Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found. +SM.PSTARGETCOL0 SV_Target packed location must start at column 0. +SM.PSTARGETINDEXMATCHESROW SV_Target semantic index must match packed row location. +SM.RAYSHADERPAYLOADSIZE For shader '%0', %1 size is smaller than argument's allocation size. +SM.RAYSHADERSIGNATURES Ray tracing shader '%0' should not have any shader signatures. +SM.RESOURCERANGEOVERLAP Resource ranges must not overlap +SM.ROVONLYINPS RasterizerOrdered objects are only allowed in 5.0+ pixel shaders. +SM.SAMPLECOUNTONLYON2DMS Only Texture2DMS/2DMSArray could has sample count. +SM.SEMANTIC Semantic must be defined in target shader model +SM.STREAMINDEXRANGE Stream index (%0) must between 0 and %1. +SM.TESSFACTORFORDOMAIN Required TessFactor for domain not found declared anywhere in Patch Constant data. +SM.TESSFACTORSIZEMATCHDOMAIN TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. +SM.TGSMUNSUPPORTED Thread Group Shared Memory not supported %0. +SM.THREADGROUPCHANNELRANGE Declared Thread Group %0 size %1 outside valid range [%2..%3]. +SM.TRIOUTPUTPRIMITIVEMISMATCH Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain. +SM.UNDEFINEDOUTPUT Not all elements of output %0 were written. +SM.VALIDDOMAIN Invalid Tessellator Domain specified. Must be isoline, tri or quad. +SM.VIEWIDNEEDSSLOT ViewID requires compatible space in pixel shader input signature +SM.WAVESIZEALLZEROWHENUNDEFINED WaveSize Max and Preferred must be 0 when Min is 0 +SM.WAVESIZEEXPECTSONEPARAM WaveSize tag expects exactly 1 parameter. +SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max +SM.WAVESIZEMAXGREATERTHANMIN WaveSize Max must greater than Min +SM.WAVESIZENEEDSCONSTANTOPERANDS WaveSize metadata operands must be constant values. +SM.WAVESIZENEEDSSM66OR67 WaveSize is valid only for Shader Model 6.6 and 6.7. +SM.WAVESIZEONCOMPUTEORNODE WaveSize only allowed on compute or node shaders +SM.WAVESIZEPREFERREDINRANGE WaveSize Preferred must be within Min..Max range +SM.WAVESIZERANGEEXPECTSTHREEPARAMS WaveSize Range tag expects exactly 3 parameters. +SM.WAVESIZERANGENEEDSSM68PLUS WaveSize Range is valid only for Shader Model 6.8 and higher. +SM.WAVESIZETAGDUPLICATE WaveSize or WaveSizeRange tag may only appear once per entry point. +SM.WAVESIZEVALUE WaveSize value must be a power of 2 in range [4..128] +SM.ZEROHSINPUTCONTROLPOINTWITHINPUT When HS input control point count is 0, no input signature should exist. +TYPES.DEFINED Type must be defined based on DXIL primitives +TYPES.I8 I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics. +TYPES.INTWIDTH Int type must be of valid width +TYPES.NOMULTIDIM Only one dimension allowed for array type. +TYPES.NOPTRTOPTR Pointers to pointers, or pointers in structures are not allowed. +TYPES.NOVECTOR Vector types must not be present +===================================================== ======================================================================================================================================================================================================================================================================================================== .. VALRULES-RST:END @@ -3294,9 +3354,9 @@ Modules and Linking =================== HLSL has linking capabilities to enable third-party libraries. The linking step happens before shader DXIL is given to the driver compilers. -Experimental library generation is added in DXIL1.1. A library could be created by compile with lib_6_1 profile. -A library is a dxil container like the compile result of other shader profiles. The difference is library will keep information for linking like resource link info and entry function signatures. -Library support is not part of DXIL spec. Only requirement is linked shader must be valid DXIL. +Experimental library generation is added in DXIL1.1. A library could be created by compiling with the lib_6_1 profile. +A library is a dxil container like the compile result of other shader profiles. The difference is a library will keep information for linking like resource link info and entry function signatures. +Library support is not part of the DXIL spec. The only requirement is that the linked shader must be valid DXIL. Additional Notes diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index c30286e4e6..b5e9c05079 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -282,7 +282,7 @@ Right now the following ```` are supported: Need ``SPV_KHR_device_group`` extension. * ``ViewportMaskNV``: The GLSL equivalent is ``gl_ViewportMask``. -Please see Vulkan spec. `14.6. Built-In Variables `_ +Please see Vulkan spec. `15.9. Built-In Variables `_ for detailed explanation of these builtins. Supported extensions @@ -312,13 +312,15 @@ Supported extensions * SPV_NV_mesh_shader * SPV_KHR_ray_query * SPV_EXT_shader_image_int64 -* SPV_KHR_fragment_shading_barycentric +* SPV_KHR_fragment_shader_barycentric * SPV_KHR_physical_storage_buffer * SPV_KHR_vulkan_memory_model +* SPV_KHR_compute_shader_derivatives * SPV_NV_compute_shader_derivatives * SPV_KHR_maximal_reconvergence * SPV_KHR_float_controls * SPV_NV_shader_subgroup_partitioned +* SPV_KHR_quad_control Vulkan specific attributes -------------------------- @@ -446,7 +448,7 @@ environment (hence SPIR-V version) and SPIR-V extension control: ``-fspv-target-env=`` accepts a Vulkan target environment (see ``-help`` for supported values). If such an option is not given, the CodeGen defaults to ``vulkan1.0``. When targeting ``vulkan1.0``, trying to use features that are only -available in Vulkan 1.1 (SPIR-V 1.3), like `Shader Model 6.0 wave intrinsics`_, +available in Vulkan 1.1 (SPIR-V 1.3), like `Shader Model 6.0 wave intrinsic `_, will trigger a compiler error. If ``-fspv-extension=`` is not specified, the CodeGen will select suitable @@ -494,7 +496,7 @@ Specifically, we need to legalize the following HLSL source code patterns: Legalization transformations will not run unless the above patterns are encountered in the source code. -For more details, please see the `SPIR-V cookbook `_, +For more details, please see the `SPIR-V cookbook `_, which contains examples of what HLSL code patterns will be accepted and generate valid SPIR-V for Vulkan. @@ -561,7 +563,7 @@ So if you want to run loop unrolling additionally after the default optimization recipe, you can specify ``-Oconfig=-O,--loop-unroll``. For the whole list of accepted passes and details about each one, please see -``spirv-opt``'s help manual (``spirv-opt --help``), or the SPIRV-Tools `optimizer header file `_. +``spirv-opt``'s help manual (``spirv-opt --help``), or the SPIRV-Tools `optimizer header file `_. Validation ~~~~~~~~~~ @@ -640,7 +642,7 @@ HLSL Semantic HLSL semantic strings are by default not emitted into the SPIR-V binary module. If you need them, by specifying ``-fspv-reflect``, the compiler will use -the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 `_ +the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 `_ extension to emit them. HLSL User Types @@ -661,7 +663,7 @@ Counter buffers for RW/Append/Consume StructuredBuffer The association between a counter buffer and its main RW/Append/Consume StructuredBuffer is conveyed by ``OpDecorateId HLSLCounterBufferGOOGLE `` instruction from the -`SPV_GOOGLE_hlsl_funtionality1 `_ +`SPV_GOOGLE_hlsl_funtionality1 `_ extension. This information is by default missing; you need to specify ``-fspv-reflect`` to direct the compiler to emit them. @@ -911,7 +913,7 @@ For example, RWTexture2D Tex2; // Works like before -``rgba8`` means ``Rgba8`` `SPIR-V Image Format `_. +``rgba8`` means ``Rgba8`` `SPIR-V Image Format `_. The following table lists the mapping between ``FORMAT`` of ``[[vk::image_format("FORMAT")]]`` and its corresponding SPIR-V Image Format. @@ -994,7 +996,7 @@ Please see the following sections for the details of each type. As a summary: =========================== ================== ================================ ==================== ================= To know more about the Vulkan buffer types, please refer to the Vulkan spec -`13.1 Descriptor Types `_. +`14.1 Descriptor Types `_. Memory layout rules ~~~~~~~~~~~~~~~~~~~ @@ -1004,7 +1006,7 @@ right now: 1. Vector-relaxed OpenGL ``std140`` for uniform buffers and vector-relaxed OpenGL ``std430`` for storage buffers: these rules satisfy Vulkan `"Standard - Uniform Buffer Layout" and "Standard Storage Buffer Layout" `_, + Uniform Buffer Layout" and "Standard Storage Buffer Layout" `_, respectively. They are the default. 2. DirectX memory layout rules for uniform buffers and storage buffers: @@ -1027,7 +1029,7 @@ In the above, "vector-relaxed OpenGL ``std140``/``std430``" rules mean OpenGL alignment: 1. The alignment of a vector type is set to be the alignment of its element type -2. If the above causes an `improper straddle `_, +2. If the above causes an `improper straddle `_, the alignment will be set to 16 bytes. As an exmaple, for the following HLSL definition: @@ -1471,7 +1473,7 @@ Without hints from the developer, the compiler will try its best to map semantics to ``Location`` numbers. However, there is no single rule for this mapping; semantic strings should be handled case by case. -Firstly, under certain `SigPoints `_, +Firstly, under certain `SigPoints `_, some system-value (SV) semantic strings will be translated into SPIR-V ``BuiltIn`` decorations: @@ -1655,7 +1657,7 @@ some system-value (SV) semantic strings will be translated into SPIR-V | +-------------+----------------------------------------+-----------------------+-----------------------------+ | | MSOut | ``PrimitiveShadingRateKHR`` | N/A | ``FragmentShadingRate`` | +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+ -| SV_CullPrimitive | MSOut | ``CullPrimitiveEXT`` | N/A | ``MeshShadingEXT `` | +| SV_CullPrimitive | MSOut | ``CullPrimitiveEXT`` | N/A | ``MeshShadingEXT`` | +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+ @@ -3596,8 +3598,8 @@ Mesh and Amplification Shaders | Amplification shaders corresponds to Task Shaders in Vulkan. | | Refer to following HLSL and SPIR-V specs for details: -| https://docs.microsoft.com/ -| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_mesh_shader.asciidoc +| https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html +| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_mesh_shader.asciidoc | | This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan. @@ -3704,8 +3706,8 @@ Raytracing in Vulkan and SPIRV | SPIR-V codegen is currently supported for NVIDIA platforms via SPV_NV_ray_tracing extension or | on other platforms via provisional cross vendor SPV_KHR_ray_tracing extension. | SPIR-V specification for reference: -| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_ray_tracing.asciidoc -| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_ray_tracing.asciidoc +| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_ray_tracing.asciidoc +| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_ray_tracing.asciidoc | Vulkan ray tracing samples: | https://developer.nvidia.com/rtx/raytracing/vkray @@ -3868,7 +3870,7 @@ Ray Query in SPIRV ~~~~~~~~~~~~~~~~~~ RayQuery SPIR-V codegen is currently supported via SPV_KHR_ray_query extension SPIR-V specification for reference: -https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_ray_query.asciidoc +https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_ray_query.asciidoc Object Type ~~~~~~~~~~~ @@ -4007,6 +4009,8 @@ Quad ``QuadReadAcrossX()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadAcrossY()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadAcrossDiagonal()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadLaneAt()`` ``OpGroupNonUniformQuadBroadcast`` +Quad ``QuadAny()`` ``OpGroupNonUniformQuadAnyKHR`` +Quad ``QuadAll()`` ``OpGroupNonUniformQuadAllKHR`` N/A ``WaveMatch()`` ``OpGroupNonUniformPartitionNV`` Multiprefix ``WaveMultiPrefixSum()`` ``OpGroupNonUniform*Add`` ``PartitionedExclusiveScanNV`` Multiprefix ``WaveMultiPrefixProduct()`` ``OpGroupNonUniform*Mul`` ``PartitionedExclusiveScanNV`` @@ -4015,6 +4019,11 @@ Multiprefix ``WaveMultiPrefixBitOr()`` ``OpGroupNonUniformLogicalOr`` ` Multiprefix ``WaveMultiPrefixBitXor()`` ``OpGroupNonUniformLogicalXor`` ``PartitionedExclusiveScanNV`` ============= ============================ =================================== ============================== +``QuadAny`` and ``QuadAll`` will use the ``OpGroupNonUniformQuadAnyKHR`` and +``OpGroupNonUniformQuadAllKHR`` instructions if the ``SPV_KHR_quad_control`` +extension is enabled. If it is not, they will fall back to constructing the +value using multiple calls to ``OpGroupNonUniformQuadBroadcast``. + The Implicit ``vk`` Namespace ============================= @@ -4081,7 +4090,7 @@ This intrinsic funcion has the following signature: uint64_t ReadClock(in uint scope); -It translates to performing ``OpReadClockKHR`` defined in `VK_KHR_shader_clock `_. +It translates to performing ``OpReadClockKHR`` defined in `VK_KHR_shader_clock `_. One can use the predefined scopes in the ``vk`` namepsace to specify the scope argument. For example: @@ -4091,11 +4100,11 @@ For example: RawBufferLoad and RawBufferStore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The Vulkan extension `VK_KHR_buffer_device_address `_ +The Vulkan extension `VK_KHR_buffer_device_address `_ supports getting the 64-bit address of a buffer and passing it to SPIR-V as a Uniform buffer. SPIR-V can use the address to load and store data without a descriptor. We add the following intrinsic functions to expose a subset of the -`VK_KHR_buffer_device_address `_ +`VK_KHR_buffer_device_address `_ and `SPV_KHR_physical_storage_buffer `_ functionality to HLSL: diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index 54a521dd13..0e71067798 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit 54a521dd130ae1b2f38fef79b09515702d135bdd +Subproject commit 0e710677989b4326ac974fd80c5308191ed80965 diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index f289d047f4..4bd1536ed7 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit f289d047f49fb60488301ec62bafab85573668cc +Subproject commit 4bd1536ed79003a5194a4bd8c9aa2fa17a84c15b diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index f8d5b740f7..8c73328fbd 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -147,12 +147,19 @@ const unsigned kMaxMSTotalSigRows = 32; const unsigned kMaxMSSMSize = 1024 * 28; const unsigned kMinWaveSize = 4; const unsigned kMaxWaveSize = 128; +const unsigned kDefaultMaxVectorLength = 4; +const unsigned kSM69MaxVectorLength = 1024; const float kMaxMipLodBias = 15.99f; const float kMinMipLodBias = -16.0f; const unsigned kResRetStatusIndex = 4; +/* hctdb_instrhelp.get_max_oload_dims()*/ +// OLOAD_DIMS-TEXT:BEGIN +const unsigned kDxilMaxOloadDims = 2; +// OLOAD_DIMS-TEXT:END + enum class ComponentType : uint32_t { Invalid = 0, I1, @@ -463,6 +470,11 @@ inline bool IsTBuffer(DXIL::ResourceKind ResourceKind) { return ResourceKind == DXIL::ResourceKind::TBuffer; } +inline bool IsCTBuffer(DXIL::ResourceKind ResourceKind) { + return ResourceKind == DXIL::ResourceKind::CBuffer || + ResourceKind == DXIL::ResourceKind::TBuffer; +} + /// Whether the resource kind is a FeedbackTexture. inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) { return ResourceKind == DXIL::ResourceKind::FeedbackTexture2D || @@ -490,37 +502,9 @@ enum class OpCode : unsigned { ReservedA0 = 259, // reserved ReservedA1 = 260, // reserved ReservedA2 = 261, // reserved - ReservedB0 = 262, // reserved - ReservedB1 = 263, // reserved - ReservedB10 = 272, // reserved - ReservedB11 = 273, // reserved - ReservedB12 = 274, // reserved - ReservedB13 = 275, // reserved - ReservedB14 = 276, // reserved - ReservedB15 = 277, // reserved - ReservedB16 = 278, // reserved - ReservedB17 = 279, // reserved - ReservedB18 = 280, // reserved - ReservedB19 = 281, // reserved - ReservedB2 = 264, // reserved - ReservedB20 = 282, // reserved - ReservedB21 = 283, // reserved - ReservedB22 = 284, // reserved - ReservedB23 = 285, // reserved - ReservedB24 = 286, // reserved - ReservedB25 = 287, // reserved - ReservedB26 = 288, // reserved - ReservedB27 = 289, // reserved ReservedB28 = 290, // reserved ReservedB29 = 291, // reserved - ReservedB3 = 265, // reserved ReservedB30 = 292, // reserved - ReservedB4 = 266, // reserved - ReservedB5 = 267, // reserved - ReservedB6 = 268, // reserved - ReservedB7 = 269, // reserved - ReservedB8 = 270, // reserved - ReservedB9 = 271, // reserved ReservedC0 = 293, // reserved ReservedC1 = 294, // reserved ReservedC2 = 295, // reserved @@ -888,8 +872,11 @@ enum class OpCode : unsigned { GetDimensions = 72, // gets texture size information RawBufferLoad = 139, // reads from a raw buffer and structured buffer RawBufferStore = 140, // writes to a RWByteAddressBuffer or RWStructuredBuffer - TextureLoad = 66, // reads texel data without any filtering or sampling - TextureStore = 67, // reads texel data without any filtering or sampling + RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer + RawBufferVectorStore = + 304, // writes to a RWByteAddressBuffer or RWStructuredBuffer + TextureLoad = 66, // reads texel data without any filtering or sampling + TextureStore = 67, // reads texel data without any filtering or sampling TextureStoreSample = 225, // stores texel data at specified sample index // Sampler Feedback @@ -902,6 +889,49 @@ enum class OpCode : unsigned { WriteSamplerFeedbackLevel = 176, // updates a feedback texture for a sampling // operation with a mipmap-level offset + // Shader Execution Reordering + HitObject_Attributes = 289, // Returns the attributes set for this HitObject + HitObject_FromRayQuery = 263, // Creates a new HitObject representing a + // committed hit from a RayQuery + HitObject_FromRayQueryWithAttrs = + 264, // Creates a new HitObject representing a committed hit from a + // RayQuery and committed attributes + HitObject_GeometryIndex = 281, // Returns the geometry index committed on hit + HitObject_HitKind = 285, // Returns the HitKind of the hit + HitObject_InstanceID = 283, // Returns the instance id committed on hit + HitObject_InstanceIndex = 282, // Returns the instance index committed on hit + HitObject_Invoke = 267, // Represents the invocation of the CH/MS shader + // represented by the HitObject + HitObject_IsHit = 270, // Returns `true` if the HitObject is a NOP-HitObject + HitObject_IsMiss = 269, // Returns `true` if the HitObject represents a miss + HitObject_IsNop = 271, // Returns `true` if the HitObject represents a nop + HitObject_LoadLocalRootTableConstant = + 288, // Returns the root table constant for this HitObject and offset + HitObject_MakeMiss = 265, // Creates a new HitObject representing a miss + HitObject_MakeNop = 266, // Creates an empty nop HitObject + HitObject_ObjectRayDirection = + 278, // Returns the ray direction in object space + HitObject_ObjectRayOrigin = 277, // Returns the ray origin in object space + HitObject_ObjectToWorld3x4 = 279, // Returns the object to world space + // transformation matrix in 3x4 form + HitObject_PrimitiveIndex = + 284, // Returns the primitive index committed on hit + HitObject_RayFlags = 272, // Returns the ray flags set in the HitObject + HitObject_RayTCurrent = + 274, // Returns the current T value set in the HitObject + HitObject_RayTMin = 273, // Returns the TMin value set in the HitObject + HitObject_SetShaderTableIndex = + 287, // Returns a HitObject with updated shader table index + HitObject_ShaderTableIndex = + 286, // Returns the shader table index set for this HitObject + HitObject_TraceRay = 262, // Analogous to TraceRay but without invoking CH/MS + // and returns the intermediate state as a HitObject + HitObject_WorldRayDirection = 276, // Returns the ray direction in world space + HitObject_WorldRayOrigin = 275, // Returns the ray origin in world space + HitObject_WorldToObject3x4 = 280, // Returns the world to object space + // transformation matrix in 3x4 form + MaybeReorderThread = 268, // Reorders the current thread + // Synchronization AtomicBinOp = 78, // performs an atomic operation on two operands AtomicCompareExchange = 79, // atomic compare and exchange to memory @@ -1030,7 +1060,7 @@ enum class OpCode : unsigned { NumOpCodes_Dxil_1_7 = 226, NumOpCodes_Dxil_1_8 = 258, - NumOpCodes = 303 // exclusive last value of enumeration + NumOpCodes = 305 // exclusive last value of enumeration }; // OPCODE-ENUM:END @@ -1264,6 +1294,8 @@ enum class OpCodeClass : unsigned { GetDimensions, RawBufferLoad, RawBufferStore, + RawBufferVectorLoad, + RawBufferVectorStore, TextureLoad, TextureStore, TextureStoreSample, @@ -1274,6 +1306,21 @@ enum class OpCodeClass : unsigned { WriteSamplerFeedbackGrad, WriteSamplerFeedbackLevel, + // Shader Execution Reordering + HitObject_Attributes, + HitObject_FromRayQuery, + HitObject_FromRayQueryWithAttrs, + HitObject_Invoke, + HitObject_LoadLocalRootTableConstant, + HitObject_MakeMiss, + HitObject_MakeNop, + HitObject_SetShaderTableIndex, + HitObject_StateMatrix, + HitObject_StateScalar, + HitObject_StateVector, + HitObject_TraceRay, + MaybeReorderThread, + // Synchronization AtomicBinOp, AtomicCompareExchange, @@ -1338,7 +1385,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 175 // exclusive last value of enumeration + NumOpClasses = 190 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END @@ -1397,6 +1444,12 @@ const unsigned kRawBufferLoadElementOffsetOpIdx = 3; const unsigned kRawBufferLoadMaskOpIdx = 4; const unsigned kRawBufferLoadAlignmentOpIdx = 5; +// RawBufferVectorLoad. +const unsigned kRawBufferVectorLoadHandleOpIdx = 1; +const unsigned kRawBufferVectorLoadIndexOpIdx = 2; +const unsigned kRawBufferVectorLoadElementOffsetOpIdx = 3; +const unsigned kRawBufferVectorLoadAlignmentOpIdx = 4; + // RawBufferStore const unsigned kRawBufferStoreHandleOpIdx = 1; const unsigned kRawBufferStoreIndexOpIdx = 2; @@ -1406,7 +1459,14 @@ const unsigned kRawBufferStoreVal1OpIdx = 5; const unsigned kRawBufferStoreVal2OpIdx = 6; const unsigned kRawBufferStoreVal3OpIdx = 7; const unsigned kRawBufferStoreMaskOpIdx = 8; -const unsigned kRawBufferStoreAlignmentOpIdx = 8; +const unsigned kRawBufferStoreAlignmentOpIdx = 9; + +// RawBufferVectorStore +const unsigned kRawBufferVectorStoreHandleOpIdx = 1; +const unsigned kRawBufferVectorStoreIndexOpIdx = 2; +const unsigned kRawBufferVectorStoreElementOffsetOpIdx = 3; +const unsigned kRawBufferVectorStoreValOpIdx = 4; +const unsigned kRawBufferVectorStoreAlignmentOpIdx = 5; // TextureStore. const unsigned kTextureStoreHandleOpIdx = 1; @@ -1820,7 +1880,7 @@ enum class RayFlag : uint32_t { CullNonOpaque = 0x80, SkipTriangles = 0x100, SkipProceduralPrimitives = 0x200, - ForceOMM2State = 0x400, // Force 2-state in Opacity Micromaps + ForceOMM2State = 0x400 }; // Corresponds to RAYQUERY_FLAG_* in HLSL @@ -1869,7 +1929,9 @@ enum class BarrierSemanticFlag : uint32_t { GroupSync = 0x00000001, // GROUP_SYNC GroupScope = 0x00000002, // GROUP_SCOPE DeviceScope = 0x00000004, // DEVICE_SCOPE - ValidMask = 0x00000007, + LegacyFlags = 0x00000007, + ReorderScope = 0x00000008, // REORDER_SCOPE + ValidMask = 0x0000000F, GroupFlags = GroupSync | GroupScope, }; diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 11ab8e3b8d..a99c5360d4 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -645,6 +645,42 @@ struct LlvmInst_VAArg { bool isAllowed() const { return false; } }; +/// This instruction extracts from vector +struct LlvmInst_ExtractElement { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_ExtractElement(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::ExtractElement; + } + // Validation support + bool isAllowed() const { return true; } +}; + +/// This instruction inserts into vector +struct LlvmInst_InsertElement { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_InsertElement(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::InsertElement; + } + // Validation support + bool isAllowed() const { return true; } +}; + +/// This instruction Shuffle two vectors +struct LlvmInst_ShuffleVector { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_ShuffleVector(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::ShuffleVector; + } + // Validation support + bool isAllowed() const { return true; } +}; + /// This instruction extracts from aggregate struct LlvmInst_ExtractValue { llvm::Instruction *Instr; @@ -8813,5 +8849,1074 @@ struct DxilInst_AllocateRayQuery2 { llvm::APInt(32, (uint64_t)val))); } }; + +/// This instruction Analogous to TraceRay but without invoking CH/MS and +/// returns the intermediate state as a HitObject +struct DxilInst_HitObject_TraceRay { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_TraceRay(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_TraceRay); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (16 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_accelerationStructure = 1, + arg_rayFlags = 2, + arg_instanceInclusionMask = 3, + arg_rayContributionToHitGroupIndex = 4, + arg_multiplierForGeometryContributionToHitGroupIndex = 5, + arg_missShaderIndex = 6, + arg_Origin_X = 7, + arg_Origin_Y = 8, + arg_Origin_Z = 9, + arg_TMin = 10, + arg_Direction_X = 11, + arg_Direction_Y = 12, + arg_Direction_Z = 13, + arg_TMax = 14, + arg_payload = 15, + }; + // Accessors + llvm::Value *get_accelerationStructure() const { + return Instr->getOperand(1); + } + void set_accelerationStructure(llvm::Value *val) { + Instr->setOperand(1, val); + } + llvm::Value *get_rayFlags() const { return Instr->getOperand(2); } + void set_rayFlags(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_instanceInclusionMask() const { + return Instr->getOperand(3); + } + void set_instanceInclusionMask(llvm::Value *val) { + Instr->setOperand(3, val); + } + llvm::Value *get_rayContributionToHitGroupIndex() const { + return Instr->getOperand(4); + } + void set_rayContributionToHitGroupIndex(llvm::Value *val) { + Instr->setOperand(4, val); + } + llvm::Value *get_multiplierForGeometryContributionToHitGroupIndex() const { + return Instr->getOperand(5); + } + void set_multiplierForGeometryContributionToHitGroupIndex(llvm::Value *val) { + Instr->setOperand(5, val); + } + llvm::Value *get_missShaderIndex() const { return Instr->getOperand(6); } + void set_missShaderIndex(llvm::Value *val) { Instr->setOperand(6, val); } + llvm::Value *get_Origin_X() const { return Instr->getOperand(7); } + void set_Origin_X(llvm::Value *val) { Instr->setOperand(7, val); } + llvm::Value *get_Origin_Y() const { return Instr->getOperand(8); } + void set_Origin_Y(llvm::Value *val) { Instr->setOperand(8, val); } + llvm::Value *get_Origin_Z() const { return Instr->getOperand(9); } + void set_Origin_Z(llvm::Value *val) { Instr->setOperand(9, val); } + llvm::Value *get_TMin() const { return Instr->getOperand(10); } + void set_TMin(llvm::Value *val) { Instr->setOperand(10, val); } + llvm::Value *get_Direction_X() const { return Instr->getOperand(11); } + void set_Direction_X(llvm::Value *val) { Instr->setOperand(11, val); } + llvm::Value *get_Direction_Y() const { return Instr->getOperand(12); } + void set_Direction_Y(llvm::Value *val) { Instr->setOperand(12, val); } + llvm::Value *get_Direction_Z() const { return Instr->getOperand(13); } + void set_Direction_Z(llvm::Value *val) { Instr->setOperand(13, val); } + llvm::Value *get_TMax() const { return Instr->getOperand(14); } + void set_TMax(llvm::Value *val) { Instr->setOperand(14, val); } + llvm::Value *get_payload() const { return Instr->getOperand(15); } + void set_payload(llvm::Value *val) { Instr->setOperand(15, val); } +}; + +/// This instruction Creates a new HitObject representing a committed hit from a +/// RayQuery +struct DxilInst_HitObject_FromRayQuery { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_FromRayQuery(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_FromRayQuery); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_rayQueryHandle = 1, + }; + // Accessors + llvm::Value *get_rayQueryHandle() const { return Instr->getOperand(1); } + void set_rayQueryHandle(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Creates a new HitObject representing a committed hit from a +/// RayQuery and committed attributes +struct DxilInst_HitObject_FromRayQueryWithAttrs { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_FromRayQueryWithAttrs(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_FromRayQueryWithAttrs); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_rayQueryHandle = 1, + arg_HitKind = 2, + arg_CommittedAttribs = 3, + }; + // Accessors + llvm::Value *get_rayQueryHandle() const { return Instr->getOperand(1); } + void set_rayQueryHandle(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_HitKind() const { return Instr->getOperand(2); } + void set_HitKind(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_CommittedAttribs() const { return Instr->getOperand(3); } + void set_CommittedAttribs(llvm::Value *val) { Instr->setOperand(3, val); } +}; + +/// This instruction Creates a new HitObject representing a miss +struct DxilInst_HitObject_MakeMiss { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_MakeMiss(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_MakeMiss); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (11 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_RayFlags = 1, + arg_MissShaderIndex = 2, + arg_Origin_X = 3, + arg_Origin_Y = 4, + arg_Origin_Z = 5, + arg_TMin = 6, + arg_Direction_X = 7, + arg_Direction_Y = 8, + arg_Direction_Z = 9, + arg_TMax = 10, + }; + // Accessors + llvm::Value *get_RayFlags() const { return Instr->getOperand(1); } + void set_RayFlags(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_MissShaderIndex() const { return Instr->getOperand(2); } + void set_MissShaderIndex(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_Origin_X() const { return Instr->getOperand(3); } + void set_Origin_X(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_Origin_Y() const { return Instr->getOperand(4); } + void set_Origin_Y(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_Origin_Z() const { return Instr->getOperand(5); } + void set_Origin_Z(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_TMin() const { return Instr->getOperand(6); } + void set_TMin(llvm::Value *val) { Instr->setOperand(6, val); } + llvm::Value *get_Direction_X() const { return Instr->getOperand(7); } + void set_Direction_X(llvm::Value *val) { Instr->setOperand(7, val); } + llvm::Value *get_Direction_Y() const { return Instr->getOperand(8); } + void set_Direction_Y(llvm::Value *val) { Instr->setOperand(8, val); } + llvm::Value *get_Direction_Z() const { return Instr->getOperand(9); } + void set_Direction_Z(llvm::Value *val) { Instr->setOperand(9, val); } + llvm::Value *get_TMax() const { return Instr->getOperand(10); } + void set_TMax(llvm::Value *val) { Instr->setOperand(10, val); } +}; + +/// This instruction Creates an empty nop HitObject +struct DxilInst_HitObject_MakeNop { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_MakeNop(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_MakeNop); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (1 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } +}; + +/// This instruction Represents the invocation of the CH/MS shader represented +/// by the HitObject +struct DxilInst_HitObject_Invoke { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_Invoke(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_Invoke); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_payload = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_payload() const { return Instr->getOperand(2); } + void set_payload(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Reorders the current thread +struct DxilInst_MaybeReorderThread { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_MaybeReorderThread(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::MaybeReorderThread); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_coherenceHint = 2, + arg_numCoherenceHintBitsFromLSB = 3, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_coherenceHint() const { return Instr->getOperand(2); } + void set_coherenceHint(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_numCoherenceHintBitsFromLSB() const { + return Instr->getOperand(3); + } + void set_numCoherenceHintBitsFromLSB(llvm::Value *val) { + Instr->setOperand(3, val); + } +}; + +/// This instruction Returns `true` if the HitObject represents a miss +struct DxilInst_HitObject_IsMiss { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_IsMiss(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_IsMiss); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns `true` if the HitObject is a NOP-HitObject +struct DxilInst_HitObject_IsHit { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_IsHit(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_IsHit); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns `true` if the HitObject represents a nop +struct DxilInst_HitObject_IsNop { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_IsNop(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_IsNop); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the ray flags set in the HitObject +struct DxilInst_HitObject_RayFlags { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_RayFlags(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_RayFlags); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the TMin value set in the HitObject +struct DxilInst_HitObject_RayTMin { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_RayTMin(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_RayTMin); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the current T value set in the HitObject +struct DxilInst_HitObject_RayTCurrent { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_RayTCurrent(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_RayTCurrent); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the ray origin in world space +struct DxilInst_HitObject_WorldRayOrigin { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_WorldRayOrigin(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_WorldRayOrigin); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_component = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_component() const { return Instr->getOperand(2); } + void set_component(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_component_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_component_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the ray direction in world space +struct DxilInst_HitObject_WorldRayDirection { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_WorldRayDirection(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_WorldRayDirection); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_component = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_component() const { return Instr->getOperand(2); } + void set_component(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_component_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_component_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the ray origin in object space +struct DxilInst_HitObject_ObjectRayOrigin { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_ObjectRayOrigin(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_ObjectRayOrigin); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_component = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_component() const { return Instr->getOperand(2); } + void set_component(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_component_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_component_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the ray direction in object space +struct DxilInst_HitObject_ObjectRayDirection { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_ObjectRayDirection(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_ObjectRayDirection); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_component = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_component() const { return Instr->getOperand(2); } + void set_component(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_component_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_component_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the object to world space transformation matrix in +/// 3x4 form +struct DxilInst_HitObject_ObjectToWorld3x4 { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_ObjectToWorld3x4(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_ObjectToWorld3x4); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_row = 2, + arg_col = 3, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_row() const { return Instr->getOperand(2); } + void set_row(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_row_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_row_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } + llvm::Value *get_col() const { return Instr->getOperand(3); } + void set_col(llvm::Value *val) { Instr->setOperand(3, val); } + int32_t get_col_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(3)) + ->getZExtValue()); + } + void set_col_val(int32_t val) { + Instr->setOperand(3, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the world to object space transformation matrix in +/// 3x4 form +struct DxilInst_HitObject_WorldToObject3x4 { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_WorldToObject3x4(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_WorldToObject3x4); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_row = 2, + arg_col = 3, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_row() const { return Instr->getOperand(2); } + void set_row(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_row_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_row_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } + llvm::Value *get_col() const { return Instr->getOperand(3); } + void set_col(llvm::Value *val) { Instr->setOperand(3, val); } + int32_t get_col_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(3)) + ->getZExtValue()); + } + void set_col_val(int32_t val) { + Instr->setOperand(3, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the geometry index committed on hit +struct DxilInst_HitObject_GeometryIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_GeometryIndex(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_GeometryIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the instance index committed on hit +struct DxilInst_HitObject_InstanceIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_InstanceIndex(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_InstanceIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the instance id committed on hit +struct DxilInst_HitObject_InstanceID { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_InstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_InstanceID); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the primitive index committed on hit +struct DxilInst_HitObject_PrimitiveIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_PrimitiveIndex(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_PrimitiveIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the HitKind of the hit +struct DxilInst_HitObject_HitKind { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_HitKind(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_HitKind); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the shader table index set for this HitObject +struct DxilInst_HitObject_ShaderTableIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_ShaderTableIndex(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_ShaderTableIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns a HitObject with updated shader table index +struct DxilInst_HitObject_SetShaderTableIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_SetShaderTableIndex(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_SetShaderTableIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_shaderTableIndex = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_shaderTableIndex() const { return Instr->getOperand(2); } + void set_shaderTableIndex(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Returns the root table constant for this HitObject and +/// offset +struct DxilInst_HitObject_LoadLocalRootTableConstant { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_LoadLocalRootTableConstant(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_LoadLocalRootTableConstant); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_offset = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_offset() const { return Instr->getOperand(2); } + void set_offset(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Returns the attributes set for this HitObject +struct DxilInst_HitObject_Attributes { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_Attributes(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_Attributes); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_attributes = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_attributes() const { return Instr->getOperand(2); } + void set_attributes(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction reads from a raw buffer and structured buffer +struct DxilInst_RawBufferVectorLoad { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_RawBufferVectorLoad(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::RawBufferVectorLoad); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_buf = 1, + arg_index = 2, + arg_elementOffset = 3, + arg_alignment = 4, + }; + // Accessors + llvm::Value *get_buf() const { return Instr->getOperand(1); } + void set_buf(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_index() const { return Instr->getOperand(2); } + void set_index(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_elementOffset() const { return Instr->getOperand(3); } + void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_alignment() const { return Instr->getOperand(4); } + void set_alignment(llvm::Value *val) { Instr->setOperand(4, val); } + int32_t get_alignment_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(4)) + ->getZExtValue()); + } + void set_alignment_val(int32_t val) { + Instr->setOperand(4, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction writes to a RWByteAddressBuffer or RWStructuredBuffer +struct DxilInst_RawBufferVectorStore { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_RawBufferVectorStore(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::RawBufferVectorStore); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_uav = 1, + arg_index = 2, + arg_elementOffset = 3, + arg_value0 = 4, + arg_alignment = 5, + }; + // Accessors + llvm::Value *get_uav() const { return Instr->getOperand(1); } + void set_uav(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_index() const { return Instr->getOperand(2); } + void set_index(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_elementOffset() const { return Instr->getOperand(3); } + void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_value0() const { return Instr->getOperand(4); } + void set_value0(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_alignment() const { return Instr->getOperand(5); } + void set_alignment(llvm::Value *val) { Instr->setOperand(5, val); } + int32_t get_alignment_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(5)) + ->getZExtValue()); + } + void set_alignment_val(int32_t val) { + Instr->setOperand(5, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h index fa13f6d766..e17db016d8 100644 --- a/include/dxc/DXIL/DxilMetadataHelper.h +++ b/include/dxc/DXIL/DxilMetadataHelper.h @@ -233,6 +233,7 @@ class DxilMDHelper { static const unsigned kDxilStructuredBufferElementStrideTag = 1; static const unsigned kDxilSamplerFeedbackKindTag = 2; static const unsigned kDxilAtomic64UseTag = 3; + static const unsigned kDxilReorderCoherentTag = 4; // Type system. static const char kDxilTypeSystemMDName[]; @@ -427,6 +428,8 @@ class DxilMDHelper { // Dxil version. void EmitDxilVersion(unsigned Major, unsigned Minor); void LoadDxilVersion(unsigned &Major, unsigned &Minor); + static bool LoadDxilVersion(const llvm::Module *pModule, unsigned &Major, + unsigned &Minor); // Validator version. void EmitValidatorVersion(unsigned Major, unsigned Minor); diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index 3514701327..c8b6762b3f 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -57,13 +57,33 @@ class OP { // caches. void RefreshCache(); + // The single llvm::Type * "OverloadType" has one of these forms: + // No overloads (NumOverloadDims == 0): + // - TS_Void: VoidTy + // For single overload dimension (NumOverloadDims == 1): + // - TS_F*, TS_I*: a scalar numeric type (half, float, i1, i64, etc.), + // - TS_UDT: a pointer to a StructType representing a User Defined Type, + // - TS_Object: a named StructType representing a built-in object, or + // - TS_Vector: a vector type (<4 x float>, <16 x i16>, etc.) + // For multiple overload dimensions (TS_Extended, NumOverloadDims > 1): + // - an unnamed StructType containing each type for the corresponding + // dimension, such as: type { i32, <2 x float> } + // - contained type options are the same as for single dimension. + llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType); + + // N-dimension convenience version of GetOpFunc: + llvm::Function *GetOpFunc(OpCode OpCode, + llvm::ArrayRef OverloadTypes); + const llvm::SmallMapVector & GetOpFuncList(OpCode OpCode) const; bool IsDxilOpUsed(OpCode opcode) const; void RemoveFunction(llvm::Function *F); llvm::LLVMContext &GetCtx() { return m_Ctx; } + llvm::Module *GetModule() { return m_pModule; } llvm::Type *GetHandleType() const; + llvm::Type *GetHitObjectType() const; llvm::Type *GetNodeHandleType() const; llvm::Type *GetNodeRecordHandleType() const; llvm::Type *GetResourcePropertiesType() const; @@ -80,9 +100,14 @@ class OP { llvm::Type *GetResRetType(llvm::Type *pOverloadType); llvm::Type *GetCBufferRetType(llvm::Type *pOverloadType); - llvm::Type *GetVectorType(unsigned numElements, llvm::Type *pOverloadType); + llvm::Type *GetStructVectorType(unsigned numElements, + llvm::Type *pOverloadType); bool IsResRetType(llvm::Type *Ty); + // Construct an unnamed struct type containing the set of member types. + llvm::StructType * + GetExtendedOverloadType(llvm::ArrayRef OverloadTypes); + // Try to get the opcode class for a function. // Return true and set `opClass` if the given function is a dxil function. // Return false if the given function is not a dxil function. @@ -126,12 +151,8 @@ class OP { static bool IsDxilOpBarrier(OpCode C); static bool BarrierRequiresGroup(const llvm::CallInst *CI); static bool BarrierRequiresNode(const llvm::CallInst *CI); + static bool BarrierRequiresReorder(const llvm::CallInst *CI); static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI); - static bool IsDxilOpTypeName(llvm::StringRef name); - static bool IsDxilOpType(llvm::StructType *ST); - static bool IsDupDxilOpType(llvm::StructType *ST); - static llvm::StructType *GetOriginalDxilOpType(llvm::StructType *ST, - llvm::Module &M); static void GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, unsigned &major, unsigned &minor, unsigned &mask); @@ -140,12 +161,20 @@ class OP { unsigned valMinor, unsigned &major, unsigned &minor, unsigned &mask); + static bool IsDxilOpExtendedOverload(OpCode C); + + // Return true if the overload name suffix for this operation may be + // constructed based on a user-defined or user-influenced type name + // that may not represent the same type in different linked modules. + static bool MayHaveNonCanonicalOverload(OpCode OC); + private: // Per-module properties. llvm::LLVMContext &m_Ctx; llvm::Module *m_pModule; llvm::Type *m_pHandleType; + llvm::Type *m_pHitObjectType; llvm::Type *m_pNodeHandleType; llvm::Type *m_pNodeRecordHandleType; llvm::Type *m_pResourcePropertiesType; @@ -162,13 +191,33 @@ class OP { DXIL::LowPrecisionMode m_LowPrecisionMode; - static const unsigned kUserDefineTypeSlot = 9; - static const unsigned kObjectTypeSlot = 10; - static const unsigned kNumTypeOverloads = - 11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj + // Overload types are split into "basic" overload types and special types + // Basic: void, half, float, double, i1, i8, i16, i32, i64 + // - These have one canonical overload per TypeSlot + // Special: udt, obj, vec, extended + // - These may have many overloads per type slot + enum TypeSlot : unsigned { + TS_F16 = 0, + TS_F32 = 1, + TS_F64 = 2, + TS_I1 = 3, + TS_I8 = 4, + TS_I16 = 5, + TS_I32 = 6, + TS_I64 = 7, + TS_BasicCount, + TS_UDT = 8, // Ex: %"struct.MyStruct" * + TS_Object = 9, // Ex: %"class.StructuredBuffer" + TS_Vector = 10, // Ex: <8 x i16> + TS_MaskBitCount, // Types used in Mask end here + // TS_Extended is only used to identify the unnamed struct type used to wrap + // multiple overloads when using GetTypeSlot. + TS_Extended, // Ex: type { float, <16 x i32> } + TS_Invalid = UINT_MAX, + }; - llvm::Type *m_pResRetType[kNumTypeOverloads]; - llvm::Type *m_pCBufferRetType[kNumTypeOverloads]; + llvm::Type *m_pResRetType[TS_BasicCount]; + llvm::Type *m_pCBufferRetType[TS_BasicCount]; struct OpCodeCacheItem { llvm::SmallMapVector pOverloads; @@ -179,27 +228,46 @@ class OP { private: // Static properties. + struct OverloadMask { + // mask of type slot bits as (1 << TypeSlot) + uint16_t SlotMask; + static_assert(TS_MaskBitCount <= (sizeof(SlotMask) * 8)); + bool operator[](unsigned TypeSlot) const { + return (TypeSlot < TS_MaskBitCount) ? (bool)(SlotMask & (1 << TypeSlot)) + : 0; + } + operator bool() const { return SlotMask != 0; } + }; struct OpCodeProperty { OpCode opCode; const char *pOpCodeName; OpCodeClass opCodeClass; const char *pOpCodeClassName; - bool bAllowOverload[kNumTypeOverloads]; // void, h,f,d, i1, i8,i16,i32,i64, - // udt llvm::Attribute::AttrKind FuncAttr; + + // Number of overload dimensions used by the operation. + unsigned int NumOverloadDims; + + // Mask of supported overload types for each overload dimension. + OverloadMask AllowedOverloads[DXIL::kDxilMaxOloadDims]; + + // Mask of scalar components allowed for each demension where + // AllowedOverloads[n][TS_Vector] is true. + OverloadMask AllowedVectorElements[DXIL::kDxilMaxOloadDims]; }; static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes]; - static const char *m_OverloadTypeName[kNumTypeOverloads]; + static const char *m_OverloadTypeName[TS_BasicCount]; static const char *m_NamePrefix; static const char *m_TypePrefix; static const char *m_MatrixTypePrefix; static unsigned GetTypeSlot(llvm::Type *pType); static const char *GetOverloadTypeName(unsigned TypeSlot); - static llvm::StringRef GetTypeName(llvm::Type *Ty, std::string &str); - static llvm::StringRef ConstructOverloadName(llvm::Type *Ty, - DXIL::OpCode opCode, - std::string &funcNameStorage); + static llvm::StringRef GetTypeName(llvm::Type *Ty, + llvm::SmallVectorImpl &Storage); + static llvm::StringRef + ConstructOverloadName(llvm::Type *Ty, DXIL::OpCode opCode, + llvm::SmallVectorImpl &Storage); }; } // namespace hlsl diff --git a/include/dxc/DXIL/DxilResource.h b/include/dxc/DXIL/DxilResource.h index 49db65caed..dcf70333da 100644 --- a/include/dxc/DXIL/DxilResource.h +++ b/include/dxc/DXIL/DxilResource.h @@ -63,6 +63,8 @@ class DxilResource : public DxilResourceBase { bool IsGloballyCoherent() const; void SetGloballyCoherent(bool b); + bool IsReorderCoherent() const; + void SetReorderCoherent(bool b); bool HasCounter() const; void SetHasCounter(bool b); @@ -97,6 +99,7 @@ class DxilResource : public DxilResourceBase { CompType m_CompType; DXIL::SamplerFeedbackType m_SamplerFeedbackType; bool m_bGloballyCoherent; + bool m_bReorderCoherent; bool m_bHasCounter; bool m_bROV; bool m_bHasAtomic64Use; diff --git a/include/dxc/DXIL/DxilResourceProperties.h b/include/dxc/DXIL/DxilResourceProperties.h index 21a705f077..2f4ff58969 100644 --- a/include/dxc/DXIL/DxilResourceProperties.h +++ b/include/dxc/DXIL/DxilResourceProperties.h @@ -47,7 +47,8 @@ struct DxilResourceProperties { uint8_t SamplerCmpOrHasCounter : 1; // BYTE 2 - uint8_t Reserved2; + uint8_t IsReorderCoherent : 1; + uint8_t Reserved2 : 7; // BYTE 3 uint8_t Reserved3; diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h index 490f335db5..ca8f2ac755 100644 --- a/include/dxc/DXIL/DxilUtil.h +++ b/include/dxc/DXIL/DxilUtil.h @@ -162,6 +162,8 @@ GetHLSLResourceProperties(llvm::Type *Ty); bool IsHLSLResourceType(llvm::Type *Ty); bool IsHLSLObjectType(llvm::Type *Ty); bool IsHLSLRayQueryType(llvm::Type *Ty); +llvm::Type *GetHLSLHitObjectType(llvm::Module *M); +bool IsHLSLHitObjectType(llvm::Type *Ty); bool IsHLSLResourceDescType(llvm::Type *Ty); bool IsResourceSingleComponent(llvm::Type *Ty); uint8_t GetResourceComponentCount(llvm::Type *Ty); @@ -221,6 +223,10 @@ bool DeleteDeadAllocas(llvm::Function &F); llvm::Value *GEPIdxToOffset(llvm::GetElementPtrInst *GEP, llvm::IRBuilder<> &Builder, hlsl::OP *OP, const llvm::DataLayout &DL); + +// Passes back Dxil version of the given module on true return. +bool LoadDxilVersion(const llvm::Module *M, unsigned &Major, unsigned &Minor); + } // namespace dxilutil } // namespace hlsl diff --git a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl index 132d272a8e..4b58b406c2 100644 --- a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl +++ b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl @@ -22,6 +22,7 @@ RDAT_ENUM_START(DxilResourceFlag, uint32_t) RDAT_ENUM_VALUE(UAVRasterizerOrderedView, 1 << 2) RDAT_ENUM_VALUE(DynamicIndexing, 1 << 3) RDAT_ENUM_VALUE(Atomics64Use, 1 << 4) + RDAT_ENUM_VALUE(UAVReorderCoherent, 1 << 5) RDAT_ENUM_END() RDAT_ENUM_START(DxilShaderStageFlags, uint32_t) diff --git a/include/dxc/DxilPIXPasses/DxilPIXPasses.h b/include/dxc/DxilPIXPasses/DxilPIXPasses.h index ad0ddfdfd2..5cc7c4aa50 100644 --- a/include/dxc/DxilPIXPasses/DxilPIXPasses.h +++ b/include/dxc/DxilPIXPasses/DxilPIXPasses.h @@ -27,6 +27,7 @@ ModulePass *createDxilDebugInstrumentationPass(); ModulePass *createDxilShaderAccessTrackingPass(); ModulePass *createDxilPIXAddTidToAmplificationShaderPayloadPass(); ModulePass *createDxilPIXDXRInvocationsLogPass(); +ModulePass *createDxilNonUniformResourceIndexInstrumentationPass(); void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry &); void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry &); @@ -41,5 +42,7 @@ void initializeDxilShaderAccessTrackingPass(llvm::PassRegistry &); void initializeDxilPIXAddTidToAmplificationShaderPayloadPass( llvm::PassRegistry &); void initializeDxilPIXDXRInvocationsLogPass(llvm::PassRegistry &); +void initializeDxilNonUniformResourceIndexInstrumentationPass( + llvm::PassRegistry &); } // namespace llvm diff --git a/include/dxc/HLSL/DxilGenerationPass.h b/include/dxc/HLSL/DxilGenerationPass.h index c77ddab3d0..9df93e9232 100644 --- a/include/dxc/HLSL/DxilGenerationPass.h +++ b/include/dxc/HLSL/DxilGenerationPass.h @@ -81,6 +81,7 @@ ModulePass *createResumePassesPass(); FunctionPass *createMatrixBitcastLowerPass(); ModulePass *createDxilCleanupAddrSpaceCastPass(); ModulePass *createDxilRenameResourcesPass(); +ModulePass *createDxilScalarizeVectorLoadStoresPass(); void initializeDxilLowerCreateHandleForLibPass(llvm::PassRegistry &); void initializeDxilAllocateResourcesForLibPass(llvm::PassRegistry &); @@ -115,6 +116,7 @@ void initializeResumePassesPass(llvm::PassRegistry &); void initializeMatrixBitcastLowerPassPass(llvm::PassRegistry &); void initializeDxilCleanupAddrSpaceCastPass(llvm::PassRegistry &); void initializeDxilRenameResourcesPass(llvm::PassRegistry &); +void initializeDxilScalarizeVectorLoadStoresPass(llvm::PassRegistry &); ModulePass *createDxilValidateWaveSensitivityPass(); void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry &); diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index 1ccb7f04a2..a7db8612a6 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -398,6 +398,10 @@ const unsigned kAnnotateHandleResourceTypeOpIdx = 3; const unsigned kTraceRayRayDescOpIdx = 7; const unsigned kTraceRayPayLoadOpIdx = 8; +// AllocateRayQuery +const unsigned kAllocateRayQueryRayFlagsIdx = 1; +const unsigned kAllocateRayQueryRayQueryFlagsIdx = 2; + // CallShader. const unsigned kCallShaderPayloadOpIdx = 2; @@ -429,6 +433,10 @@ const unsigned kNodeHandleToResCastOpIdx = 1; const unsigned kAnnotateNodeHandleNodePropIdx = 2; const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2; +// HitObject::MakeMiss +const unsigned kHitObjectMakeMiss_NumOp = 8; +const unsigned kHitObjectMakeMissRayDescOpIdx = 4; + } // namespace HLOperandIndex llvm::Function *GetOrCreateHLFunction(llvm::Module &M, diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index fcc9bb11b1..d37c27a38e 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -5,378 +5,398 @@ #pragma once namespace hlsl { enum class IntrinsicOp { - IOP_AcceptHitAndEndSearch, - IOP_AddUint64, - IOP_AllMemoryBarrier, - IOP_AllMemoryBarrierWithGroupSync, - IOP_AllocateRayQuery, - IOP_Barrier, - IOP_CallShader, - IOP_CheckAccessFullyMapped, - IOP_CreateResourceFromHeap, - IOP_D3DCOLORtoUBYTE4, - IOP_DeviceMemoryBarrier, - IOP_DeviceMemoryBarrierWithGroupSync, - IOP_DispatchMesh, - IOP_DispatchRaysDimensions, - IOP_DispatchRaysIndex, - IOP_EvaluateAttributeAtSample, - IOP_EvaluateAttributeCentroid, - IOP_EvaluateAttributeSnapped, - IOP_GeometryIndex, - IOP_GetAttributeAtVertex, - IOP_GetRemainingRecursionLevels, - IOP_GetRenderTargetSampleCount, - IOP_GetRenderTargetSamplePosition, - IOP_GroupMemoryBarrier, - IOP_GroupMemoryBarrierWithGroupSync, - IOP_HitKind, - IOP_IgnoreHit, - IOP_InstanceID, - IOP_InstanceIndex, - IOP_InterlockedAdd, - IOP_InterlockedAnd, - IOP_InterlockedCompareExchange, - IOP_InterlockedCompareExchangeFloatBitwise, - IOP_InterlockedCompareStore, - IOP_InterlockedCompareStoreFloatBitwise, - IOP_InterlockedExchange, - IOP_InterlockedMax, - IOP_InterlockedMin, - IOP_InterlockedOr, - IOP_InterlockedXor, - IOP_IsHelperLane, - IOP_NonUniformResourceIndex, - IOP_ObjectRayDirection, - IOP_ObjectRayOrigin, - IOP_ObjectToWorld, - IOP_ObjectToWorld3x4, - IOP_ObjectToWorld4x3, - IOP_PrimitiveIndex, - IOP_Process2DQuadTessFactorsAvg, - IOP_Process2DQuadTessFactorsMax, - IOP_Process2DQuadTessFactorsMin, - IOP_ProcessIsolineTessFactors, - IOP_ProcessQuadTessFactorsAvg, - IOP_ProcessQuadTessFactorsMax, - IOP_ProcessQuadTessFactorsMin, - IOP_ProcessTriTessFactorsAvg, - IOP_ProcessTriTessFactorsMax, - IOP_ProcessTriTessFactorsMin, - IOP_QuadAll, - IOP_QuadAny, - IOP_QuadReadAcrossDiagonal, - IOP_QuadReadAcrossX, - IOP_QuadReadAcrossY, - IOP_QuadReadLaneAt, - IOP_RayFlags, - IOP_RayTCurrent, - IOP_RayTMin, - IOP_ReportHit, - IOP_SetMeshOutputCounts, - IOP_TraceRay, - IOP_WaveActiveAllEqual, - IOP_WaveActiveAllTrue, - IOP_WaveActiveAnyTrue, - IOP_WaveActiveBallot, - IOP_WaveActiveBitAnd, - IOP_WaveActiveBitOr, - IOP_WaveActiveBitXor, - IOP_WaveActiveCountBits, - IOP_WaveActiveMax, - IOP_WaveActiveMin, - IOP_WaveActiveProduct, - IOP_WaveActiveSum, - IOP_WaveGetLaneCount, - IOP_WaveGetLaneIndex, - IOP_WaveIsFirstLane, - IOP_WaveMatch, - IOP_WaveMultiPrefixBitAnd, - IOP_WaveMultiPrefixBitOr, - IOP_WaveMultiPrefixBitXor, - IOP_WaveMultiPrefixCountBits, - IOP_WaveMultiPrefixProduct, - IOP_WaveMultiPrefixSum, - IOP_WavePrefixCountBits, - IOP_WavePrefixProduct, - IOP_WavePrefixSum, - IOP_WaveReadLaneAt, - IOP_WaveReadLaneFirst, - IOP_WorldRayDirection, - IOP_WorldRayOrigin, - IOP_WorldToObject, - IOP_WorldToObject3x4, - IOP_WorldToObject4x3, - IOP_abort, - IOP_abs, - IOP_acos, - IOP_all, - IOP_and, - IOP_any, - IOP_asdouble, - IOP_asfloat, - IOP_asfloat16, - IOP_asin, - IOP_asint, - IOP_asint16, - IOP_asuint, - IOP_asuint16, - IOP_atan, - IOP_atan2, - IOP_ceil, - IOP_clamp, - IOP_clip, - IOP_cos, - IOP_cosh, - IOP_countbits, - IOP_cross, - IOP_ddx, - IOP_ddx_coarse, - IOP_ddx_fine, - IOP_ddy, - IOP_ddy_coarse, - IOP_ddy_fine, - IOP_degrees, - IOP_determinant, - IOP_distance, - IOP_dot, - IOP_dot2add, - IOP_dot4add_i8packed, - IOP_dot4add_u8packed, - IOP_dst, - IOP_exp, - IOP_exp2, - IOP_f16tof32, - IOP_f32tof16, - IOP_faceforward, - IOP_firstbithigh, - IOP_firstbitlow, - IOP_floor, - IOP_fma, - IOP_fmod, - IOP_frac, - IOP_frexp, - IOP_fwidth, - IOP_isfinite, - IOP_isinf, - IOP_isnan, - IOP_ldexp, - IOP_length, - IOP_lerp, - IOP_lit, - IOP_log, - IOP_log10, - IOP_log2, - IOP_mad, - IOP_max, - IOP_min, - IOP_modf, - IOP_msad4, - IOP_mul, - IOP_normalize, - IOP_or, - IOP_pack_clamp_s8, - IOP_pack_clamp_u8, - IOP_pack_s8, - IOP_pack_u8, - IOP_pow, - IOP_printf, - IOP_radians, - IOP_rcp, - IOP_reflect, - IOP_refract, - IOP_reversebits, - IOP_round, - IOP_rsqrt, - IOP_saturate, - IOP_select, - IOP_sign, - IOP_sin, - IOP_sincos, - IOP_sinh, - IOP_smoothstep, - IOP_source_mark, - IOP_sqrt, - IOP_step, - IOP_tan, - IOP_tanh, - IOP_tex1D, - IOP_tex1Dbias, - IOP_tex1Dgrad, - IOP_tex1Dlod, - IOP_tex1Dproj, - IOP_tex2D, - IOP_tex2Dbias, - IOP_tex2Dgrad, - IOP_tex2Dlod, - IOP_tex2Dproj, - IOP_tex3D, - IOP_tex3Dbias, - IOP_tex3Dgrad, - IOP_tex3Dlod, - IOP_tex3Dproj, - IOP_texCUBE, - IOP_texCUBEbias, - IOP_texCUBEgrad, - IOP_texCUBElod, - IOP_texCUBEproj, - IOP_transpose, - IOP_trunc, - IOP_unpack_s8s16, - IOP_unpack_s8s32, - IOP_unpack_u8u16, - IOP_unpack_u8u32, -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkRawBufferLoad, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkRawBufferStore, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkReadClock, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_Vkext_execution_mode, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_Vkext_execution_mode_id, -#endif // ENABLE_SPIRV_CODEGEN - MOP_Append, - MOP_RestartStrip, - MOP_CalculateLevelOfDetail, - MOP_CalculateLevelOfDetailUnclamped, - MOP_GetDimensions, - MOP_Load, - MOP_Sample, - MOP_SampleBias, - MOP_SampleCmp, - MOP_SampleCmpBias, - MOP_SampleCmpGrad, - MOP_SampleCmpLevel, - MOP_SampleCmpLevelZero, - MOP_SampleGrad, - MOP_SampleLevel, - MOP_Gather, - MOP_GatherAlpha, - MOP_GatherBlue, - MOP_GatherCmp, - MOP_GatherCmpAlpha, - MOP_GatherCmpBlue, - MOP_GatherCmpGreen, - MOP_GatherCmpRed, - MOP_GatherGreen, - MOP_GatherRaw, - MOP_GatherRed, - MOP_GetSamplePosition, - MOP_Load2, - MOP_Load3, - MOP_Load4, - MOP_InterlockedAdd, - MOP_InterlockedAdd64, - MOP_InterlockedAnd, - MOP_InterlockedAnd64, - MOP_InterlockedCompareExchange, - MOP_InterlockedCompareExchange64, - MOP_InterlockedCompareExchangeFloatBitwise, - MOP_InterlockedCompareStore, - MOP_InterlockedCompareStore64, - MOP_InterlockedCompareStoreFloatBitwise, - MOP_InterlockedExchange, - MOP_InterlockedExchange64, - MOP_InterlockedExchangeFloat, - MOP_InterlockedMax, - MOP_InterlockedMax64, - MOP_InterlockedMin, - MOP_InterlockedMin64, - MOP_InterlockedOr, - MOP_InterlockedOr64, - MOP_InterlockedXor, - MOP_InterlockedXor64, - MOP_Store, - MOP_Store2, - MOP_Store3, - MOP_Store4, - MOP_DecrementCounter, - MOP_IncrementCounter, - MOP_Consume, - MOP_WriteSamplerFeedback, - MOP_WriteSamplerFeedbackBias, - MOP_WriteSamplerFeedbackGrad, - MOP_WriteSamplerFeedbackLevel, - MOP_Abort, - MOP_CandidateGeometryIndex, - MOP_CandidateInstanceContributionToHitGroupIndex, - MOP_CandidateInstanceID, - MOP_CandidateInstanceIndex, - MOP_CandidateObjectRayDirection, - MOP_CandidateObjectRayOrigin, - MOP_CandidateObjectToWorld3x4, - MOP_CandidateObjectToWorld4x3, - MOP_CandidatePrimitiveIndex, - MOP_CandidateProceduralPrimitiveNonOpaque, - MOP_CandidateTriangleBarycentrics, - MOP_CandidateTriangleFrontFace, - MOP_CandidateTriangleRayT, - MOP_CandidateType, - MOP_CandidateWorldToObject3x4, - MOP_CandidateWorldToObject4x3, - MOP_CommitNonOpaqueTriangleHit, - MOP_CommitProceduralPrimitiveHit, - MOP_CommittedGeometryIndex, - MOP_CommittedInstanceContributionToHitGroupIndex, - MOP_CommittedInstanceID, - MOP_CommittedInstanceIndex, - MOP_CommittedObjectRayDirection, - MOP_CommittedObjectRayOrigin, - MOP_CommittedObjectToWorld3x4, - MOP_CommittedObjectToWorld4x3, - MOP_CommittedPrimitiveIndex, - MOP_CommittedRayT, - MOP_CommittedStatus, - MOP_CommittedTriangleBarycentrics, - MOP_CommittedTriangleFrontFace, - MOP_CommittedWorldToObject3x4, - MOP_CommittedWorldToObject4x3, - MOP_Proceed, - MOP_RayFlags, - MOP_RayTMin, - MOP_TraceRayInline, - MOP_WorldRayDirection, - MOP_WorldRayOrigin, - MOP_Count, - MOP_FinishedCrossGroupSharing, - MOP_GetGroupNodeOutputRecords, - MOP_GetThreadNodeOutputRecords, - MOP_IsValid, - MOP_GroupIncrementOutputCount, - MOP_ThreadIncrementOutputCount, - MOP_OutputComplete, -#ifdef ENABLE_SPIRV_CODEGEN - MOP_SubpassLoad, -#endif // ENABLE_SPIRV_CODEGEN + IOP_AcceptHitAndEndSearch = 0, + IOP_AddUint64 = 1, + IOP_AllMemoryBarrier = 2, + IOP_AllMemoryBarrierWithGroupSync = 3, + IOP_AllocateRayQuery = 4, + IOP_Barrier = 5, + IOP_CallShader = 6, + IOP_CheckAccessFullyMapped = 7, + IOP_CreateResourceFromHeap = 8, + IOP_D3DCOLORtoUBYTE4 = 9, + IOP_DeviceMemoryBarrier = 10, + IOP_DeviceMemoryBarrierWithGroupSync = 11, + IOP_DispatchMesh = 12, + IOP_DispatchRaysDimensions = 13, + IOP_DispatchRaysIndex = 14, + IOP_EvaluateAttributeAtSample = 15, + IOP_EvaluateAttributeCentroid = 16, + IOP_EvaluateAttributeSnapped = 17, + IOP_GeometryIndex = 18, + IOP_GetAttributeAtVertex = 19, + IOP_GetRemainingRecursionLevels = 20, + IOP_GetRenderTargetSampleCount = 21, + IOP_GetRenderTargetSamplePosition = 22, + IOP_GroupMemoryBarrier = 23, + IOP_GroupMemoryBarrierWithGroupSync = 24, + IOP_HitKind = 25, + IOP_IgnoreHit = 26, + IOP_InstanceID = 27, + IOP_InstanceIndex = 28, + IOP_InterlockedAdd = 29, + IOP_InterlockedAnd = 30, + IOP_InterlockedCompareExchange = 31, + IOP_InterlockedCompareExchangeFloatBitwise = 32, + IOP_InterlockedCompareStore = 33, + IOP_InterlockedCompareStoreFloatBitwise = 34, + IOP_InterlockedExchange = 35, + IOP_InterlockedMax = 36, + IOP_InterlockedMin = 37, + IOP_InterlockedOr = 38, + IOP_InterlockedXor = 39, + IOP_IsHelperLane = 40, + IOP_NonUniformResourceIndex = 41, + IOP_ObjectRayDirection = 42, + IOP_ObjectRayOrigin = 43, + IOP_ObjectToWorld = 44, + IOP_ObjectToWorld3x4 = 45, + IOP_ObjectToWorld4x3 = 46, + IOP_PrimitiveIndex = 47, + IOP_Process2DQuadTessFactorsAvg = 48, + IOP_Process2DQuadTessFactorsMax = 49, + IOP_Process2DQuadTessFactorsMin = 50, + IOP_ProcessIsolineTessFactors = 51, + IOP_ProcessQuadTessFactorsAvg = 52, + IOP_ProcessQuadTessFactorsMax = 53, + IOP_ProcessQuadTessFactorsMin = 54, + IOP_ProcessTriTessFactorsAvg = 55, + IOP_ProcessTriTessFactorsMax = 56, + IOP_ProcessTriTessFactorsMin = 57, + IOP_QuadAll = 58, + IOP_QuadAny = 59, + IOP_QuadReadAcrossDiagonal = 60, + IOP_QuadReadAcrossX = 61, + IOP_QuadReadAcrossY = 62, + IOP_QuadReadLaneAt = 63, + IOP_RayFlags = 64, + IOP_RayTCurrent = 65, + IOP_RayTMin = 66, + IOP_ReportHit = 67, + IOP_SetMeshOutputCounts = 68, + IOP_TraceRay = 69, + IOP_WaveActiveAllEqual = 70, + IOP_WaveActiveAllTrue = 71, + IOP_WaveActiveAnyTrue = 72, + IOP_WaveActiveBallot = 73, + IOP_WaveActiveBitAnd = 74, + IOP_WaveActiveBitOr = 75, + IOP_WaveActiveBitXor = 76, + IOP_WaveActiveCountBits = 77, + IOP_WaveActiveMax = 78, + IOP_WaveActiveMin = 79, + IOP_WaveActiveProduct = 80, + IOP_WaveActiveSum = 81, + IOP_WaveGetLaneCount = 82, + IOP_WaveGetLaneIndex = 83, + IOP_WaveIsFirstLane = 84, + IOP_WaveMatch = 85, + IOP_WaveMultiPrefixBitAnd = 86, + IOP_WaveMultiPrefixBitOr = 87, + IOP_WaveMultiPrefixBitXor = 88, + IOP_WaveMultiPrefixCountBits = 89, + IOP_WaveMultiPrefixProduct = 90, + IOP_WaveMultiPrefixSum = 91, + IOP_WavePrefixCountBits = 92, + IOP_WavePrefixProduct = 93, + IOP_WavePrefixSum = 94, + IOP_WaveReadLaneAt = 95, + IOP_WaveReadLaneFirst = 96, + IOP_WorldRayDirection = 97, + IOP_WorldRayOrigin = 98, + IOP_WorldToObject = 99, + IOP_WorldToObject3x4 = 100, + IOP_WorldToObject4x3 = 101, + IOP_abort = 102, + IOP_abs = 103, + IOP_acos = 104, + IOP_all = 105, + IOP_and = 106, + IOP_any = 107, + IOP_asdouble = 108, + IOP_asfloat = 109, + IOP_asfloat16 = 110, + IOP_asin = 111, + IOP_asint = 112, + IOP_asint16 = 113, + IOP_asuint = 114, + IOP_asuint16 = 115, + IOP_atan = 116, + IOP_atan2 = 117, + IOP_ceil = 118, + IOP_clamp = 119, + IOP_clip = 120, + IOP_cos = 121, + IOP_cosh = 122, + IOP_countbits = 123, + IOP_cross = 124, + IOP_ddx = 125, + IOP_ddx_coarse = 126, + IOP_ddx_fine = 127, + IOP_ddy = 128, + IOP_ddy_coarse = 129, + IOP_ddy_fine = 130, + IOP_degrees = 131, + IOP_determinant = 132, + IOP_distance = 133, + IOP_dot = 134, + IOP_dot2add = 135, + IOP_dot4add_i8packed = 136, + IOP_dot4add_u8packed = 137, + IOP_dst = 138, + IOP_exp = 139, + IOP_exp2 = 140, + IOP_f16tof32 = 141, + IOP_f32tof16 = 142, + IOP_faceforward = 143, + IOP_firstbithigh = 144, + IOP_firstbitlow = 145, + IOP_floor = 146, + IOP_fma = 147, + IOP_fmod = 148, + IOP_frac = 149, + IOP_frexp = 150, + IOP_fwidth = 151, + IOP_isfinite = 152, + IOP_isinf = 153, + IOP_isnan = 154, + IOP_ldexp = 155, + IOP_length = 156, + IOP_lerp = 157, + IOP_lit = 158, + IOP_log = 159, + IOP_log10 = 160, + IOP_log2 = 161, + IOP_mad = 162, + IOP_max = 163, + IOP_min = 164, + IOP_modf = 165, + IOP_msad4 = 166, + IOP_mul = 167, + IOP_normalize = 168, + IOP_or = 169, + IOP_pack_clamp_s8 = 170, + IOP_pack_clamp_u8 = 171, + IOP_pack_s8 = 172, + IOP_pack_u8 = 173, + IOP_pow = 174, + IOP_printf = 175, + IOP_radians = 176, + IOP_rcp = 177, + IOP_reflect = 178, + IOP_refract = 179, + IOP_reversebits = 180, + IOP_round = 181, + IOP_rsqrt = 182, + IOP_saturate = 183, + IOP_select = 184, + IOP_sign = 185, + IOP_sin = 186, + IOP_sincos = 187, + IOP_sinh = 188, + IOP_smoothstep = 189, + IOP_source_mark = 190, + IOP_sqrt = 191, + IOP_step = 192, + IOP_tan = 193, + IOP_tanh = 194, + IOP_tex1D = 195, + IOP_tex1Dbias = 196, + IOP_tex1Dgrad = 197, + IOP_tex1Dlod = 198, + IOP_tex1Dproj = 199, + IOP_tex2D = 200, + IOP_tex2Dbias = 201, + IOP_tex2Dgrad = 202, + IOP_tex2Dlod = 203, + IOP_tex2Dproj = 204, + IOP_tex3D = 205, + IOP_tex3Dbias = 206, + IOP_tex3Dgrad = 207, + IOP_tex3Dlod = 208, + IOP_tex3Dproj = 209, + IOP_texCUBE = 210, + IOP_texCUBEbias = 211, + IOP_texCUBEgrad = 212, + IOP_texCUBElod = 213, + IOP_texCUBEproj = 214, + IOP_transpose = 215, + IOP_trunc = 216, + IOP_unpack_s8s16 = 217, + IOP_unpack_s8s32 = 218, + IOP_unpack_u8u16 = 219, + IOP_unpack_u8u32 = 220, + IOP_VkRawBufferLoad = 221, + IOP_VkRawBufferStore = 222, + IOP_VkReadClock = 223, + IOP_Vkext_execution_mode = 224, + IOP_Vkext_execution_mode_id = 225, + IOP_Vkreinterpret_pointer_cast = 360, + IOP_Vkstatic_pointer_cast = 361, + MOP_GetBufferContents = 362, + MOP_Append = 226, + MOP_RestartStrip = 227, + MOP_CalculateLevelOfDetail = 228, + MOP_CalculateLevelOfDetailUnclamped = 229, + MOP_GetDimensions = 230, + MOP_Load = 231, + MOP_Sample = 232, + MOP_SampleBias = 233, + MOP_SampleCmp = 234, + MOP_SampleCmpBias = 235, + MOP_SampleCmpGrad = 236, + MOP_SampleCmpLevel = 237, + MOP_SampleCmpLevelZero = 238, + MOP_SampleGrad = 239, + MOP_SampleLevel = 240, + MOP_Gather = 241, + MOP_GatherAlpha = 242, + MOP_GatherBlue = 243, + MOP_GatherCmp = 244, + MOP_GatherCmpAlpha = 245, + MOP_GatherCmpBlue = 246, + MOP_GatherCmpGreen = 247, + MOP_GatherCmpRed = 248, + MOP_GatherGreen = 249, + MOP_GatherRaw = 250, + MOP_GatherRed = 251, + MOP_GetSamplePosition = 252, + MOP_Load2 = 253, + MOP_Load3 = 254, + MOP_Load4 = 255, + MOP_InterlockedAdd = 256, + MOP_InterlockedAdd64 = 257, + MOP_InterlockedAnd = 258, + MOP_InterlockedAnd64 = 259, + MOP_InterlockedCompareExchange = 260, + MOP_InterlockedCompareExchange64 = 261, + MOP_InterlockedCompareExchangeFloatBitwise = 262, + MOP_InterlockedCompareStore = 263, + MOP_InterlockedCompareStore64 = 264, + MOP_InterlockedCompareStoreFloatBitwise = 265, + MOP_InterlockedExchange = 266, + MOP_InterlockedExchange64 = 267, + MOP_InterlockedExchangeFloat = 268, + MOP_InterlockedMax = 269, + MOP_InterlockedMax64 = 270, + MOP_InterlockedMin = 271, + MOP_InterlockedMin64 = 272, + MOP_InterlockedOr = 273, + MOP_InterlockedOr64 = 274, + MOP_InterlockedXor = 275, + MOP_InterlockedXor64 = 276, + MOP_Store = 277, + MOP_Store2 = 278, + MOP_Store3 = 279, + MOP_Store4 = 280, + MOP_DecrementCounter = 281, + MOP_IncrementCounter = 282, + MOP_Consume = 283, + MOP_WriteSamplerFeedback = 284, + MOP_WriteSamplerFeedbackBias = 285, + MOP_WriteSamplerFeedbackGrad = 286, + MOP_WriteSamplerFeedbackLevel = 287, + MOP_Abort = 288, + MOP_CandidateGeometryIndex = 289, + MOP_CandidateInstanceContributionToHitGroupIndex = 290, + MOP_CandidateInstanceID = 291, + MOP_CandidateInstanceIndex = 292, + MOP_CandidateObjectRayDirection = 293, + MOP_CandidateObjectRayOrigin = 294, + MOP_CandidateObjectToWorld3x4 = 295, + MOP_CandidateObjectToWorld4x3 = 296, + MOP_CandidatePrimitiveIndex = 297, + MOP_CandidateProceduralPrimitiveNonOpaque = 298, + MOP_CandidateTriangleBarycentrics = 299, + MOP_CandidateTriangleFrontFace = 300, + MOP_CandidateTriangleRayT = 301, + MOP_CandidateType = 302, + MOP_CandidateWorldToObject3x4 = 303, + MOP_CandidateWorldToObject4x3 = 304, + MOP_CommitNonOpaqueTriangleHit = 305, + MOP_CommitProceduralPrimitiveHit = 306, + MOP_CommittedGeometryIndex = 307, + MOP_CommittedInstanceContributionToHitGroupIndex = 308, + MOP_CommittedInstanceID = 309, + MOP_CommittedInstanceIndex = 310, + MOP_CommittedObjectRayDirection = 311, + MOP_CommittedObjectRayOrigin = 312, + MOP_CommittedObjectToWorld3x4 = 313, + MOP_CommittedObjectToWorld4x3 = 314, + MOP_CommittedPrimitiveIndex = 315, + MOP_CommittedRayT = 316, + MOP_CommittedStatus = 317, + MOP_CommittedTriangleBarycentrics = 318, + MOP_CommittedTriangleFrontFace = 319, + MOP_CommittedWorldToObject3x4 = 320, + MOP_CommittedWorldToObject4x3 = 321, + MOP_Proceed = 322, + MOP_RayFlags = 323, + MOP_RayTMin = 324, + MOP_TraceRayInline = 325, + MOP_WorldRayDirection = 326, + MOP_WorldRayOrigin = 327, + MOP_DxHitObject_FromRayQuery = 363, + MOP_DxHitObject_GetAttributes = 364, + MOP_DxHitObject_GetGeometryIndex = 365, + MOP_DxHitObject_GetHitKind = 366, + MOP_DxHitObject_GetInstanceID = 367, + MOP_DxHitObject_GetInstanceIndex = 368, + MOP_DxHitObject_GetObjectRayDirection = 369, + MOP_DxHitObject_GetObjectRayOrigin = 370, + MOP_DxHitObject_GetObjectToWorld3x4 = 371, + MOP_DxHitObject_GetObjectToWorld4x3 = 372, + MOP_DxHitObject_GetPrimitiveIndex = 373, + MOP_DxHitObject_GetRayFlags = 374, + MOP_DxHitObject_GetRayTCurrent = 375, + MOP_DxHitObject_GetRayTMin = 376, + MOP_DxHitObject_GetShaderTableIndex = 377, + MOP_DxHitObject_GetWorldRayDirection = 378, + MOP_DxHitObject_GetWorldRayOrigin = 379, + MOP_DxHitObject_GetWorldToObject3x4 = 380, + MOP_DxHitObject_GetWorldToObject4x3 = 381, + MOP_DxHitObject_Invoke = 382, + MOP_DxHitObject_IsHit = 383, + MOP_DxHitObject_IsMiss = 384, + MOP_DxHitObject_IsNop = 385, + MOP_DxHitObject_LoadLocalRootTableConstant = 386, + MOP_DxHitObject_MakeMiss = 387, + MOP_DxHitObject_MakeNop = 358, + MOP_DxHitObject_SetShaderTableIndex = 388, + MOP_DxHitObject_TraceRay = 389, + IOP_DxMaybeReorderThread = 359, + MOP_Count = 328, + MOP_FinishedCrossGroupSharing = 329, + MOP_GetGroupNodeOutputRecords = 330, + MOP_GetThreadNodeOutputRecords = 331, + MOP_IsValid = 332, + MOP_GroupIncrementOutputCount = 333, + MOP_ThreadIncrementOutputCount = 334, + MOP_OutputComplete = 335, + MOP_SubpassLoad = 336, // unsigned - IOP_InterlockedUMax, - IOP_InterlockedUMin, - IOP_WaveActiveUMax, - IOP_WaveActiveUMin, - IOP_WaveActiveUProduct, - IOP_WaveActiveUSum, - IOP_WaveMultiPrefixUProduct, - IOP_WaveMultiPrefixUSum, - IOP_WavePrefixUProduct, - IOP_WavePrefixUSum, - IOP_uabs, - IOP_uclamp, - IOP_udot, - IOP_ufirstbithigh, - IOP_umad, - IOP_umax, - IOP_umin, - IOP_umul, - IOP_usign, - MOP_InterlockedUMax, - MOP_InterlockedUMin, - Num_Intrinsics, + IOP_InterlockedUMax = 337, + IOP_InterlockedUMin = 338, + IOP_WaveActiveUMax = 339, + IOP_WaveActiveUMin = 340, + IOP_WaveActiveUProduct = 341, + IOP_WaveActiveUSum = 342, + IOP_WaveMultiPrefixUProduct = 343, + IOP_WaveMultiPrefixUSum = 344, + IOP_WavePrefixUProduct = 345, + IOP_WavePrefixUSum = 346, + IOP_uabs = 347, + IOP_uclamp = 348, + IOP_udot = 349, + IOP_ufirstbithigh = 350, + IOP_umad = 351, + IOP_umax = 352, + IOP_umin = 353, + IOP_umul = 354, + IOP_usign = 355, + MOP_InterlockedUMax = 356, + MOP_InterlockedUMin = 357, + Num_Intrinsics = 390, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/include/dxc/Support/HLSLOptions.h b/include/dxc/Support/HLSLOptions.h index 887591ae82..56e95a1659 100644 --- a/include/dxc/Support/HLSLOptions.h +++ b/include/dxc/Support/HLSLOptions.h @@ -274,6 +274,8 @@ class DxcOpts { SpirvOptions; // All SPIR-V CodeGen-related options #endif // SPIRV Change Ends + + bool GenMetal = false; // OPT_metal }; /// Use this class to capture, convert and handle the lifetime for the diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index 130e19a525..ea000f4877 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -346,6 +346,11 @@ def disable_exception_handling : Flag<["-", "/"], "disable-exception-handling">, def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group, Flags<[CoreOption, HelpHidden]>, HelpText<"Return a module interface instead of serialized output">; +def metal : Flag<["-"], "metal">, + Group, + Flags<[CoreOption, DriverOption]>, + HelpText<"Generate Metal code">; + // SPIRV Change Starts def spirv : Flag<["-"], "spirv">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Generate SPIR-V code">; diff --git a/include/dxc/WinAdapter.h b/include/dxc/WinAdapter.h index b8c6646871..d02ad1ac38 100644 --- a/include/dxc/WinAdapter.h +++ b/include/dxc/WinAdapter.h @@ -51,7 +51,8 @@ #define _countof(a) (sizeof(a) / sizeof(*(a))) // If it is GCC, there is no UUID support and we must emulate it. -#ifndef __clang__ +// Clang support depends on the -fms-extensions compiler flag. +#if !defined(__clang__) || !defined(_MSC_EXTENSIONS) #define __EMULATE_UUID 1 #endif // __clang__ diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index b0f9a467a4..28bd3e7066 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -7,6 +7,9 @@ // // // Provides non-public declarations for the DirectX Compiler component. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /////////////////////////////////////////////////////////////////////////////// #ifndef __DXC_API_INTERNAL__ @@ -35,6 +38,7 @@ typedef struct ID3D10Blob ID3D10Blob; static const BYTE INTRIN_TEMPLATE_FROM_TYPE = 0xff; static const BYTE INTRIN_TEMPLATE_VARARGS = 0xfe; static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION = 0xfd; +static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION_2 = 0xfc; // Use this enumeration to describe allowed templates (layouts) in intrinsics. enum LEGAL_INTRINSIC_TEMPLATES { @@ -126,7 +130,15 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS = 49, LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS = 50, - LICOMPTYPE_COUNT = 51 + LICOMPTYPE_HIT_OBJECT = 51, + LICOMPTYPE_RAY_QUERY = 52, + +#ifdef ENABLE_SPIRV_CODEGEN + LICOMPTYPE_VK_BUFFER_POINTER = 53, + LICOMPTYPE_COUNT = 54 +#else + LICOMPTYPE_COUNT = 53 +#endif }; static const BYTE IA_SPECIAL_BASE = 0xf0; @@ -160,11 +172,17 @@ struct HLSL_INTRINSIC_ARGUMENT { // matching input constraints. }; +// HLSL_INTRINSIC flags +static const UINT INTRIN_FLAG_READ_ONLY = 1U << 0; +static const UINT INTRIN_FLAG_READ_NONE = 1U << 1; +static const UINT INTRIN_FLAG_IS_WAVE = 1U << 2; +static const UINT INTRIN_FLAG_STATIC_MEMBER = 1U << 3; + struct HLSL_INTRINSIC { UINT Op; // Intrinsic Op ID - BOOL bReadOnly; // Only read memory - BOOL bReadNone; // Not read memory - BOOL bIsWave; // Is a wave-sensitive op + UINT Flags; // INTRIN_FLAG_* flags + UINT MinShaderModel; // Encoded minimum shader model, 0 = no minimum + // (Major << 4) + (Minor & 0xf) INT iOverloadParamIndex; // Parameter decide the overload type, -1 means ret // type UINT uNumArgs; // Count of arguments in pArgs. diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp index fdd6d6b946..c1282a980a 100644 --- a/lib/DXIL/DxilMetadataHelper.cpp +++ b/lib/DXIL/DxilMetadataHelper.cpp @@ -177,17 +177,28 @@ void DxilMDHelper::EmitDxilVersion(unsigned Major, unsigned Minor) { pDxilVersionMD->addOperand(MDNode::get(m_Ctx, MDVals)); } -void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) { - NamedMDNode *pDxilVersionMD = m_pModule->getNamedMetadata(kDxilVersionMDName); - IFTBOOL(pDxilVersionMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA); - IFTBOOL(pDxilVersionMD->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA); +// Load dxil version from metadata contained in pModule. +// Returns true and passes result through +// the dxil major/minor version params if valid. +// Returns false if metadata is missing or invalid. +bool DxilMDHelper::LoadDxilVersion(const Module *pModule, unsigned &Major, + unsigned &Minor) { + NamedMDNode *pDxilVersionMD = pModule->getNamedMetadata(kDxilVersionMDName); + IFRBOOL(pDxilVersionMD != nullptr, false); + IFRBOOL(pDxilVersionMD->getNumOperands() == 1, false); MDNode *pVersionMD = pDxilVersionMD->getOperand(0); - IFTBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields, - DXC_E_INCORRECT_DXIL_METADATA); + IFRBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields, false); Major = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMajorIdx)); Minor = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMinorIdx)); + + return true; +} + +void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) { + IFTBOOL(LoadDxilVersion(m_pModule, Major, Minor), + DXC_E_INCORRECT_DXIL_METADATA); } // @@ -3099,6 +3110,13 @@ void DxilExtraPropertyHelper::EmitUAVProperties( DxilMDHelper::kDxilAtomic64UseTag, m_Ctx)); MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD((unsigned)true, m_Ctx)); } + // Whether resource is reordercoherent. + if (DXIL::CompareVersions(m_ValMajor, m_ValMinor, 1, 9) >= 0 && + UAV.IsReorderCoherent()) { + MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD( + DxilMDHelper::kDxilReorderCoherentTag, m_Ctx)); + MDVals.emplace_back(DxilMDHelper::BoolToConstMD(true, m_Ctx)); + } } void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO, @@ -3136,6 +3154,9 @@ void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO, case DxilMDHelper::kDxilAtomic64UseTag: UAV.SetHasAtomic64Use(DxilMDHelper::ConstMDToBool(MDO)); break; + case DxilMDHelper::kDxilReorderCoherentTag: + UAV.SetReorderCoherent(DxilMDHelper::ConstMDToBool(MDO)); + break; default: DXASSERT(false, "Unknown resource record tag"); m_bExtraMetadata = true; diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b3e552da18..f614ba9d14 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -10,6 +10,7 @@ /////////////////////////////////////////////////////////////////////////////// #include "dxc/DXIL/DxilOperations.h" +#include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilInstructions.h" #include "dxc/DXIL/DxilModule.h" #include "dxc/Support/Global.h" @@ -23,8 +24,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -using std::string; -using std::vector; namespace hlsl { @@ -41,2984 +40,2623 @@ import hctdb_instrhelp /* hctdb_instrhelp.get_oloads_props()*/ // OPCODE-OLOADS:BEGIN const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { - // OpCode OpCode name, OpCodeClass - // OpCodeClass name, void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj, function attribute - // Temporary, indexable, input, output registers void, h, f, d, - // i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::TempRegLoad, - "TempRegLoad", - OCC::TempRegLoad, - "tempRegLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TempRegStore, - "TempRegStore", - OCC::TempRegStore, - "tempRegStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::MinPrecXRegLoad, - "MinPrecXRegLoad", - OCC::MinPrecXRegLoad, - "minPrecXRegLoad", - {false, true, false, false, false, false, true, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::MinPrecXRegStore, - "MinPrecXRegStore", - OCC::MinPrecXRegStore, - "minPrecXRegStore", - {false, true, false, false, false, false, true, false, false, false, - false}, - Attribute::None, - }, - { - OC::LoadInput, - "LoadInput", - OCC::LoadInput, - "loadInput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::StoreOutput, - "StoreOutput", - OCC::StoreOutput, - "storeOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // Unary float void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FAbs, - "FAbs", - OCC::Unary, - "unary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Saturate, - "Saturate", - OCC::Unary, - "unary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsNaN, - "IsNaN", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsInf, - "IsInf", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsFinite, - "IsFinite", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsNormal, - "IsNormal", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Cos, - "Cos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Sin, - "Sin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Tan, - "Tan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Acos, - "Acos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Asin, - "Asin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Atan, - "Atan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Hcos, - "Hcos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Hsin, - "Hsin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Htan, - "Htan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Exp, - "Exp", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Frc, - "Frc", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Log, - "Log", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Sqrt, - "Sqrt", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Rsqrt, - "Rsqrt", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unary float - rounding void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::Round_ne, - "Round_ne", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_ni, - "Round_ni", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_pi, - "Round_pi", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_z, - "Round_z", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Bfrev, - "Bfrev", - OCC::Unary, - "unary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::Countbits, - "Countbits", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::FirstbitLo, - "FirstbitLo", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Unary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FirstbitHi, - "FirstbitHi", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Unary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FirstbitSHi, - "FirstbitSHi", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary float void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FMax, - "FMax", - OCC::Binary, - "binary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::FMin, - "FMin", - OCC::Binary, - "binary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IMax, - "IMax", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::IMin, - "IMin", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::UMax, - "UMax", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::UMin, - "UMin", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary int with two outputs void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::IMul, - "IMul", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint with two outputs void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::UMul, - "UMul", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::UDiv, - "UDiv", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint with carry or borrow void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::UAddc, - "UAddc", - OCC::BinaryWithCarryOrBorrow, - "binaryWithCarryOrBorrow", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::USubb, - "USubb", - OCC::BinaryWithCarryOrBorrow, - "binaryWithCarryOrBorrow", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary float void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::FMad, - "FMad", - OCC::Tertiary, - "tertiary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Fma, - "Fma", - OCC::Tertiary, - "tertiary", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IMad, - "IMad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::UMad, - "UMad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Msad, - "Msad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::Ibfe, - "Ibfe", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Ubfe, - "Ubfe", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Quaternary void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Bfi, - "Bfi", - OCC::Quaternary, - "quaternary", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Dot void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::Dot2, - "Dot2", - OCC::Dot2, - "dot2", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot3, - "Dot3", - OCC::Dot3, - "dot3", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4, - "Dot4", - OCC::Dot4, - "dot4", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::CreateHandle, - "CreateHandle", - OCC::CreateHandle, - "createHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::CBufferLoad, - "CBufferLoad", - OCC::CBufferLoad, - "cbufferLoad", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::ReadOnly, - }, - { - OC::CBufferLoadLegacy, - "CBufferLoadLegacy", - OCC::CBufferLoadLegacy, - "cbufferLoadLegacy", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Sample, - "Sample", - OCC::Sample, - "sample", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleBias, - "SampleBias", - OCC::SampleBias, - "sampleBias", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleLevel, - "SampleLevel", - OCC::SampleLevel, - "sampleLevel", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleGrad, - "SampleGrad", - OCC::SampleGrad, - "sampleGrad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmp, - "SampleCmp", - OCC::SampleCmp, - "sampleCmp", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmpLevelZero, - "SampleCmpLevelZero", - OCC::SampleCmpLevelZero, - "sampleCmpLevelZero", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::TextureLoad, - "TextureLoad", - OCC::TextureLoad, - "textureLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TextureStore, - "TextureStore", - OCC::TextureStore, - "textureStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::BufferLoad, - "BufferLoad", - OCC::BufferLoad, - "bufferLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::BufferStore, - "BufferStore", - OCC::BufferStore, - "bufferStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::BufferUpdateCounter, - "BufferUpdateCounter", - OCC::BufferUpdateCounter, - "bufferUpdateCounter", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::CheckAccessFullyMapped, - "CheckAccessFullyMapped", - OCC::CheckAccessFullyMapped, - "checkAccessFullyMapped", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::GetDimensions, - "GetDimensions", - OCC::GetDimensions, - "getDimensions", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - gather void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::TextureGather, - "TextureGather", - OCC::TextureGather, - "textureGather", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TextureGatherCmp, - "TextureGatherCmp", - OCC::TextureGatherCmp, - "textureGatherCmp", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Texture2DMSGetSamplePosition, - "Texture2DMSGetSamplePosition", - OCC::Texture2DMSGetSamplePosition, - "texture2DMSGetSamplePosition", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RenderTargetGetSamplePosition, - "RenderTargetGetSamplePosition", - OCC::RenderTargetGetSamplePosition, - "renderTargetGetSamplePosition", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RenderTargetGetSampleCount, - "RenderTargetGetSampleCount", - OCC::RenderTargetGetSampleCount, - "renderTargetGetSampleCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Synchronization void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AtomicBinOp, - "AtomicBinOp", - OCC::AtomicBinOp, - "atomicBinOp", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::None, - }, - { - OC::AtomicCompareExchange, - "AtomicCompareExchange", - OCC::AtomicCompareExchange, - "atomicCompareExchange", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::None, - }, - { - OC::Barrier, - "Barrier", - OCC::Barrier, - "barrier", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - - // Derivatives void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::CalculateLOD, - "CalculateLOD", - OCC::CalculateLOD, - "calculateLOD", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Discard, - "Discard", - OCC::Discard, - "discard", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Derivatives void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::DerivCoarseX, - "DerivCoarseX", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivCoarseY, - "DerivCoarseY", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivFineX, - "DerivFineX", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivFineY, - "DerivFineY", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::EvalSnapped, - "EvalSnapped", - OCC::EvalSnapped, - "evalSnapped", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::EvalSampleIndex, - "EvalSampleIndex", - OCC::EvalSampleIndex, - "evalSampleIndex", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::EvalCentroid, - "EvalCentroid", - OCC::EvalCentroid, - "evalCentroid", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::SampleIndex, - "SampleIndex", - OCC::SampleIndex, - "sampleIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Coverage, - "Coverage", - OCC::Coverage, - "coverage", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::InnerCoverage, - "InnerCoverage", - OCC::InnerCoverage, - "innerCoverage", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Compute/Mesh/Amplification/Node shader void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::ThreadId, - "ThreadId", - OCC::ThreadId, - "threadId", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::GroupId, - "GroupId", - OCC::GroupId, - "groupId", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::ThreadIdInGroup, - "ThreadIdInGroup", - OCC::ThreadIdInGroup, - "threadIdInGroup", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::FlattenedThreadIdInGroup, - "FlattenedThreadIdInGroup", - OCC::FlattenedThreadIdInGroup, - "flattenedThreadIdInGroup", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Geometry shader void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::EmitStream, - "EmitStream", - OCC::EmitStream, - "emitStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::CutStream, - "CutStream", - OCC::CutStream, - "cutStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::EmitThenCutStream, - "EmitThenCutStream", - OCC::EmitThenCutStream, - "emitThenCutStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GSInstanceID, - "GSInstanceID", - OCC::GSInstanceID, - "gsInstanceID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Double precision void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::MakeDouble, - "MakeDouble", - OCC::MakeDouble, - "makeDouble", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::SplitDouble, - "SplitDouble", - OCC::SplitDouble, - "splitDouble", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Domain and hull shader void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::LoadOutputControlPoint, - "LoadOutputControlPoint", - OCC::LoadOutputControlPoint, - "loadOutputControlPoint", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LoadPatchConstant, - "LoadPatchConstant", - OCC::LoadPatchConstant, - "loadPatchConstant", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Domain shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::DomainLocation, - "DomainLocation", - OCC::DomainLocation, - "domainLocation", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Hull shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::StorePatchConstant, - "StorePatchConstant", - OCC::StorePatchConstant, - "storePatchConstant", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::OutputControlPointID, - "OutputControlPointID", - OCC::OutputControlPointID, - "outputControlPointID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Hull, Domain and Geometry shaders void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::PrimitiveID, - "PrimitiveID", - OCC::PrimitiveID, - "primitiveID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Other void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::CycleCounterLegacy, - "CycleCounterLegacy", - OCC::CycleCounterLegacy, - "cycleCounterLegacy", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveIsFirstLane, - "WaveIsFirstLane", - OCC::WaveIsFirstLane, - "waveIsFirstLane", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveGetLaneIndex, - "WaveGetLaneIndex", - OCC::WaveGetLaneIndex, - "waveGetLaneIndex", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::WaveGetLaneCount, - "WaveGetLaneCount", - OCC::WaveGetLaneCount, - "waveGetLaneCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WaveAnyTrue, - "WaveAnyTrue", - OCC::WaveAnyTrue, - "waveAnyTrue", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveAllTrue, - "WaveAllTrue", - OCC::WaveAllTrue, - "waveAllTrue", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveActiveAllEqual, - "WaveActiveAllEqual", - OCC::WaveActiveAllEqual, - "waveActiveAllEqual", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveBallot, - "WaveActiveBallot", - OCC::WaveActiveBallot, - "waveActiveBallot", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveReadLaneAt, - "WaveReadLaneAt", - OCC::WaveReadLaneAt, - "waveReadLaneAt", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveReadLaneFirst, - "WaveReadLaneFirst", - OCC::WaveReadLaneFirst, - "waveReadLaneFirst", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveOp, - "WaveActiveOp", - OCC::WaveActiveOp, - "waveActiveOp", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveBit, - "WaveActiveBit", - OCC::WaveActiveBit, - "waveActiveBit", - {false, false, false, false, false, true, true, true, true, false, - false}, - Attribute::None, - }, - { - OC::WavePrefixOp, - "WavePrefixOp", - OCC::WavePrefixOp, - "wavePrefixOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - - // Quad Wave Ops void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::QuadReadLaneAt, - "QuadReadLaneAt", - OCC::QuadReadLaneAt, - "quadReadLaneAt", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::QuadOp, - "QuadOp", - OCC::QuadOp, - "quadOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - - // Bitcasts with different sizes void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::BitcastI16toF16, - "BitcastI16toF16", - OCC::BitcastI16toF16, - "bitcastI16toF16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF16toI16, - "BitcastF16toI16", - OCC::BitcastF16toI16, - "bitcastF16toI16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastI32toF32, - "BitcastI32toF32", - OCC::BitcastI32toF32, - "bitcastI32toF32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF32toI32, - "BitcastF32toI32", - OCC::BitcastF32toI32, - "bitcastF32toI32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastI64toF64, - "BitcastI64toF64", - OCC::BitcastI64toF64, - "bitcastI64toF64", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF64toI64, - "BitcastF64toI64", - OCC::BitcastF64toI64, - "bitcastF64toI64", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Legacy floating-point void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::LegacyF32ToF16, - "LegacyF32ToF16", - OCC::LegacyF32ToF16, - "legacyF32ToF16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyF16ToF32, - "LegacyF16ToF32", - OCC::LegacyF16ToF32, - "legacyF16ToF32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Double precision void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::LegacyDoubleToFloat, - "LegacyDoubleToFloat", - OCC::LegacyDoubleToFloat, - "legacyDoubleToFloat", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyDoubleToSInt32, - "LegacyDoubleToSInt32", - OCC::LegacyDoubleToSInt32, - "legacyDoubleToSInt32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyDoubleToUInt32, - "LegacyDoubleToUInt32", - OCC::LegacyDoubleToUInt32, - "legacyDoubleToUInt32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveAllBitCount, - "WaveAllBitCount", - OCC::WaveAllOp, - "waveAllOp", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WavePrefixBitCount, - "WavePrefixBitCount", - OCC::WavePrefixOp, - "wavePrefixOp", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::AttributeAtVertex, - "AttributeAtVertex", - OCC::AttributeAtVertex, - "attributeAtVertex", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Graphics shader void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::ViewID, - "ViewID", - OCC::ViewID, - "viewID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::RawBufferLoad, - "RawBufferLoad", - OCC::RawBufferLoad, - "rawBufferLoad", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::ReadOnly, - }, - { - OC::RawBufferStore, - "RawBufferStore", - OCC::RawBufferStore, - "rawBufferStore", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::None, - }, - - // Raytracing object space uint System Values void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::InstanceID, - "InstanceID", - OCC::InstanceID, - "instanceID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::InstanceIndex, - "InstanceIndex", - OCC::InstanceIndex, - "instanceIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Raytracing hit uint System Values void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::HitKind, - "HitKind", - OCC::HitKind, - "hitKind", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Raytracing uint System Values void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::RayFlags, - "RayFlags", - OCC::RayFlags, - "rayFlags", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Dispatch Arguments void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::DispatchRaysIndex, - "DispatchRaysIndex", - OCC::DispatchRaysIndex, - "dispatchRaysIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DispatchRaysDimensions, - "DispatchRaysDimensions", - OCC::DispatchRaysDimensions, - "dispatchRaysDimensions", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Vectors void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::WorldRayOrigin, - "WorldRayOrigin", - OCC::WorldRayOrigin, - "worldRayOrigin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WorldRayDirection, - "WorldRayDirection", - OCC::WorldRayDirection, - "worldRayDirection", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray object space Vectors void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::ObjectRayOrigin, - "ObjectRayOrigin", - OCC::ObjectRayOrigin, - "objectRayOrigin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::ObjectRayDirection, - "ObjectRayDirection", - OCC::ObjectRayDirection, - "objectRayDirection", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Transforms void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::ObjectToWorld, - "ObjectToWorld", - OCC::ObjectToWorld, - "objectToWorld", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WorldToObject, - "WorldToObject", - OCC::WorldToObject, - "worldToObject", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // RayT void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::RayTMin, - "RayTMin", - OCC::RayTMin, - "rayTMin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::RayTCurrent, - "RayTCurrent", - OCC::RayTCurrent, - "rayTCurrent", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // AnyHit Terminals void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::IgnoreHit, - "IgnoreHit", - OCC::IgnoreHit, - "ignoreHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoReturn, - }, - { - OC::AcceptHitAndEndSearch, - "AcceptHitAndEndSearch", - OCC::AcceptHitAndEndSearch, - "acceptHitAndEndSearch", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoReturn, - }, - - // Indirect Shader Invocation void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::TraceRay, - "TraceRay", - OCC::TraceRay, - "traceRay", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - { - OC::ReportHit, - "ReportHit", - OCC::ReportHit, - "reportHit", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - { - OC::CallShader, - "CallShader", - OCC::CallShader, - "callShader", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - - // Library create handle from resource struct (like HL intrinsic) void, h, - // f, d, i1, i8, i16, i32, i64, udt, obj , function - // attribute - { - OC::CreateHandleForLib, - "CreateHandleForLib", - OCC::CreateHandleForLib, - "createHandleForLib", - {false, false, false, false, false, false, false, false, false, false, - true}, - Attribute::ReadOnly, - }, - - // Raytracing object space uint System Values void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::PrimitiveIndex, - "PrimitiveIndex", - OCC::PrimitiveIndex, - "primitiveIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Dot product with accumulate void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::Dot2AddHalf, - "Dot2AddHalf", - OCC::Dot2AddHalf, - "dot2AddHalf", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4AddI8Packed, - "Dot4AddI8Packed", - OCC::Dot4AddPacked, - "dot4AddPacked", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4AddU8Packed, - "Dot4AddU8Packed", - OCC::Dot4AddPacked, - "dot4AddPacked", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveMatch, - "WaveMatch", - OCC::WaveMatch, - "waveMatch", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveMultiPrefixOp, - "WaveMultiPrefixOp", - OCC::WaveMultiPrefixOp, - "waveMultiPrefixOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveMultiPrefixBitCount, - "WaveMultiPrefixBitCount", - OCC::WaveMultiPrefixBitCount, - "waveMultiPrefixBitCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Mesh shader instructions void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::SetMeshOutputCounts, - "SetMeshOutputCounts", - OCC::SetMeshOutputCounts, - "setMeshOutputCounts", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::EmitIndices, - "EmitIndices", - OCC::EmitIndices, - "emitIndices", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GetMeshPayload, - "GetMeshPayload", - OCC::GetMeshPayload, - "getMeshPayload", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::ReadOnly, - }, - { - OC::StoreVertexOutput, - "StoreVertexOutput", - OCC::StoreVertexOutput, - "storeVertexOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::StorePrimitiveOutput, - "StorePrimitiveOutput", - OCC::StorePrimitiveOutput, - "storePrimitiveOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // Amplification shader instructions void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::DispatchMesh, - "DispatchMesh", - OCC::DispatchMesh, - "dispatchMesh", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - - // Sampler Feedback void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::WriteSamplerFeedback, - "WriteSamplerFeedback", - OCC::WriteSamplerFeedback, - "writeSamplerFeedback", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackBias, - "WriteSamplerFeedbackBias", - OCC::WriteSamplerFeedbackBias, - "writeSamplerFeedbackBias", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackLevel, - "WriteSamplerFeedbackLevel", - OCC::WriteSamplerFeedbackLevel, - "writeSamplerFeedbackLevel", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackGrad, - "WriteSamplerFeedbackGrad", - OCC::WriteSamplerFeedbackGrad, - "writeSamplerFeedbackGrad", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AllocateRayQuery, - "AllocateRayQuery", - OCC::AllocateRayQuery, - "allocateRayQuery", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_TraceRayInline, - "RayQuery_TraceRayInline", - OCC::RayQuery_TraceRayInline, - "rayQuery_TraceRayInline", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_Proceed, - "RayQuery_Proceed", - OCC::RayQuery_Proceed, - "rayQuery_Proceed", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_Abort, - "RayQuery_Abort", - OCC::RayQuery_Abort, - "rayQuery_Abort", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommitNonOpaqueTriangleHit, - "RayQuery_CommitNonOpaqueTriangleHit", - OCC::RayQuery_CommitNonOpaqueTriangleHit, - "rayQuery_CommitNonOpaqueTriangleHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommitProceduralPrimitiveHit, - "RayQuery_CommitProceduralPrimitiveHit", - OCC::RayQuery_CommitProceduralPrimitiveHit, - "rayQuery_CommitProceduralPrimitiveHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommittedStatus, - "RayQuery_CommittedStatus", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateType, - "RayQuery_CandidateType", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectToWorld3x4, - "RayQuery_CandidateObjectToWorld3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateWorldToObject3x4, - "RayQuery_CandidateWorldToObject3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectToWorld3x4, - "RayQuery_CommittedObjectToWorld3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedWorldToObject3x4, - "RayQuery_CommittedWorldToObject3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateProceduralPrimitiveNonOpaque, - "RayQuery_CandidateProceduralPrimitiveNonOpaque", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleFrontFace, - "RayQuery_CandidateTriangleFrontFace", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedTriangleFrontFace, - "RayQuery_CommittedTriangleFrontFace", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleBarycentrics, - "RayQuery_CandidateTriangleBarycentrics", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedTriangleBarycentrics, - "RayQuery_CommittedTriangleBarycentrics", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_RayFlags, - "RayQuery_RayFlags", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_WorldRayOrigin, - "RayQuery_WorldRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_WorldRayDirection, - "RayQuery_WorldRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_RayTMin, - "RayQuery_RayTMin", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleRayT, - "RayQuery_CandidateTriangleRayT", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedRayT, - "RayQuery_CommittedRayT", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateInstanceIndex, - "RayQuery_CandidateInstanceIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateInstanceID, - "RayQuery_CandidateInstanceID", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateGeometryIndex, - "RayQuery_CandidateGeometryIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidatePrimitiveIndex, - "RayQuery_CandidatePrimitiveIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectRayOrigin, - "RayQuery_CandidateObjectRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectRayDirection, - "RayQuery_CandidateObjectRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceIndex, - "RayQuery_CommittedInstanceIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceID, - "RayQuery_CommittedInstanceID", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedGeometryIndex, - "RayQuery_CommittedGeometryIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedPrimitiveIndex, - "RayQuery_CommittedPrimitiveIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectRayOrigin, - "RayQuery_CommittedObjectRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectRayDirection, - "RayQuery_CommittedObjectRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Raytracing object space uint System Values, raytracing tier 1.1 void, h, - // f, d, i1, i8, i16, i32, i64, udt, obj , function - // attribute - { - OC::GeometryIndex, - "GeometryIndex", - OCC::GeometryIndex, - "geometryIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::RayQuery_CandidateInstanceContributionToHitGroupIndex, - "RayQuery_CandidateInstanceContributionToHitGroupIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceContributionToHitGroupIndex, - "RayQuery_CommittedInstanceContributionToHitGroupIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - - // Get handle from heap void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AnnotateHandle, - "AnnotateHandle", - OCC::AnnotateHandle, - "annotateHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateHandleFromBinding, - "CreateHandleFromBinding", - OCC::CreateHandleFromBinding, - "createHandleFromBinding", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateHandleFromHeap, - "CreateHandleFromHeap", - OCC::CreateHandleFromHeap, - "createHandleFromHeap", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unpacking intrinsics void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Unpack4x8, - "Unpack4x8", - OCC::Unpack4x8, - "unpack4x8", - {false, false, false, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Packing intrinsics void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Pack4x8, - "Pack4x8", - OCC::Pack4x8, - "pack4x8", - {false, false, false, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Helper Lanes void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IsHelperLane, - "IsHelperLane", - OCC::IsHelperLane, - "isHelperLane", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Quad Wave Ops void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::QuadVote, - "QuadVote", - OCC::QuadVote, - "quadVote", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Resources - gather void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::TextureGatherRaw, - "TextureGatherRaw", - OCC::TextureGatherRaw, - "textureGatherRaw", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::SampleCmpLevel, - "SampleCmpLevel", - OCC::SampleCmpLevel, - "sampleCmpLevel", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::TextureStoreSample, - "TextureStoreSample", - OCC::TextureStoreSample, - "textureStoreSample", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::Reserved0, - "Reserved0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved1, - "Reserved1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved2, - "Reserved2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved3, - "Reserved3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved4, - "Reserved4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved5, - "Reserved5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved6, - "Reserved6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved7, - "Reserved7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved8, - "Reserved8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved9, - "Reserved9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved10, - "Reserved10", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved11, - "Reserved11", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Create/Annotate Node Handles void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::AllocateNodeOutputRecords, - "AllocateNodeOutputRecords", - OCC::AllocateNodeOutputRecords, - "allocateNodeOutputRecords", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Get Pointer to Node Record in Address Space 6 void, h, f, d, - // i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::GetNodeRecordPtr, - "GetNodeRecordPtr", - OCC::GetNodeRecordPtr, - "getNodeRecordPtr", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::ReadNone, - }, - - // Work Graph intrinsics void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::IncrementOutputCount, - "IncrementOutputCount", - OCC::IncrementOutputCount, - "incrementOutputCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::OutputComplete, - "OutputComplete", - OCC::OutputComplete, - "outputComplete", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GetInputRecordCount, - "GetInputRecordCount", - OCC::GetInputRecordCount, - "getInputRecordCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::FinishedCrossGroupSharing, - "FinishedCrossGroupSharing", - OCC::FinishedCrossGroupSharing, - "finishedCrossGroupSharing", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Synchronization void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::BarrierByMemoryType, - "BarrierByMemoryType", - OCC::BarrierByMemoryType, - "barrierByMemoryType", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - { - OC::BarrierByMemoryHandle, - "BarrierByMemoryHandle", - OCC::BarrierByMemoryHandle, - "barrierByMemoryHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - { - OC::BarrierByNodeRecordHandle, - "BarrierByNodeRecordHandle", - OCC::BarrierByNodeRecordHandle, - "barrierByNodeRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - - // Create/Annotate Node Handles void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::CreateNodeOutputHandle, - "CreateNodeOutputHandle", - OCC::createNodeOutputHandle, - "createNodeOutputHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IndexNodeHandle, - "IndexNodeHandle", - OCC::IndexNodeHandle, - "indexNodeHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::AnnotateNodeHandle, - "AnnotateNodeHandle", - OCC::AnnotateNodeHandle, - "annotateNodeHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateNodeInputRecordHandle, - "CreateNodeInputRecordHandle", - OCC::CreateNodeInputRecordHandle, - "createNodeInputRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::AnnotateNodeRecordHandle, - "AnnotateNodeRecordHandle", - OCC::AnnotateNodeRecordHandle, - "annotateNodeRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Work Graph intrinsics void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::NodeOutputIsValid, - "NodeOutputIsValid", - OCC::NodeOutputIsValid, - "nodeOutputIsValid", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::GetRemainingRecursionLevels, - "GetRemainingRecursionLevels", - OCC::GetRemainingRecursionLevels, - "getRemainingRecursionLevels", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Comparison Samples void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::SampleCmpGrad, - "SampleCmpGrad", - OCC::SampleCmpGrad, - "sampleCmpGrad", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmpBias, - "SampleCmpBias", - OCC::SampleCmpBias, - "sampleCmpBias", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Extended Command Information void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::StartVertexLocation, - "StartVertexLocation", - OCC::StartVertexLocation, - "startVertexLocation", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::StartInstanceLocation, - "StartInstanceLocation", - OCC::StartInstanceLocation, - "startInstanceLocation", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AllocateRayQuery2, - "AllocateRayQuery2", - OCC::AllocateRayQuery2, - "allocateRayQuery2", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::ReservedA0, - "ReservedA0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedA1, - "ReservedA1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedA2, - "ReservedA2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB0, - "ReservedB0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB1, - "ReservedB1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB2, - "ReservedB2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB3, - "ReservedB3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB4, - "ReservedB4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB5, - "ReservedB5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB6, - "ReservedB6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB7, - "ReservedB7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB8, - "ReservedB8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB9, - "ReservedB9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB10, - "ReservedB10", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB11, - "ReservedB11", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB12, - "ReservedB12", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB13, - "ReservedB13", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB14, - "ReservedB14", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB15, - "ReservedB15", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB16, - "ReservedB16", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB17, - "ReservedB17", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB18, - "ReservedB18", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB19, - "ReservedB19", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB20, - "ReservedB20", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB21, - "ReservedB21", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB22, - "ReservedB22", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB23, - "ReservedB23", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB24, - "ReservedB24", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB25, - "ReservedB25", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB26, - "ReservedB26", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB27, - "ReservedB27", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB28, - "ReservedB28", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB29, - "ReservedB29", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB30, - "ReservedB30", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC0, - "ReservedC0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC1, - "ReservedC1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC2, - "ReservedC2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC3, - "ReservedC3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC4, - "ReservedC4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC5, - "ReservedC5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC6, - "ReservedC6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC7, - "ReservedC7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC8, - "ReservedC8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC9, - "ReservedC9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // Temporary, indexable, input, output registers + {OC::TempRegLoad, + "TempRegLoad", + OCC::TempRegLoad, + "tempRegLoad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::TempRegStore, + "TempRegStore", + OCC::TempRegStore, + "tempRegStore", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::MinPrecXRegLoad, + "MinPrecXRegLoad", + OCC::MinPrecXRegLoad, + "minPrecXRegLoad", + Attribute::ReadOnly, + 1, + {{0x21}}, + {{0x0}}}, // Overloads: hw + {OC::MinPrecXRegStore, + "MinPrecXRegStore", + OCC::MinPrecXRegStore, + "minPrecXRegStore", + Attribute::None, + 1, + {{0x21}}, + {{0x0}}}, // Overloads: hw + {OC::LoadInput, + "LoadInput", + OCC::LoadInput, + "loadInput", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::StoreOutput, + "StoreOutput", + OCC::StoreOutput, + "storeOutput", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Unary float + {OC::FAbs, + "FAbs", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x407}}, + {{0x7}}}, // Overloads: hfdgetTypeID(); switch (T) { case Type::VoidTyID: - return 0; + return TS_Invalid; case Type::HalfTyID: - return 1; + return TS_F16; case Type::FloatTyID: - return 2; + return TS_F32; case Type::DoubleTyID: - return 3; + return TS_F64; case Type::IntegerTyID: { IntegerType *pIT = dyn_cast(pType); unsigned Bits = pIT->getBitWidth(); switch (Bits) { case 1: - return 4; + return TS_I1; case 8: - return 5; + return TS_I8; case 16: - return 6; + return TS_I16; case 32: - return 7; + return TS_I32; case 64: - return 8; + return TS_I64; } llvm_unreachable("Invalid Bits size"); + return TS_Invalid; } case Type::PointerTyID: { pType = cast(pType)->getElementType(); if (pType->isStructTy()) - return kUserDefineTypeSlot; + return TS_UDT; DXASSERT(!pType->isPointerTy(), "pointer-to-pointer type unsupported"); return GetTypeSlot(pType); } case Type::StructTyID: - return kObjectTypeSlot; + // Named struct value (not pointer) indicates a built-in object type. + // Anonymous struct value is used to wrap multi-overload dimensions. + if (cast(pType)->hasName()) + return TS_Object; + else + return TS_Extended; + case Type::VectorTyID: + return TS_Vector; default: break; } - return UINT_MAX; + return TS_Invalid; } const char *OP::GetOverloadTypeName(unsigned TypeSlot) { - DXASSERT(TypeSlot < kUserDefineTypeSlot, "otherwise caller passed OOB index"); + DXASSERT(TypeSlot < TS_BasicCount, "otherwise caller passed OOB index"); return m_OverloadTypeName[TypeSlot]; } -llvm::StringRef OP::GetTypeName(Type *Ty, std::string &str) { +StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { + DXASSERT(!Ty->isVoidTy(), "must not pass void type here"); unsigned TypeSlot = OP::GetTypeSlot(Ty); - if (TypeSlot < kUserDefineTypeSlot) { + if (TypeSlot < TS_BasicCount) { return GetOverloadTypeName(TypeSlot); - } else if (TypeSlot == kUserDefineTypeSlot) { + } else if (TypeSlot == TS_UDT) { if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); StructType *ST = cast(Ty); return ST->getStructName(); - } else if (TypeSlot == kObjectTypeSlot) { + } else if (TypeSlot == TS_Object) { StructType *ST = cast(Ty); return ST->getStructName(); + } else if (TypeSlot == TS_Vector) { + VectorType *VecTy = cast(Ty); + return (Twine("v") + Twine(VecTy->getNumElements()) + + Twine( + GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) + .toStringRef(Storage); + } else if (TypeSlot == TS_Extended) { + DXASSERT(isa(Ty), + "otherwise, extended overload type not wrapped in struct type."); + StructType *ST = cast(Ty); + DXASSERT(ST->getNumElements() <= DXIL::kDxilMaxOloadDims, + "otherwise, extended overload has too many dimensions."); + // Iterate extended slots, recurse, separate with '.' + raw_svector_ostream OS(Storage); + for (unsigned I = 0; I < ST->getNumElements(); ++I) { + if (I > 0) + OS << "."; + SmallVector TempStr; + OS << GetTypeName(ST->getElementType(I), TempStr); + } + return OS.str(); } else { - raw_string_ostream os(str); - Ty->print(os); - os.flush(); - return str; + raw_svector_ostream OS(Storage); + Ty->print(OS); + return OS.str(); } } -llvm::StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, - std::string &funcNameStorage) { +StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, + SmallVectorImpl &Storage) { if (Ty == Type::getVoidTy(Ty->getContext())) { - funcNameStorage = - (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode))).str(); + return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode))) + .toStringRef(Storage); } else { - funcNameStorage = - (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." + - GetTypeName(Ty, funcNameStorage)) - .str(); + llvm::SmallVector TempStr; + return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." + + GetTypeName(Ty, TempStr)) + .toStringRef(Storage); } - return funcNameStorage; } const char *OP::GetOpCodeName(OpCode opCode) { @@ -3138,13 +2804,41 @@ llvm::Attribute::AttrKind OP::GetMemAccessAttr(OpCode opCode) { } bool OP::IsOverloadLegal(OpCode opCode, Type *pType) { - if (!pType) + if (static_cast(opCode) >= + static_cast(OpCode::NumOpCodes)) return false; - if (opCode == OpCode::NumOpCodes) + if (!pType) return false; - unsigned TypeSlot = GetTypeSlot(pType); - return TypeSlot != UINT_MAX && - m_OpCodeProps[(unsigned)opCode].bAllowOverload[TypeSlot]; + auto &OpProps = m_OpCodeProps[static_cast(opCode)]; + + if (OpProps.NumOverloadDims == 0) + return pType->isVoidTy(); + + // Normalize 1+ overload dimensions into array. + Type *Types[DXIL::kDxilMaxOloadDims] = {pType}; + if (OpProps.NumOverloadDims > 1) { + StructType *ST = dyn_cast(pType); + // Make sure multi-overload is well-formed. + if (!ST || ST->hasName() || ST->getNumElements() != OpProps.NumOverloadDims) + return false; + for (unsigned I = 0; I < ST->getNumElements(); ++I) + Types[I] = ST->getElementType(I); + } + + for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I) { + Type *Ty = Types[I]; + unsigned TypeSlot = GetTypeSlot(Ty); + if (!OpProps.AllowedOverloads[I][TypeSlot]) + return false; + if (TypeSlot == TS_Vector) { + unsigned EltTypeSlot = + GetTypeSlot(cast(Ty)->getElementType()); + if (!OpProps.AllowedVectorElements[I][EltTypeSlot]) + return false; + } + } + + return true; } bool OP::CheckOpCodeTable() { @@ -3168,41 +2862,6 @@ bool OP::IsDxilOpFunc(const llvm::Function *F) { return IsDxilOpFuncName(F->getName()); } -bool OP::IsDxilOpTypeName(StringRef name) { - return name.startswith(m_TypePrefix) || name.startswith(m_MatrixTypePrefix); -} - -bool OP::IsDxilOpType(llvm::StructType *ST) { - if (!ST->hasName()) - return false; - StringRef Name = ST->getName(); - return IsDxilOpTypeName(Name); -} - -bool OP::IsDupDxilOpType(llvm::StructType *ST) { - if (!ST->hasName()) - return false; - StringRef Name = ST->getName(); - if (!IsDxilOpTypeName(Name)) - return false; - size_t DotPos = Name.rfind('.'); - if (DotPos == 0 || DotPos == StringRef::npos || Name.back() == '.' || - !isdigit(static_cast(Name[DotPos + 1]))) - return false; - return true; -} - -StructType *OP::GetOriginalDxilOpType(llvm::StructType *ST, llvm::Module &M) { - DXASSERT(IsDupDxilOpType(ST), "else should not call GetOriginalDxilOpType"); - StringRef Name = ST->getName(); - size_t DotPos = Name.rfind('.'); - StructType *OriginalST = M.getTypeByName(Name.substr(0, DotPos)); - DXASSERT(OriginalST, "else name collison without original type"); - DXASSERT(ST->isLayoutIdentical(OriginalST), - "else invalid layout for dxil types"); - return OriginalST; -} - bool OP::IsDxilOpFuncCallInst(const llvm::Instruction *I) { const CallInst *CI = dyn_cast(I); if (CI == nullptr) @@ -3292,6 +2951,12 @@ bool OP::IsDxilOpBarrier(OpCode C) { // OPCODE-BARRIER:END } +bool OP::IsDxilOpExtendedOverload(OpCode C) { + if (C >= OpCode::NumOpCodes) + return false; + return m_OpCodeProps[static_cast(C)].NumOverloadDims > 1; +} + static unsigned MaskMemoryTypeFlagsIfAllowed(unsigned memoryTypeFlags, unsigned allowedMask) { // If the memory type is AllMemory, masking inapplicable flags is allowed. @@ -3360,6 +3025,30 @@ bool OP::BarrierRequiresNode(const llvm::CallInst *CI) { } } +bool OP::BarrierRequiresReorder(const llvm::CallInst *CI) { + OpCode Opcode = OP::GetDxilOpFuncCallInst(CI); + switch (Opcode) { + case OpCode::BarrierByMemoryType: { + DxilInst_BarrierByMemoryType Barrier(const_cast(CI)); + if (!isa(Barrier.get_SemanticFlags())) + return false; + unsigned SemanticFlags = Barrier.get_SemanticFlags_val(); + return (SemanticFlags & static_cast( + DXIL::BarrierSemanticFlag::ReorderScope)) != 0U; + } + case OpCode::BarrierByMemoryHandle: { + DxilInst_BarrierByMemoryHandle Barrier(const_cast(CI)); + if (!isa(Barrier.get_SemanticFlags())) + return false; + unsigned SemanticFlags = Barrier.get_SemanticFlags_val(); + return (SemanticFlags & static_cast( + DXIL::BarrierSemanticFlag::ReorderScope)) != 0U; + } + default: + return false; + } +} + DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) { OpCode opcode = OP::GetDxilOpFuncCallInst(CI); switch (opcode) { @@ -3382,6 +3071,12 @@ DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) { semanticFlags = barrier.get_SemanticFlags_val(); } + // Disallow SM6.9+ semantic flags. + if (semanticFlags & + ~static_cast(DXIL::BarrierSemanticFlag::LegacyFlags)) { + return DXIL::BarrierMode::Invalid; + } + // Mask to legacy flags, if allowed. memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed( memoryTypeFlags, (unsigned)DXIL::MemoryTypeFlag::LegacyFlags); @@ -3744,10 +3439,38 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, } return; } - // Instructions: AllocateRayQuery2=258 - if (op == 258) { + // Instructions: AllocateRayQuery2=258, RawBufferVectorLoad=303, + // RawBufferVectorStore=304 + if (op == 258 || (303 <= op && op <= 304)) { + major = 6; + minor = 9; + return; + } + // Instructions: MaybeReorderThread=268 + if (op == 268) { major = 6; minor = 9; + mask = SFLAG(Library) | SFLAG(RayGeneration); + return; + } + // Instructions: HitObject_TraceRay=262, HitObject_FromRayQuery=263, + // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265, + // HitObject_MakeNop=266, HitObject_Invoke=267, HitObject_IsMiss=269, + // HitObject_IsHit=270, HitObject_IsNop=271, HitObject_RayFlags=272, + // HitObject_RayTMin=273, HitObject_RayTCurrent=274, + // HitObject_WorldRayOrigin=275, HitObject_WorldRayDirection=276, + // HitObject_ObjectRayOrigin=277, HitObject_ObjectRayDirection=278, + // HitObject_ObjectToWorld3x4=279, HitObject_WorldToObject3x4=280, + // HitObject_GeometryIndex=281, HitObject_InstanceIndex=282, + // HitObject_InstanceID=283, HitObject_PrimitiveIndex=284, + // HitObject_HitKind=285, HitObject_ShaderTableIndex=286, + // HitObject_SetShaderTableIndex=287, + // HitObject_LoadLocalRootTableConstant=288, HitObject_Attributes=289 + if ((262 <= op && op <= 267) || (269 <= op && op <= 289)) { + major = 6; + minor = 9; + mask = + SFLAG(Library) | SFLAG(RayGeneration) | SFLAG(ClosestHit) | SFLAG(Miss); return; } // OPCODE-SMMASK:END @@ -3794,10 +3517,17 @@ void OP::GetMinShaderModelAndMask(const llvm::CallInst *CI, minor = 8; } } + if (BarrierRequiresReorder(CI)) { + major = 6; + minor = 9; + mask &= SFLAG(Library) | SFLAG(RayGeneration); + return; + } if (BarrierRequiresNode(CI)) { mask &= SFLAG(Library) | SFLAG(Node); return; - } else if (BarrierRequiresGroup(CI)) { + } + if (BarrierRequiresGroup(CI)) { mask &= SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) | SFLAG(Mesh) | SFLAG(Node); return; @@ -3851,6 +3581,8 @@ OP::OP(LLVMContext &Ctx, Module *pModule) m_pHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), "dx.types.Handle", pModule); + m_pHitObjectType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), + "dx.types.HitObject", pModule); m_pNodeHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), "dx.types.NodeHandle", pModule); m_pNodeRecordHandleType = GetOrCreateStructType( @@ -3930,13 +3662,12 @@ void OP::FixOverloadNames() { if (F.isDeclaration() && OP::IsDxilOpFunc(&F) && !F.user_empty()) { CallInst *CI = cast(*F.user_begin()); DXIL::OpCode opCode = OP::GetDxilOpFuncCallInst(CI); + if (!MayHaveNonCanonicalOverload(opCode)) + continue; llvm::Type *Ty = OP::GetOverloadType(opCode, &F); if (!OP::IsOverloadLegal(opCode, Ty)) continue; - if (!isa(Ty) && !isa(Ty)) - continue; - - std::string funcName; + SmallVector funcName; if (OP::ConstructOverloadName(Ty, opCode, funcName) .compare(F.getName()) != 0) F.setName(funcName); @@ -3949,11 +3680,54 @@ void OP::UpdateCache(OpCodeClass opClass, Type *Ty, llvm::Function *F) { m_FunctionToOpClass[F] = opClass; } +bool OP::MayHaveNonCanonicalOverload(OpCode OC) { + if (OC >= OpCode::NumOpCodes) + return false; + const unsigned CheckMask = (1 << TS_UDT) | (1 << TS_Object); + auto &OpProps = m_OpCodeProps[static_cast(OC)]; + for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I) + if ((CheckMask & OpProps.AllowedOverloads[I].SlotMask) != 0) + return true; + return false; +} + +Function *OP::GetOpFunc(OpCode OC, ArrayRef OverloadTypes) { + if (OC >= OpCode::NumOpCodes) + return nullptr; + if (OverloadTypes.size() != + m_OpCodeProps[static_cast(OC)].NumOverloadDims) { + llvm_unreachable("incorrect overload dimensions"); + return nullptr; + } + if (OverloadTypes.size() == 0) { + return GetOpFunc(OC, Type::getVoidTy(m_Ctx)); + } else if (OverloadTypes.size() == 1) { + return GetOpFunc(OC, OverloadTypes[0]); + } + return GetOpFunc(OC, GetExtendedOverloadType(OverloadTypes)); +} + Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { - if (opCode == OpCode::NumOpCodes) + if (opCode >= OpCode::NumOpCodes) return nullptr; if (!pOverloadType) return nullptr; + + auto &OpProps = m_OpCodeProps[static_cast(opCode)]; + if (IsDxilOpExtendedOverload(opCode)) { + // Make sure pOverloadType is well formed for an extended overload. + StructType *ST = dyn_cast(pOverloadType); + DXASSERT(ST != nullptr, + "otherwise, extended overload type is not a struct"); + if (ST == nullptr) + return nullptr; + bool EltCountValid = ST->getNumElements() == OpProps.NumOverloadDims; + DXASSERT(EltCountValid, + "otherwise, incorrect type count for extended overload."); + if (!EltCountValid) + return nullptr; + } + // Illegal overloads are generated and eliminated by DXIL op constant // evaluation for a number of cases where a double overload of an HL intrinsic // that otherwise does not support double is used for literal values, when @@ -3961,7 +3735,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Illegal overloads of DXIL intrinsics may survive through to final DXIL, // but these will be caught by the validator, and this is not a regression. - OpCodeClass opClass = m_OpCodeProps[(unsigned)opCode].opCodeClass; + OpCodeClass opClass = OpProps.opCodeClass; Function *&F = m_OpCodeClassCache[(unsigned)opClass].pOverloads[pOverloadType]; if (F != nullptr) { @@ -3969,7 +3743,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { return F; } - vector ArgTypes; // RetType is ArgTypes[0] + SmallVector ArgTypes; // RetType is ArgTypes[0] Type *pETy = pOverloadType; Type *pRes = GetHandleType(); Type *pNodeHandle = GetNodeHandleType(); @@ -3993,6 +3767,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { Type *pF64 = Type::getDoubleTy(m_Ctx); Type *pSDT = GetSplitDoubleType(); // Split double type. Type *p4I32 = GetFourI32Type(); // 4 i32s in a struct. + Type *pHit = GetHitObjectType(); Type *udt = pOverloadType; Type *obj = pOverloadType; @@ -4004,7 +3779,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { #define A(_x) ArgTypes.emplace_back(_x) #define RRT(_y) A(GetResRetType(_y)) #define CBRT(_y) A(GetCBufferRetType(_y)) -#define VEC4(_y) A(GetVectorType(4, _y)) +#define VEC4(_y) A(GetStructVectorType(4, _y)) + +// Extended Overload types are wrapped in an anonymous struct +#define EXT(_y) A(cast(pOverloadType)->getElementType(_y)) /* hctdb_instrhelp.get_oloads_funcs()*/ switch (opCode) { // return opCode @@ -5859,118 +5637,188 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pV); A(pI32); break; - case OpCode::ReservedB0: - A(pV); + + // Shader Execution Reordering + case OpCode::HitObject_TraceRay: + A(pHit); + A(pI32); + A(pRes); A(pI32); + A(pI32); + A(pI32); + A(pI32); + A(pI32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(udt); break; - case OpCode::ReservedB1: - A(pV); + case OpCode::HitObject_FromRayQuery: + A(pHit); + A(pI32); A(pI32); break; - case OpCode::ReservedB2: - A(pV); + case OpCode::HitObject_FromRayQueryWithAttrs: + A(pHit); + A(pI32); A(pI32); + A(pI32); + A(udt); break; - case OpCode::ReservedB3: - A(pV); + case OpCode::HitObject_MakeMiss: + A(pHit); A(pI32); + A(pI32); + A(pI32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); break; - case OpCode::ReservedB4: - A(pV); + case OpCode::HitObject_MakeNop: + A(pHit); A(pI32); break; - case OpCode::ReservedB5: + case OpCode::HitObject_Invoke: A(pV); A(pI32); + A(pHit); + A(udt); break; - case OpCode::ReservedB6: + case OpCode::MaybeReorderThread: A(pV); A(pI32); + A(pHit); + A(pI32); + A(pI32); break; - case OpCode::ReservedB7: - A(pV); + case OpCode::HitObject_IsMiss: + A(pI1); A(pI32); + A(pHit); break; - case OpCode::ReservedB8: - A(pV); + case OpCode::HitObject_IsHit: + A(pI1); A(pI32); + A(pHit); break; - case OpCode::ReservedB9: - A(pV); + case OpCode::HitObject_IsNop: + A(pI1); A(pI32); + A(pHit); break; - case OpCode::ReservedB10: - A(pV); + case OpCode::HitObject_RayFlags: + A(pI32); A(pI32); + A(pHit); break; - case OpCode::ReservedB11: - A(pV); + case OpCode::HitObject_RayTMin: + A(pF32); A(pI32); + A(pHit); break; - case OpCode::ReservedB12: - A(pV); + case OpCode::HitObject_RayTCurrent: + A(pF32); A(pI32); + A(pHit); break; - case OpCode::ReservedB13: - A(pV); + case OpCode::HitObject_WorldRayOrigin: + A(pF32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB14: - A(pV); + case OpCode::HitObject_WorldRayDirection: + A(pF32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB15: - A(pV); + case OpCode::HitObject_ObjectRayOrigin: + A(pF32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB16: - A(pV); + case OpCode::HitObject_ObjectRayDirection: + A(pF32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB17: - A(pV); + case OpCode::HitObject_ObjectToWorld3x4: + A(pF32); + A(pI32); + A(pHit); + A(pI32); A(pI32); break; - case OpCode::ReservedB18: - A(pV); + case OpCode::HitObject_WorldToObject3x4: + A(pF32); + A(pI32); + A(pHit); + A(pI32); A(pI32); break; - case OpCode::ReservedB19: - A(pV); + case OpCode::HitObject_GeometryIndex: + A(pI32); A(pI32); + A(pHit); break; - case OpCode::ReservedB20: - A(pV); + case OpCode::HitObject_InstanceIndex: A(pI32); + A(pI32); + A(pHit); break; - case OpCode::ReservedB21: - A(pV); + case OpCode::HitObject_InstanceID: + A(pI32); A(pI32); + A(pHit); break; - case OpCode::ReservedB22: - A(pV); + case OpCode::HitObject_PrimitiveIndex: + A(pI32); A(pI32); + A(pHit); break; - case OpCode::ReservedB23: - A(pV); + case OpCode::HitObject_HitKind: + A(pI32); A(pI32); + A(pHit); break; - case OpCode::ReservedB24: - A(pV); + case OpCode::HitObject_ShaderTableIndex: A(pI32); + A(pI32); + A(pHit); break; - case OpCode::ReservedB25: - A(pV); + case OpCode::HitObject_SetShaderTableIndex: + A(pHit); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB26: - A(pV); + case OpCode::HitObject_LoadLocalRootTableConstant: + A(pI32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB27: + case OpCode::HitObject_Attributes: A(pV); A(pI32); + A(pHit); + A(udt); break; + + // case OpCode::ReservedB28: A(pV); A(pI32); @@ -6023,6 +5871,25 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pV); A(pI32); break; + + // Resources + case OpCode::RawBufferVectorLoad: + RRT(pETy); + A(pI32); + A(pRes); + A(pI32); + A(pI32); + A(pI32); + break; + case OpCode::RawBufferVectorStore: + A(pV); + A(pI32); + A(pRes); + A(pI32); + A(pI32); + A(pETy); + A(pI32); + break; // OPCODE-OLOAD-FUNCS:END default: DXASSERT(false, "otherwise unhandled case"); @@ -6036,14 +5903,15 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { pFT = FunctionType::get( ArgTypes[0], ArrayRef(&ArgTypes[1], ArgTypes.size() - 1), false); - std::string funcName; - ConstructOverloadName(pOverloadType, opCode, funcName); + SmallVector FuncStorage; + StringRef FuncName = + ConstructOverloadName(pOverloadType, opCode, FuncStorage); // Try to find existing function with the same name in the module. // This needs to happen after the switch statement that constructs arguments // and return values to ensure that ResRetType is constructed in the // RefreshCache case. - if (Function *existF = m_pModule->getFunction(funcName)) { + if (Function *existF = m_pModule->getFunction(FuncName)) { if (existF->getFunctionType() != pFT) return nullptr; F = existF; @@ -6051,13 +5919,13 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { return F; } - F = cast(m_pModule->getOrInsertFunction(funcName, pFT)); + F = cast(m_pModule->getOrInsertFunction(FuncName, pFT)); UpdateCache(opClass, pOverloadType, F); F->setCallingConv(CallingConv::C); F->addFnAttr(Attribute::NoUnwind); - if (m_OpCodeProps[(unsigned)opCode].FuncAttr != Attribute::None) - F->addFnAttr(m_OpCodeProps[(unsigned)opCode].FuncAttr); + if (OpProps.FuncAttr != Attribute::None) + F->addFnAttr(OpProps.FuncAttr); return F; } @@ -6160,6 +6028,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::TempRegStore: case OpCode::CallShader: case OpCode::Pack4x8: + case OpCode::HitObject_Invoke: + case OpCode::HitObject_Attributes: if (FT->getNumParams() <= 2) return nullptr; return FT->getParamType(2); @@ -6171,6 +6041,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::StoreVertexOutput: case OpCode::StorePrimitiveOutput: case OpCode::DispatchMesh: + case OpCode::RawBufferVectorStore: if (FT->getNumParams() <= 4) return nullptr; return FT->getParamType(4); @@ -6199,10 +6070,12 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { return nullptr; return FT->getParamType(5); case OpCode::TraceRay: + case OpCode::HitObject_TraceRay: if (FT->getNumParams() <= 15) return nullptr; return FT->getParamType(15); case OpCode::ReportHit: + case OpCode::HitObject_FromRayQueryWithAttrs: if (FT->getNumParams() <= 3) return nullptr; return FT->getParamType(3); @@ -6285,34 +6158,12 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::ReservedA0: case OpCode::ReservedA1: case OpCode::ReservedA2: - case OpCode::ReservedB0: - case OpCode::ReservedB1: - case OpCode::ReservedB2: - case OpCode::ReservedB3: - case OpCode::ReservedB4: - case OpCode::ReservedB5: - case OpCode::ReservedB6: - case OpCode::ReservedB7: - case OpCode::ReservedB8: - case OpCode::ReservedB9: - case OpCode::ReservedB10: - case OpCode::ReservedB11: - case OpCode::ReservedB12: - case OpCode::ReservedB13: - case OpCode::ReservedB14: - case OpCode::ReservedB15: - case OpCode::ReservedB16: - case OpCode::ReservedB17: - case OpCode::ReservedB18: - case OpCode::ReservedB19: - case OpCode::ReservedB20: - case OpCode::ReservedB21: - case OpCode::ReservedB22: - case OpCode::ReservedB23: - case OpCode::ReservedB24: - case OpCode::ReservedB25: - case OpCode::ReservedB26: - case OpCode::ReservedB27: + case OpCode::HitObject_FromRayQuery: + case OpCode::HitObject_MakeMiss: + case OpCode::HitObject_MakeNop: + case OpCode::MaybeReorderThread: + case OpCode::HitObject_SetShaderTableIndex: + case OpCode::HitObject_LoadLocalRootTableConstant: case OpCode::ReservedB28: case OpCode::ReservedB29: case OpCode::ReservedB30: @@ -6364,6 +6215,13 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex: case OpCode::StartVertexLocation: case OpCode::StartInstanceLocation: + case OpCode::HitObject_RayFlags: + case OpCode::HitObject_GeometryIndex: + case OpCode::HitObject_InstanceIndex: + case OpCode::HitObject_InstanceID: + case OpCode::HitObject_PrimitiveIndex: + case OpCode::HitObject_HitKind: + case OpCode::HitObject_ShaderTableIndex: return IntegerType::get(Ctx, 32); case OpCode::CalculateLOD: case OpCode::DomainLocation: @@ -6390,6 +6248,14 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::RayQuery_CandidateObjectRayDirection: case OpCode::RayQuery_CommittedObjectRayOrigin: case OpCode::RayQuery_CommittedObjectRayDirection: + case OpCode::HitObject_RayTMin: + case OpCode::HitObject_RayTCurrent: + case OpCode::HitObject_WorldRayOrigin: + case OpCode::HitObject_WorldRayDirection: + case OpCode::HitObject_ObjectRayOrigin: + case OpCode::HitObject_ObjectRayDirection: + case OpCode::HitObject_ObjectToWorld3x4: + case OpCode::HitObject_WorldToObject3x4: return Type::getFloatTy(Ctx); case OpCode::MakeDouble: case OpCode::SplitDouble: @@ -6400,6 +6266,9 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::RayQuery_CommittedTriangleFrontFace: case OpCode::IsHelperLane: case OpCode::QuadVote: + case OpCode::HitObject_IsMiss: + case OpCode::HitObject_IsHit: + case OpCode::HitObject_IsNop: return IntegerType::get(Ctx, 1); case OpCode::CBufferLoadLegacy: case OpCode::Sample: @@ -6417,7 +6286,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::TextureGatherRaw: case OpCode::SampleCmpLevel: case OpCode::SampleCmpGrad: - case OpCode::SampleCmpBias: { + case OpCode::SampleCmpBias: + case OpCode::RawBufferVectorLoad: { StructType *ST = cast(Ty); return ST->getElementType(0); } @@ -6431,6 +6301,8 @@ Type *OP::GetHandleType() const { return m_pHandleType; } Type *OP::GetNodeHandleType() const { return m_pNodeHandleType; } +Type *OP::GetHitObjectType() const { return m_pHitObjectType; } + Type *OP::GetNodeRecordHandleType() const { return m_pNodeRecordHandleType; } Type *OP::GetResourcePropertiesType() const { @@ -6462,62 +6334,91 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; } Type *OP::GetFourI16Type() const { return m_pFourI16Type; } bool OP::IsResRetType(llvm::Type *Ty) { + if (!Ty->isStructTy()) + return false; for (Type *ResTy : m_pResRetType) { if (Ty == ResTy) return true; } - return false; + // Check for vector overload which isn't cached in m_pResRetType. + StructType *ST = cast(Ty); + if (!ST->hasName() || ST->getNumElements() < 2 || + !ST->getElementType(0)->isVectorTy()) + return false; + return Ty == GetResRetType(ST->getElementType(0)); } Type *OP::GetResRetType(Type *pOverloadType) { unsigned TypeSlot = GetTypeSlot(pOverloadType); - if (m_pResRetType[TypeSlot] == nullptr) { - string TypeName("dx.types.ResRet."); - TypeName += GetOverloadTypeName(TypeSlot); - Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType, - pOverloadType, Type::getInt32Ty(m_Ctx)}; - m_pResRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + if (TypeSlot < TS_BasicCount) { + if (m_pResRetType[TypeSlot] == nullptr) { + SmallVector Storage; + StringRef TypeName = + (Twine("dx.types.ResRet.") + Twine(GetOverloadTypeName(TypeSlot))) + .toStringRef(Storage); + Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType, + pOverloadType, Type::getInt32Ty(m_Ctx)}; + m_pResRetType[TypeSlot] = + GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + } + return m_pResRetType[TypeSlot]; + } else if (TypeSlot == TS_Vector) { + SmallVector Storage; + VectorType *VecTy = cast(pOverloadType); + StringRef TypeName = + (Twine("dx.types.ResRet.v") + Twine(VecTy->getNumElements()) + + Twine(GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) + .toStringRef(Storage); + Type *FieldTypes[2] = {pOverloadType, Type::getInt32Ty(m_Ctx)}; + return GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); } - return m_pResRetType[TypeSlot]; + llvm_unreachable("Invalid overload for GetResRetType"); + return nullptr; } Type *OP::GetCBufferRetType(Type *pOverloadType) { unsigned TypeSlot = GetTypeSlot(pOverloadType); + if (TypeSlot >= TS_BasicCount) { + llvm_unreachable("Invalid overload for GetResRetType"); + return nullptr; + } + if (m_pCBufferRetType[TypeSlot] == nullptr) { DXASSERT(m_LowPrecisionMode != DXIL::LowPrecisionMode::Undefined, "m_LowPrecisionMode must be set before constructing type."); - string TypeName("dx.types.CBufRet."); - TypeName += GetOverloadTypeName(TypeSlot); + SmallVector Storage; + raw_svector_ostream OS(Storage); + OS << "dx.types.CBufRet."; + OS << GetOverloadTypeName(TypeSlot); Type *i64Ty = Type::getInt64Ty(pOverloadType->getContext()); Type *i16Ty = Type::getInt16Ty(pOverloadType->getContext()); if (pOverloadType->isDoubleTy() || pOverloadType == i64Ty) { Type *FieldTypes[2] = {pOverloadType, pOverloadType}; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } else if (!UseMinPrecision() && (pOverloadType->isHalfTy() || pOverloadType == i16Ty)) { - TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves + OS << ".8"; // dx.types.CBufRet.f16.8 for buffer of 8 halves Type *FieldTypes[8] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, }; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } else { Type *FieldTypes[4] = {pOverloadType, pOverloadType, pOverloadType, pOverloadType}; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } } return m_pCBufferRetType[TypeSlot]; } -Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) { +Type *OP::GetStructVectorType(unsigned numElements, Type *pOverloadType) { if (numElements == 4) { if (pOverloadType == Type::getInt32Ty(pOverloadType->getContext())) { return m_pFourI32Type; @@ -6529,6 +6430,10 @@ Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) { return nullptr; } +StructType *OP::GetExtendedOverloadType(ArrayRef OverloadTypes) { + return StructType::get(m_Ctx, OverloadTypes); +} + //------------------------------------------------------------------------------ // // LLVM utility methods. diff --git a/lib/DXIL/DxilResource.cpp b/lib/DXIL/DxilResource.cpp index 3ab71030bb..0e6f1df877 100644 --- a/lib/DXIL/DxilResource.cpp +++ b/lib/DXIL/DxilResource.cpp @@ -25,8 +25,8 @@ namespace hlsl { DxilResource::DxilResource() : DxilResourceBase(DxilResourceBase::Class::Invalid), m_SampleCount(0), m_ElementStride(0), m_SamplerFeedbackType((DXIL::SamplerFeedbackType)0), - m_bGloballyCoherent(false), m_bHasCounter(false), m_bROV(false), - m_bHasAtomic64Use(false) {} + m_bGloballyCoherent(false), m_bReorderCoherent(false), + m_bHasCounter(false), m_bROV(false), m_bHasAtomic64Use(false) {} CompType DxilResource::GetCompType() const { return m_CompType; } @@ -74,6 +74,10 @@ bool DxilResource::IsGloballyCoherent() const { return m_bGloballyCoherent; } void DxilResource::SetGloballyCoherent(bool b) { m_bGloballyCoherent = b; } +bool DxilResource::IsReorderCoherent() const { return m_bReorderCoherent; } + +void DxilResource::SetReorderCoherent(bool b) { m_bReorderCoherent = b; } + bool DxilResource::HasCounter() const { return m_bHasCounter; } void DxilResource::SetHasCounter(bool b) { m_bHasCounter = b; } diff --git a/lib/DXIL/DxilResourceProperties.cpp b/lib/DXIL/DxilResourceProperties.cpp index 2d1bf95014..54ab24f36e 100644 --- a/lib/DXIL/DxilResourceProperties.cpp +++ b/lib/DXIL/DxilResourceProperties.cpp @@ -190,6 +190,7 @@ DxilResourceProperties loadPropsFromResourceBase(const DxilResourceBase *Res) { RP.Basic.IsUAV = true; RP.Basic.ResourceKind = (uint8_t)Res->GetKind(); RP.Basic.IsGloballyCoherent = UAV->IsGloballyCoherent(); + RP.Basic.IsReorderCoherent = UAV->IsReorderCoherent(); RP.Basic.SamplerCmpOrHasCounter = UAV->HasCounter(); RP.Basic.IsROV = UAV->IsROV(); SetResProperties(*UAV); @@ -234,6 +235,8 @@ DxilResourceProperties tryMergeProps(DxilResourceProperties curProps, prevProps.Basic.IsGloballyCoherent) { curProps.Basic.IsGloballyCoherent = prevProps.Basic.IsGloballyCoherent; } + if (curProps.Basic.IsReorderCoherent != prevProps.Basic.IsReorderCoherent) + curProps.Basic.IsReorderCoherent = prevProps.Basic.IsReorderCoherent; } if (curProps.Basic.ResourceKind == (uint8_t)DXIL::ResourceKind::CBuffer) { diff --git a/lib/DXIL/DxilShaderFlags.cpp b/lib/DXIL/DxilShaderFlags.cpp index 7d0799dc64..993038aaf1 100644 --- a/lib/DXIL/DxilShaderFlags.cpp +++ b/lib/DXIL/DxilShaderFlags.cpp @@ -637,6 +637,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F, hasViewID = true; break; case DXIL::OpCode::AllocateRayQuery: + case DXIL::OpCode::AllocateRayQuery2: case DXIL::OpCode::GeometryIndex: hasRaytracingTier1_1 = true; break; diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 757a0bc3ee..966c2e189c 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -426,35 +426,37 @@ GetHLSLResourceProperties(llvm::Type *Ty) { false, false, false)); if (name == "SamplerComparisonState") - return RetType( - true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, false, - false, /*cmp or counter*/ true)); + return RetType(true, MakeResourceProperties( + hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false, + /*ROV*/ false, /*cmp or counter*/ true)); if (name.startswith("AppendStructuredBuffer<")) - return RetType(true, MakeResourceProperties( - hlsl::DXIL::ResourceKind::StructuredBuffer, - false, false, /*cmp or counter*/ true)); + return RetType(true, + MakeResourceProperties( + hlsl::DXIL::ResourceKind::StructuredBuffer, + /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true)); if (name.startswith("ConsumeStructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, - false, false, /*cmp or counter*/ true)); + /*UAV*/ true, /*ROV*/ false, + /*cmp or counter*/ true)); if (name == "RaytracingAccelerationStructure") return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::RTAccelerationStructure, - false, false, false)); + /*UAV*/ false, /*ROV*/ false, false)); if (name.startswith("ConstantBuffer<")) - return RetType(true, - MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer, - false, false, false)); + return RetType( + true, MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer, + /*UAV*/ false, /*ROV*/ false, false)); if (name.startswith("TextureBuffer<")) - return RetType(true, - MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer, - false, false, false)); + return RetType( + true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer, + /*UAV*/ false, /*ROV*/ false, false)); if (ConsumePrefix(name, "FeedbackTexture2D")) { hlsl::DXIL::ResourceKind kind = hlsl::DXIL::ResourceKind::Invalid; @@ -464,7 +466,9 @@ GetHLSLResourceProperties(llvm::Type *Ty) { kind = hlsl::DXIL::ResourceKind::FeedbackTexture2D; if (name.startswith("<")) - return RetType(true, MakeResourceProperties(kind, false, false, false)); + return RetType(true, + MakeResourceProperties(kind, /*UAV*/ false, + /*ROV*/ false, /*Cmp*/ false)); return FalseRet; } @@ -475,63 +479,63 @@ GetHLSLResourceProperties(llvm::Type *Ty) { if (name == "ByteAddressBuffer") return RetType(true, MakeResourceProperties(hlsl::DXIL::ResourceKind::RawBuffer, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("Buffer<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TypedBuffer, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("StructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (ConsumePrefix(name, "Texture")) { if (name.startswith("1D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture1D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("1DArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture1DArray, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (name.startswith("2D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture2DArray, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (name.startswith("3D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture3D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("Cube<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TextureCube, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("CubeArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::TextureCubeArray, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DMS<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2DMS, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DMSArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture2DMSArray, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); return FalseRet; } } @@ -570,6 +574,9 @@ bool IsHLSLObjectType(llvm::Type *Ty) { if (IsHLSLNodeIOType(Ty)) return true; + + if (IsHLSLHitObjectType(Ty)) + return true; } return false; } @@ -587,6 +594,24 @@ bool IsHLSLRayQueryType(llvm::Type *Ty) { return false; } +llvm::Type *GetHLSLHitObjectType(llvm::Module *M) { + using namespace llvm; + StructType *HitObjectTy = M->getTypeByName("dx.types.HitObject"); + if (!HitObjectTy) + HitObjectTy = StructType::create({Type::getInt8PtrTy(M->getContext(), 0)}, + "dx.types.HitObject", false); + return HitObjectTy; +} + +bool IsHLSLHitObjectType(llvm::Type *Ty) { + llvm::StructType *ST = dyn_cast(Ty); + if (!ST) + return false; + if (!ST->hasName()) + return false; + return ST->getName() == "dx.types.HitObject"; +} + bool IsHLSLResourceDescType(llvm::Type *Ty) { if (llvm::StructType *ST = dyn_cast(Ty)) { if (!ST->hasName()) @@ -1390,5 +1415,18 @@ bool DeleteDeadAllocas(llvm::Function &F) { return Changed; } +// Retrieve dxil version in the given module. +// Where the module doesn't already have a Dxil module, +// it identifies and returns the version info from the metatdata. +// Returns false where none of that works, but that shouldn't happen much. +bool LoadDxilVersion(const Module *M, unsigned &Major, unsigned &Minor) { + if (M->HasDxilModule()) { + M->GetDxilModule().GetShaderModel()->GetDxilVersion(Major, Minor); + return true; + } + // No module, try metadata. + return DxilMDHelper::LoadDxilVersion(M, Major, Minor); +} + } // namespace dxilutil } // namespace hlsl diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp index 3daf880f6d..1ce7d0dfc0 100644 --- a/lib/DxcSupport/HLSLOptions.cpp +++ b/lib/DxcSupport/HLSLOptions.cpp @@ -1089,6 +1089,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, addDiagnosticArgs(Args, OPT_W_Group, OPT_W_value_Group, opts.Warnings); + opts.GenMetal = Args.hasFlag(OPT_metal, OPT_INVALID, false); + // SPIRV Change Starts #ifdef ENABLE_SPIRV_CODEGEN opts.GenSPIRV = Args.hasFlag(OPT_spirv, OPT_INVALID, false); @@ -1313,6 +1315,21 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, #endif // ENABLE_SPIRV_CODEGEN // SPIRV Change Ends +#ifndef ENABLE_METAL_CODEGEN + if (opts.GenMetal) { + errors << "Metal CodeGen not available. " + "Please rebuild with Metal IR Converter installed."; + return 1; + } +#endif + + if (opts.GenMetal) { + if (!opts.AssemblyCode.empty() || opts.OutputObject.empty()) { + errors << "Disassembly of Metal IR not supported (yet)."; + return 1; + } + } + // Validation for DebugInfo here because spirv uses same DebugInfo opt, // and legacy wrappers will add EmbedDebug in this case, leading to this // failing if placed before spirv path sets DebugInfo to true. diff --git a/lib/DxilContainer/DxilContainerAssembler.cpp b/lib/DxilContainer/DxilContainerAssembler.cpp index 0b7f5dd467..48d8872733 100644 --- a/lib/DxilContainer/DxilContainerAssembler.cpp +++ b/lib/DxilContainer/DxilContainerAssembler.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Transforms/Utils/Cloning.h" #include #include // Needed for DxilPipelineStateValidation.h @@ -1056,6 +1057,9 @@ class DxilRDATWriter : public DxilPartWriter { if (pRes->IsGloballyCoherent()) info.Flags |= static_cast(RDAT::DxilResourceFlag::UAVGloballyCoherent); + if (pRes->IsReorderCoherent()) + info.Flags |= + static_cast(RDAT::DxilResourceFlag::UAVReorderCoherent); if (pRes->IsROV()) info.Flags |= static_cast( RDAT::DxilResourceFlag::UAVRasterizerOrderedView); @@ -1895,6 +1899,7 @@ void hlsl::SerializeDxilContainerForModule( DxilShaderHash *pShaderHashOut, AbstractMemoryStream *pReflectionStreamOut, AbstractMemoryStream *pRootSigStreamOut, void *pPrivateData, size_t PrivateDataSize) { + llvm::TimeTraceScope TimeScope("SerializeDxilContainer", StringRef("")); // TODO: add a flag to update the module and remove information that is not // part of DXIL proper and is used only to assemble the container. diff --git a/lib/DxilPIXPasses/CMakeLists.txt b/lib/DxilPIXPasses/CMakeLists.txt index c36d11d559..67e77f17cd 100644 --- a/lib/DxilPIXPasses/CMakeLists.txt +++ b/lib/DxilPIXPasses/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMDxilPIXPasses PixPassHelpers.cpp DxilPIXAddTidToAmplificationShaderPayload.cpp DxilPIXDXRInvocationsLog.cpp + DxilNonUniformResourceIndexInstrumentation.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR diff --git a/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp new file mode 100644 index 0000000000..a442bfabed --- /dev/null +++ b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp @@ -0,0 +1,173 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilNonUniformResourceIndexInstrumentation.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Provides a pass to add instrumentation to determine missing usage of the // +// NonUniformResourceIndex qualifier when dynamically indexing resources. // +// Used by PIX. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "PixPassHelpers.h" +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DxilPIXPasses/DxilPIXPasses.h" +#include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h" +#include "dxc/Support/Global.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; +using namespace hlsl; + +class DxilNonUniformResourceIndexInstrumentation : public ModulePass { + +public: + static char ID; // Pass identification, replacement for typeid + explicit DxilNonUniformResourceIndexInstrumentation() : ModulePass(ID) {} + StringRef getPassName() const override { + return "DXIL NonUniformResourceIndex Instrumentation"; + } + bool runOnModule(Module &M) override; +}; + +bool DxilNonUniformResourceIndexInstrumentation::runOnModule(Module &M) { + // This pass adds instrumentation for incorrect NonUniformResourceIndex usage + + DxilModule &DM = M.GetOrCreateDxilModule(); + LLVMContext &Ctx = M.getContext(); + OP *HlslOP = DM.GetOP(); + + hlsl::DxilResource *PixUAVResource = nullptr; + + UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx)); + + // Use WaveActiveAllEqual to check if a dynamic index is uniform + Function *WaveActiveAllEqualFunc = HlslOP->GetOpFunc( + DXIL::OpCode::WaveActiveAllEqual, Type::getInt32Ty(Ctx)); + Constant *WaveActiveAllEqualOpCode = + HlslOP->GetI32Const((int32_t)DXIL::OpCode::WaveActiveAllEqual); + + // Atomic operation to use for writing to the result uav resource + Function *AtomicOpFunc = + HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx)); + Constant *AtomicBinOpcode = + HlslOP->GetU32Const((uint32_t)OP::OpCode::AtomicBinOp); + Constant *AtomicOr = HlslOP->GetU32Const((uint32_t)DXIL::AtomicBinOpCode::Or); + + std::map FunctionToUAVHandle; + + // This is the main pass that will iterate through all of the resources that + // are dynamically indexed. If not already marked NonUniformResourceIndex, + // then insert WaveActiveAllEqual to determine if the index is uniform + // and finally write to a UAV resource with the result. + + PIXPassHelpers::ForEachDynamicallyIndexedResource( + DM, [&](bool IsNonUniformIndex, Instruction *CreateHandle, + Value *IndexOperand) { + if (IsNonUniformIndex) { + // The NonUniformResourceIndex qualifier was used, continue. + return true; + } + + if (!PixUAVResource) { + PixUAVResource = + PIXPassHelpers::CreateGlobalUAVResource(DM, 0, "PixUAVResource"); + } + + CallInst *PixUAVHandle = nullptr; + Function *F = CreateHandle->getParent()->getParent(); + + const auto FunctionToUAVHandleIter = FunctionToUAVHandle.lower_bound(F); + + if ((FunctionToUAVHandleIter != FunctionToUAVHandle.end()) && + (FunctionToUAVHandleIter->first == F)) { + PixUAVHandle = FunctionToUAVHandleIter->second; + } else { + IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt()); + + PixUAVHandle = PIXPassHelpers::CreateHandleForResource( + DM, Builder, PixUAVResource, "PixUAVHandle"); + + FunctionToUAVHandle.insert(FunctionToUAVHandleIter, + {F, PixUAVHandle}); + } + + IRBuilder<> Builder(CreateHandle); + + uint32_t InstructionNumber = 0; + if (!pix_dxil::PixDxilInstNum::FromInst(CreateHandle, + &InstructionNumber)) { + DXASSERT_NOMSG(false); + } + + // The output UAV is treated as a bit array where each bit corresponds + // to an instruction number. This determines what byte offset to write + // our result to based on the instruction number. + const uint32_t InstructionNumByteOffset = + (InstructionNumber / 32u) * sizeof(uint32_t); + const uint32_t InstructionNumBitPosition = (InstructionNumber % 32u); + const uint32_t InstructionNumBitMask = 1u << InstructionNumBitPosition; + + Constant *UAVByteOffsetArg = + HlslOP->GetU32Const(InstructionNumByteOffset); + + CallInst *WaveActiveAllEqualCall = Builder.CreateCall( + WaveActiveAllEqualFunc, {WaveActiveAllEqualOpCode, IndexOperand}); + + // This takes the result of the WaveActiveAllEqual result and shifts + // it into the same bit position as the instruction number, followed + // by an xor to determine what to write to the UAV + Value *IsWaveEqual = + Builder.CreateZExt(WaveActiveAllEqualCall, Builder.getInt32Ty()); + Value *WaveEqualBitMask = + Builder.CreateShl(IsWaveEqual, InstructionNumBitPosition); + Value *FinalResult = + Builder.CreateXor(WaveEqualBitMask, InstructionNumBitMask); + + // Generate instructions to bitwise OR a UAV value corresponding + // to the instruction number and result of WaveActiveAllEqual. + // If WaveActiveAllEqual was false, we write a 1, otherwise a 0. + Builder.CreateCall( + AtomicOpFunc, + { + AtomicBinOpcode, // i32, ; opcode + PixUAVHandle, // %dx.types.Handle, ; resource handle + AtomicOr, // i32, ; binary operation code : + // EXCHANGE, IADD, AND, OR, XOR + // IMIN, IMAX, UMIN, UMAX + UAVByteOffsetArg, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + UndefArg, // i32, ; coordinate c2 (unused) + FinalResult // i32); value + }, + "UAVInstructionNumberBitSet"); + return true; + }); + + const bool modified = (PixUAVResource != nullptr); + + if (modified) { + DM.ReEmitDxilResources(); + + if (OSOverride != nullptr) { + formatted_raw_ostream FOS(*OSOverride); + FOS << "\nFoundDynamicIndexingNoNuri\n"; + } + } + + return modified; +} + +char DxilNonUniformResourceIndexInstrumentation::ID = 0; + +ModulePass *llvm::createDxilNonUniformResourceIndexInstrumentationPass() { + return new DxilNonUniformResourceIndexInstrumentation(); +} + +INITIALIZE_PASS(DxilNonUniformResourceIndexInstrumentation, + "hlsl-dxil-non-uniform-resource-index-instrumentation", + "HLSL DXIL NonUniformResourceIndex instrumentation for PIX", + false, false) diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp index 4f4cc7c620..1dddb6c0e6 100644 --- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp +++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp @@ -795,87 +795,6 @@ DxilShaderAccessTracking::GetResourceFromHandle(Value *resHandle, return ret; } -static bool CheckForDynamicIndexing(OP *HlslOP, LLVMContext &Ctx, - DxilModule &DM) { - bool FoundDynamicIndexing = false; - - for (llvm::Function &F : DM.GetModule()->functions()) { - if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) { - if (F.hasName()) { - if (F.getName().find("createHandleForLib") != StringRef::npos) { - auto FunctionUses = F.uses(); - for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { - auto &FunctionUse = *FI++; - auto FunctionUser = FunctionUse.getUser(); - auto instruction = cast(FunctionUser); - Value *resourceLoad = - instruction->getOperand(kCreateHandleForLibResOpIdx); - if (auto *load = cast(resourceLoad)) { - auto *resOrGep = load->getOperand(0); - if (isa(resOrGep)) { - FoundDynamicIndexing = true; - break; - } - } - } - } - } - } - if (FoundDynamicIndexing) { - break; - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFn = - HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFn->user_begin(); - FI != CreateHandleFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = instruction->getOperand(kCreateHandleResIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFromBindingFn = HlslOP->GetOpFunc( - DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFromBindingFn->user_begin(); - FI != CreateHandleFromBindingFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = - instruction->getOperand(kCreateHandleFromBindingResIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFromHeapFn = HlslOP->GetOpFunc( - DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFromHeapFn->user_begin(); - FI != CreateHandleFromHeapFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = - instruction->getOperand(kCreateHandleFromHeapHeapIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - return FoundDynamicIndexing; -} - bool DxilShaderAccessTracking::runOnModule(Module &M) { // This pass adds instrumentation for shader access to resources @@ -887,7 +806,13 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { if (m_CheckForDynamicIndexing) { - bool FoundDynamicIndexing = CheckForDynamicIndexing(HlslOP, Ctx, DM); + bool FoundDynamicIndexing = false; + + PIXPassHelpers::ForEachDynamicallyIndexedResource( + DM, [&FoundDynamicIndexing](bool, Instruction *, Value *) { + FoundDynamicIndexing = true; + return false; + }); if (FoundDynamicIndexing) { if (OSOverride != nullptr) { @@ -980,13 +905,14 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { case DXIL::OpCode::BufferUpdateCounter: readWrite = ShaderAccessFlags::Counter; break; + case DXIL::OpCode::HitObject_TraceRay: case DXIL::OpCode::TraceRay: { // Read of AccelerationStructure; doesn't match function attribute - auto res = GetResourceFromHandle(Call->getArgOperand(1), DM); - if (res.accessStyle == AccessStyle::None) { + auto Res = GetResourceFromHandle(Call->getArgOperand(1), DM); + if (Res.accessStyle == AccessStyle::None) { continue; } - if (EmitResourceAccess(DM, res, Call, HlslOP, Ctx, + if (EmitResourceAccess(DM, Res, Call, HlslOP, Ctx, ShaderAccessFlags::Read)) { Modified = true; } diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp index dfb4b3aa83..c7c99cf763 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.cpp +++ b/lib/DxilPIXPasses/PixPassHelpers.cpp @@ -199,6 +199,18 @@ constexpr uint32_t toolsUAVRegister = 0; template void ExtendRootSig(RootSigDesc &rootSigDesc) { auto *existingParams = rootSigDesc.pParameters; + for (uint32_t i = 0; i < rootSigDesc.NumParameters; ++i) { + if (rootSigDesc.pParameters[i].ParameterType == + DxilRootParameterType::UAV) { + if (rootSigDesc.pParameters[i].Descriptor.RegisterSpace == + toolsRegisterSpace && + rootSigDesc.pParameters[i].Descriptor.ShaderRegister == + toolsUAVRegister) { + // Already added + return; + } + } + } auto *newParams = new RootParameterDesc[rootSigDesc.NumParameters + 1]; if (existingParams != nullptr) { memcpy(newParams, existingParams, @@ -312,6 +324,7 @@ hlsl::DxilResource *CreateGlobalUAVResource(hlsl::DxilModule &DM, (unsigned int)-2); // This is the reserved-for-tools register space pUAV->SetSampleCount(0); // This is what compiler generates for a raw UAV pUAV->SetGloballyCoherent(false); + pUAV->SetReorderCoherent(false); pUAV->SetHasCounter(false); pUAV->SetCompType( CompType::getInvalid()); // This is what compiler generates for a raw UAV @@ -500,6 +513,90 @@ unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM, } } +void ForEachDynamicallyIndexedResource( + hlsl::DxilModule &DM, + const std::function &Visitor) { + OP *HlslOP = DM.GetOP(); + LLVMContext &Ctx = DM.GetModule()->getContext(); + + for (llvm::Function &F : DM.GetModule()->functions()) { + if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) { + if (F.hasName()) { + if (F.getName().find("createHandleForLib") != StringRef::npos) { + auto FunctionUses = F.uses(); + for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { + auto &FunctionUse = *FI++; + auto FunctionUser = FunctionUse.getUser(); + auto instruction = cast(FunctionUser); + Value *resourceLoad = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleForLibResOpIdx); + if (auto *load = cast(resourceLoad)) { + auto *resOrGep = load->getOperand(0); + if (auto *gep = dyn_cast(resOrGep)) { + if (!Visitor(DxilMDHelper::IsMarkedNonUniform(gep), load, + gep->getOperand(2))) { + return; + } + } + } + } + } + } + } + } + + auto CreateHandleFn = + HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFn->user_begin(); + FI != CreateHandleFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = + instruction->getOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandle createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } + + auto CreateHandleFromBindingFn = HlslOP->GetOpFunc( + DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFromBindingFn->user_begin(); + FI != CreateHandleFromBindingFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleFromBindingResIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandleFromBinding createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } + + auto CreateHandleFromHeapFn = HlslOP->GetOpFunc( + DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFromHeapFn->user_begin(); + FI != CreateHandleFromHeapFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleFromHeapHeapIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandleFromHeap createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } +} + #ifdef PIX_DEBUG_DUMP_HELPER static int g_logIndent = 0; diff --git a/lib/DxilPIXPasses/PixPassHelpers.h b/lib/DxilPIXPasses/PixPassHelpers.h index 4cd0e1a549..d7b0b40af8 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.h +++ b/lib/DxilPIXPasses/PixPassHelpers.h @@ -9,6 +9,7 @@ #pragma once +#include #include #include "dxc/DXIL/DxilModule.h" @@ -16,7 +17,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -//#define PIX_DEBUG_DUMP_HELPER +// #define PIX_DEBUG_DUMP_HELPER #ifdef PIX_DEBUG_DUMP_HELPER #include "dxc/Support/Global.h" #endif @@ -82,4 +83,8 @@ void ReplaceAllUsesOfInstructionWithNewValueAndDeleteInstruction( llvm::Instruction *Instr, llvm::Value *newValue, llvm::Type *newType); unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM, unsigned UpStreamSVPosRow); +void ForEachDynamicallyIndexedResource( + hlsl::DxilModule &DM, + const std::function + &Visitor); } // namespace PIXPassHelpers diff --git a/lib/DxilValidation/DxilContainerValidation.cpp b/lib/DxilValidation/DxilContainerValidation.cpp index 890e90e354..89e23767fe 100644 --- a/lib/DxilValidation/DxilContainerValidation.cpp +++ b/lib/DxilValidation/DxilContainerValidation.cpp @@ -337,7 +337,7 @@ void PSVContentVerifier::VerifySignatureElement( PSVSignatureElement PSVSE(StrTab, IndexTab, PSVSE0); if (SE.IsArbitrary()) - Mismatch |= strcmp(PSVSE.GetSemanticName(), SE.GetName()); + Mismatch |= strcmp(PSVSE.GetSemanticName(), SE.GetName()) != 0; else Mismatch |= PSVSE0->SemanticKind != static_cast(SE.GetKind()); @@ -494,7 +494,8 @@ void PSVContentVerifier::Verify(unsigned ValMajor, unsigned ValMinor, std::to_string(ShaderStage)); return; } - if (PSV1->UsesViewID != DM.m_ShaderFlags.GetViewID()) + bool ViewIDUsed = PSV1->UsesViewID != 0; + if (ViewIDUsed != DM.m_ShaderFlags.GetViewID()) EmitMismatchError("UsesViewID", std::to_string(PSV1->UsesViewID), std::to_string(DM.m_ShaderFlags.GetViewID())); diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 0a2001a745..00a6b9ae14 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -65,8 +65,8 @@ using std::vector; namespace hlsl { // PrintDiagnosticContext methods. -PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &printer) - : m_Printer(printer), m_errorsFound(false), m_warningsFound(false) {} +PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &Printer) + : m_Printer(Printer), m_errorsFound(false), m_warningsFound(false) {} bool PrintDiagnosticContext::HasErrors() const { return m_errorsFound; } bool PrintDiagnosticContext::HasWarnings() const { return m_warningsFound; } @@ -97,68 +97,68 @@ struct PSExecutionInfo { }; static unsigned ValidateSignatureRowCol(Instruction *I, - DxilSignatureElement &SE, Value *rowVal, - Value *colVal, EntryStatus &Status, + DxilSignatureElement &SE, Value *RowVal, + Value *ColVal, EntryStatus &Status, ValidationContext &ValCtx) { - if (ConstantInt *constRow = dyn_cast(rowVal)) { - unsigned row = constRow->getLimitedValue(); - if (row >= SE.GetRows()) { - std::string range = std::string("0~") + std::to_string(SE.GetRows()); + if (ConstantInt *ConstRow = dyn_cast(RowVal)) { + unsigned Row = ConstRow->getLimitedValue(); + if (Row >= SE.GetRows()) { + std::string Range = std::string("0~") + std::to_string(SE.GetRows()); ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange, - {"Row", range, std::to_string(row)}); + {"Row", Range, std::to_string(Row)}); } } - if (!isa(colVal)) { - // col must be const + if (!isa(ColVal)) { + // Col must be const ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, {"Col", "LoadInput/StoreOutput"}); return 0; } - unsigned col = cast(colVal)->getLimitedValue(); + unsigned Col = cast(ColVal)->getLimitedValue(); - if (col > SE.GetCols()) { - std::string range = std::string("0~") + std::to_string(SE.GetCols()); + if (Col > SE.GetCols()) { + std::string Range = std::string("0~") + std::to_string(SE.GetCols()); ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange, - {"Col", range, std::to_string(col)}); + {"Col", Range, std::to_string(Col)}); } else { if (SE.IsOutput()) - Status.outputCols[SE.GetID()] |= 1 << col; + Status.outputCols[SE.GetID()] |= 1 << Col; if (SE.IsPatchConstOrPrim()) - Status.patchConstOrPrimCols[SE.GetID()] |= 1 << col; + Status.patchConstOrPrimCols[SE.GetID()] |= 1 << Col; } - return col; + return Col; } static DxilSignatureElement * -ValidateSignatureAccess(Instruction *I, DxilSignature &sig, Value *sigID, - Value *rowVal, Value *colVal, EntryStatus &Status, +ValidateSignatureAccess(Instruction *I, DxilSignature &Sig, Value *SigId, + Value *RowVal, Value *ColVal, EntryStatus &Status, ValidationContext &ValCtx) { - if (!isa(sigID)) { + if (!isa(SigId)) { // inputID must be const ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, {"SignatureID", "LoadInput/StoreOutput"}); return nullptr; } - unsigned SEIdx = cast(sigID)->getLimitedValue(); - if (sig.GetElements().size() <= SEIdx) { + unsigned SEIdx = cast(SigId)->getLimitedValue(); + if (Sig.GetElements().size() <= SEIdx) { ValCtx.EmitInstrError(I, ValidationRule::InstrOpConstRange); return nullptr; } - DxilSignatureElement &SE = sig.GetElement(SEIdx); - bool isOutput = sig.IsOutput(); + DxilSignatureElement &SE = Sig.GetElement(SEIdx); + bool IsOutput = Sig.IsOutput(); - unsigned col = ValidateSignatureRowCol(I, SE, rowVal, colVal, Status, ValCtx); + unsigned Col = ValidateSignatureRowCol(I, SE, RowVal, ColVal, Status, ValCtx); - if (isOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) { - unsigned mask = Status.OutputPositionMask[SE.GetOutputStream()]; - mask |= 1 << col; + if (IsOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) { + unsigned Mask = Status.OutputPositionMask[SE.GetOutputStream()]; + Mask |= 1 << Col; if (SE.GetOutputStream() < DXIL::kNumOutputStreams) - Status.OutputPositionMask[SE.GetOutputStream()] = mask; + Status.OutputPositionMask[SE.GetOutputStream()] = Mask; } return &SE; } @@ -183,9 +183,9 @@ static DxilResourceProperties GetResourceFromHandle(Value *Handle, return RP; } -static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle, +static DXIL::SamplerKind GetSamplerKind(Value *SamplerHandle, ValidationContext &ValCtx) { - DxilResourceProperties RP = GetResourceFromHandle(samplerHandle, ValCtx); + DxilResourceProperties RP = GetResourceFromHandle(SamplerHandle, ValCtx); if (RP.getResourceClass() != DXIL::ResourceClass::Sampler) { // must be sampler. @@ -200,14 +200,14 @@ static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle, } static DXIL::ResourceKind -GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy, +GetResourceKindAndCompTy(Value *Handle, DXIL::ComponentType &CompTy, DXIL::ResourceClass &ResClass, ValidationContext &ValCtx) { CompTy = DXIL::ComponentType::Invalid; ResClass = DXIL::ResourceClass::Invalid; // TODO: validate ROV is used only in PS. - DxilResourceProperties RP = GetResourceFromHandle(handle, ValCtx); + DxilResourceProperties RP = GetResourceFromHandle(Handle, ValCtx); ResClass = RP.getResourceClass(); switch (ResClass) { @@ -230,19 +230,19 @@ GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy, return RP.getResourceKind(); } -DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys, - std::deque &offsets) { +DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &TypeSys, + std::deque &Offsets) { unsigned CurIdx = 1; - unsigned LastIdx = offsets.size() - 1; + unsigned LastIdx = Offsets.size() - 1; DxilStructAnnotation *StructAnnot = nullptr; - for (; CurIdx < offsets.size(); ++CurIdx) { + for (; CurIdx < Offsets.size(); ++CurIdx) { if (const StructType *EltST = dyn_cast(Ty)) { - if (DxilStructAnnotation *EltAnnot = typeSys.GetStructAnnotation(EltST)) { + if (DxilStructAnnotation *EltAnnot = TypeSys.GetStructAnnotation(EltST)) { StructAnnot = EltAnnot; - Ty = EltST->getElementType(offsets[CurIdx]); + Ty = EltST->getElementType(Offsets[CurIdx]); if (CurIdx == LastIdx) { - return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]); + return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]); } } else { return nullptr; @@ -252,16 +252,16 @@ DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys, StructAnnot = nullptr; } else { if (StructAnnot) - return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]); + return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]); } } return nullptr; } -DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) { - auto it = ResPropMap.find(resVal); - if (it != ResPropMap.end()) { - return it->second; +DxilResourceProperties ValidationContext::GetResourceFromVal(Value *ResVal) { + auto It = ResPropMap.find(ResVal); + if (It != ResPropMap.end()) { + return It->second; } else { DxilResourceProperties RP; return RP; @@ -269,34 +269,34 @@ DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) { } struct ResRetUsage { - bool x; - bool y; - bool z; - bool w; - bool status; - ResRetUsage() : x(false), y(false), z(false), w(false), status(false) {} + bool X; + bool Y; + bool Z; + bool W; + bool Status; + ResRetUsage() : X(false), Y(false), Z(false), W(false), Status(false) {} }; -static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, +static void CollectGetDimResRetUsage(ResRetUsage &Usage, Instruction *ResRet, ValidationContext &ValCtx) { for (User *U : ResRet->users()) { if (ExtractValueInst *EVI = dyn_cast(U)) { - for (unsigned idx : EVI->getIndices()) { - switch (idx) { + for (unsigned Idx : EVI->getIndices()) { + switch (Idx) { case 0: - usage.x = true; + Usage.X = true; break; case 1: - usage.y = true; + Usage.Y = true; break; case 2: - usage.z = true; + Usage.Z = true; break; case 3: - usage.w = true; + Usage.W = true; break; case DXIL::kResRetStatusIndex: - usage.status = true; + Usage.Status = true; break; default: // Emit index out of bound. @@ -306,7 +306,7 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, } } } else if (PHINode *PHI = dyn_cast(U)) { - CollectGetDimResRetUsage(usage, PHI, ValCtx); + CollectGetDimResRetUsage(Usage, PHI, ValCtx); } else { Instruction *User = cast(U); ValCtx.EmitInstrError(User, ValidationRule::InstrDxilStructUser); @@ -314,18 +314,18 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, } } -static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind, - ArrayRef coords, +static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind ResKind, + ArrayRef Coords, ValidationContext &ValCtx) { - const unsigned kMaxNumCoords = 4; - unsigned numCoords = DxilResource::GetNumCoords(resKind); - for (unsigned i = 0; i < kMaxNumCoords; i++) { - if (i < numCoords) { - if (isa(coords[i])) { + const unsigned KMaxNumCoords = 4; + unsigned NumCoords = DxilResource::GetNumCoords(ResKind); + for (unsigned I = 0; I < KMaxNumCoords; I++) { + if (I < NumCoords) { + if (isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss); } } else { - if (!isa(coords[i])) { + if (!isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateTooMany); } @@ -334,18 +334,18 @@ static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind, } static void ValidateCalcLODResourceDimensionCoord(CallInst *CI, - DXIL::ResourceKind resKind, - ArrayRef coords, + DXIL::ResourceKind ResKind, + ArrayRef Coords, ValidationContext &ValCtx) { const unsigned kMaxNumDimCoords = 3; - unsigned numCoords = DxilResource::GetNumDimensionsForCalcLOD(resKind); - for (unsigned i = 0; i < kMaxNumDimCoords; i++) { - if (i < numCoords) { - if (isa(coords[i])) { + unsigned NumCoords = DxilResource::GetNumDimensionsForCalcLOD(ResKind); + for (unsigned I = 0; I < kMaxNumDimCoords; I++) { + if (I < NumCoords) { + if (isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss); } } else { - if (!isa(coords[i])) { + if (!isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateTooMany); } @@ -353,21 +353,21 @@ static void ValidateCalcLODResourceDimensionCoord(CallInst *CI, } } -static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind, - ArrayRef offsets, +static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind ResKind, + ArrayRef Offsets, ValidationContext &ValCtx) { const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - unsigned numOffsets = DxilResource::GetNumOffsets(resKind); - bool hasOffset = !isa(offsets[0]); + unsigned NumOffsets = DxilResource::GetNumOffsets(ResKind); + bool HasOffset = !isa(Offsets[0]); - auto validateOffset = [&](Value *offset) { + auto ValidateOffset = [&](Value *Offset) { // 6.7 Advanced Textures allow programmable offsets if (pSM->IsSM67Plus()) return; - if (ConstantInt *cOffset = dyn_cast(offset)) { - int offset = cOffset->getValue().getSExtValue(); - if (offset > 7 || offset < -8) { + if (ConstantInt *cOffset = dyn_cast(Offset)) { + int Offset = cOffset->getValue().getSExtValue(); + if (Offset > 7 || Offset < -8) { ValCtx.EmitInstrError(CI, ValidationRule::InstrTextureOffset); } } else { @@ -375,20 +375,20 @@ static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind, } }; - if (hasOffset) { - validateOffset(offsets[0]); + if (HasOffset) { + ValidateOffset(Offsets[0]); } - for (unsigned i = 1; i < offsets.size(); i++) { - if (i < numOffsets) { - if (hasOffset) { - if (isa(offsets[i])) + for (unsigned I = 1; I < Offsets.size(); I++) { + if (I < NumOffsets) { + if (HasOffset) { + if (isa(Offsets[I])) ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss); else - validateOffset(offsets[i]); + ValidateOffset(Offsets[I]); } } else { - if (!isa(offsets[i])) { + if (!isa(Offsets[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } } @@ -405,53 +405,53 @@ static void ValidateDerivativeOp(CallInst *CI, ValidationContext &ValCtx) { {"Derivatives in CS/MS/AS", "Shader Model 6.6+"}); } -static void ValidateSampleInst(CallInst *CI, Value *srvHandle, - Value *samplerHandle, ArrayRef coords, - ArrayRef offsets, bool IsSampleC, +static void ValidateSampleInst(CallInst *CI, Value *SrvHandle, + Value *SamplerHandle, ArrayRef Coords, + ArrayRef Offsets, bool IsSampleC, ValidationContext &ValCtx) { if (!IsSampleC) { - if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) { + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample); } } else { - if (GetSamplerKind(samplerHandle, ValCtx) != + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Comparison) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC); } } - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx); - bool isSampleCompTy = compTy == DXIL::ComponentType::F32; - isSampleCompTy |= compTy == DXIL::ComponentType::SNormF32; - isSampleCompTy |= compTy == DXIL::ComponentType::UNormF32; - isSampleCompTy |= compTy == DXIL::ComponentType::F16; - isSampleCompTy |= compTy == DXIL::ComponentType::SNormF16; - isSampleCompTy |= compTy == DXIL::ComponentType::UNormF16; + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx); + bool IsSampleCompTy = CompTy == DXIL::ComponentType::F32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::F16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF16; const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); if (pSM->IsSM67Plus() && !IsSampleC) { - isSampleCompTy |= compTy == DXIL::ComponentType::I16; - isSampleCompTy |= compTy == DXIL::ComponentType::U16; - isSampleCompTy |= compTy == DXIL::ComponentType::I32; - isSampleCompTy |= compTy == DXIL::ComponentType::U32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::I16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::U16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::I32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::U32; } - if (!isSampleCompTy) { + if (!IsSampleCompTy) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleCompType); } - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); } - ValidationRule rule = ValidationRule::InstrResourceKindForSample; + ValidationRule Rule = ValidationRule::InstrResourceKindForSample; if (IsSampleC) { - rule = ValidationRule::InstrResourceKindForSampleC; + Rule = ValidationRule::InstrResourceKindForSampleC; } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -461,64 +461,64 @@ static void ValidateSampleInst(CallInst *CI, Value *srvHandle, break; case DXIL::ResourceKind::Texture3D: if (IsSampleC) { - ValCtx.EmitInstrError(CI, rule); + ValCtx.EmitInstrError(CI, Rule); } break; default: - ValCtx.EmitInstrError(CI, rule); + ValCtx.EmitInstrError(CI, Rule); return; } // Coord match resource kind. - ValidateResourceCoord(CI, resKind, coords, ValCtx); + ValidateResourceCoord(CI, ResKind, Coords, ValCtx); // Offset match resource kind. - ValidateResourceOffset(CI, resKind, offsets, ValCtx); + ValidateResourceOffset(CI, ResKind, Offsets, ValCtx); } -static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle, - ArrayRef coords, ArrayRef offsets, +static void ValidateGather(CallInst *CI, Value *SrvHandle, Value *SamplerHandle, + ArrayRef Coords, ArrayRef Offsets, bool IsSampleC, ValidationContext &ValCtx) { if (!IsSampleC) { - if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) { + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample); } } else { - if (GetSamplerKind(samplerHandle, ValCtx) != + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Comparison) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC); } } - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); return; } // Coord match resource kind. - ValidateResourceCoord(CI, resKind, coords, ValCtx); + ValidateResourceCoord(CI, ResKind, Coords, ValCtx); // Offset match resource kind. - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture2D: case DXIL::ResourceKind::Texture2DArray: { - bool hasOffset = !isa(offsets[0]); - if (hasOffset) { - if (isa(offsets[1])) { + bool HasOffset = !isa(Offsets[0]); + if (HasOffset) { + if (isa(Offsets[1])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss); } } } break; case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: { - if (!isa(offsets[0])) { + if (!isa(Offsets[0])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } - if (!isa(offsets[1])) { + if (!isa(Offsets[1])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } } break; @@ -529,21 +529,21 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle, } } -static unsigned StoreValueToMask(ArrayRef vals) { - unsigned mask = 0; - for (unsigned i = 0; i < 4; i++) { - if (!isa(vals[i])) { - mask |= 1 << i; +static unsigned StoreValueToMask(ArrayRef Vals) { + unsigned Mask = 0; + for (unsigned I = 0; I < 4; I++) { + if (!isa(Vals[I])) { + Mask |= 1 << I; } } - return mask; + return Mask; } -static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) { - DxilResourceProperties RP = GetResourceFromHandle(cbHandle, ValCtx); +static int GetCBufSize(Value *CbHandle, ValidationContext &ValCtx) { + DxilResourceProperties RP = GetResourceFromHandle(CbHandle, ValCtx); if (RP.getResourceClass() != DXIL::ResourceClass::CBuffer) { - ValCtx.EmitInstrError(cast(cbHandle), + ValCtx.EmitInstrError(cast(CbHandle), ValidationRule::InstrCBufferClassForCBufferHandle); return -1; } @@ -554,7 +554,7 @@ static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) { // Make sure none of the handle arguments are undef / zero-initializer, // Also, do not accept any resource handles with invalid dxil resource // properties -void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, +void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { for (Value *op : CI->operands()) { @@ -563,13 +563,13 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, const Type *pNodeRecordHandleTy = ValCtx.DxilMod.GetOP()->GetNodeRecordHandleType(); - const Type *argTy = op->getType(); - if (argTy == pNodeHandleTy || argTy == pNodeRecordHandleTy || - argTy == pHandleTy) { + const Type *ArgTy = op->getType(); + if (ArgTy == pNodeHandleTy || ArgTy == pNodeRecordHandleTy || + ArgTy == pHandleTy) { if (isa(op) || isa(op)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); - } else if (argTy == pHandleTy) { + } else if (ArgTy == pHandleTy) { // GetResourceFromHandle will emit an error on an invalid handle GetResourceFromHandle(op, ValCtx); } @@ -577,10 +577,10 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, } } -void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode, +void ValidateHandleArgs(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { // TODO: add case DXIL::OpCode::IndexNodeRecordHandle: case DXIL::OpCode::AnnotateHandle: @@ -591,12 +591,12 @@ void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode, break; default: - ValidateHandleArgsForInstruction(CI, opcode, ValCtx); + ValidateHandleArgsForInstruction(CI, Opcode, ValCtx); break; } } -static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) { +static unsigned GetNumVertices(DXIL::InputPrimitive InputPrimitive) { const unsigned InputPrimitiveVertexTab[] = { 0, // Undefined = 0, 1, // Point = 1, @@ -641,26 +641,26 @@ static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) { 0, // LastEntry, }; - unsigned primitiveIdx = static_cast(inputPrimitive); - return InputPrimitiveVertexTab[primitiveIdx]; + unsigned PrimitiveIdx = static_cast(InputPrimitive); + return InputPrimitiveVertexTab[PrimitiveIdx]; } -static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { Function *F = CI->getParent()->getParent(); DxilModule &DM = ValCtx.DxilMod; - bool bIsPatchConstantFunc = false; + bool IsPatchConstantFunc = false; if (!DM.HasDxilEntryProps(F)) { - auto it = ValCtx.PatchConstantFuncMap.find(F); - if (it == ValCtx.PatchConstantFuncMap.end()) { + auto It = ValCtx.PatchConstantFuncMap.find(F); + if (It == ValCtx.PatchConstantFuncMap.end()) { // Missing entry props. ValCtx.EmitInstrError(CI, ValidationRule::InstrSignatureOperationNotInEntry); return; } // Use hull entry instead of patch constant function. - F = it->second.front(); - bIsPatchConstantFunc = true; + F = It->second.front(); + IsPatchConstantFunc = true; } if (!ValCtx.HasEntryStatus(F)) { return; @@ -668,67 +668,67 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, EntryStatus &Status = ValCtx.GetEntryStatus(F); DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); - DxilFunctionProps &props = EntryProps.props; + DxilFunctionProps &Props = EntryProps.props; DxilEntrySignature &S = EntryProps.sig; - switch (opcode) { + switch (Opcode) { case DXIL::OpCode::LoadInput: { - Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); - ValidateSignatureAccess(CI, inputSig, inputID, row, col, Status, ValCtx); - - // Check vertexID in ps/vs. and none array input. - Value *vertexID = + Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + ValidateSignatureAccess(CI, InputSig, InputId, Row, Col, Status, ValCtx); + + // Check VertexId in ps/vs. and none array input. + Value *VertexId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputVertexIDOpIdx); - bool usedVertexID = vertexID && !isa(vertexID); - if (props.IsVS() || props.IsPS()) { - if (usedVertexID) { - // use vertexID in VS/PS input. + bool UsedVertexId = VertexId && !isa(VertexId); + if (Props.IsVS() || Props.IsPS()) { + if (UsedVertexId) { + // Use VertexId in VS/PS input. ValCtx.EmitInstrError(CI, ValidationRule::SmOperand); return; } } else { - if (ConstantInt *cVertexID = dyn_cast(vertexID)) { - int immVertexID = cVertexID->getValue().getLimitedValue(); - if (cVertexID->getValue().isNegative()) { - immVertexID = cVertexID->getValue().getSExtValue(); + if (ConstantInt *cVertexId = dyn_cast(VertexId)) { + int ImmVertexId = cVertexId->getValue().getLimitedValue(); + if (cVertexId->getValue().isNegative()) { + ImmVertexId = cVertexId->getValue().getSExtValue(); } - const int low = 0; - int high = 0; - if (props.IsGS()) { - DXIL::InputPrimitive inputPrimitive = - props.ShaderProps.GS.inputPrimitive; - high = GetNumVertices(inputPrimitive); - } else if (props.IsDS()) { - high = props.ShaderProps.DS.inputControlPoints; - } else if (props.IsHS()) { - high = props.ShaderProps.HS.inputControlPoints; + const int Low = 0; + int High = 0; + if (Props.IsGS()) { + DXIL::InputPrimitive InputPrimitive = + Props.ShaderProps.GS.inputPrimitive; + High = GetNumVertices(InputPrimitive); + } else if (Props.IsDS()) { + High = Props.ShaderProps.DS.inputControlPoints; + } else if (Props.IsHS()) { + High = Props.ShaderProps.HS.inputControlPoints; } else { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"LoadInput", "VS/HS/DS/GS/PS"}); } - if (immVertexID < low || immVertexID >= high) { - std::string range = std::to_string(low) + "~" + std::to_string(high); + if (ImmVertexId < Low || ImmVertexId >= High) { + std::string Range = std::to_string(Low) + "~" + std::to_string(High); ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"VertexID", range, std::to_string(immVertexID)}); + {"VertexID", Range, std::to_string(ImmVertexId)}); } } } } break; case DXIL::OpCode::DomainLocation: { - Value *colValue = + Value *ColValue = CI->getArgOperand(DXIL::OperandIndex::kDomainLocationColOpIdx); - if (!isa(colValue)) { - // col must be const + if (!isa(ColValue)) { + // Col must be const ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst, {"Col", "DomainLocation"}); } else { - unsigned col = cast(colValue)->getLimitedValue(); - if (col >= Status.domainLocSize) { + unsigned Col = cast(ColValue)->getLimitedValue(); + if (Col >= Status.domainLocSize) { ValCtx.EmitInstrError(CI, ValidationRule::SmDomainLocationIdxOOB); } } @@ -736,60 +736,60 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::StoreOutput: case DXIL::OpCode::StoreVertexOutput: case DXIL::OpCode::StorePrimitiveOutput: { - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = opcode == DXIL::OpCode::StorePrimitiveOutput + DxilSignature &OutputSig = Opcode == DXIL::OpCode::StorePrimitiveOutput ? S.PatchConstOrPrimSignature : S.OutputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx); + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } break; case DXIL::OpCode::OutputControlPointID: { // Only used in hull shader. - Function *func = CI->getParent()->getParent(); + Function *Func = CI->getParent()->getParent(); // Make sure this is inside hs shader entry function. - if (!(props.IsHS() && F == func)) { + if (!(Props.IsHS() && F == Func)) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"OutputControlPointID", "hull function"}); } } break; case DXIL::OpCode::LoadOutputControlPoint: { // Only used in patch constant function. - Function *func = CI->getParent()->getParent(); - if (ValCtx.entryFuncCallSet.count(func) > 0) { + Function *Func = CI->getParent()->getParent(); + if (ValCtx.entryFuncCallSet.count(Func) > 0) { ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, {"LoadOutputControlPoint", "PatchConstant function"}); } - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = S.OutputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx); + DxilSignature &OutputSig = S.OutputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } break; case DXIL::OpCode::StorePatchConstant: { // Only used in patch constant function. - Function *func = CI->getParent()->getParent(); - if (!bIsPatchConstantFunc) { + Function *Func = CI->getParent()->getParent(); + if (!IsPatchConstantFunc) { ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, {"StorePatchConstant", "PatchConstant function"}); } else { - auto &hullShaders = ValCtx.PatchConstantFuncMap[func]; - for (Function *F : hullShaders) { + auto &HullShaders = ValCtx.PatchConstantFuncMap[Func]; + for (Function *F : HullShaders) { EntryStatus &Status = ValCtx.GetEntryStatus(F); DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); DxilEntrySignature &S = EntryProps.sig; - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = S.PatchConstOrPrimSignature; - Value *row = + DxilSignature &OutputSig = S.PatchConstOrPrimSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } } @@ -807,12 +807,12 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::EvalSampleIndex: case DXIL::OpCode::EvalSnapped: { // Eval* share same operand index with load input. - Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); DxilSignatureElement *pSE = ValidateSignatureAccess( - CI, inputSig, inputID, row, col, Status, ValCtx); + CI, InputSig, InputId, Row, Col, Status, ValCtx); if (pSE) { switch (pSE->GetInterpolationMode()->GetKind()) { case DXIL::InterpolationMode::Linear: @@ -836,11 +836,11 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::AttributeAtVertex: { Value *Attribute = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); DxilSignatureElement *pSE = ValidateSignatureAccess( - CI, inputSig, Attribute, row, col, Status, ValCtx); + CI, InputSig, Attribute, Row, Col, Status, ValCtx); if (pSE && pSE->GetInterpolationMode()->GetKind() != hlsl::InterpolationMode::Kind::Constant) { ValCtx.EmitInstrFormatError( @@ -851,35 +851,35 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::CutStream: case DXIL::OpCode::EmitThenCutStream: case DXIL::OpCode::EmitStream: { - if (props.IsGS()) { - auto &GS = props.ShaderProps.GS; - unsigned streamMask = 0; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + if (Props.IsGS()) { + auto &GS = Props.ShaderProps.GS; + unsigned StreamMask = 0; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - streamMask |= 1 << i; + StreamMask |= 1 << I; } } - Value *streamID = + Value *StreamId = CI->getArgOperand(DXIL::OperandIndex::kStreamEmitCutIDOpIdx); - if (ConstantInt *cStreamID = dyn_cast(streamID)) { - int immStreamID = cStreamID->getValue().getLimitedValue(); - if (cStreamID->getValue().isNegative() || immStreamID >= 4) { + if (ConstantInt *cStreamId = dyn_cast(StreamId)) { + int ImmStreamId = cStreamId->getValue().getLimitedValue(); + if (cStreamId->getValue().isNegative() || ImmStreamId >= 4) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"StreamID", "0~4", std::to_string(immStreamID)}); + {"StreamID", "0~4", std::to_string(ImmStreamId)}); } else { - unsigned immMask = 1 << immStreamID; - if ((streamMask & immMask) == 0) { - std::string range; - for (unsigned i = 0; i < 4; i++) { - if (streamMask & (1 << i)) { - range += std::to_string(i) + " "; + unsigned ImmMask = 1 << ImmStreamId; + if ((StreamMask & ImmMask) == 0) { + std::string Range; + for (unsigned I = 0; I < 4; I++) { + if (StreamMask & (1 << I)) { + Range += std::to_string(I) + " "; } } ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"StreamID", range, std::to_string(immStreamID)}); + {"StreamID", Range, std::to_string(ImmStreamId)}); } } @@ -893,25 +893,25 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::EmitIndices: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"EmitIndices", "Mesh shader"}); } } break; case DXIL::OpCode::SetMeshOutputCounts: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"SetMeshOutputCounts", "Mesh shader"}); } } break; case DXIL::OpCode::GetMeshPayload: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"GetMeshPayload", "Mesh shader"}); } } break; case DXIL::OpCode::DispatchMesh: { - if (!props.IsAS()) { + if (!Props.IsAS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"DispatchMesh", "Amplification shader"}); } @@ -925,9 +925,9 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } } -static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { // Imm input value validation. case DXIL::OpCode::Asin: { DxilInst_Asin I(CI); @@ -973,77 +973,86 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode, // Validate the type-defined mask compared to the store value mask which // indicates which parts were defined returns true if caller should continue // validation -static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode opcode, - ConstantInt *mask, unsigned stValMask, - bool isTyped, ValidationContext &ValCtx) { - if (!mask) { +static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode Opcode, + ConstantInt *Mask, unsigned StValMask, + bool IsTyped, ValidationContext &ValCtx) { + if (!Mask) { // Mask for buffer store should be immediate. ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, - {"Mask", hlsl::OP::GetOpCodeName(opcode)}); + {"Mask", hlsl::OP::GetOpCodeName(Opcode)}); return false; } - unsigned uMask = mask->getLimitedValue(); - if (isTyped && uMask != 0xf) { + unsigned UMask = Mask->getLimitedValue(); + if (IsTyped && UMask != 0xf) { ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskForTypedUAVStore); } // write mask must be contiguous (.x .xy .xyz or .xyzw) - if (!((uMask == 0xf) || (uMask == 0x7) || (uMask == 0x3) || (uMask == 0x1))) { + if (!((UMask == 0xf) || (UMask == 0x7) || (UMask == 0x3) || (UMask == 0x1))) { ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskGapForUAV); } - // If a bit is set in the uMask (expected values) that isn't set in stValMask + // If a bit is set in the UMask (expected values) that isn't set in StValMask // (user provided values) then the user failed to define some of the output // values. - if (uMask & ~stValMask) + if (UMask & ~StValMask) ValCtx.EmitInstrError(I, ValidationRule::InstrUndefinedValueForUAVStore); - else if (uMask != stValMask) + else if (UMask != StValMask) ValCtx.EmitInstrFormatError( I, ValidationRule::InstrWriteMaskMatchValueForUAVStore, - {std::to_string(uMask), std::to_string(stValMask)}); + {std::to_string(UMask), std::to_string(StValMask)}); return true; } -static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateASHandle(CallInst *CI, Value *Hdl, + ValidationContext &ValCtx) { + DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl); + if (RP.getResourceClass() == DXIL::ResourceClass::Invalid || + RP.getResourceKind() != DXIL::ResourceKind::RTAccelerationStructure) { + ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay); + } +} + +static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { case DXIL::OpCode::GetDimensions: { - DxilInst_GetDimensions getDim(CI); - Value *handle = getDim.get_handle(); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx); + DxilInst_GetDimensions GetDim(CI); + Value *Handle = GetDim.get_handle(); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); // Check the result component use. - ResRetUsage usage; - CollectGetDimResRetUsage(usage, CI, ValCtx); + ResRetUsage Usage; + CollectGetDimResRetUsage(Usage, CI, ValCtx); // Mip level only for texture. - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: - if (usage.y) { + if (Usage.Y) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"y", "Texture1D"}); } - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture1D"}); } break; case DXIL::ResourceKind::Texture1DArray: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture1DArray"}); } break; case DXIL::ResourceKind::Texture2D: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture2D"}); @@ -1052,7 +1061,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::Texture2DArray: break; case DXIL::ResourceKind::Texture2DMS: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture2DMS"}); @@ -1063,7 +1072,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::Texture3D: break; case DXIL::ResourceKind::TextureCube: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "TextureCube"}); @@ -1075,12 +1084,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: { - Value *mip = getDim.get_mipLevel(); - if (!isa(mip)) { + Value *Mip = GetDim.get_mipLevel(); + if (!isa(Mip)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrMipLevelForGetDimension); } - if (resKind != DXIL::ResourceKind::Invalid) { - if (usage.y || usage.z || usage.w) { + if (ResKind != DXIL::ResourceKind::Invalid) { + if (Usage.Y || Usage.Z || Usage.W) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"invalid", "resource"}); @@ -1092,38 +1101,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; } - if (usage.status) { + if (Usage.Status) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"invalid", "resource"}); } } break; case DXIL::OpCode::CalculateLOD: { - DxilInst_CalculateLOD lod(CI); - Value *samplerHandle = lod.get_sampler(); - DXIL::SamplerKind samplerKind = GetSamplerKind(samplerHandle, ValCtx); - if (samplerKind != DXIL::SamplerKind::Default) { + DxilInst_CalculateLOD LOD(CI); + Value *SamplerHandle = LOD.get_sampler(); + DXIL::SamplerKind SamplerKind = GetSamplerKind(SamplerHandle, ValCtx); + if (SamplerKind != DXIL::SamplerKind::Default) { // After SM68, Comparison is supported. if (!ValCtx.DxilMod.GetShaderModel()->IsSM68Plus() || - samplerKind != DXIL::SamplerKind::Comparison) + SamplerKind != DXIL::SamplerKind::Comparison) ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForLOD); } - Value *handle = lod.get_handle(); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx); - if (resClass != DXIL::ResourceClass::SRV) { + Value *Handle = LOD.get_handle(); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); return; } // Coord match resource. ValidateCalcLODResourceDimensionCoord( - CI, resKind, {lod.get_coord0(), lod.get_coord1(), lod.get_coord2()}, + CI, ResKind, {LOD.get_coord0(), LOD.get_coord1(), LOD.get_coord2()}, ValCtx); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1140,67 +1149,67 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::TextureGather: { - DxilInst_TextureGather gather(CI); - ValidateGather(CI, gather.get_srv(), gather.get_sampler(), - {gather.get_coord0(), gather.get_coord1(), - gather.get_coord2(), gather.get_coord3()}, - {gather.get_offset0(), gather.get_offset1()}, + DxilInst_TextureGather Gather(CI); + ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(), + {Gather.get_coord0(), Gather.get_coord1(), + Gather.get_coord2(), Gather.get_coord3()}, + {Gather.get_offset0(), Gather.get_offset1()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::TextureGatherCmp: { - DxilInst_TextureGatherCmp gather(CI); - ValidateGather(CI, gather.get_srv(), gather.get_sampler(), - {gather.get_coord0(), gather.get_coord1(), - gather.get_coord2(), gather.get_coord3()}, - {gather.get_offset0(), gather.get_offset1()}, + DxilInst_TextureGatherCmp Gather(CI); + ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(), + {Gather.get_coord0(), Gather.get_coord1(), + Gather.get_coord2(), Gather.get_coord3()}, + {Gather.get_offset0(), Gather.get_offset1()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::Sample: { - DxilInst_Sample sample(CI); + DxilInst_Sample Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmp: { - DxilInst_SampleCmp sample(CI); + DxilInst_SampleCmp Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmpLevel: { // sampler must be comparison mode. - DxilInst_SampleCmpLevel sample(CI); + DxilInst_SampleCmpLevel Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleCmpLevelZero: { // sampler must be comparison mode. - DxilInst_SampleCmpLevelZero sample(CI); + DxilInst_SampleCmpLevelZero Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleBias: { - DxilInst_SampleBias sample(CI); - Value *bias = sample.get_bias(); - if (ConstantFP *cBias = dyn_cast(bias)) { - float fBias = cBias->getValueAPF().convertToFloat(); - if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) { + DxilInst_SampleBias Sample(CI); + Value *Bias = Sample.get_bias(); + if (ConstantFP *cBias = dyn_cast(Bias)) { + float FBias = cBias->getValueAPF().convertToFloat(); + if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrImmBiasForSampleB, {std::to_string(DXIL::kMinMipLodBias), @@ -1210,19 +1219,19 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmpBias: { - DxilInst_SampleCmpBias sample(CI); - Value *bias = sample.get_bias(); - if (ConstantFP *cBias = dyn_cast(bias)) { - float fBias = cBias->getValueAPF().convertToFloat(); - if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) { + DxilInst_SampleCmpBias Sample(CI); + Value *Bias = Sample.get_bias(); + if (ConstantFP *cBias = dyn_cast(Bias)) { + float FBias = cBias->getValueAPF().convertToFloat(); + if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrImmBiasForSampleB, {std::to_string(DXIL::kMinMipLodBias), @@ -1232,38 +1241,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleGrad: { - DxilInst_SampleGrad sample(CI); + DxilInst_SampleGrad Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::SampleCmpGrad: { - DxilInst_SampleCmpGrad sample(CI); + DxilInst_SampleCmpGrad Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleLevel: { - DxilInst_SampleLevel sample(CI); + DxilInst_SampleLevel Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::CheckAccessFullyMapped: { @@ -1273,53 +1282,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } else { Value *V = EVI->getOperand(0); - bool isLegal = EVI->getNumIndices() == 1 && + bool IsLegal = EVI->getNumIndices() == 1 && EVI->getIndices()[0] == DXIL::kResRetStatusIndex && ValCtx.DxilMod.GetOP()->IsResRetType(V->getType()); - if (!isLegal) { + if (!IsLegal) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } } } break; case DXIL::OpCode::BufferStore: { - DxilInst_BufferStore bufSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx); + DxilInst_BufferStore BufSt(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); } - ConstantInt *mask = dyn_cast(bufSt.get_mask()); - unsigned stValMask = - StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(), - bufSt.get_value2(), bufSt.get_value3()}); + ConstantInt *Mask = dyn_cast(BufSt.get_mask()); + unsigned StValMask = + StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(), + BufSt.get_value2(), BufSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, - resKind == DXIL::ResourceKind::TypedBuffer || - resKind == DXIL::ResourceKind::TBuffer, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, + ResKind == DXIL::ResourceKind::TypedBuffer || + ResKind == DXIL::ResourceKind::TBuffer, ValCtx)) return; - Value *offset = bufSt.get_coord1(); + Value *Offset = BufSt.get_coord1(); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1332,26 +1341,26 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::TextureStore: { - DxilInst_TextureStore texSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(texSt.get_srv(), compTy, resClass, ValCtx); + DxilInst_TextureStore TexSt(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(TexSt.get_srv(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); } - ConstantInt *mask = dyn_cast(texSt.get_mask()); - unsigned stValMask = - StoreValueToMask({texSt.get_value0(), texSt.get_value1(), - texSt.get_value2(), texSt.get_value3()}); + ConstantInt *Mask = dyn_cast(TexSt.get_mask()); + unsigned StValMask = + StoreValueToMask({TexSt.get_value0(), TexSt.get_value1(), + TexSt.get_value2(), TexSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, true /*isTyped*/, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, true /*IsTyped*/, ValCtx)) return; - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1367,30 +1376,30 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::BufferLoad: { - DxilInst_BufferLoad bufLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx); - - if (resClass != DXIL::ResourceClass::SRV && - resClass != DXIL::ResourceClass::UAV) { + DxilInst_BufferLoad BufLd(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx); + + if (ResClass != DXIL::ResourceClass::SRV && + ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); } - Value *offset = bufLd.get_wot(); + Value *Offset = BufLd.get_wot(); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1403,33 +1412,33 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::TextureLoad: { - DxilInst_TextureLoad texLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(texLd.get_srv(), compTy, resClass, ValCtx); - - Value *mipLevel = texLd.get_mipLevelOrSampleCount(); - - if (resClass == DXIL::ResourceClass::UAV) { - bool noOffset = isa(texLd.get_offset0()); - noOffset &= isa(texLd.get_offset1()); - noOffset &= isa(texLd.get_offset2()); - if (!noOffset) { + DxilInst_TextureLoad TexLd(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(TexLd.get_srv(), CompTy, ResClass, ValCtx); + + Value *MipLevel = TexLd.get_mipLevelOrSampleCount(); + + if (ResClass == DXIL::ResourceClass::UAV) { + bool NoOffset = isa(TexLd.get_offset0()); + NoOffset &= isa(TexLd.get_offset1()); + NoOffset &= isa(TexLd.get_offset2()); + if (!NoOffset) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOffsetOnUAVLoad); } - if (!isa(mipLevel)) { - if (resKind != DXIL::ResourceKind::Texture2DMS && - resKind != DXIL::ResourceKind::Texture2DMSArray) + if (!isa(MipLevel)) { + if (ResKind != DXIL::ResourceKind::Texture2DMS && + ResKind != DXIL::ResourceKind::Texture2DMSArray) ValCtx.EmitInstrError(CI, ValidationRule::InstrMipOnUAVLoad); } } else { - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); } } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1438,7 +1447,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, break; case DXIL::ResourceKind::Texture2DMS: case DXIL::ResourceKind::Texture2DMSArray: { - if (isa(mipLevel)) { + if (isa(MipLevel)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleIndexForLoad2DMS); } } break; @@ -1449,69 +1458,70 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateResourceOffset( - CI, resKind, - {texLd.get_offset0(), texLd.get_offset1(), texLd.get_offset2()}, + CI, ResKind, + {TexLd.get_offset0(), TexLd.get_offset1(), TexLd.get_offset2()}, ValCtx); } break; case DXIL::OpCode::CBufferLoad: { DxilInst_CBufferLoad CBLoad(CI); - Value *regIndex = CBLoad.get_byteOffset(); - if (ConstantInt *cIndex = dyn_cast(regIndex)) { - int offset = cIndex->getLimitedValue(); - int size = GetCBufSize(CBLoad.get_handle(), ValCtx); - if (size > 0 && offset >= size) { + Value *RegIndex = CBLoad.get_byteOffset(); + if (ConstantInt *cIndex = dyn_cast(RegIndex)) { + int Offset = cIndex->getLimitedValue(); + int Size = GetCBufSize(CBLoad.get_handle(), ValCtx); + if (Size > 0 && Offset >= Size) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound); } } } break; case DXIL::OpCode::CBufferLoadLegacy: { DxilInst_CBufferLoadLegacy CBLoad(CI); - Value *regIndex = CBLoad.get_regIndex(); - if (ConstantInt *cIndex = dyn_cast(regIndex)) { - int offset = cIndex->getLimitedValue() * 16; // 16 bytes align - int size = GetCBufSize(CBLoad.get_handle(), ValCtx); - if (size > 0 && offset >= size) { + Value *RegIndex = CBLoad.get_regIndex(); + if (ConstantInt *cIndex = dyn_cast(RegIndex)) { + int Offset = cIndex->getLimitedValue() * 16; // 16 bytes align + int Size = GetCBufSize(CBLoad.get_handle(), ValCtx); + if (Size > 0 && Offset >= Size) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound); } } } break; - case DXIL::OpCode::RawBufferLoad: { + case DXIL::OpCode::RawBufferLoad: if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) { Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferLoad, CI->getCalledFunction()); - if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) { + if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); - } } - DxilInst_RawBufferLoad bufLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx); + LLVM_FALLTHROUGH; + case DXIL::OpCode::RawBufferVectorLoad: { + Value *Handle = + CI->getOperand(DXIL::OperandIndex::kRawBufferLoadHandleOpIdx); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); + + if (ResClass != DXIL::ResourceClass::SRV && + ResClass != DXIL::ResourceClass::UAV) - if (resClass != DXIL::ResourceClass::SRV && - resClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); - } - Value *offset = bufLd.get_elementOffset(); - Value *align = bufLd.get_alignment(); - unsigned alignSize = 0; - if (!isa(align)) { - ValCtx.EmitInstrError(CI, - ValidationRule::InstrCoordinateCountForRawTypedBuf); - } else { - alignSize = bufLd.get_alignment_val(); - } - switch (resKind) { + unsigned AlignIdx = DXIL::OperandIndex::kRawBufferLoadAlignmentOpIdx; + if (DXIL::OpCode::RawBufferVectorLoad == Opcode) + AlignIdx = DXIL::OperandIndex::kRawBufferVectorLoadAlignmentOpIdx; + if (!isa(CI->getOperand(AlignIdx))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf); + + Value *Offset = + CI->getOperand(DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx); + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1526,47 +1536,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) { Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferStore, CI->getCalledFunction()); - if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) { + if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); - } } DxilInst_RawBufferStore bufSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx); - - if (resClass != DXIL::ResourceClass::UAV) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); - } - - ConstantInt *mask = dyn_cast(bufSt.get_mask()); - unsigned stValMask = + ConstantInt *Mask = dyn_cast(bufSt.get_mask()); + unsigned StValMask = StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(), bufSt.get_value2(), bufSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, false /*isTyped*/, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/, ValCtx)) return; + } + LLVM_FALLTHROUGH; + case DXIL::OpCode::RawBufferVectorStore: { + Value *Handle = + CI->getOperand(DXIL::OperandIndex::kRawBufferStoreHandleOpIdx); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); + + if (ResClass != DXIL::ResourceClass::UAV) + ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); - Value *offset = bufSt.get_elementOffset(); - Value *align = bufSt.get_alignment(); - unsigned alignSize = 0; - if (!isa(align)) { - ValCtx.EmitInstrError(CI, - ValidationRule::InstrCoordinateCountForRawTypedBuf); - } else { - alignSize = bufSt.get_alignment_val(); + unsigned AlignIdx = DXIL::OperandIndex::kRawBufferStoreAlignmentOpIdx; + if (DXIL::OpCode::RawBufferVectorStore == Opcode) { + AlignIdx = DXIL::OperandIndex::kRawBufferVectorStoreAlignmentOpIdx; + unsigned ValueIx = DXIL::OperandIndex::kRawBufferVectorStoreValOpIdx; + if (isa(CI->getOperand(ValueIx))) + ValCtx.EmitInstrError(CI, + ValidationRule::InstrUndefinedValueForUAVStore); } - switch (resKind) { + if (!isa(CI->getOperand(AlignIdx))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf); + + Value *Offset = + CI->getOperand(DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx); + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1578,16 +1594,14 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::TraceRay: { - DxilInst_TraceRay traceRay(CI); - Value *hdl = traceRay.get_AccelerationStructure(); - DxilResourceProperties RP = ValCtx.GetResourceFromVal(hdl); - if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay); - return; - } - if (RP.getResourceKind() != DXIL::ResourceKind::RTAccelerationStructure) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay); - } + DxilInst_TraceRay TraceRay(CI); + Value *Hdl = TraceRay.get_AccelerationStructure(); + ValidateASHandle(CI, Hdl, ValCtx); + } break; + case DXIL::OpCode::HitObject_TraceRay: { + DxilInst_HitObject_TraceRay HOTraceRay(CI); + Value *Hdl = HOTraceRay.get_accelerationStructure(); + ValidateASHandle(CI, Hdl, ValCtx); } break; default: break; @@ -1595,12 +1609,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } static void ValidateBarrierFlagArg(ValidationContext &ValCtx, CallInst *CI, - Value *Arg, unsigned validMask, - StringRef flagName, StringRef opName) { + Value *Arg, unsigned ValidMask, + StringRef FlagName, StringRef OpName) { if (ConstantInt *CArg = dyn_cast(Arg)) { - if ((CArg->getLimitedValue() & (uint32_t)(~validMask)) != 0) { + if ((CArg->getLimitedValue() & (uint32_t)(~ValidMask)) != 0) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrBarrierFlagInvalid, - {flagName, opName}); + {FlagName, OpName}); } } else { ValCtx.EmitInstrError(CI, @@ -1621,36 +1635,45 @@ std::string GetLaunchTypeStr(DXIL::NodeLaunchType LT) { } } +static unsigned getSemanticFlagValidMask(const ShaderModel *pSM) { + unsigned DxilMajor, DxilMinor; + pSM->GetDxilVersion(DxilMajor, DxilMinor); + // DXIL version >= 1.9 + if (hlsl::DXIL::CompareVersions(DxilMajor, DxilMinor, 1, 9) < 0) + return static_cast(hlsl::DXIL::BarrierSemanticFlag::LegacyFlags); + return static_cast(hlsl::DXIL::BarrierSemanticFlag::ValidMask); +} + static void ValidateDxilOperationCallInProfile(CallInst *CI, - DXIL::OpCode opcode, + DXIL::OpCode Opcode, const ShaderModel *pSM, ValidationContext &ValCtx) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = pSM ? pSM->GetKind() : DXIL::ShaderKind::Invalid; llvm::Function *F = CI->getParent()->getParent(); - DXIL::NodeLaunchType nodeLaunchType = DXIL::NodeLaunchType::Invalid; - if (DXIL::ShaderKind::Library == shaderKind) { + DXIL::NodeLaunchType NodeLaunchType = DXIL::NodeLaunchType::Invalid; + if (DXIL::ShaderKind::Library == ShaderKind) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DxilEntryProps &entryProps = ValCtx.DxilMod.GetDxilEntryProps(F); - shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind == DXIL::ShaderKind::Node) - nodeLaunchType = entryProps.props.Node.LaunchType; + DxilEntryProps &EntryProps = ValCtx.DxilMod.GetDxilEntryProps(F); + ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; + if (ShaderKind == DXIL::ShaderKind::Node) + NodeLaunchType = EntryProps.props.Node.LaunchType; } else if (ValCtx.DxilMod.IsPatchConstantShader(F)) - shaderKind = DXIL::ShaderKind::Hull; + ShaderKind = DXIL::ShaderKind::Hull; } // These shader models are treted like compute - bool isCSLike = shaderKind == DXIL::ShaderKind::Compute || - shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Node; + bool IsCSLike = ShaderKind == DXIL::ShaderKind::Compute || + ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Node; // Is called from a library function - bool isLibFunc = shaderKind == DXIL::ShaderKind::Library; + bool IsLibFunc = ShaderKind == DXIL::ShaderKind::Library; - ValidateHandleArgs(CI, opcode, ValCtx); + ValidateHandleArgs(CI, Opcode, ValCtx); - switch (opcode) { + switch (Opcode) { // Imm input value validation. case DXIL::OpCode::Asin: case DXIL::OpCode::Acos: @@ -1659,7 +1682,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::DerivFineY: case DXIL::OpCode::DerivCoarseX: case DXIL::OpCode::DerivCoarseY: - ValidateImmOperandForMathDxilOp(CI, opcode, ValCtx); + ValidateImmOperandForMathDxilOp(CI, Opcode, ValCtx); break; // Resource validation. case DXIL::OpCode::GetDimensions: @@ -1684,7 +1707,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::CBufferLoadLegacy: case DXIL::OpCode::RawBufferLoad: case DXIL::OpCode::RawBufferStore: - ValidateResourceDxilOp(CI, opcode, ValCtx); + case DXIL::OpCode::RawBufferVectorLoad: + case DXIL::OpCode::RawBufferVectorStore: + ValidateResourceDxilOp(CI, Opcode, ValCtx); break; // Input output. case DXIL::OpCode::LoadInput: @@ -1705,13 +1730,13 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::EmitStream: case DXIL::OpCode::EmitThenCutStream: case DXIL::OpCode::CutStream: - ValidateSignatureDxilOp(CI, opcode, ValCtx); + ValidateSignatureDxilOp(CI, Opcode, ValCtx); break; // Special. case DXIL::OpCode::AllocateRayQuery: { // validate flags are immediate and compatible - llvm::Value *constRayFlag = CI->getOperand(1); - if (!llvm::isa(constRayFlag)) { + llvm::Value *ConstRayFlag = CI->getOperand(1); + if (!llvm::isa(ConstRayFlag)) { ValCtx.EmitInstrError(CI, ValidationRule::DeclAllocateRayQueryFlagsAreConst); } @@ -1719,9 +1744,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } case DXIL::OpCode::AllocateRayQuery2: { // validate flags are immediate and compatible - llvm::Value *constRayFlag = CI->getOperand(1); + llvm::Value *ConstRayFlag = CI->getOperand(1); llvm::Value *RayQueryFlag = CI->getOperand(2); - if (!llvm::isa(constRayFlag) || + if (!llvm::isa(ConstRayFlag) || !llvm::isa(RayQueryFlag)) { ValCtx.EmitInstrError(CI, ValidationRule::DeclAllocateRayQuery2FlagsAreConst); @@ -1730,7 +1755,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, // When the ForceOMM2State ConstRayFlag is given as an argument to // a RayQuery object, AllowOpacityMicromaps is expected // as a RayQueryFlag argument - llvm::ConstantInt *Arg1 = llvm::cast(constRayFlag); + llvm::ConstantInt *Arg1 = llvm::cast(ConstRayFlag); llvm::ConstantInt *Arg2 = llvm::cast(RayQueryFlag); if ((Arg1->getValue().getSExtValue() & (unsigned)DXIL::RayFlag::ForceOMM2State) && @@ -1744,9 +1769,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } case DXIL::OpCode::BufferUpdateCounter: { - DxilInst_BufferUpdateCounter updateCounter(CI); - Value *handle = updateCounter.get_uav(); - DxilResourceProperties RP = ValCtx.GetResourceFromVal(handle); + DxilInst_BufferUpdateCounter UpdateCounter(CI); + Value *Handle = UpdateCounter.get_uav(); + DxilResourceProperties RP = ValCtx.GetResourceFromVal(Handle); if (!RP.isUAV()) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBufferUpdateCounterOnUAV); @@ -1761,20 +1786,20 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, CI, ValidationRule::InstrBufferUpdateCounterOnResHasCounter); } - Value *inc = updateCounter.get_inc(); - if (ConstantInt *cInc = dyn_cast(inc)) { - bool isInc = cInc->getLimitedValue() == 1; + Value *Inc = UpdateCounter.get_inc(); + if (ConstantInt *cInc = dyn_cast(Inc)) { + bool IsInc = cInc->getLimitedValue() == 1; if (!ValCtx.isLibProfile) { - auto it = ValCtx.HandleResIndexMap.find(handle); - if (it != ValCtx.HandleResIndexMap.end()) { - unsigned resIndex = it->second; - if (ValCtx.UavCounterIncMap.count(resIndex)) { - if (isInc != ValCtx.UavCounterIncMap[resIndex]) { + auto It = ValCtx.HandleResIndexMap.find(Handle); + if (It != ValCtx.HandleResIndexMap.end()) { + unsigned ResIndex = It->second; + if (ValCtx.UavCounterIncMap.count(ResIndex)) { + if (IsInc != ValCtx.UavCounterIncMap[ResIndex]) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOnlyOneAllocConsume); } } else { - ValCtx.UavCounterIncMap[resIndex] = isInc; + ValCtx.UavCounterIncMap[ResIndex] = IsInc; } } @@ -1789,35 +1814,35 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } break; case DXIL::OpCode::Barrier: { - DxilInst_Barrier barrier(CI); - Value *mode = barrier.get_barrierMode(); - ConstantInt *cMode = dyn_cast(mode); - if (!cMode) { + DxilInst_Barrier Barrier(CI); + Value *Mode = Barrier.get_barrierMode(); + ConstantInt *CMode = dyn_cast(Mode); + if (!CMode) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst, {"Mode", "Barrier"}); return; } - const unsigned uglobal = + const unsigned Uglobal = static_cast(DXIL::BarrierMode::UAVFenceGlobal); - const unsigned g = static_cast(DXIL::BarrierMode::TGSMFence); - const unsigned ut = + const unsigned G = static_cast(DXIL::BarrierMode::TGSMFence); + const unsigned Ut = static_cast(DXIL::BarrierMode::UAVFenceThreadGroup); - unsigned barrierMode = cMode->getLimitedValue(); + unsigned BarrierMode = CMode->getLimitedValue(); - if (isCSLike || isLibFunc) { - bool bHasUGlobal = barrierMode & uglobal; - bool bHasGroup = barrierMode & g; - bool bHasUGroup = barrierMode & ut; - if (bHasUGlobal && bHasUGroup) { + if (IsCSLike || IsLibFunc) { + bool HasUGlobal = BarrierMode & Uglobal; + bool HasGroup = BarrierMode & G; + bool HasUGroup = BarrierMode & Ut; + if (HasUGlobal && HasUGroup) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeUselessUGroup); } - if (!bHasUGlobal && !bHasGroup && !bHasUGroup) { + if (!HasUGlobal && !HasGroup && !HasUGroup) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeNoMemory); } } else { - if (uglobal != barrierMode) { + if (Uglobal != BarrierMode) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } @@ -1829,30 +1854,29 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, (unsigned)hlsl::DXIL::MemoryTypeFlag::ValidMask, "memory type", "BarrierByMemoryType"); ValidateBarrierFlagArg(ValCtx, CI, DI.get_SemanticFlags(), - (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask, - "semantic", "BarrierByMemoryType"); - if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node && + getSemanticFlagValidMask(pSM), "semantic", + "BarrierByMemoryType"); + if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node && OP::BarrierRequiresNode(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode); } - if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) { + if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } break; case DXIL::OpCode::BarrierByNodeRecordHandle: case DXIL::OpCode::BarrierByMemoryHandle: { - std::string opName = opcode == DXIL::OpCode::BarrierByNodeRecordHandle + std::string OpName = Opcode == DXIL::OpCode::BarrierByNodeRecordHandle ? "barrierByNodeRecordHandle" : "barrierByMemoryHandle"; DxilInst_BarrierByMemoryHandle DIMH(CI); ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SemanticFlags(), - (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask, - "semantic", opName); - if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node && + getSemanticFlagValidMask(pSM), "semantic", OpName); + if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node && OP::BarrierRequiresNode(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode); } - if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) { + if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } break; @@ -1862,9 +1886,33 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, {"CreateHandleForLib", "Library"}); } break; + + // Shader Execution Reordering + case DXIL::OpCode::MaybeReorderThread: { + Value *HitObject = CI->getArgOperand(1); + Value *CoherenceHintBits = CI->getArgOperand(2); + Value *NumCoherenceHintBits = CI->getArgOperand(3); + + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + + if (isa(NumCoherenceHintBits)) + ValCtx.EmitInstrError( + CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam); + + ConstantInt *NumCoherenceHintBitsConst = + dyn_cast(NumCoherenceHintBits); + const bool HasCoherenceHint = + NumCoherenceHintBitsConst && + NumCoherenceHintBitsConst->getLimitedValue() != 0; + if (HasCoherenceHint && isa(CoherenceHintBits)) + ValCtx.EmitInstrError( + CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam); + } break; + case DXIL::OpCode::AtomicBinOp: case DXIL::OpCode::AtomicCompareExchange: { - Type *pOverloadType = OP::GetOverloadType(opcode, CI->getCalledFunction()); + Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction()); if ((pOverloadType->isIntegerTy(64)) && !pSM->IsSM66Plus()) ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, @@ -1890,73 +1938,73 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, break; case DXIL::OpCode::ThreadId: // SV_DispatchThreadID - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting) break; ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrSVConflictingLaunchMode, - {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(nodeLaunchType)}); + {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::GroupId: // SV_GroupId - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting) break; ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrSVConflictingLaunchMode, - {"GroupId", "SV_GroupId", GetLaunchTypeStr(nodeLaunchType)}); + {"GroupId", "SV_GroupId", GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::ThreadIdInGroup: // SV_GroupThreadID - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting || - nodeLaunchType == DXIL::NodeLaunchType::Coalescing) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting || + NodeLaunchType == DXIL::NodeLaunchType::Coalescing) break; ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrSVConflictingLaunchMode, {"ThreadIdInGroup", "SV_GroupThreadID", - GetLaunchTypeStr(nodeLaunchType)}); + GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::FlattenedThreadIdInGroup: // SV_GroupIndex - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting || - nodeLaunchType == DXIL::NodeLaunchType::Coalescing) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting || + NodeLaunchType == DXIL::NodeLaunchType::Coalescing) break; ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrSVConflictingLaunchMode, {"FlattenedThreadIdInGroup", "SV_GroupIndex", - GetLaunchTypeStr(nodeLaunchType)}); + GetLaunchTypeStr(NodeLaunchType)}); break; default: - // TODO: make sure every opcode is checked. + // TODO: make sure every Opcode is checked. // Skip opcodes don't need special check. break; } } static bool IsDxilFunction(llvm::Function *F) { - unsigned argSize = F->arg_size(); - if (argSize < 1) { + unsigned ArgSize = F->arg_size(); + if (ArgSize < 1) { // Cannot be a DXIL operation. return false; } @@ -1991,9 +2039,9 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { } const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - OP *hlslOP = ValCtx.DxilMod.GetOP(); - bool isDxilOp = OP::IsDxilOpFunc(F); - Type *voidTy = Type::getVoidTy(F->getContext()); + OP *HlslOP = ValCtx.DxilMod.GetOP(); + bool IsDxilOp = OP::IsDxilOpFunc(F); + Type *VoidTy = Type::getVoidTy(F->getContext()); for (User *user : F->users()) { CallInst *CI = dyn_cast(user); @@ -2004,32 +2052,32 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { } // Skip call to external user defined function - if (!isDxilOp) + if (!IsDxilOp) continue; - Value *argOpcode = CI->getArgOperand(0); - ConstantInt *constOpcode = dyn_cast(argOpcode); - if (!constOpcode) { - // opcode not immediate; function body will validate this error. + Value *ArgOpcode = CI->getArgOperand(0); + ConstantInt *ConstOpcode = dyn_cast(ArgOpcode); + if (!ConstOpcode) { + // Opcode not immediate; function body will validate this error. continue; } - unsigned opcode = constOpcode->getLimitedValue(); - if (opcode >= (unsigned)DXIL::OpCode::NumOpCodes) { - // invalid opcode; function body will validate this error. + unsigned Opcode = ConstOpcode->getLimitedValue(); + if (Opcode >= (unsigned)DXIL::OpCode::NumOpCodes) { + // invalid Opcode; function body will validate this error. continue; } - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; // In some cases, no overloads are provided (void is exclusive to others) - Function *dxilFunc; - if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) { - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, voidTy); + Function *DxilFunc; + if (HlslOP->IsOverloadLegal(DxilOpcode, VoidTy)) { + DxilFunc = HlslOP->GetOpFunc(DxilOpcode, VoidTy); } else { - Type *Ty = OP::GetOverloadType(dxilOpcode, CI->getCalledFunction()); + Type *Ty = OP::GetOverloadType(DxilOpcode, CI->getCalledFunction()); try { - if (!hlslOP->IsOverloadLegal(dxilOpcode, Ty)) { + if (!HlslOP->IsOverloadLegal(DxilOpcode, Ty)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } @@ -2037,89 +2085,92 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty->getScalarType()); + DxilFunc = HlslOP->GetOpFunc(DxilOpcode, Ty); } - if (!dxilFunc) { - // Cannot find dxilFunction based on opcode and type. + if (!DxilFunc) { + // Cannot find DxilFunction based on Opcode and type. ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - if (dxilFunc->getFunctionType() != F->getFunctionType()) { + if (DxilFunc->getFunctionType() != F->getFunctionType()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrCallOload, - {dxilFunc->getName()}); + {DxilFunc->getName()}); continue; } unsigned major = pSM->GetMajor(); unsigned minor = pSM->GetMinor(); if (ValCtx.isLibProfile) { - Function *callingFunction = CI->getParent()->getParent(); + Function *CallingFunction = CI->getParent()->getParent(); DXIL::ShaderKind SK = DXIL::ShaderKind::Library; - if (ValCtx.DxilMod.HasDxilFunctionProps(callingFunction)) - SK = ValCtx.DxilMod.GetDxilFunctionProps(callingFunction).shaderKind; - else if (ValCtx.DxilMod.IsPatchConstantShader(callingFunction)) + if (ValCtx.DxilMod.HasDxilFunctionProps(CallingFunction)) + SK = ValCtx.DxilMod.GetDxilFunctionProps(CallingFunction).shaderKind; + else if (ValCtx.DxilMod.IsPatchConstantShader(CallingFunction)) SK = DXIL::ShaderKind::Hull; - if (!ValidateOpcodeInProfile(dxilOpcode, SK, major, minor)) { + if (!ValidateOpcodeInProfile(DxilOpcode, SK, major, minor)) { // Opcode not available in profile. // produces: "lib_6_3(ps)", or "lib_6_3(anyhit)" for shader types // Or: "lib_6_3(lib)" for library function - std::string shaderModel = pSM->GetName(); - shaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")"; + std::string ShaderModel = pSM->GetName(); + ShaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")"; ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcode, - {hlslOP->GetOpCodeName(dxilOpcode), shaderModel}); + {HlslOP->GetOpCodeName(DxilOpcode), ShaderModel}); continue; } } else { - if (!ValidateOpcodeInProfile(dxilOpcode, pSM->GetKind(), major, minor)) { + if (!ValidateOpcodeInProfile(DxilOpcode, pSM->GetKind(), major, minor)) { // Opcode not available in profile. ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcode, - {hlslOP->GetOpCodeName(dxilOpcode), pSM->GetName()}); + {HlslOP->GetOpCodeName(DxilOpcode), pSM->GetName()}); continue; } } // Check more detail. - ValidateDxilOperationCallInProfile(CI, dxilOpcode, pSM, ValCtx); + ValidateDxilOperationCallInProfile(CI, DxilOpcode, pSM, ValCtx); } } /////////////////////////////////////////////////////////////////////////////// // Instruction validation functions. // -static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { - if (ST == hlslOP->GetBinaryWithCarryType()) +static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *HlslOP) { + if (ST == HlslOP->GetBinaryWithCarryType()) return true; - if (ST == hlslOP->GetBinaryWithTwoOutputsType()) + if (ST == HlslOP->GetBinaryWithTwoOutputsType()) return true; - if (ST == hlslOP->GetFourI32Type()) + if (ST == HlslOP->GetFourI32Type()) return true; - if (ST == hlslOP->GetFourI16Type()) + if (ST == HlslOP->GetFourI16Type()) return true; - if (ST == hlslOP->GetDimensionsType()) + if (ST == HlslOP->GetDimensionsType()) return true; - if (ST == hlslOP->GetHandleType()) + if (ST == HlslOP->GetHandleType()) return true; - if (ST == hlslOP->GetSamplePosType()) + if (ST == HlslOP->GetSamplePosType()) return true; - if (ST == hlslOP->GetSplitDoubleType()) + if (ST == HlslOP->GetSplitDoubleType()) return true; unsigned EltNum = ST->getNumElements(); + Type *EltTy = ST->getElementType(0); switch (EltNum) { case 2: + // Check if it's a native vector resret. + if (EltTy->isVectorTy()) + return ST == HlslOP->GetResRetType(EltTy); + LLVM_FALLTHROUGH; case 4: - case 8: { // 2 for doubles, 8 for halfs. - Type *EltTy = ST->getElementType(0); - return ST == hlslOP->GetCBufferRetType(EltTy); - } break; - case 5: { - Type *EltTy = ST->getElementType(0); - return ST == hlslOP->GetResRetType(EltTy); - } break; + case 8: // 2 for doubles, 8 for halfs. + return ST == HlslOP->GetCBufferRetType(EltTy); + break; + case 5: + return ST == HlslOP->GetResRetType(EltTy); + break; default: return false; } @@ -2129,11 +2180,11 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { // inner type (UDT struct member) may be: [N dim array of]( UDT struct | scalar // ) scalar type may be: ( float(16|32|64) | int(16|32|64) ) static bool ValidateType(Type *Ty, ValidationContext &ValCtx, - bool bInner = false) { + bool IsInner = false) { DXASSERT_NOMSG(Ty != nullptr); if (Ty->isPointerTy()) { Type *EltTy = Ty->getPointerElementType(); - if (bInner || EltTy->isPointerTy()) { + if (IsInner || EltTy->isPointerTy()) { ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoPtrToPtr); return false; } @@ -2141,7 +2192,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, } if (Ty->isArrayTy()) { Type *EltTy = Ty->getArrayElementType(); - if (!bInner && isa(EltTy)) { + if (!IsInner && isa(EltTy)) { // Outermost array should be converted to single-dim, // but arrays inside struct are allowed to be multi-dim ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoMultiDim); @@ -2152,7 +2203,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, Ty = EltTy; } if (Ty->isStructTy()) { - bool result = true; + bool Result = true; StructType *ST = cast(Ty); StringRef Name = ST->getName(); @@ -2160,28 +2211,28 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, // Allow handle type. if (ValCtx.HandleTy == Ty) return true; - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); - if (IsDxilBuiltinStructType(ST, hlslOP)) { + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); + if (IsDxilBuiltinStructType(ST, HlslOP)) { ValCtx.EmitTypeError(Ty, ValidationRule::InstrDxilStructUser); - result = false; + Result = false; } ValCtx.EmitTypeError(Ty, ValidationRule::DeclDxilNsReserved); - result = false; + Result = false; } for (auto e : ST->elements()) { - if (!ValidateType(e, ValCtx, /*bInner*/ true)) { - result = false; + if (!ValidateType(e, ValCtx, /*IsInner*/ true)) { + Result = false; } } - return result; + return Result; } if (Ty->isFloatTy() || Ty->isHalfTy() || Ty->isDoubleTy()) { return true; } if (Ty->isIntegerTy()) { - unsigned width = Ty->getIntegerBitWidth(); - if (width != 1 && width != 8 && width != 16 && width != 32 && width != 64) { + unsigned Width = Ty->getIntegerBitWidth(); + if (Width != 1 && Width != 8 && Width != 16 && Width != 32 && Width != 64) { ValCtx.EmitTypeError(Ty, ValidationRule::TypesIntWidth); return false; } @@ -2193,6 +2244,9 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, return true; if (Ty->isVectorTy()) { + if (Ty->getVectorNumElements() > 1 && + ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + return true; ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoVector); return false; } @@ -2201,13 +2255,13 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, } static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD, - unsigned index, uint64_t *pValue) { - *pValue = 0; - if (pMD->getNumOperands() < index) { + unsigned Index, uint64_t *PValue) { + *PValue = 0; + if (pMD->getNumOperands() < Index) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; } - ConstantAsMetadata *C = dyn_cast(pMD->getOperand(index)); + ConstantAsMetadata *C = dyn_cast(pMD->getOperand(Index)); if (C == nullptr) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; @@ -2217,7 +2271,7 @@ static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD, ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; } - *pValue = CI->getValue().getZExtValue(); + *PValue = CI->getValue().getZExtValue(); return true; } @@ -2231,14 +2285,14 @@ static bool IsPrecise(Instruction &I, ValidationContext &ValCtx) { return false; } - uint64_t val; - if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) { + uint64_t Val; + if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) { return false; } - if (val == 1) { + if (Val == 1) { return true; } - if (val != 0) { + if (Val != 0) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange); } return false; @@ -2257,12 +2311,12 @@ static bool IsValueMinPrec(DxilModule &DxilMod, Value *V) { } static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, - CallInst *setMeshOutputCounts, - CallInst *getMeshPayload) { + CallInst *SetMeshOutputCounts, + CallInst *GetMeshPayload) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind != DXIL::ShaderKind::Mesh) + if (ShaderKind != DXIL::ShaderKind::Mesh) return; } else { return; @@ -2271,10 +2325,10 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, DominatorTreeAnalysis DTA; DominatorTree DT = DTA.run(*F); - for (auto b = F->begin(), bend = F->end(); b != bend; ++b) { - bool foundSetMeshOutputCountsInCurrentBB = false; - for (auto i = b->begin(), iend = b->end(); i != iend; ++i) { - llvm::Instruction &I = *i; + for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) { + bool FoundSetMeshOutputCountsInCurrentBb = false; + for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) { + llvm::Instruction &I = *It; // Calls to external functions. CallInst *CI = dyn_cast(&I); @@ -2290,22 +2344,22 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, continue; } - if (CI == setMeshOutputCounts) { - foundSetMeshOutputCountsInCurrentBB = true; + if (CI == SetMeshOutputCounts) { + FoundSetMeshOutputCountsInCurrentBb = true; } - Value *opcodeVal = CI->getOperand(0); - ConstantInt *OpcodeConst = dyn_cast(opcodeVal); - unsigned opcode = OpcodeConst->getLimitedValue(); - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; - - if (dxilOpcode == DXIL::OpCode::StoreVertexOutput || - dxilOpcode == DXIL::OpCode::StorePrimitiveOutput || - dxilOpcode == DXIL::OpCode::EmitIndices) { - if (setMeshOutputCounts == nullptr) { + Value *OpcodeVal = CI->getOperand(0); + ConstantInt *OpcodeConst = dyn_cast(OpcodeVal); + unsigned Opcode = OpcodeConst->getLimitedValue(); + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; + + if (DxilOpcode == DXIL::OpCode::StoreVertexOutput || + DxilOpcode == DXIL::OpCode::StorePrimitiveOutput || + DxilOpcode == DXIL::OpCode::EmitIndices) { + if (SetMeshOutputCounts == nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMissingSetMeshOutputCounts); - } else if (!foundSetMeshOutputCountsInCurrentBB && - !DT.dominates(setMeshOutputCounts->getParent(), + } else if (!FoundSetMeshOutputCountsInCurrentBb && + !DT.dominates(SetMeshOutputCounts->getParent(), I.getParent())) { ValCtx.EmitInstrError( &I, ValidationRule::InstrNonDominatingSetMeshOutputCounts); @@ -2316,61 +2370,61 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, } } - if (getMeshPayload) { - PointerType *payloadPTy = cast(getMeshPayload->getType()); - StructType *payloadTy = - cast(payloadPTy->getPointerElementType()); + if (GetMeshPayload) { + PointerType *PayloadPTy = cast(GetMeshPayload->getType()); + StructType *PayloadTy = + cast(PayloadPTy->getPointerElementType()); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F); + DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (prop.ShaderProps.MS.payloadSizeInBytes < payloadSize) { + if (Prop.ShaderProps.MS.payloadSizeInBytes < PayloadSize) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderPayloadSizeDeclared, - {F->getName(), std::to_string(payloadSize), - std::to_string(prop.ShaderProps.MS.payloadSizeInBytes)}); + {F->getName(), std::to_string(PayloadSize), + std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes)}); } - if (prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { + if (Prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderPayloadSize, - {F->getName(), std::to_string(prop.ShaderProps.MS.payloadSizeInBytes), + {F->getName(), std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } } static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, - CallInst *dispatchMesh) { + CallInst *DispatchMesh) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind != DXIL::ShaderKind::Amplification) + if (ShaderKind != DXIL::ShaderKind::Amplification) return; - if (dispatchMesh) { - DxilInst_DispatchMesh dispatchMeshCall(dispatchMesh); - Value *operandVal = dispatchMeshCall.get_payload(); - Type *payloadTy = operandVal->getType(); + if (DispatchMesh) { + DxilInst_DispatchMesh DispatchMeshCall(DispatchMesh); + Value *OperandVal = DispatchMeshCall.get_payload(); + Type *PayloadTy = OperandVal->getType(); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F); + DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (prop.ShaderProps.AS.payloadSizeInBytes < payloadSize) { + if (Prop.ShaderProps.AS.payloadSizeInBytes < PayloadSize) { ValCtx.EmitInstrFormatError( - dispatchMesh, + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSizeDeclared, - {F->getName(), std::to_string(payloadSize), - std::to_string(prop.ShaderProps.AS.payloadSizeInBytes)}); + {F->getName(), std::to_string(PayloadSize), + std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes)}); } - if (prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { + if (Prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitInstrFormatError( - dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, {F->getName(), - std::to_string(prop.ShaderProps.AS.payloadSizeInBytes), + std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } @@ -2379,7 +2433,7 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, return; } - if (dispatchMesh == nullptr) { + if (DispatchMesh == nullptr) { ValCtx.EmitFnError(F, ValidationRule::InstrNotOnceDispatchMesh); return; } @@ -2387,30 +2441,30 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, PostDominatorTree PDT; PDT.runOnFunction(*F); - if (!PDT.dominates(dispatchMesh->getParent(), &F->getEntryBlock())) { - ValCtx.EmitInstrError(dispatchMesh, + if (!PDT.dominates(DispatchMesh->getParent(), &F->getEntryBlock())) { + ValCtx.EmitInstrError(DispatchMesh, ValidationRule::InstrNonDominatingDispatchMesh); } - Function *dispatchMeshFunc = dispatchMesh->getCalledFunction(); - FunctionType *dispatchMeshFuncTy = dispatchMeshFunc->getFunctionType(); - PointerType *payloadPTy = - cast(dispatchMeshFuncTy->getParamType(4)); - StructType *payloadTy = cast(payloadPTy->getPointerElementType()); + Function *DispatchMeshFunc = DispatchMesh->getCalledFunction(); + FunctionType *DispatchMeshFuncTy = DispatchMeshFunc->getFunctionType(); + PointerType *PayloadPTy = + cast(DispatchMeshFuncTy->getParamType(4)); + StructType *PayloadTy = cast(PayloadPTy->getPointerElementType()); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - if (payloadSize > DXIL::kMaxMSASPayloadBytes) { + if (PayloadSize > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitInstrFormatError( - dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, - {F->getName(), std::to_string(payloadSize), + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, + {F->getName(), std::to_string(PayloadSize), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } -static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) { +static void ValidateControlFlowHint(BasicBlock &BB, ValidationContext &ValCtx) { // Validate controlflow hint. - TerminatorInst *TI = bb.getTerminator(); + TerminatorInst *TI = BB.getTerminator(); if (!TI) return; @@ -2421,33 +2475,33 @@ static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) { if (pNode->getNumOperands() < 3) return; - bool bHasBranch = false; - bool bHasFlatten = false; - bool bForceCase = false; + bool HasBranch = false; + bool HasFlatten = false; + bool ForceCase = false; - for (unsigned i = 2; i < pNode->getNumOperands(); i++) { - uint64_t value = 0; - if (GetNodeOperandAsInt(ValCtx, pNode, i, &value)) { - DXIL::ControlFlowHint hint = static_cast(value); - switch (hint) { + for (unsigned I = 2; I < pNode->getNumOperands(); I++) { + uint64_t Value = 0; + if (GetNodeOperandAsInt(ValCtx, pNode, I, &Value)) { + DXIL::ControlFlowHint Hint = static_cast(Value); + switch (Hint) { case DXIL::ControlFlowHint::Flatten: - bHasFlatten = true; + HasFlatten = true; break; case DXIL::ControlFlowHint::Branch: - bHasBranch = true; + HasBranch = true; break; case DXIL::ControlFlowHint::ForceCase: - bForceCase = true; + ForceCase = true; break; default: ValCtx.EmitMetaError(pNode, ValidationRule::MetaInvalidControlFlowHint); } } } - if (bHasBranch && bHasFlatten) { + if (HasBranch && HasFlatten) { ValCtx.EmitMetaError(pNode, ValidationRule::MetaBranchFlatten); } - if (bForceCase && !isa(TI)) { + if (ForceCase && !isa(TI)) { ValCtx.EmitMetaError(pNode, ValidationRule::MetaForceCaseOnSwitch); } } @@ -2460,30 +2514,30 @@ static void ValidateTBAAMetadata(MDNode *Node, ValidationContext &ValCtx) { } } break; case 2: { - MDNode *rootNode = dyn_cast(Node->getOperand(1)); - if (!rootNode) { + MDNode *RootNode = dyn_cast(Node->getOperand(1)); + if (!RootNode) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ValidateTBAAMetadata(rootNode, ValCtx); + ValidateTBAAMetadata(RootNode, ValCtx); } } break; case 3: { - MDNode *rootNode = dyn_cast(Node->getOperand(1)); - if (!rootNode) { + MDNode *RootNode = dyn_cast(Node->getOperand(1)); + if (!RootNode) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ValidateTBAAMetadata(rootNode, ValCtx); + ValidateTBAAMetadata(RootNode, ValCtx); } - ConstantAsMetadata *pointsToConstMem = + ConstantAsMetadata *PointsToConstMem = dyn_cast(Node->getOperand(2)); - if (!pointsToConstMem) { + if (!PointsToConstMem) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ConstantInt *isConst = - dyn_cast(pointsToConstMem->getValue()); - if (!isConst) { + ConstantInt *IsConst = + dyn_cast(PointsToConstMem->getValue()); + if (!IsConst) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); - } else if (isConst->getValue().getLimitedValue() > 1) { + } else if (IsConst->getValue().getLimitedValue() > 1) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } } @@ -2564,11 +2618,11 @@ static void ValidateNonUniformMetadata(Instruction &I, MDNode *pMD, if (pMD->getNumOperands() != 1) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); } - uint64_t val; - if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) { + uint64_t Val; + if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); } - if (val != 1) { + if (Val != 1) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange); } } @@ -2603,31 +2657,31 @@ static void ValidateInstructionMetadata(Instruction *I, } static void ValidateFunctionAttribute(Function *F, ValidationContext &ValCtx) { - AttributeSet attrSet = F->getAttributes().getFnAttributes(); + AttributeSet AttrSet = F->getAttributes().getFnAttributes(); // fp32-denorm-mode - if (attrSet.hasAttribute(AttributeSet::FunctionIndex, + if (AttrSet.hasAttribute(AttributeSet::FunctionIndex, DXIL::kFP32DenormKindString)) { - Attribute attr = attrSet.getAttribute(AttributeSet::FunctionIndex, + Attribute Attr = AttrSet.getAttribute(AttributeSet::FunctionIndex, DXIL::kFP32DenormKindString); - StringRef value = attr.getValueAsString(); - if (!value.equals(DXIL::kFP32DenormValueAnyString) && - !value.equals(DXIL::kFP32DenormValueFtzString) && - !value.equals(DXIL::kFP32DenormValuePreserveString)) { - ValCtx.EmitFnAttributeError(F, attr.getKindAsString(), - attr.getValueAsString()); + StringRef StrValue = Attr.getValueAsString(); + if (!StrValue.equals(DXIL::kFP32DenormValueAnyString) && + !StrValue.equals(DXIL::kFP32DenormValueFtzString) && + !StrValue.equals(DXIL::kFP32DenormValuePreserveString)) { + ValCtx.EmitFnAttributeError(F, Attr.getKindAsString(), + Attr.getValueAsString()); } } // TODO: If validating libraries, we should remove all unknown function // attributes. For each attribute, check if it is a known attribute - for (unsigned I = 0, E = attrSet.getNumSlots(); I != E; ++I) { - for (auto AttrIter = attrSet.begin(I), AttrEnd = attrSet.end(I); + for (unsigned I = 0, E = AttrSet.getNumSlots(); I != E; ++I) { + for (auto AttrIter = AttrSet.begin(I), AttrEnd = AttrSet.end(I); AttrIter != AttrEnd; ++AttrIter) { if (!AttrIter->isStringAttribute()) { continue; } - StringRef kind = AttrIter->getKindAsString(); - if (!kind.equals(DXIL::kFP32DenormKindString) && - !kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) { + StringRef Kind = AttrIter->getKindAsString(); + if (!Kind.equals(DXIL::kFP32DenormKindString) && + !Kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) { ValCtx.EmitFnAttributeError(F, AttrIter->getKindAsString(), AttrIter->getValueAsString()); } @@ -2669,21 +2723,38 @@ static bool IsLLVMInstructionAllowedForLib(Instruction &I, } } +// Shader model specific checks for valid LLVM instructions. +// Currently only checks for pre 6.9 usage of vector operations. +// Returns false if shader model is pre 6.9 and I represents a vector +// operation. Returns true otherwise. +static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I, + ValidationContext &ValCtx) { + if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + return true; + unsigned Opcode = I.getOpcode(); + if (Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement || + Opcode == Instruction::ShuffleVector) + return false; + + return true; +} + static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { bool SupportsMinPrecision = ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision; bool SupportsLifetimeIntrinsics = ValCtx.DxilMod.GetShaderModel()->IsSM66Plus(); - SmallVector gradientOps; - SmallVector barriers; - CallInst *setMeshOutputCounts = nullptr; - CallInst *getMeshPayload = nullptr; - CallInst *dispatchMesh = nullptr; - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); + SmallVector GradientOps; + SmallVector Barriers; + CallInst *SetMeshOutputCounts = nullptr; + CallInst *GetMeshPayload = nullptr; + CallInst *DispatchMesh = nullptr; + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); - for (auto b = F->begin(), bend = F->end(); b != bend; ++b) { - for (auto i = b->begin(), iend = b->end(); i != iend; ++i) { - llvm::Instruction &I = *i; + for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) { + for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) { + llvm::Instruction &I = *It; if (I.hasMetadata()) { @@ -2691,7 +2762,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } // Instructions must be allowed. - if (!IsLLVMInstructionAllowed(I)) { + if (!IsLLVMInstructionAllowed(I) || + !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) { if (!IsLLVMInstructionAllowedForLib(I, ValCtx)) { ValCtx.EmitInstrError(&I, ValidationRule::InstrAllowed); continue; @@ -2721,27 +2793,27 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { continue; } - Value *opcodeVal = CI->getOperand(0); - ConstantInt *OpcodeConst = dyn_cast(opcodeVal); + Value *OpcodeVal = CI->getOperand(0); + ConstantInt *OpcodeConst = dyn_cast(OpcodeVal); if (OpcodeConst == nullptr) { ValCtx.EmitInstrFormatError(&I, ValidationRule::InstrOpConst, {"Opcode", "DXIL operation"}); continue; } - unsigned opcode = OpcodeConst->getLimitedValue(); - if (opcode >= static_cast(DXIL::OpCode::NumOpCodes)) { + unsigned Opcode = OpcodeConst->getLimitedValue(); + if (Opcode >= static_cast(DXIL::OpCode::NumOpCodes)) { ValCtx.EmitInstrFormatError( &I, ValidationRule::InstrIllegalDXILOpCode, {std::to_string((unsigned)DXIL::OpCode::NumOpCodes), - std::to_string(opcode)}); + std::to_string(Opcode)}); continue; } - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; bool IllegalOpFunc = true; - for (auto &it : hlslOP->GetOpFuncList(dxilOpcode)) { - if (it.second == FCalled) { + for (auto &It : HlslOP->GetOpFuncList(DxilOpcode)) { + if (It.second == FCalled) { IllegalOpFunc = false; break; } @@ -2750,46 +2822,46 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { if (IllegalOpFunc) { ValCtx.EmitInstrFormatError( &I, ValidationRule::InstrIllegalDXILOpFunction, - {FCalled->getName(), OP::GetOpCodeName(dxilOpcode)}); + {FCalled->getName(), OP::GetOpCodeName(DxilOpcode)}); continue; } - if (OP::IsDxilOpGradient(dxilOpcode)) { - gradientOps.push_back(CI); + if (OP::IsDxilOpGradient(DxilOpcode)) { + GradientOps.push_back(CI); } - if (dxilOpcode == DXIL::OpCode::Barrier) { - barriers.push_back(CI); + if (DxilOpcode == DXIL::OpCode::Barrier) { + Barriers.push_back(CI); } // External function validation will check the parameter // list. This function will check that the call does not // violate any rules. - if (dxilOpcode == DXIL::OpCode::SetMeshOutputCounts) { + if (DxilOpcode == DXIL::OpCode::SetMeshOutputCounts) { // validate the call count of SetMeshOutputCounts - if (setMeshOutputCounts != nullptr) { + if (SetMeshOutputCounts != nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMultipleSetMeshOutputCounts); } - setMeshOutputCounts = CI; + SetMeshOutputCounts = CI; } - if (dxilOpcode == DXIL::OpCode::GetMeshPayload) { + if (DxilOpcode == DXIL::OpCode::GetMeshPayload) { // validate the call count of GetMeshPayload - if (getMeshPayload != nullptr) { + if (GetMeshPayload != nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMultipleGetMeshPayload); } - getMeshPayload = CI; + GetMeshPayload = CI; } - if (dxilOpcode == DXIL::OpCode::DispatchMesh) { + if (DxilOpcode == DXIL::OpCode::DispatchMesh) { // validate the call count of DispatchMesh - if (dispatchMesh != nullptr) { + if (DispatchMesh != nullptr) { ValCtx.EmitInstrError(&I, ValidationRule::InstrNotOnceDispatchMesh); } - dispatchMesh = CI; + DispatchMesh = CI; } } continue; @@ -2797,23 +2869,23 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { for (Value *op : I.operands()) { if (isa(op)) { - bool legalUndef = isa(&I); + bool LegalUndef = isa(&I); if (isa(&I)) { - legalUndef = op == I.getOperand(0); + LegalUndef = op == I.getOperand(0); } if (isa(&I)) { - legalUndef = op == I.getOperand(1); + LegalUndef = op == I.getOperand(1); } if (isa(&I)) { - legalUndef = op == I.getOperand(0); + LegalUndef = op == I.getOperand(0); } - if (!legalUndef) + if (!LegalUndef) ValCtx.EmitInstrError(&I, ValidationRule::InstrNoReadingUninitialized); } else if (ConstantExpr *CE = dyn_cast(op)) { - for (Value *opCE : CE->operands()) { - if (isa(opCE)) { + for (Value *OpCE : CE->operands()) { + if (isa(OpCE)) { ValCtx.EmitInstrError( &I, ValidationRule::InstrNoReadingUninitialized); } @@ -2843,8 +2915,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } } - unsigned opcode = I.getOpcode(); - switch (opcode) { + unsigned Opcode = I.getOpcode(); + switch (Opcode) { case Instruction::Alloca: { AllocaInst *AI = cast(&I); // TODO: validate address space and alignment @@ -2885,26 +2957,26 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { continue; } GetElementPtrInst *GEP = cast(&I); - bool allImmIndex = true; + bool AllImmIndex = true; for (auto Idx = GEP->idx_begin(), E = GEP->idx_end(); Idx != E; Idx++) { if (!isa(Idx)) { - allImmIndex = false; + AllImmIndex = false; break; } } - if (allImmIndex) { + if (AllImmIndex) { const DataLayout &DL = ValCtx.DL; Value *Ptr = GEP->getPointerOperand(); - unsigned size = + unsigned Size = DL.getTypeAllocSize(Ptr->getType()->getPointerElementType()); - unsigned valSize = + unsigned ValSize = DL.getTypeAllocSize(GEP->getType()->getPointerElementType()); SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); - unsigned offset = + unsigned Offset = DL.getIndexedOffset(GEP->getPointerOperandType(), Indices); - if ((offset + valSize) > size) { + if ((Offset + ValSize) > Size) { ValCtx.EmitInstrError(GEP, ValidationRule::InstrInBoundsAccess); } } @@ -2978,16 +3050,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { case Instruction::AtomicCmpXchg: case Instruction::AtomicRMW: { Value *Ptr = I.getOperand(AtomicRMWInst::getPointerOperandIndex()); - PointerType *ptrType = cast(Ptr->getType()); - Type *elType = ptrType->getElementType(); + PointerType *PtrType = cast(Ptr->getType()); + Type *ElType = PtrType->getElementType(); const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - if ((elType->isIntegerTy(64)) && !pSM->IsSM66Plus()) + if ((ElType->isIntegerTy(64)) && !pSM->IsSM66Plus()) ValCtx.EmitInstrFormatError( &I, ValidationRule::SmOpcodeInInvalidFunction, {"64-bit atomic operations", "Shader Model 6.6+"}); - if (ptrType->getAddressSpace() != DXIL::kTGSMAddrSpace && - ptrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace) + if (PtrType->getAddressSpace() != DXIL::kTGSMAddrSpace && + PtrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace) ValCtx.EmitInstrError( &I, ValidationRule::InstrAtomicOpNonGroupsharedOrRecord); @@ -3038,12 +3110,12 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } } } - ValidateControlFlowHint(*b, ValCtx); + ValidateControlFlowHint(*B, ValCtx); } - ValidateMsIntrinsics(F, ValCtx, setMeshOutputCounts, getMeshPayload); + ValidateMsIntrinsics(F, ValCtx, SetMeshOutputCounts, GetMeshPayload); - ValidateAsIntrinsics(F, ValCtx, dispatchMesh); + ValidateAsIntrinsics(F, ValCtx, DispatchMesh); } static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) { @@ -3051,39 +3123,39 @@ static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) { // to do here if (!ValCtx.DxilMod.HasDxilFunctionProps(F)) return; - auto &props = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (!props.IsNode()) + auto &Props = ValCtx.DxilMod.GetDxilFunctionProps(F); + if (!Props.IsNode()) return; - if (props.InputNodes.size() > 1) { + if (Props.InputNodes.size() > 1) { ValCtx.EmitFnFormatError( F, ValidationRule::DeclMultipleNodeInputs, - {F->getName(), std::to_string(props.InputNodes.size())}); + {F->getName(), std::to_string(Props.InputNodes.size())}); } - for (auto &input : props.InputNodes) { - if (!input.Flags.RecordTypeMatchesLaunchType(props.Node.LaunchType)) { + for (auto &input : Props.InputNodes) { + if (!input.Flags.RecordTypeMatchesLaunchType(Props.Node.LaunchType)) { // We allow EmptyNodeInput here, as that may have been added implicitly // if there was no input specified if (input.Flags.IsEmptyInput()) continue; - llvm::StringRef validInputs = ""; - switch (props.Node.LaunchType) { + llvm::StringRef ValidInputs = ""; + switch (Props.Node.LaunchType) { case DXIL::NodeLaunchType::Broadcasting: - validInputs = "{RW}DispatchNodeInputRecord"; + ValidInputs = "{RW}DispatchNodeInputRecord"; break; case DXIL::NodeLaunchType::Coalescing: - validInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput"; + ValidInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput"; break; case DXIL::NodeLaunchType::Thread: - validInputs = "{RW}ThreadNodeInputRecord"; + ValidInputs = "{RW}ThreadNodeInputRecord"; break; default: llvm_unreachable("invalid launch type"); } ValCtx.EmitFnFormatError( F, ValidationRule::DeclNodeLaunchInputType, - {ShaderModel::GetNodeLaunchTypeName(props.Node.LaunchType), - F->getName(), validInputs}); + {ShaderModel::GetNodeLaunchTypeName(Props.Node.LaunchType), + F->getName(), ValidInputs}); } } } @@ -3094,26 +3166,26 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { if (F.isIntrinsic() || IsDxilFunction(&F)) return; } else { - DXIL::ShaderKind shaderKind = DXIL::ShaderKind::Library; - bool isShader = ValCtx.DxilMod.HasDxilFunctionProps(&F); - unsigned numUDTShaderArgs = 0; - if (isShader) { - shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind; - switch (shaderKind) { + DXIL::ShaderKind ShaderKind = DXIL::ShaderKind::Library; + bool IsShader = ValCtx.DxilMod.HasDxilFunctionProps(&F); + unsigned NumUDTShaderArgs = 0; + if (IsShader) { + ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind; + switch (ShaderKind) { case DXIL::ShaderKind::AnyHit: case DXIL::ShaderKind::ClosestHit: - numUDTShaderArgs = 2; + NumUDTShaderArgs = 2; break; case DXIL::ShaderKind::Miss: case DXIL::ShaderKind::Callable: - numUDTShaderArgs = 1; + NumUDTShaderArgs = 1; break; case DXIL::ShaderKind::Compute: { DxilModule &DM = ValCtx.DxilMod; if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); // Check that compute has no node metadata - if (entryProps.props.IsNode()) { + if (EntryProps.props.IsNode()) { ValCtx.EmitFnFormatError(&F, ValidationRule::MetaComputeWithNode, {F.getName()}); } @@ -3124,45 +3196,45 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { break; } } else { - isShader = ValCtx.DxilMod.IsPatchConstantShader(&F); + IsShader = ValCtx.DxilMod.IsPatchConstantShader(&F); } // Entry function should not have parameter. - if (isShader && 0 == numUDTShaderArgs && !F.arg_empty()) + if (IsShader && 0 == NumUDTShaderArgs && !F.arg_empty()) ValCtx.EmitFnFormatError(&F, ValidationRule::FlowFunctionCall, {F.getName()}); // Shader functions should return void. - if (isShader && !F.getReturnType()->isVoidTy()) + if (IsShader && !F.getReturnType()->isVoidTy()) ValCtx.EmitFnFormatError(&F, ValidationRule::DeclShaderReturnVoid, {F.getName()}); - auto ArgFormatError = [&](Function &F, Argument &arg, ValidationRule rule) { - if (arg.hasName()) - ValCtx.EmitFnFormatError(&F, rule, {arg.getName().str(), F.getName()}); + auto ArgFormatError = [&](Function &F, Argument &Arg, ValidationRule Rule) { + if (Arg.hasName()) + ValCtx.EmitFnFormatError(&F, Rule, {Arg.getName().str(), F.getName()}); else - ValCtx.EmitFnFormatError(&F, rule, - {std::to_string(arg.getArgNo()), F.getName()}); + ValCtx.EmitFnFormatError(&F, Rule, + {std::to_string(Arg.getArgNo()), F.getName()}); }; - unsigned numArgs = 0; - for (auto &arg : F.args()) { - Type *argTy = arg.getType(); - if (argTy->isPointerTy()) - argTy = argTy->getPointerElementType(); - - numArgs++; - if (numUDTShaderArgs) { - if (arg.getArgNo() >= numUDTShaderArgs) { - ArgFormatError(F, arg, ValidationRule::DeclExtraArgs); - } else if (!argTy->isStructTy()) { - switch (shaderKind) { + unsigned NumArgs = 0; + for (auto &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + if (ArgTy->isPointerTy()) + ArgTy = ArgTy->getPointerElementType(); + + NumArgs++; + if (NumUDTShaderArgs) { + if (Arg.getArgNo() >= NumUDTShaderArgs) { + ArgFormatError(F, Arg, ValidationRule::DeclExtraArgs); + } else if (!ArgTy->isStructTy()) { + switch (ShaderKind) { case DXIL::ShaderKind::Callable: - ArgFormatError(F, arg, ValidationRule::DeclParamStruct); + ArgFormatError(F, Arg, ValidationRule::DeclParamStruct); break; default: - ArgFormatError(F, arg, - arg.getArgNo() == 0 + ArgFormatError(F, Arg, + Arg.getArgNo() == 0 ? ValidationRule::DeclPayloadStruct : ValidationRule::DeclAttrStruct); } @@ -3170,24 +3242,24 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { continue; } - while (argTy->isArrayTy()) { - argTy = argTy->getArrayElementType(); + while (ArgTy->isArrayTy()) { + ArgTy = ArgTy->getArrayElementType(); } - if (argTy->isStructTy() && !ValCtx.isLibProfile) { - ArgFormatError(F, arg, ValidationRule::DeclFnFlattenParam); + if (ArgTy->isStructTy() && !ValCtx.isLibProfile) { + ArgFormatError(F, Arg, ValidationRule::DeclFnFlattenParam); break; } } - if (numArgs < numUDTShaderArgs && shaderKind != DXIL::ShaderKind::Node) { - StringRef argType[2] = { - shaderKind == DXIL::ShaderKind::Callable ? "params" : "payload", + if (NumArgs < NumUDTShaderArgs && ShaderKind != DXIL::ShaderKind::Node) { + StringRef ArgType[2] = { + ShaderKind == DXIL::ShaderKind::Callable ? "params" : "payload", "attributes"}; - for (unsigned i = numArgs; i < numUDTShaderArgs; i++) { + for (unsigned I = NumArgs; I < NumUDTShaderArgs; I++) { ValCtx.EmitFnFormatError( &F, ValidationRule::DeclShaderMissingArg, - {ShaderModel::GetKindName(shaderKind), F.getName(), argType[i]}); + {ShaderModel::GetKindName(ShaderKind), F.getName(), ArgType[I]}); } } @@ -3224,25 +3296,25 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { static void ValidateGlobalVariable(GlobalVariable &GV, ValidationContext &ValCtx) { - bool isInternalGV = + bool IsInternalGv = dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV); if (ValCtx.isLibProfile) { - auto isCBufferGlobal = + auto IsCBufferGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) return true; return false; }; - auto isResourceGlobal = + auto IsResourceGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) return true; return false; }; - auto isSamplerGlobal = + auto IsSamplerGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) @@ -3250,32 +3322,32 @@ static void ValidateGlobalVariable(GlobalVariable &GV, return false; }; - bool isRes = isCBufferGlobal(ValCtx.DxilMod.GetCBuffers()); - isRes |= isResourceGlobal(ValCtx.DxilMod.GetUAVs()); - isRes |= isResourceGlobal(ValCtx.DxilMod.GetSRVs()); - isRes |= isSamplerGlobal(ValCtx.DxilMod.GetSamplers()); - isInternalGV |= isRes; + bool IsRes = IsCBufferGlobal(ValCtx.DxilMod.GetCBuffers()); + IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetUAVs()); + IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetSRVs()); + IsRes |= IsSamplerGlobal(ValCtx.DxilMod.GetSamplers()); + IsInternalGv |= IsRes; // Allow special dx.ishelper for library target if (GV.getName().compare(DXIL::kDxIsHelperGlobalName) == 0) { Type *Ty = GV.getType()->getPointerElementType(); if (Ty->isIntegerTy() && Ty->getScalarSizeInBits() == 32) { - isInternalGV = true; + IsInternalGv = true; } } } - if (!isInternalGV) { + if (!IsInternalGv) { if (!GV.user_empty()) { - bool hasInstructionUser = false; + bool HasInstructionUser = false; for (User *U : GV.users()) { if (isa(U)) { - hasInstructionUser = true; + HasInstructionUser = true; break; } } // External GV should not have instruction user. - if (hasInstructionUser) { + if (HasInstructionUser) { ValCtx.EmitGlobalVariableFormatError( &GV, ValidationRule::DeclNotUsedExternal, {GV.getName()}); } @@ -3298,14 +3370,14 @@ static void ValidateGlobalVariable(GlobalVariable &GV, } static void CollectFixAddressAccess(Value *V, - std::vector &fixAddrTGSMList) { + std::vector &FixAddrTGSMList) { for (User *U : V->users()) { if (GEPOperator *GEP = dyn_cast(U)) { if (isa(GEP) || GEP->hasAllConstantIndices()) { - CollectFixAddressAccess(GEP, fixAddrTGSMList); + CollectFixAddressAccess(GEP, FixAddrTGSMList); } } else if (StoreInst *SI = dyn_cast(U)) { - fixAddrTGSMList.emplace_back(SI); + FixAddrTGSMList.emplace_back(SI); } } } @@ -3315,16 +3387,16 @@ static bool IsDivergent(Value *V) { return false; } -static void ValidateTGSMRaceCondition(std::vector &fixAddrTGSMList, +static void ValidateTGSMRaceCondition(std::vector &FixAddrTGSMList, ValidationContext &ValCtx) { - std::unordered_set fixAddrTGSMFuncSet; - for (StoreInst *I : fixAddrTGSMList) { + std::unordered_set FixAddrTGSMFuncSet; + for (StoreInst *I : FixAddrTGSMList) { BasicBlock *BB = I->getParent(); - fixAddrTGSMFuncSet.insert(BB->getParent()); + FixAddrTGSMFuncSet.insert(BB->getParent()); } for (auto &F : ValCtx.DxilMod.GetModule()->functions()) { - if (F.isDeclaration() || !fixAddrTGSMFuncSet.count(&F)) + if (F.isDeclaration() || !FixAddrTGSMFuncSet.count(&F)) continue; PostDominatorTree PDT; @@ -3332,7 +3404,7 @@ static void ValidateTGSMRaceCondition(std::vector &fixAddrTGSMList, BasicBlock *Entry = &F.getEntryBlock(); - for (StoreInst *SI : fixAddrTGSMList) { + for (StoreInst *SI : FixAddrTGSMList) { BasicBlock *BB = SI->getParent(); if (BB->getParent() == &F) { if (PDT.dominates(BB, Entry)) { @@ -3351,7 +3423,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib(); unsigned TGSMSize = 0; - std::vector fixAddrTGSMList; + std::vector FixAddrTGSMList; const DataLayout &DL = M.GetModule()->getDataLayout(); for (GlobalVariable &GV : M.GetModule()->globals()) { ValidateGlobalVariable(GV, ValCtx); @@ -3366,9 +3438,9 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { if (Instruction *I = dyn_cast(U)) { llvm::Function *F = I->getParent()->getParent(); if (M.HasDxilEntryProps(F)) { - DxilFunctionProps &props = M.GetDxilEntryProps(F).props; - if (!props.IsCS() && !props.IsAS() && !props.IsMS() && - !props.IsNode()) { + DxilFunctionProps &Props = M.GetDxilEntryProps(F).props; + if (!Props.IsCS() && !Props.IsAS() && !Props.IsMS() && + !Props.IsNode()) { ValCtx.EmitInstrFormatError(I, ValidationRule::SmTGSMUnsupported, {"from non-compute entry points"}); @@ -3378,7 +3450,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { } } TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType()); - CollectFixAddressAccess(&GV, fixAddrTGSMList); + CollectFixAddressAccess(&GV, FixAddrTGSMList); } } @@ -3402,8 +3474,8 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { GV, Rule, {std::to_string(TGSMSize), std::to_string(MaxSize)}); } - if (!fixAddrTGSMList.empty()) { - ValidateTGSMRaceCondition(fixAddrTGSMList, ValCtx); + if (!FixAddrTGSMList.empty()) { + ValidateTGSMRaceCondition(FixAddrTGSMList, ValCtx); } } @@ -3416,20 +3488,20 @@ static void ValidateValidatorVersion(ValidationContext &ValCtx) { if (pNode->getNumOperands() == 1) { MDTuple *pVerValues = dyn_cast(pNode->getOperand(0)); if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) { - uint64_t majorVer, minorVer; - if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) && - GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) { - unsigned curMajor, curMinor; - GetValidationVersion(&curMajor, &curMinor); + uint64_t MajorVer, MinorVer; + if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) && + GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) { + unsigned CurMajor, CurMinor; + GetValidationVersion(&CurMajor, &CurMinor); // This will need to be updated as major/minor versions evolve, // depending on the degree of compat across versions. - if (majorVer == curMajor && minorVer <= curMinor) { + if (MajorVer == CurMajor && MinorVer <= CurMinor) { return; } else { ValCtx.EmitFormatError( ValidationRule::MetaVersionSupported, - {"Validator", std::to_string(majorVer), std::to_string(minorVer), - std::to_string(curMajor), std::to_string(curMinor)}); + {"Validator", std::to_string(MajorVer), std::to_string(MinorVer), + std::to_string(CurMajor), std::to_string(CurMinor)}); return; } } @@ -3447,19 +3519,19 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) { if (pNode->getNumOperands() == 1) { MDTuple *pVerValues = dyn_cast(pNode->getOperand(0)); if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) { - uint64_t majorVer, minorVer; - if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) && - GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) { + uint64_t MajorVer, MinorVer; + if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) && + GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) { // This will need to be updated as dxil major/minor versions evolve, // depending on the degree of compat across versions. - if ((majorVer == DXIL::kDxilMajor && minorVer <= DXIL::kDxilMinor) && - (majorVer == ValCtx.m_DxilMajor && - minorVer == ValCtx.m_DxilMinor)) { + if ((MajorVer == DXIL::kDxilMajor && MinorVer <= DXIL::kDxilMinor) && + (MajorVer == ValCtx.m_DxilMajor && + MinorVer == ValCtx.m_DxilMinor)) { return; } else { ValCtx.EmitFormatError(ValidationRule::MetaVersionSupported, - {"Dxil", std::to_string(majorVer), - std::to_string(minorVer), + {"Dxil", std::to_string(MajorVer), + std::to_string(MinorVer), std::to_string(DXIL::kDxilMajor), std::to_string(DXIL::kDxilMinor)}); return; @@ -3477,16 +3549,16 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) { NamedMDNode *TA = pModule->getNamedMetadata("dx.typeAnnotations"); if (TA == nullptr) return; - for (unsigned i = 0, end = TA->getNumOperands(); i < end; ++i) { - MDTuple *TANode = dyn_cast(TA->getOperand(i)); + for (unsigned I = 0, End = TA->getNumOperands(); I < End; ++I) { + MDTuple *TANode = dyn_cast(TA->getOperand(I)); if (TANode->getNumOperands() < 3) { ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed); return; } - ConstantInt *tag = mdconst::extract(TANode->getOperand(0)); - uint64_t tagValue = tag->getZExtValue(); - if (tagValue != DxilMDHelper::kDxilTypeSystemStructTag && - tagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) { + ConstantInt *Tag = mdconst::extract(TANode->getOperand(0)); + uint64_t TagValue = Tag->getZExtValue(); + if (TagValue != DxilMDHelper::kDxilTypeSystemStructTag && + TagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) { ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed); return; } @@ -3495,11 +3567,11 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) { } static void ValidateBitcode(ValidationContext &ValCtx) { - std::string diagStr; - raw_string_ostream diagStream(diagStr); - if (llvm::verifyModule(ValCtx.M, &diagStream)) { + std::string DiagStr; + raw_string_ostream DiagStream(DiagStr); + if (llvm::verifyModule(ValCtx.M, &DiagStream)) { ValCtx.EmitError(ValidationRule::BitcodeValid); - dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), diagStream.str()); + dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), DiagStream.str()); } } @@ -3513,18 +3585,18 @@ static void ValidateWaveSize(ValidationContext &ValCtx, if (!EPs) return; - for (unsigned i = 0, end = EPs->getNumOperands(); i < end; ++i) { - MDTuple *EPNodeRef = dyn_cast(EPs->getOperand(i)); + for (unsigned I = 0, End = EPs->getNumOperands(); I < End; ++I) { + MDTuple *EPNodeRef = dyn_cast(EPs->getOperand(I)); if (EPNodeRef->getNumOperands() < 5) { ValCtx.EmitMetaError(EPNodeRef, ValidationRule::MetaWellFormed); return; } // get access to the digit that represents the metadata number that // would store entry properties - const llvm::MDOperand &mOp = + const llvm::MDOperand &MOp = EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1); // the final operand to the entry points tuple should be a tuple. - if (mOp == nullptr || (mOp.get())->getMetadataID() != Metadata::MDTupleKind) + if (MOp == nullptr || (MOp.get())->getMetadataID() != Metadata::MDTupleKind) continue; // get access to the node that stores entry properties @@ -3532,29 +3604,29 @@ static void ValidateWaveSize(ValidationContext &ValCtx, EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1)); // find any incompatible tags inside the entry properties // increment j by 2 to only analyze tags, not values - bool foundTag = false; - for (unsigned j = 0, end2 = EPropNode->getNumOperands(); j < end2; j += 2) { - const MDOperand &propertyTagOp = EPropNode->getOperand(j); + bool FoundTag = false; + for (unsigned J = 0, End2 = EPropNode->getNumOperands(); J < End2; J += 2) { + const MDOperand &PropertyTagOp = EPropNode->getOperand(J); // note, we are only looking for tags, which will be a constant // integer - DXASSERT(!(propertyTagOp == nullptr || - (propertyTagOp.get())->getMetadataID() != + DXASSERT(!(PropertyTagOp == nullptr || + (PropertyTagOp.get())->getMetadataID() != Metadata::ConstantAsMetadataKind), "tag operand should be a constant integer."); - ConstantInt *tag = mdconst::extract(propertyTagOp); - uint64_t tagValue = tag->getZExtValue(); + ConstantInt *Tag = mdconst::extract(PropertyTagOp); + uint64_t TagValue = Tag->getZExtValue(); // legacy wavesize is only supported between 6.6 and 6.7, so we // should fail if we find the ranged wave size metadata tag - if (tagValue == DxilMDHelper::kDxilRangedWaveSizeTag) { + if (TagValue == DxilMDHelper::kDxilRangedWaveSizeTag) { // if this tag is already present in the // current entry point, emit an error - if (foundTag) { + if (FoundTag) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {}); return; } - foundTag = true; + FoundTag = true; if (SM->IsSM66Plus() && !SM->IsSM68Plus()) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeRangeNeedsSM68Plus, @@ -3563,36 +3635,36 @@ static void ValidateWaveSize(ValidationContext &ValCtx, } // get the metadata that contains the // parameters to the wavesize attribute - MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(j + 1)); + MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(J + 1)); if (WaveTuple->getNumOperands() != 3) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeRangeExpectsThreeParams, {}); return; } - for (int k = 0; k < 3; k++) { - const MDOperand ¶m = WaveTuple->getOperand(k); - if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) { + for (int K = 0; K < 3; K++) { + const MDOperand &Param = WaveTuple->getOperand(K); + if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeNeedsConstantOperands, {}); return; } } - } else if (tagValue == DxilMDHelper::kDxilWaveSizeTag) { + } else if (TagValue == DxilMDHelper::kDxilWaveSizeTag) { // if this tag is already present in the // current entry point, emit an error - if (foundTag) { + if (FoundTag) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {}); return; } - foundTag = true; - MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(j + 1)); + FoundTag = true; + MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(J + 1)); if (WaveTuple->getNumOperands() != 1) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeExpectsOneParam, {}); return; } - const MDOperand ¶m = WaveTuple->getOperand(0); - if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) { + const MDOperand &Param = WaveTuple->getOperand(0); + if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeNeedsConstantOperands, {}); return; @@ -3613,9 +3685,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) { ValidateDxilVersion(ValCtx); Module *pModule = &ValCtx.M; - const std::string &target = pModule->getTargetTriple(); - if (target != "dxil-ms-dx") { - ValCtx.EmitFormatError(ValidationRule::MetaTarget, {target}); + const std::string &Target = pModule->getTargetTriple(); + if (Target != "dxil-ms-dx") { + ValCtx.EmitFormatError(ValidationRule::MetaTarget, {Target}); } // The llvm.dbg.(cu/contents/defines/mainFileName/arg) named metadata nodes @@ -3623,9 +3695,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) { // llvm.bitsets is also disallowed. // // These are verified in lib/IR/Verifier.cpp. - StringMap llvmNamedMeta; - llvmNamedMeta["llvm.ident"]; - llvmNamedMeta["llvm.module.flags"]; + StringMap LlvmNamedMeta; + LlvmNamedMeta["llvm.ident"]; + LlvmNamedMeta["llvm.module.flags"]; for (auto &NamedMetaNode : pModule->named_metadata()) { if (!DxilModule::IsKnownNamedMetaData(NamedMetaNode)) { @@ -3633,7 +3705,7 @@ static void ValidateMetadata(ValidationContext &ValCtx) { if (!name.startswith_lower("llvm.")) { ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()}); } else { - if (llvmNamedMeta.count(name) == 0) { + if (LlvmNamedMeta.count(name) == 0) { ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()}); } } @@ -3666,35 +3738,35 @@ static void ValidateMetadata(ValidationContext &ValCtx) { } static void ValidateResourceOverlap( - hlsl::DxilResourceBase &res, - SpacesAllocator &spaceAllocator, + hlsl::DxilResourceBase &Res, + SpacesAllocator &SpaceAllocator, ValidationContext &ValCtx) { - unsigned base = res.GetLowerBound(); - if (ValCtx.isLibProfile && !res.IsAllocated()) { + unsigned Base = Res.GetLowerBound(); + if (ValCtx.isLibProfile && !Res.IsAllocated()) { // Skip unallocated resource for library. return; } - unsigned size = res.GetRangeSize(); - unsigned space = res.GetSpaceID(); + unsigned Size = Res.GetRangeSize(); + unsigned Space = Res.GetSpaceID(); - auto &allocator = spaceAllocator.Get(space); - unsigned end = base + size - 1; + auto &Allocator = SpaceAllocator.Get(Space); + unsigned End = Base + Size - 1; // unbounded - if (end < base) - end = size; - const DxilResourceBase *conflictRes = allocator.Insert(&res, base, end); - if (conflictRes) { + if (End < Base) + End = Size; + const DxilResourceBase *ConflictRes = Allocator.Insert(&Res, Base, End); + if (ConflictRes) { ValCtx.EmitFormatError( ValidationRule::SmResourceRangeOverlap, - {ValCtx.GetResourceName(&res), std::to_string(base), - std::to_string(size), std::to_string(conflictRes->GetLowerBound()), - std::to_string(conflictRes->GetRangeSize()), std::to_string(space)}); + {ValCtx.GetResourceName(&Res), std::to_string(Base), + std::to_string(Size), std::to_string(ConflictRes->GetLowerBound()), + std::to_string(ConflictRes->GetRangeSize()), std::to_string(Space)}); } } -static void ValidateResource(hlsl::DxilResource &res, +static void ValidateResource(hlsl::DxilResource &Res, ValidationContext &ValCtx) { - switch (res.GetKind()) { + switch (Res.GetKind()) { case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: @@ -3706,8 +3778,8 @@ static void ValidateResource(hlsl::DxilResource &res, case DXIL::ResourceKind::Texture3D: case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: - if (res.GetSampleCount() > 0) { - ValCtx.EmitResourceError(&res, ValidationRule::SmSampleCountOnlyOn2DMS); + if (Res.GetSampleCount() > 0) { + ValCtx.EmitResourceError(&Res, ValidationRule::SmSampleCountOnlyOn2DMS); } break; case DXIL::ResourceKind::Texture2DMS: @@ -3718,16 +3790,16 @@ static void ValidateResource(hlsl::DxilResource &res, break; case DXIL::ResourceKind::FeedbackTexture2D: case DXIL::ResourceKind::FeedbackTexture2DArray: - if (res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry) - ValCtx.EmitResourceError(&res, + if (Res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry) + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidSamplerFeedbackType); break; default: - ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceKind); + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceKind); break; } - switch (res.GetCompType().GetKind()) { + switch (Res.GetCompType().GetKind()) { case DXIL::ComponentType::F32: case DXIL::ComponentType::SNormF32: case DXIL::ComponentType::UNormF32: @@ -3741,266 +3813,266 @@ static void ValidateResource(hlsl::DxilResource &res, case DXIL::ComponentType::U16: break; default: - if (!res.IsStructuredBuffer() && !res.IsRawBuffer() && - !res.IsFeedbackTexture()) - ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceCompType); + if (!Res.IsStructuredBuffer() && !Res.IsRawBuffer() && + !Res.IsFeedbackTexture()) + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceCompType); break; } - if (res.IsStructuredBuffer()) { - unsigned stride = res.GetElementStride(); - bool alignedTo4Bytes = (stride & 3) == 0; - if (!alignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) { + if (Res.IsStructuredBuffer()) { + unsigned Stride = Res.GetElementStride(); + bool AlignedTo4Bytes = (Stride & 3) == 0; + if (!AlignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) { ValCtx.EmitResourceFormatError( - &res, ValidationRule::MetaStructBufAlignment, - {std::to_string(4), std::to_string(stride)}); + &Res, ValidationRule::MetaStructBufAlignment, + {std::to_string(4), std::to_string(Stride)}); } - if (stride > DXIL::kMaxStructBufferStride) { + if (Stride > DXIL::kMaxStructBufferStride) { ValCtx.EmitResourceFormatError( - &res, ValidationRule::MetaStructBufAlignmentOutOfBound, + &Res, ValidationRule::MetaStructBufAlignmentOutOfBound, {std::to_string(DXIL::kMaxStructBufferStride), - std::to_string(stride)}); + std::to_string(Stride)}); } } - if (res.IsAnyTexture() || res.IsTypedBuffer()) { - Type *RetTy = res.GetRetType(); - unsigned size = + if (Res.IsAnyTexture() || Res.IsTypedBuffer()) { + Type *RetTy = Res.GetRetType(); + unsigned Size = ValCtx.DxilMod.GetModule()->getDataLayout().getTypeAllocSize(RetTy); - if (size > 4 * 4) { - ValCtx.EmitResourceError(&res, ValidationRule::MetaTextureType); + if (Size > 4 * 4) { + ValCtx.EmitResourceError(&Res, ValidationRule::MetaTextureType); } } } static void CollectCBufferRanges( - DxilStructAnnotation *annotation, - SpanAllocator &constAllocator, unsigned base, - DxilTypeSystem &typeSys, StringRef cbName, ValidationContext &ValCtx) { - DXASSERT(((base + 15) & ~(0xf)) == base, + DxilStructAnnotation *Annotation, + SpanAllocator &ConstAllocator, unsigned Base, + DxilTypeSystem &TypeSys, StringRef CbName, ValidationContext &ValCtx) { + DXASSERT(((Base + 15) & ~(0xf)) == Base, "otherwise, base for struct is not aligned"); - unsigned cbSize = annotation->GetCBufferSize(); + unsigned CbSize = Annotation->GetCBufferSize(); - const StructType *ST = annotation->GetStructType(); + const StructType *ST = Annotation->GetStructType(); - for (int i = annotation->GetNumFields() - 1; i >= 0; i--) { - DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(i); - Type *EltTy = ST->getElementType(i); + for (int I = Annotation->GetNumFields() - 1; I >= 0; I--) { + DxilFieldAnnotation &FieldAnnotation = Annotation->GetFieldAnnotation(I); + Type *EltTy = ST->getElementType(I); - unsigned offset = fieldAnnotation.GetCBufferOffset(); + unsigned Offset = FieldAnnotation.GetCBufferOffset(); unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( - fieldAnnotation, EltTy, typeSys); + FieldAnnotation, EltTy, TypeSys); - bool bOutOfBound = false; + bool IsOutOfBound = false; if (!EltTy->isAggregateType()) { - bOutOfBound = (offset + EltSize) > cbSize; - if (!bOutOfBound) { - if (constAllocator.Insert(&fieldAnnotation, base + offset, - base + offset + EltSize - 1)) { + IsOutOfBound = (Offset + EltSize) > CbSize; + if (!IsOutOfBound) { + if (ConstAllocator.Insert(&FieldAnnotation, Base + Offset, + Base + Offset + EltSize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(base + offset)}); + {CbName, std::to_string(Base + Offset)}); } } } else if (isa(EltTy)) { - if (((offset + 15) & ~(0xf)) != offset) { + if (((Offset + 15) & ~(0xf)) != Offset) { ValCtx.EmitFormatError(ValidationRule::SmCBufferArrayOffsetAlignment, - {cbName, std::to_string(offset)}); + {CbName, std::to_string(Offset)}); continue; } - unsigned arrayCount = 1; + unsigned ArrayCount = 1; while (isa(EltTy)) { - arrayCount *= EltTy->getArrayNumElements(); + ArrayCount *= EltTy->getArrayNumElements(); EltTy = EltTy->getArrayElementType(); } DxilStructAnnotation *EltAnnotation = nullptr; if (StructType *EltST = dyn_cast(EltTy)) - EltAnnotation = typeSys.GetStructAnnotation(EltST); + EltAnnotation = TypeSys.GetStructAnnotation(EltST); - unsigned alignedEltSize = ((EltSize + 15) & ~(0xf)); - unsigned arraySize = ((arrayCount - 1) * alignedEltSize) + EltSize; - bOutOfBound = (offset + arraySize) > cbSize; + unsigned AlignedEltSize = ((EltSize + 15) & ~(0xf)); + unsigned ArraySize = ((ArrayCount - 1) * AlignedEltSize) + EltSize; + IsOutOfBound = (Offset + ArraySize) > CbSize; - if (!bOutOfBound) { + if (!IsOutOfBound) { // If we didn't care about gaps where elements could be placed with user // offsets, we could: recurse once if EltAnnotation, then allocate the - // rest if arrayCount > 1 + // rest if ArrayCount > 1 - unsigned arrayBase = base + offset; + unsigned ArrayBase = Base + Offset; if (!EltAnnotation) { if (EltSize > 0 && - nullptr != constAllocator.Insert(&fieldAnnotation, arrayBase, - arrayBase + arraySize - 1)) { + nullptr != ConstAllocator.Insert(&FieldAnnotation, ArrayBase, + ArrayBase + ArraySize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(arrayBase)}); + {CbName, std::to_string(ArrayBase)}); } } else { - for (unsigned idx = 0; idx < arrayCount; idx++) { - CollectCBufferRanges(EltAnnotation, constAllocator, arrayBase, - typeSys, cbName, ValCtx); - arrayBase += alignedEltSize; + for (unsigned Idx = 0; Idx < ArrayCount; Idx++) { + CollectCBufferRanges(EltAnnotation, ConstAllocator, ArrayBase, + TypeSys, CbName, ValCtx); + ArrayBase += AlignedEltSize; } } } } else { StructType *EltST = cast(EltTy); - unsigned structBase = base + offset; - bOutOfBound = (offset + EltSize) > cbSize; - if (!bOutOfBound) { + unsigned StructBase = Base + Offset; + IsOutOfBound = (Offset + EltSize) > CbSize; + if (!IsOutOfBound) { if (DxilStructAnnotation *EltAnnotation = - typeSys.GetStructAnnotation(EltST)) { - CollectCBufferRanges(EltAnnotation, constAllocator, structBase, - typeSys, cbName, ValCtx); + TypeSys.GetStructAnnotation(EltST)) { + CollectCBufferRanges(EltAnnotation, ConstAllocator, StructBase, + TypeSys, CbName, ValCtx); } else { if (EltSize > 0 && - nullptr != constAllocator.Insert(&fieldAnnotation, structBase, - structBase + EltSize - 1)) { + nullptr != ConstAllocator.Insert(&FieldAnnotation, StructBase, + StructBase + EltSize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(structBase)}); + {CbName, std::to_string(StructBase)}); } } } } - if (bOutOfBound) { + if (IsOutOfBound) { ValCtx.EmitFormatError(ValidationRule::SmCBufferElementOverflow, - {cbName, std::to_string(base + offset)}); + {CbName, std::to_string(Base + Offset)}); } } } -static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) { - Type *Ty = cb.GetHLSLType()->getPointerElementType(); - if (cb.GetRangeSize() != 1 || Ty->isArrayTy()) { +static void ValidateCBuffer(DxilCBuffer &Cb, ValidationContext &ValCtx) { + Type *Ty = Cb.GetHLSLType()->getPointerElementType(); + if (Cb.GetRangeSize() != 1 || Ty->isArrayTy()) { Ty = Ty->getArrayElementType(); } if (!isa(Ty)) { - ValCtx.EmitResourceError(&cb, + ValCtx.EmitResourceError(&Cb, ValidationRule::SmCBufferTemplateTypeMustBeStruct); return; } - if (cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) { - ValCtx.EmitResourceFormatError(&cb, ValidationRule::SmCBufferSize, - {std::to_string(cb.GetSize())}); + if (Cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) { + ValCtx.EmitResourceFormatError(&Cb, ValidationRule::SmCBufferSize, + {std::to_string(Cb.GetSize())}); return; } StructType *ST = cast(Ty); - DxilTypeSystem &typeSys = ValCtx.DxilMod.GetTypeSystem(); - DxilStructAnnotation *annotation = typeSys.GetStructAnnotation(ST); - if (!annotation) + DxilTypeSystem &TypeSys = ValCtx.DxilMod.GetTypeSystem(); + DxilStructAnnotation *Annotation = TypeSys.GetStructAnnotation(ST); + if (!Annotation) return; // Collect constant ranges. - std::vector> constRanges; - SpanAllocator constAllocator( + std::vector> ConstRanges; + SpanAllocator ConstAllocator( 0, // 4096 * 16 bytes. DXIL::kMaxCBufferSize << 4); - CollectCBufferRanges(annotation, constAllocator, 0, typeSys, - ValCtx.GetResourceName(&cb), ValCtx); + CollectCBufferRanges(Annotation, ConstAllocator, 0, TypeSys, + ValCtx.GetResourceName(&Cb), ValCtx); } static void ValidateResources(ValidationContext &ValCtx) { - const vector> &uavs = ValCtx.DxilMod.GetUAVs(); - SpacesAllocator uavAllocator; + const vector> &Uavs = ValCtx.DxilMod.GetUAVs(); + SpacesAllocator UavAllocator; - for (auto &uav : uavs) { - if (uav->IsROV()) { + for (auto &Uav : Uavs) { + if (Uav->IsROV()) { if (!ValCtx.DxilMod.GetShaderModel()->IsPS() && !ValCtx.isLibProfile) { - ValCtx.EmitResourceError(uav.get(), ValidationRule::SmROVOnlyInPS); + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmROVOnlyInPS); } } - switch (uav->GetKind()) { + switch (Uav->GetKind()) { case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: - ValCtx.EmitResourceError(uav.get(), + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmInvalidTextureKindOnUAV); break; default: break; } - if (uav->HasCounter() && !uav->IsStructuredBuffer()) { - ValCtx.EmitResourceError(uav.get(), + if (Uav->HasCounter() && !Uav->IsStructuredBuffer()) { + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmCounterOnlyOnStructBuf); } - if (uav->HasCounter() && uav->IsGloballyCoherent()) - ValCtx.EmitResourceFormatError(uav.get(), + if (Uav->HasCounter() && Uav->IsGloballyCoherent()) + ValCtx.EmitResourceFormatError(Uav.get(), ValidationRule::MetaGlcNotOnAppendConsume, - {ValCtx.GetResourceName(uav.get())}); + {ValCtx.GetResourceName(Uav.get())}); - ValidateResource(*uav, ValCtx); - ValidateResourceOverlap(*uav, uavAllocator, ValCtx); + ValidateResource(*Uav, ValCtx); + ValidateResourceOverlap(*Uav, UavAllocator, ValCtx); } - SpacesAllocator srvAllocator; - const vector> &srvs = ValCtx.DxilMod.GetSRVs(); - for (auto &srv : srvs) { + SpacesAllocator SrvAllocator; + const vector> &Srvs = ValCtx.DxilMod.GetSRVs(); + for (auto &srv : Srvs) { ValidateResource(*srv, ValCtx); - ValidateResourceOverlap(*srv, srvAllocator, ValCtx); + ValidateResourceOverlap(*srv, SrvAllocator, ValCtx); } - hlsl::DxilResourceBase *pNonDense; - if (!AreDxilResourcesDense(&ValCtx.M, &pNonDense)) { - ValCtx.EmitResourceError(pNonDense, ValidationRule::MetaDenseResIDs); + hlsl::DxilResourceBase *NonDenseRes; + if (!AreDxilResourcesDense(&ValCtx.M, &NonDenseRes)) { + ValCtx.EmitResourceError(NonDenseRes, ValidationRule::MetaDenseResIDs); } - SpacesAllocator samplerAllocator; + SpacesAllocator SamplerAllocator; for (auto &sampler : ValCtx.DxilMod.GetSamplers()) { if (sampler->GetSamplerKind() == DXIL::SamplerKind::Invalid) { ValCtx.EmitResourceError(sampler.get(), ValidationRule::MetaValidSamplerMode); } - ValidateResourceOverlap(*sampler, samplerAllocator, ValCtx); + ValidateResourceOverlap(*sampler, SamplerAllocator, ValCtx); } - SpacesAllocator cbufferAllocator; + SpacesAllocator CbufferAllocator; for (auto &cbuffer : ValCtx.DxilMod.GetCBuffers()) { ValidateCBuffer(*cbuffer, ValCtx); - ValidateResourceOverlap(*cbuffer, cbufferAllocator, ValCtx); + ValidateResourceOverlap(*cbuffer, CbufferAllocator, ValCtx); } } static void ValidateShaderFlags(ValidationContext &ValCtx) { - ShaderFlags calcFlags; - ValCtx.DxilMod.CollectShaderFlagsForModule(calcFlags); + ShaderFlags CalcFlags; + ValCtx.DxilMod.CollectShaderFlagsForModule(CalcFlags); // Special case for validator version prior to 1.8. // If DXR 1.1 flag is set, but our computed flags do not have this set, then // this is due to prior versions setting the flag based on DXR 1.1 subobjects, // which are gone by this point. Set the flag and the rest should match. - unsigned valMajor, valMinor; - ValCtx.DxilMod.GetValidatorVersion(valMajor, valMinor); - if (DXIL::CompareVersions(valMajor, valMinor, 1, 5) >= 0 && - DXIL::CompareVersions(valMajor, valMinor, 1, 8) < 0 && + unsigned ValMajor, ValMinor; + ValCtx.DxilMod.GetValidatorVersion(ValMajor, ValMinor); + if (DXIL::CompareVersions(ValMajor, ValMinor, 1, 5) >= 0 && + DXIL::CompareVersions(ValMajor, ValMinor, 1, 8) < 0 && ValCtx.DxilMod.m_ShaderFlags.GetRaytracingTier1_1() && - !calcFlags.GetRaytracingTier1_1()) { - calcFlags.SetRaytracingTier1_1(true); + !CalcFlags.GetRaytracingTier1_1()) { + CalcFlags.SetRaytracingTier1_1(true); } - const uint64_t mask = ShaderFlags::GetShaderFlagsRawForCollection(); - uint64_t declaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw(); - uint64_t calcFlagsRaw = calcFlags.GetShaderFlagsRaw(); + const uint64_t Mask = ShaderFlags::GetShaderFlagsRawForCollection(); + uint64_t DeclaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw(); + uint64_t CalcFlagsRaw = CalcFlags.GetShaderFlagsRaw(); - declaredFlagsRaw &= mask; - calcFlagsRaw &= mask; + DeclaredFlagsRaw &= Mask; + CalcFlagsRaw &= Mask; - if (declaredFlagsRaw == calcFlagsRaw) { + if (DeclaredFlagsRaw == CalcFlagsRaw) { return; } ValCtx.EmitError(ValidationRule::MetaFlagsUsage); dxilutil::EmitNoteOnContext(ValCtx.M.getContext(), Twine("Flags declared=") + - Twine(declaredFlagsRaw) + Twine(", actual=") + - Twine(calcFlagsRaw)); + Twine(DeclaredFlagsRaw) + Twine(", actual=") + + Twine(CalcFlagsRaw)); } static void ValidateSignatureElement(DxilSignatureElement &SE, ValidationContext &ValCtx) { - DXIL::SemanticKind semanticKind = SE.GetSemantic()->GetKind(); - CompType::Kind compKind = SE.GetCompType().GetKind(); + DXIL::SemanticKind SemanticKind = SE.GetSemantic()->GetKind(); + CompType::Kind CompKind = SE.GetCompType().GetKind(); DXIL::InterpolationMode Mode = SE.GetInterpolationMode()->GetKind(); StringRef Name = SE.GetName(); @@ -4008,86 +4080,86 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSemanticLen); } - if (semanticKind > DXIL::SemanticKind::Arbitrary && - semanticKind < DXIL::SemanticKind::Invalid) { - if (semanticKind != Semantic::GetByName(SE.GetName())->GetKind()) { + if (SemanticKind > DXIL::SemanticKind::Arbitrary && + SemanticKind < DXIL::SemanticKind::Invalid) { + if (SemanticKind != Semantic::GetByName(SE.GetName())->GetKind()) { ValCtx.EmitFormatError(ValidationRule::MetaSemaKindMatchesName, {SE.GetName(), SE.GetSemantic()->GetName()}); } } - unsigned compWidth = 0; - bool compFloat = false; - bool compInt = false; - bool compBool = false; + unsigned CompWidth = 0; + bool CompFloat = false; + bool CompInt = false; + bool CompBool = false; - switch (compKind) { + switch (CompKind) { case CompType::Kind::U64: - compWidth = 64; - compInt = true; + CompWidth = 64; + CompInt = true; break; case CompType::Kind::I64: - compWidth = 64; - compInt = true; + CompWidth = 64; + CompInt = true; break; // These should be translated for signatures: // case CompType::Kind::PackedS8x32: // case CompType::Kind::PackedU8x32: case CompType::Kind::U32: - compWidth = 32; - compInt = true; + CompWidth = 32; + CompInt = true; break; case CompType::Kind::I32: - compWidth = 32; - compInt = true; + CompWidth = 32; + CompInt = true; break; case CompType::Kind::U16: - compWidth = 16; - compInt = true; + CompWidth = 16; + CompInt = true; break; case CompType::Kind::I16: - compWidth = 16; - compInt = true; + CompWidth = 16; + CompInt = true; break; case CompType::Kind::I1: - compWidth = 1; - compBool = true; + CompWidth = 1; + CompBool = true; break; case CompType::Kind::F64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::F32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::F16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::SNormF64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::SNormF32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::SNormF16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::UNormF64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::UNormF32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::UNormF16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::Invalid: default: @@ -4096,7 +4168,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, break; } - if (compInt || compBool) { + if (CompInt || CompBool) { switch (Mode) { case DXIL::InterpolationMode::Linear: case DXIL::InterpolationMode::LinearCentroid: @@ -4113,91 +4185,91 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } // Elements that should not appear in the Dxil signature: - bool bAllowedInSig = true; - bool bShouldBeAllocated = true; + bool AllowedInSig = true; + bool ShouldBeAllocated = true; switch (SE.GetInterpretation()) { case DXIL::SemanticInterpretationKind::NA: case DXIL::SemanticInterpretationKind::NotInSig: case DXIL::SemanticInterpretationKind::Invalid: - bAllowedInSig = false; + AllowedInSig = false; LLVM_FALLTHROUGH; case DXIL::SemanticInterpretationKind::NotPacked: case DXIL::SemanticInterpretationKind::Shadow: - bShouldBeAllocated = false; + ShouldBeAllocated = false; break; default: break; } - const char *inputOutput = nullptr; + const char *InputOutput = nullptr; if (SE.IsInput()) - inputOutput = "Input"; + InputOutput = "Input"; else if (SE.IsOutput()) - inputOutput = "Output"; + InputOutput = "Output"; else - inputOutput = "PatchConstant"; + InputOutput = "PatchConstant"; - if (!bAllowedInSig) { + if (!AllowedInSig) { ValCtx.EmitFormatError(ValidationRule::SmSemantic, {SE.GetName(), ValCtx.DxilMod.GetShaderModel()->GetKindName(), - inputOutput}); - } else if (bShouldBeAllocated && !SE.IsAllocated()) { + InputOutput}); + } else if (ShouldBeAllocated && !SE.IsAllocated()) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldBeAllocated, - {inputOutput, SE.GetName()}); - } else if (!bShouldBeAllocated && SE.IsAllocated()) { + {InputOutput, SE.GetName()}); + } else if (!ShouldBeAllocated && SE.IsAllocated()) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldNotBeAllocated, - {inputOutput, SE.GetName()}); + {InputOutput, SE.GetName()}); } - bool bIsClipCull = false; - bool bIsTessfactor = false; - bool bIsBarycentric = false; + bool IsClipCull = false; + bool IsTessfactor = false; + bool IsBarycentric = false; - switch (semanticKind) { + switch (SemanticKind) { case DXIL::SemanticKind::Depth: case DXIL::SemanticKind::DepthGreaterEqual: case DXIL::SemanticKind::DepthLessEqual: - if (!compFloat || compWidth > 32 || SE.GetCols() != 1) { + if (!CompFloat || CompWidth > 32 || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } break; case DXIL::SemanticKind::Coverage: - DXASSERT(!SE.IsInput() || !bAllowedInSig, + DXASSERT(!SE.IsInput() || !AllowedInSig, "else internal inconsistency between semantic interpretation " "table and validation code"); LLVM_FALLTHROUGH; case DXIL::SemanticKind::InnerCoverage: case DXIL::SemanticKind::OutputControlPointID: - if (compKind != CompType::Kind::U32 || SE.GetCols() != 1) { + if (CompKind != CompType::Kind::U32 || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } break; case DXIL::SemanticKind::Position: - if (!compFloat || compWidth > 32 || SE.GetCols() != 4) { + if (!CompFloat || CompWidth > 32 || SE.GetCols() != 4) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float4"}); } break; case DXIL::SemanticKind::Target: - if (compWidth > 32) { + if (CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float/int/uint"}); } break; case DXIL::SemanticKind::ClipDistance: case DXIL::SemanticKind::CullDistance: - bIsClipCull = true; - if (!compFloat || compWidth > 32) { + IsClipCull = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } // NOTE: clip cull distance size is checked at ValidateSignature. break; case DXIL::SemanticKind::IsFrontFace: { - if (!(compInt && compWidth == 32) || SE.GetCols() != 1) { + if (!(CompInt && CompWidth == 32) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } @@ -4211,14 +4283,14 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, case DXIL::SemanticKind::SampleIndex: case DXIL::SemanticKind::StencilRef: case DXIL::SemanticKind::ShadingRate: - if ((compKind != CompType::Kind::U32 && compKind != CompType::Kind::U16) || + if ((CompKind != CompType::Kind::U32 && CompKind != CompType::Kind::U16) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } break; case DXIL::SemanticKind::CullPrimitive: { - if (!(compBool && compWidth == 1) || SE.GetCols() != 1) { + if (!(CompBool && CompWidth == 1) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "bool"}); } @@ -4226,8 +4298,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, case DXIL::SemanticKind::TessFactor: case DXIL::SemanticKind::InsideTessFactor: // NOTE: the size check is at CheckPatchConstantSemantic. - bIsTessfactor = true; - if (!compFloat || compWidth > 32) { + IsTessfactor = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } @@ -4236,12 +4308,12 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, break; case DXIL::SemanticKind::DomainLocation: case DXIL::SemanticKind::Invalid: - DXASSERT(!bAllowedInSig, "else internal inconsistency between semantic " - "interpretation table and validation code"); + DXASSERT(!AllowedInSig, "else internal inconsistency between semantic " + "interpretation table and validation code"); break; case DXIL::SemanticKind::Barycentrics: - bIsBarycentric = true; - if (!compFloat || compWidth > 32) { + IsBarycentric = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } @@ -4286,32 +4358,32 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } } - if (semanticKind == DXIL::SemanticKind::Target) { - // Verify packed row == semantic index - unsigned row = SE.GetStartRow(); + if (SemanticKind == DXIL::SemanticKind::Target) { + // Verify packed Row == semantic index + unsigned Row = SE.GetStartRow(); for (unsigned i : SE.GetSemanticIndexVec()) { - if (row != i) { + if (Row != i) { ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetIndexMatchesRow); } - ++row; + ++Row; } - // Verify packed col is 0 + // Verify packed Col is 0 if (SE.GetStartCol() != 0) { ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetCol0); } - // Verify max row used < 8 + // Verify max Row used < 8 if (SE.GetStartRow() + SE.GetRows() > 8) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {"SV_Target", "7"}); } - } else if (bAllowedInSig && semanticKind != DXIL::SemanticKind::Arbitrary) { - if (bIsBarycentric) { + } else if (AllowedInSig && SemanticKind != DXIL::SemanticKind::Arbitrary) { + if (IsBarycentric) { if (SE.GetSemanticStartIndex() > 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {SE.GetSemantic()->GetName(), "1"}); } - } else if (!bIsClipCull && SE.GetSemanticStartIndex() > 0) { + } else if (!IsClipCull && SE.GetSemanticStartIndex() > 0) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {SE.GetSemantic()->GetName(), "0"}); } @@ -4319,17 +4391,17 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, // with the exception of tessfactors, which are validated in // CheckPatchConstantSemantic and ClipDistance/CullDistance, which have // other custom constraints. - if (!bIsTessfactor && !bIsClipCull && SE.GetRows() > 1) { + if (!IsTessfactor && !IsClipCull && SE.GetRows() > 1) { ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSystemValueRows); } } if (SE.GetCols() + (SE.IsAllocated() ? SE.GetStartCol() : 0) > 4) { - unsigned size = (SE.GetRows() - 1) * 4 + SE.GetCols(); + unsigned Size = (SE.GetRows() - 1) * 4 + SE.GetCols(); ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange, {SE.GetName(), std::to_string(SE.GetStartRow()), std::to_string(SE.GetStartCol()), - std::to_string(size)}); + std::to_string(Size)}); } if (!SE.GetInterpolationMode()->IsValid()) { @@ -4338,8 +4410,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } static void ValidateSignatureOverlap(DxilSignatureElement &E, - unsigned maxScalars, - DxilSignatureAllocator &allocator, + unsigned MaxScalars, + DxilSignatureAllocator &Allocator, ValidationContext &ValCtx) { // Skip entries that are not or should not be allocated. Validation occurs in @@ -4357,16 +4429,16 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E, break; } - DxilPackElement PE(&E, allocator.UseMinPrecision()); - DxilSignatureAllocator::ConflictType conflict = - allocator.DetectRowConflict(&PE, E.GetStartRow()); - if (conflict == DxilSignatureAllocator::kNoConflict || - conflict == DxilSignatureAllocator::kInsufficientFreeComponents) - conflict = - allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol()); - switch (conflict) { + DxilPackElement PE(&E, Allocator.UseMinPrecision()); + DxilSignatureAllocator::ConflictType Conflict = + Allocator.DetectRowConflict(&PE, E.GetStartRow()); + if (Conflict == DxilSignatureAllocator::kNoConflict || + Conflict == DxilSignatureAllocator::kInsufficientFreeComponents) + Conflict = + Allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol()); + switch (Conflict) { case DxilSignatureAllocator::kNoConflict: - allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol()); + Allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol()); break; case DxilSignatureAllocator::kConflictsWithIndexed: ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict, @@ -4428,59 +4500,59 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E, } static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, - EntryStatus &Status, unsigned maxScalars) { - DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = { + EntryStatus &Status, unsigned MaxScalars) { + DxilSignatureAllocator Allocator[DXIL::kNumOutputStreams] = { {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}}; - unordered_set semanticUsageSet[DXIL::kNumOutputStreams]; - StringMap> semanticIndexMap[DXIL::kNumOutputStreams]; - unordered_set clipcullRowSet[DXIL::kNumOutputStreams]; - unsigned clipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0}; + unordered_set SemanticUsageSet[DXIL::kNumOutputStreams]; + StringMap> SemanticIndexMap[DXIL::kNumOutputStreams]; + unordered_set ClipcullRowSet[DXIL::kNumOutputStreams]; + unsigned ClipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0}; - bool isOutput = S.IsOutput(); + bool IsOutput = S.IsOutput(); unsigned TargetMask = 0; DXIL::SemanticKind DepthKind = DXIL::SemanticKind::Invalid; - const InterpolationMode *prevBaryInterpMode = nullptr; - unsigned numBarycentrics = 0; + const InterpolationMode *PrevBaryInterpMode = nullptr; + unsigned NumBarycentrics = 0; for (auto &E : S.GetElements()) { - DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind(); + DXIL::SemanticKind SemanticKind = E->GetSemantic()->GetKind(); ValidateSignatureElement(*E, ValCtx); - // Avoid OOB indexing on streamId. - unsigned streamId = E->GetOutputStream(); - if (streamId >= DXIL::kNumOutputStreams || !isOutput || + // Avoid OOB indexing on StreamId. + unsigned StreamId = E->GetOutputStream(); + if (StreamId >= DXIL::kNumOutputStreams || !IsOutput || !ValCtx.DxilMod.GetShaderModel()->IsGS()) { - streamId = 0; + StreamId = 0; } // Semantic index overlap check, keyed by name. - std::string nameUpper(E->GetName()); - std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(), + std::string NameUpper(E->GetName()); + std::transform(NameUpper.begin(), NameUpper.end(), NameUpper.begin(), ::toupper); - unordered_set &semIdxSet = semanticIndexMap[streamId][nameUpper]; - for (unsigned semIdx : E->GetSemanticIndexVec()) { - if (semIdxSet.count(semIdx) > 0) { + unordered_set &SemIdxSet = SemanticIndexMap[StreamId][NameUpper]; + for (unsigned SemIdx : E->GetSemanticIndexVec()) { + if (SemIdxSet.count(SemIdx) > 0) { ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap, - {E->GetName(), std::to_string(semIdx)}); + {E->GetName(), std::to_string(SemIdx)}); return; } else - semIdxSet.insert(semIdx); + SemIdxSet.insert(SemIdx); } // SV_Target has special rules - if (semanticKind == DXIL::SemanticKind::Target) { + if (SemanticKind == DXIL::SemanticKind::Target) { // Validate target overlap if (E->GetStartRow() + E->GetRows() <= 8) { - unsigned mask = ((1 << E->GetRows()) - 1) << E->GetStartRow(); - if (TargetMask & mask) { + unsigned Mask = ((1 << E->GetRows()) - 1) << E->GetStartRow(); + if (TargetMask & Mask) { ValCtx.EmitFormatError( ValidationRule::MetaNoSemanticOverlap, {"SV_Target", std::to_string(E->GetStartRow())}); } - TargetMask = TargetMask | mask; + TargetMask = TargetMask | Mask; } if (E->GetRows() > 1) { ValCtx.EmitSignatureError(E.get(), ValidationRule::SmNoPSOutputIdx); @@ -4492,19 +4564,19 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, continue; // validate system value semantic rules - switch (semanticKind) { + switch (SemanticKind) { case DXIL::SemanticKind::Arbitrary: break; case DXIL::SemanticKind::ClipDistance: case DXIL::SemanticKind::CullDistance: // Validate max 8 components across 2 rows (registers) - for (unsigned rowIdx = 0; rowIdx < E->GetRows(); rowIdx++) - clipcullRowSet[streamId].insert(E->GetStartRow() + rowIdx); - if (clipcullRowSet[streamId].size() > 2) { + for (unsigned RowIdx = 0; RowIdx < E->GetRows(); RowIdx++) + ClipcullRowSet[StreamId].insert(E->GetStartRow() + RowIdx); + if (ClipcullRowSet[StreamId].size() > 2) { ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxRows); } - clipcullComponents[streamId] += E->GetCols(); - if (clipcullComponents[streamId] > 8) { + ClipcullComponents[StreamId] += E->GetCols(); + if (ClipcullComponents[StreamId] > 8) { ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxComponents); } @@ -4516,58 +4588,58 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, ValCtx.EmitSignatureError(E.get(), ValidationRule::SmPSMultipleDepthSemantic); } - DepthKind = semanticKind; + DepthKind = SemanticKind; break; case DXIL::SemanticKind::Barycentrics: { // There can only be up to two SV_Barycentrics // with differeent perspective interpolation modes. - if (numBarycentrics++ > 1) { + if (NumBarycentrics++ > 1) { ValCtx.EmitSignatureError( E.get(), ValidationRule::MetaBarycentricsTwoPerspectives); break; } - const InterpolationMode *mode = E->GetInterpolationMode(); - if (prevBaryInterpMode) { - if ((mode->IsAnyNoPerspective() && - prevBaryInterpMode->IsAnyNoPerspective()) || - (!mode->IsAnyNoPerspective() && - !prevBaryInterpMode->IsAnyNoPerspective())) { + const InterpolationMode *Mode = E->GetInterpolationMode(); + if (PrevBaryInterpMode) { + if ((Mode->IsAnyNoPerspective() && + PrevBaryInterpMode->IsAnyNoPerspective()) || + (!Mode->IsAnyNoPerspective() && + !PrevBaryInterpMode->IsAnyNoPerspective())) { ValCtx.EmitSignatureError( E.get(), ValidationRule::MetaBarycentricsTwoPerspectives); } } - prevBaryInterpMode = mode; + PrevBaryInterpMode = Mode; break; } default: - if (semanticUsageSet[streamId].count( - static_cast(semanticKind)) > 0) { + if (SemanticUsageSet[StreamId].count( + static_cast(SemanticKind)) > 0) { ValCtx.EmitFormatError(ValidationRule::MetaDuplicateSysValue, {E->GetSemantic()->GetName()}); } - semanticUsageSet[streamId].insert(static_cast(semanticKind)); + SemanticUsageSet[StreamId].insert(static_cast(SemanticKind)); break; } // Packed element overlap check. - ValidateSignatureOverlap(*E.get(), maxScalars, allocator[streamId], ValCtx); + ValidateSignatureOverlap(*E.get(), MaxScalars, Allocator[StreamId], ValCtx); - if (isOutput && semanticKind == DXIL::SemanticKind::Position) { + if (IsOutput && SemanticKind == DXIL::SemanticKind::Position) { Status.hasOutputPosition[E->GetOutputStream()] = true; } } if (Status.hasViewID && S.IsInput() && ValCtx.DxilMod.GetShaderModel()->GetKind() == DXIL::ShaderKind::Pixel) { - // Ensure sufficient space for ViewID: - DxilSignatureAllocator::DummyElement viewID; - viewID.rows = 1; - viewID.cols = 1; - viewID.kind = DXIL::SemanticKind::Arbitrary; - viewID.interpolation = DXIL::InterpolationMode::Constant; - viewID.interpretation = DXIL::SemanticInterpretationKind::SGV; - allocator[0].PackNext(&viewID, 0, 32); - if (!viewID.IsAllocated()) { + // Ensure sufficient space for ViewId: + DxilSignatureAllocator::DummyElement ViewId; + ViewId.rows = 1; + ViewId.cols = 1; + ViewId.kind = DXIL::SemanticKind::Arbitrary; + ViewId.interpolation = DXIL::InterpolationMode::Constant; + ViewId.interpretation = DXIL::SemanticInterpretationKind::SGV; + Allocator[0].PackNext(&ViewId, 0, 32); + if (!ViewId.IsAllocated()) { ValCtx.EmitError(ValidationRule::SmViewIDNeedsSlot); } } @@ -4592,12 +4664,12 @@ static void ValidateConstantInterpModeSignature(ValidationContext &ValCtx, } static void ValidateEntrySignatures(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, + const DxilEntryProps &EntryProps, EntryStatus &Status, Function &F) { - const DxilFunctionProps &props = entryProps.props; - const DxilEntrySignature &S = entryProps.sig; + const DxilFunctionProps &Props = EntryProps.props; + const DxilEntrySignature &S = EntryProps.sig; - if (props.IsRay()) { + if (Props.IsRay()) { // No signatures allowed if (!S.InputSignature.GetElements().empty() || !S.OutputSignature.GetElements().empty() || @@ -4607,62 +4679,62 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } // Validate payload/attribute/params sizes - unsigned payloadSize = 0; - unsigned attrSize = 0; - auto itPayload = F.arg_begin(); - auto itAttr = itPayload; - if (itAttr != F.arg_end()) - itAttr++; + unsigned PayloadSize = 0; + unsigned AttrSize = 0; + auto ItPayload = F.arg_begin(); + auto ItAttr = ItPayload; + if (ItAttr != F.arg_end()) + ItAttr++; DataLayout DL(F.getParent()); - switch (props.shaderKind) { + switch (Props.shaderKind) { case DXIL::ShaderKind::AnyHit: case DXIL::ShaderKind::ClosestHit: - if (itAttr != F.arg_end()) { - Type *Ty = itAttr->getType(); + if (ItAttr != F.arg_end()) { + Type *Ty = ItAttr->getType(); if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); - attrSize = + AttrSize = (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX); } LLVM_FALLTHROUGH; case DXIL::ShaderKind::Miss: case DXIL::ShaderKind::Callable: - if (itPayload != F.arg_end()) { - Type *Ty = itPayload->getType(); + if (ItPayload != F.arg_end()) { + Type *Ty = ItPayload->getType(); if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); - payloadSize = + PayloadSize = (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX); } break; } - if (props.ShaderProps.Ray.payloadSizeInBytes < payloadSize) { + if (Props.ShaderProps.Ray.payloadSizeInBytes < PayloadSize) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmRayShaderPayloadSize, - {F.getName(), props.IsCallable() ? "params" : "payload"}); + {F.getName(), Props.IsCallable() ? "params" : "payload"}); } - if (props.ShaderProps.Ray.attributeSizeInBytes < attrSize) { + if (Props.ShaderProps.Ray.attributeSizeInBytes < AttrSize) { ValCtx.EmitFnFormatError(&F, ValidationRule::SmRayShaderPayloadSize, {F.getName(), "attribute"}); } return; } - bool isPS = props.IsPS(); - bool isVS = props.IsVS(); - bool isGS = props.IsGS(); - bool isCS = props.IsCS(); - bool isMS = props.IsMS(); + bool IsPs = Props.IsPS(); + bool IsVs = Props.IsVS(); + bool IsGs = Props.IsGS(); + bool IsCs = Props.IsCS(); + bool IsMs = Props.IsMS(); - if (isPS) { + if (IsPs) { // PS output no interp mode. ValidateNoInterpModeSignature(ValCtx, S.OutputSignature); - } else if (isVS) { + } else if (IsVs) { // VS input no interp mode. ValidateNoInterpModeSignature(ValCtx, S.InputSignature); } - if (isMS) { + if (IsMs) { // primitive output constant interp mode. ValidateConstantInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature); } else { @@ -4670,38 +4742,38 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, ValidateNoInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature); } - unsigned maxInputScalars = DXIL::kMaxInputTotalScalars; - unsigned maxOutputScalars = 0; - unsigned maxPatchConstantScalars = 0; + unsigned MaxInputScalars = DXIL::kMaxInputTotalScalars; + unsigned MaxOutputScalars = 0; + unsigned MaxPatchConstantScalars = 0; - switch (props.shaderKind) { + switch (Props.shaderKind) { case DXIL::ShaderKind::Compute: break; case DXIL::ShaderKind::Vertex: case DXIL::ShaderKind::Geometry: case DXIL::ShaderKind::Pixel: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; break; case DXIL::ShaderKind::Hull: case DXIL::ShaderKind::Domain: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; - maxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars; break; case DXIL::ShaderKind::Mesh: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; - maxPatchConstantScalars = DXIL::kMaxOutputTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxPatchConstantScalars = DXIL::kMaxOutputTotalScalars; break; case DXIL::ShaderKind::Amplification: default: break; } - ValidateSignature(ValCtx, S.InputSignature, Status, maxInputScalars); - ValidateSignature(ValCtx, S.OutputSignature, Status, maxOutputScalars); + ValidateSignature(ValCtx, S.InputSignature, Status, MaxInputScalars); + ValidateSignature(ValCtx, S.OutputSignature, Status, MaxOutputScalars); ValidateSignature(ValCtx, S.PatchConstOrPrimSignature, Status, - maxPatchConstantScalars); + MaxPatchConstantScalars); - if (isPS) { + if (IsPs) { // Gather execution information. hlsl::PSExecutionInfo PSExec; DxilSignatureElement *PosInterpSE = nullptr; @@ -4743,10 +4815,10 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } // Validate PS output semantic. - const DxilSignature &outputSig = S.OutputSignature; - for (auto &SE : outputSig.GetElements()) { - Semantic::Kind semanticKind = SE->GetSemantic()->GetKind(); - switch (semanticKind) { + const DxilSignature &OutputSig = S.OutputSignature; + for (auto &SE : OutputSig.GetElements()) { + Semantic::Kind SemanticKind = SE->GetSemantic()->GetKind(); + switch (SemanticKind) { case Semantic::Kind::Target: case Semantic::Kind::Coverage: case Semantic::Kind::Depth: @@ -4762,24 +4834,24 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } } - if (isGS) { - unsigned maxVertexCount = props.ShaderProps.GS.maxVertexCount; - unsigned outputScalarCount = 0; - const DxilSignature &outSig = S.OutputSignature; - for (auto &SE : outSig.GetElements()) { - outputScalarCount += SE->GetRows() * SE->GetCols(); + if (IsGs) { + unsigned MaxVertexCount = Props.ShaderProps.GS.maxVertexCount; + unsigned OutputScalarCount = 0; + const DxilSignature &OutSig = S.OutputSignature; + for (auto &SE : OutSig.GetElements()) { + OutputScalarCount += SE->GetRows() * SE->GetCols(); } - unsigned totalOutputScalars = maxVertexCount * outputScalarCount; - if (totalOutputScalars > DXIL::kMaxGSOutputTotalScalars) { + unsigned TotalOutputScalars = MaxVertexCount * OutputScalarCount; + if (TotalOutputScalars > DXIL::kMaxGSOutputTotalScalars) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmGSTotalOutputVertexDataRange, - {std::to_string(maxVertexCount), std::to_string(outputScalarCount), - std::to_string(totalOutputScalars), + {std::to_string(MaxVertexCount), std::to_string(OutputScalarCount), + std::to_string(TotalOutputScalars), std::to_string(DXIL::kMaxGSOutputTotalScalars)}); } } - if (isCS) { + if (IsCs) { if (!S.InputSignature.GetElements().empty() || !S.OutputSignature.GetElements().empty() || !S.PatchConstOrPrimSignature.GetElements().empty()) { @@ -4787,7 +4859,7 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } } - if (isMS) { + if (IsMs) { unsigned VertexSignatureRows = S.OutputSignature.GetRowCount(); if (VertexSignatureRows > DXIL::kMaxMSVSigRows) { ValCtx.EmitFnFormatError( @@ -4809,31 +4881,31 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, const unsigned kScalarSizeForMSAttributes = 4; #define ALIGN32(n) (((n) + 31) & ~31) - unsigned maxAlign32VertexCount = - ALIGN32(props.ShaderProps.MS.maxVertexCount); - unsigned maxAlign32PrimitiveCount = - ALIGN32(props.ShaderProps.MS.maxPrimitiveCount); - unsigned totalOutputScalars = 0; + unsigned MaxAlign32VertexCount = + ALIGN32(Props.ShaderProps.MS.maxVertexCount); + unsigned MaxAlign32PrimitiveCount = + ALIGN32(Props.ShaderProps.MS.maxPrimitiveCount); + unsigned TotalOutputScalars = 0; for (auto &SE : S.OutputSignature.GetElements()) { - totalOutputScalars += - SE->GetRows() * SE->GetCols() * maxAlign32VertexCount; + TotalOutputScalars += + SE->GetRows() * SE->GetCols() * MaxAlign32VertexCount; } for (auto &SE : S.PatchConstOrPrimSignature.GetElements()) { - totalOutputScalars += - SE->GetRows() * SE->GetCols() * maxAlign32PrimitiveCount; + TotalOutputScalars += + SE->GetRows() * SE->GetCols() * MaxAlign32PrimitiveCount; } - if (totalOutputScalars * kScalarSizeForMSAttributes > + if (TotalOutputScalars * kScalarSizeForMSAttributes > DXIL::kMaxMSOutputTotalBytes) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmMeshShaderOutputSize, {F.getName(), std::to_string(DXIL::kMaxMSOutputTotalBytes)}); } - unsigned totalInputOutputBytes = - totalOutputScalars * kScalarSizeForMSAttributes + - props.ShaderProps.MS.payloadSizeInBytes; - if (totalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) { + unsigned TotalInputOutputBytes = + TotalOutputScalars * kScalarSizeForMSAttributes + + Props.ShaderProps.MS.payloadSizeInBytes; + if (TotalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmMeshShaderInOutSize, {F.getName(), std::to_string(DXIL::kMaxMSInputOutputTotalBytes)}); @@ -4846,9 +4918,9 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { if (ValCtx.isLibProfile) { for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); EntryStatus &Status = ValCtx.GetEntryStatus(&F); - ValidateEntrySignatures(ValCtx, entryProps, Status, F); + ValidateEntrySignatures(ValCtx, EntryProps, Status, F); } } } else { @@ -4859,8 +4931,8 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { return; } EntryStatus &Status = ValCtx.GetEntryStatus(Entry); - DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry); - ValidateEntrySignatures(ValCtx, entryProps, Status, *Entry); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry); + ValidateEntrySignatures(ValCtx, EntryProps, Status, *Entry); } } @@ -4869,14 +4941,14 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { struct CompatibilityChecker { ValidationContext &ValCtx; Function *EntryFn; - const DxilFunctionProps &props; - DXIL::ShaderKind shaderKind; + const DxilFunctionProps &Props; + DXIL::ShaderKind ShaderKind; // These masks identify the potential conflict flags based on the entry // function's shader kind and properties when either UsesDerivatives or // RequiresGroup flags are set in ShaderCompatInfo. - uint32_t maskForDeriv = 0; - uint32_t maskForGroup = 0; + uint32_t MaskForDeriv = 0; + uint32_t MaskForGroup = 0; enum class ConflictKind : uint32_t { Stage, @@ -4898,77 +4970,77 @@ struct CompatibilityChecker { CompatibilityChecker(ValidationContext &ValCtx, Function *EntryFn) : ValCtx(ValCtx), EntryFn(EntryFn), - props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props), - shaderKind(props.shaderKind) { + Props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props), + ShaderKind(Props.shaderKind) { // Precompute potential incompatibilities based on shader stage, shader kind // and entry attributes. These will turn into full conflicts if the entry // point's shader flags indicate that they use relevant features. if (!ValCtx.DxilMod.GetShaderModel()->IsSM66Plus() && - (shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Compute)) { - maskForDeriv |= + (ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Compute)) { + MaskForDeriv |= static_cast(ConflictFlags::DerivInComputeShaderModel); - } else if (shaderKind == DXIL::ShaderKind::Node) { + } else if (ShaderKind == DXIL::ShaderKind::Node) { // Only broadcasting launch supports derivatives. - if (props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) - maskForDeriv |= static_cast(ConflictFlags::DerivLaunch); + if (Props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) + MaskForDeriv |= static_cast(ConflictFlags::DerivLaunch); // Thread launch node has no group. - if (props.Node.LaunchType == DXIL::NodeLaunchType::Thread) - maskForGroup |= static_cast(ConflictFlags::RequiresGroup); + if (Props.Node.LaunchType == DXIL::NodeLaunchType::Thread) + MaskForGroup |= static_cast(ConflictFlags::RequiresGroup); } - if (shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Compute || - shaderKind == DXIL::ShaderKind::Node) { + if (ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Compute || + ShaderKind == DXIL::ShaderKind::Node) { // All compute-like stages // Thread dimensions must be either 1D and X is multiple of 4, or 2D // and X and Y must be multiples of 2. - if (props.numThreads[1] == 1 && props.numThreads[2] == 1) { - if ((props.numThreads[0] & 0x3) != 0) - maskForDeriv |= + if (Props.numThreads[1] == 1 && Props.numThreads[2] == 1) { + if ((Props.numThreads[0] & 0x3) != 0) + MaskForDeriv |= static_cast(ConflictFlags::DerivThreadGroupDim); - } else if ((props.numThreads[0] & 0x1) || (props.numThreads[1] & 0x1)) - maskForDeriv |= + } else if ((Props.numThreads[0] & 0x1) || (Props.numThreads[1] & 0x1)) + MaskForDeriv |= static_cast(ConflictFlags::DerivThreadGroupDim); } else { // other stages have no group - maskForGroup |= static_cast(ConflictFlags::RequiresGroup); + MaskForGroup |= static_cast(ConflictFlags::RequiresGroup); } } uint32_t - IdentifyConflict(const DxilModule::ShaderCompatInfo &compatInfo) const { - uint32_t conflictMask = 0; + IdentifyConflict(const DxilModule::ShaderCompatInfo &CompatInfo) const { + uint32_t ConflictMask = 0; // Compatibility check said this shader kind is not compatible. - if (0 == ((1 << (uint32_t)shaderKind) & compatInfo.mask)) - conflictMask |= (uint32_t)ConflictFlags::Stage; + if (0 == ((1 << (uint32_t)ShaderKind) & CompatInfo.mask)) + ConflictMask |= (uint32_t)ConflictFlags::Stage; // Compatibility check said this shader model is not compatible. if (DXIL::CompareVersions(ValCtx.DxilMod.GetShaderModel()->GetMajor(), ValCtx.DxilMod.GetShaderModel()->GetMinor(), - compatInfo.minMajor, compatInfo.minMinor) < 0) - conflictMask |= (uint32_t)ConflictFlags::ShaderModel; + CompatInfo.minMajor, CompatInfo.minMinor) < 0) + ConflictMask |= (uint32_t)ConflictFlags::ShaderModel; - if (compatInfo.shaderFlags.GetUsesDerivatives()) - conflictMask |= maskForDeriv; + if (CompatInfo.shaderFlags.GetUsesDerivatives()) + ConflictMask |= MaskForDeriv; - if (compatInfo.shaderFlags.GetRequiresGroup()) - conflictMask |= maskForGroup; + if (CompatInfo.shaderFlags.GetRequiresGroup()) + ConflictMask |= MaskForGroup; - return conflictMask; + return ConflictMask; } - void Diagnose(Function *F, uint32_t conflictMask, ConflictKind conflict, - ValidationRule rule, ArrayRef args = {}) { - if (conflictMask & (1 << (unsigned)conflict)) - ValCtx.EmitFnFormatError(F, rule, args); + void Diagnose(Function *F, uint32_t ConflictMask, ConflictKind Conflict, + ValidationRule Rule, ArrayRef Args = {}) { + if (ConflictMask & (1 << (unsigned)Conflict)) + ValCtx.EmitFnFormatError(F, Rule, Args); } - void DiagnoseConflicts(Function *F, uint32_t conflictMask) { + void DiagnoseConflicts(Function *F, uint32_t ConflictMask) { // Emit a diagnostic indicating that either the entry function or a function // called by the entry function contains a disallowed operation. if (F == EntryFn) @@ -4977,22 +5049,22 @@ struct CompatibilityChecker { ValCtx.EmitFnError(EntryFn, ValidationRule::SmIncompatibleCallInEntry); // Emit diagnostics for each conflict found in this function. - Diagnose(F, conflictMask, ConflictKind::Stage, + Diagnose(F, ConflictMask, ConflictKind::Stage, ValidationRule::SmIncompatibleStage, - {ShaderModel::GetKindName(props.shaderKind)}); - Diagnose(F, conflictMask, ConflictKind::ShaderModel, + {ShaderModel::GetKindName(Props.shaderKind)}); + Diagnose(F, ConflictMask, ConflictKind::ShaderModel, ValidationRule::SmIncompatibleShaderModel); - Diagnose(F, conflictMask, ConflictKind::DerivLaunch, + Diagnose(F, ConflictMask, ConflictKind::DerivLaunch, ValidationRule::SmIncompatibleDerivLaunch, - {GetLaunchTypeStr(props.Node.LaunchType)}); - Diagnose(F, conflictMask, ConflictKind::DerivThreadGroupDim, + {GetLaunchTypeStr(Props.Node.LaunchType)}); + Diagnose(F, ConflictMask, ConflictKind::DerivThreadGroupDim, ValidationRule::SmIncompatibleThreadGroupDim, - {std::to_string(props.numThreads[0]), - std::to_string(props.numThreads[1]), - std::to_string(props.numThreads[2])}); - Diagnose(F, conflictMask, ConflictKind::DerivInComputeShaderModel, + {std::to_string(Props.numThreads[0]), + std::to_string(Props.numThreads[1]), + std::to_string(Props.numThreads[2])}); + Diagnose(F, ConflictMask, ConflictKind::DerivInComputeShaderModel, ValidationRule::SmIncompatibleDerivInComputeShaderModel); - Diagnose(F, conflictMask, ConflictKind::RequiresGroup, + Diagnose(F, ConflictMask, ConflictKind::RequiresGroup, ValidationRule::SmIncompatibleRequiresGroup); } @@ -5001,59 +5073,59 @@ struct CompatibilityChecker { // functions called by that function introduced the conflict. // In those cases, the called functions themselves will emit the diagnostic. // Return conflict mask for this function. - uint32_t Visit(Function *F, uint32_t &remainingMask, - llvm::SmallPtrSet &visited, CallGraph &CG) { + uint32_t Visit(Function *F, uint32_t &RemainingMask, + llvm::SmallPtrSet &Visited, CallGraph &CG) { // Recursive check looks for where a conflict is found and not present // in functions called by the current function. // - When a source is found, emit diagnostics and clear the conflict // flags introduced by this function from the working mask so we don't // report this conflict again. - // - When the remainingMask is 0, we are done. + // - When the RemainingMask is 0, we are done. - if (remainingMask == 0) + if (RemainingMask == 0) return 0; // Nothing left to search for. - if (!visited.insert(F).second) + if (!Visited.insert(F).second) return 0; // Already visited. - const DxilModule::ShaderCompatInfo *compatInfo = + const DxilModule::ShaderCompatInfo *CompatInfo = ValCtx.DxilMod.GetCompatInfoForFunction(F); - DXASSERT(compatInfo, "otherwise, compat info not computed in module"); - if (!compatInfo) + DXASSERT(CompatInfo, "otherwise, compat info not computed in module"); + if (!CompatInfo) return 0; - uint32_t maskForThisFunction = IdentifyConflict(*compatInfo); + uint32_t MaskForThisFunction = IdentifyConflict(*CompatInfo); - uint32_t maskForCalls = 0; + uint32_t MaskForCalls = 0; if (CallGraphNode *CGNode = CG[F]) { for (auto &Call : *CGNode) { Function *called = Call.second->getFunction(); if (called->isDeclaration()) continue; - maskForCalls |= Visit(called, remainingMask, visited, CG); - if (remainingMask == 0) + MaskForCalls |= Visit(called, RemainingMask, Visited, CG); + if (RemainingMask == 0) return 0; // Nothing left to search for. } } // Mask of incompatibilities introduced by this function. - uint32_t conflictsIntroduced = - remainingMask & maskForThisFunction & ~maskForCalls; - if (conflictsIntroduced) { + uint32_t ConflictsIntroduced = + RemainingMask & MaskForThisFunction & ~MaskForCalls; + if (ConflictsIntroduced) { // This function introduces at least one conflict. - DiagnoseConflicts(F, conflictsIntroduced); + DiagnoseConflicts(F, ConflictsIntroduced); // Mask off diagnosed incompatibilities. - remainingMask &= ~conflictsIntroduced; + RemainingMask &= ~ConflictsIntroduced; } - return maskForThisFunction; + return MaskForThisFunction; } - void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &compatInfo) { - uint32_t conflictMask = IdentifyConflict(compatInfo); - if (conflictMask == 0) + void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &CompatInfo) { + uint32_t ConflictMask = IdentifyConflict(CompatInfo); + if (ConflictMask == 0) return; CallGraph &CG = ValCtx.GetCallGraph(); - llvm::SmallPtrSet visited; - Visit(EntryFn, conflictMask, visited, CG); + llvm::SmallPtrSet Visited; + Visit(EntryFn, ConflictMask, Visited, CG); } }; @@ -5062,14 +5134,14 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) { DxilModule &DM = ValCtx.DxilMod; for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - const DxilModule::ShaderCompatInfo *compatInfo = + const DxilModule::ShaderCompatInfo *CompatInfo = DM.GetCompatInfoForFunction(&F); - DXASSERT(compatInfo, "otherwise, compat info not computed in module"); - if (!compatInfo) + DXASSERT(CompatInfo, "otherwise, compat info not computed in module"); + if (!CompatInfo) continue; CompatibilityChecker checker(ValCtx, &F); - checker.FindIncompatibleCall(*compatInfo); + checker.FindIncompatibleCall(*CompatInfo); } } } @@ -5077,101 +5149,101 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) { static void CheckPatchConstantSemantic(ValidationContext &ValCtx, const DxilEntryProps &EntryProps, EntryStatus &Status, Function *F) { - const DxilFunctionProps &props = EntryProps.props; - bool isHS = props.IsHS(); + const DxilFunctionProps &Props = EntryProps.props; + bool IsHs = Props.IsHS(); - DXIL::TessellatorDomain domain = - isHS ? props.ShaderProps.HS.domain : props.ShaderProps.DS.domain; + DXIL::TessellatorDomain Domain = + IsHs ? Props.ShaderProps.HS.domain : Props.ShaderProps.DS.domain; - const DxilSignature &patchConstantSig = + const DxilSignature &PatchConstantSig = EntryProps.sig.PatchConstOrPrimSignature; - const unsigned kQuadEdgeSize = 4; - const unsigned kQuadInsideSize = 2; - const unsigned kQuadDomainLocSize = 2; + const unsigned KQuadEdgeSize = 4; + const unsigned KQuadInsideSize = 2; + const unsigned KQuadDomainLocSize = 2; - const unsigned kTriEdgeSize = 3; - const unsigned kTriInsideSize = 1; - const unsigned kTriDomainLocSize = 3; + const unsigned KTriEdgeSize = 3; + const unsigned KTriInsideSize = 1; + const unsigned KTriDomainLocSize = 3; - const unsigned kIsolineEdgeSize = 2; - const unsigned kIsolineInsideSize = 0; - const unsigned kIsolineDomainLocSize = 3; + const unsigned KIsolineEdgeSize = 2; + const unsigned KIsolineInsideSize = 0; + const unsigned KIsolineDomainLocSize = 3; - const char *domainName = ""; + const char *DomainName = ""; DXIL::SemanticKind kEdgeSemantic = DXIL::SemanticKind::TessFactor; - unsigned edgeSize = 0; + unsigned EdgeSize = 0; DXIL::SemanticKind kInsideSemantic = DXIL::SemanticKind::InsideTessFactor; - unsigned insideSize = 0; + unsigned InsideSize = 0; Status.domainLocSize = 0; - switch (domain) { + switch (Domain) { case DXIL::TessellatorDomain::IsoLine: - domainName = "IsoLine"; - edgeSize = kIsolineEdgeSize; - insideSize = kIsolineInsideSize; - Status.domainLocSize = kIsolineDomainLocSize; + DomainName = "IsoLine"; + EdgeSize = KIsolineEdgeSize; + InsideSize = KIsolineInsideSize; + Status.domainLocSize = KIsolineDomainLocSize; break; case DXIL::TessellatorDomain::Tri: - domainName = "Tri"; - edgeSize = kTriEdgeSize; - insideSize = kTriInsideSize; - Status.domainLocSize = kTriDomainLocSize; + DomainName = "Tri"; + EdgeSize = KTriEdgeSize; + InsideSize = KTriInsideSize; + Status.domainLocSize = KTriDomainLocSize; break; case DXIL::TessellatorDomain::Quad: - domainName = "Quad"; - edgeSize = kQuadEdgeSize; - insideSize = kQuadInsideSize; - Status.domainLocSize = kQuadDomainLocSize; + DomainName = "Quad"; + EdgeSize = KQuadEdgeSize; + InsideSize = KQuadInsideSize; + Status.domainLocSize = KQuadDomainLocSize; break; default: // Don't bother with other tests if domain is invalid return; } - bool bFoundEdgeSemantic = false; - bool bFoundInsideSemantic = false; - for (auto &SE : patchConstantSig.GetElements()) { - Semantic::Kind kind = SE->GetSemantic()->GetKind(); - if (kind == kEdgeSemantic) { - bFoundEdgeSemantic = true; - if (SE->GetRows() != edgeSize || SE->GetCols() > 1) { + bool FoundEdgeSemantic = false; + bool FoundInsideSemantic = false; + for (auto &SE : PatchConstantSig.GetElements()) { + Semantic::Kind Kind = SE->GetSemantic()->GetKind(); + if (Kind == kEdgeSemantic) { + FoundEdgeSemantic = true; + if (SE->GetRows() != EdgeSize || SE->GetCols() > 1) { ValCtx.EmitFnFormatError(F, ValidationRule::SmTessFactorSizeMatchDomain, {std::to_string(SE->GetRows()), - std::to_string(SE->GetCols()), domainName, - std::to_string(edgeSize)}); + std::to_string(SE->GetCols()), DomainName, + std::to_string(EdgeSize)}); } - } else if (kind == kInsideSemantic) { - bFoundInsideSemantic = true; - if (SE->GetRows() != insideSize || SE->GetCols() > 1) { + } else if (Kind == kInsideSemantic) { + FoundInsideSemantic = true; + if (SE->GetRows() != InsideSize || SE->GetCols() > 1) { ValCtx.EmitFnFormatError( F, ValidationRule::SmInsideTessFactorSizeMatchDomain, {std::to_string(SE->GetRows()), std::to_string(SE->GetCols()), - domainName, std::to_string(insideSize)}); + DomainName, std::to_string(InsideSize)}); } } } - if (isHS) { - if (!bFoundEdgeSemantic) { + if (IsHs) { + if (!FoundEdgeSemantic) { ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain); } - if (!bFoundInsideSemantic && domain != DXIL::TessellatorDomain::IsoLine) { + if (!FoundInsideSemantic && Domain != DXIL::TessellatorDomain::IsoLine) { ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain); } } } static void ValidatePassThruHS(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, Function *F) { + const DxilEntryProps &EntryProps, Function *F) { // Check pass thru HS. if (F->isDeclaration()) { - const auto &props = entryProps.props; - if (props.IsHS()) { - const auto &HS = props.ShaderProps.HS; + const auto &Props = EntryProps.props; + if (Props.IsHS()) { + const auto &HS = Props.ShaderProps.HS; if (HS.inputControlPoints < HS.outputControlPoints) { ValCtx.EmitFnError( F, ValidationRule::SmHullPassThruControlPointCountMatch); @@ -5179,12 +5251,12 @@ static void ValidatePassThruHS(ValidationContext &ValCtx, // Check declared control point outputs storage amounts are ok to pass // through (less output storage than input for control points). - const DxilSignature &outSig = entryProps.sig.OutputSignature; - unsigned totalOutputCPScalars = 0; - for (auto &SE : outSig.GetElements()) { - totalOutputCPScalars += SE->GetRows() * SE->GetCols(); + const DxilSignature &OutSig = EntryProps.sig.OutputSignature; + unsigned TotalOutputCpScalars = 0; + for (auto &SE : OutSig.GetElements()) { + TotalOutputCpScalars += SE->GetRows() * SE->GetCols(); } - if (totalOutputCPScalars * HS.outputControlPoints > + if (TotalOutputCpScalars * HS.outputControlPoints > DXIL::kMaxHSOutputControlPointsTotalScalars) { ValCtx.EmitFnError(F, ValidationRule::SmOutputControlPointsTotalScalars); @@ -5199,35 +5271,35 @@ static void ValidatePassThruHS(ValidationContext &ValCtx, // validate wave size (currently allowed only on CS and node shaders but might // be supported on other shader types in the future) static void ValidateWaveSize(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, Function *F) { - const DxilFunctionProps &props = entryProps.props; - const hlsl::DxilWaveSize &waveSize = props.WaveSize; + const DxilEntryProps &EntryProps, Function *F) { + const DxilFunctionProps &Props = EntryProps.props; + const hlsl::DxilWaveSize &WaveSize = Props.WaveSize; - switch (waveSize.Validate()) { + switch (WaveSize.Validate()) { case hlsl::DxilWaveSize::ValidationResult::Success: break; case hlsl::DxilWaveSize::ValidationResult::InvalidMin: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Min", std::to_string(waveSize.Min), + {"Min", std::to_string(WaveSize.Min), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::InvalidMax: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Max", std::to_string(waveSize.Max), + {"Max", std::to_string(WaveSize.Max), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::InvalidPreferred: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Preferred", std::to_string(waveSize.Preferred), + {"Preferred", std::to_string(WaveSize.Preferred), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxOrPreferredWhenUndefined: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeAllZeroWhenUndefined, - {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxEqualsMin: // This case is allowed because users may disable the ErrorDefault warning. @@ -5235,227 +5307,227 @@ static void ValidateWaveSize(ValidationContext &ValCtx, case hlsl::DxilWaveSize::ValidationResult::PreferredWhenNoRange: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeMaxAndPreferredZeroWhenNoRange, - {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxLessThanMin: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeMaxGreaterThanMin, - {std::to_string(waveSize.Max), std::to_string(waveSize.Min)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Min)}); break; case hlsl::DxilWaveSize::ValidationResult::PreferredOutOfRange: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizePreferredInRange, - {std::to_string(waveSize.Preferred), - std::to_string(waveSize.Min), - std::to_string(waveSize.Max)}); + {std::to_string(WaveSize.Preferred), + std::to_string(WaveSize.Min), + std::to_string(WaveSize.Max)}); break; } // Check shader model and kind. - if (waveSize.IsDefined()) { - if (!props.IsCS() && !props.IsNode()) { + if (WaveSize.IsDefined()) { + if (!Props.IsCS() && !Props.IsNode()) { ValCtx.EmitFnError(F, ValidationRule::SmWaveSizeOnComputeOrNode); } } } static void ValidateEntryProps(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, + const DxilEntryProps &EntryProps, EntryStatus &Status, Function *F) { - const DxilFunctionProps &props = entryProps.props; - DXIL::ShaderKind ShaderType = props.shaderKind; + const DxilFunctionProps &Props = EntryProps.props; + DXIL::ShaderKind ShaderType = Props.shaderKind; - ValidateWaveSize(ValCtx, entryProps, F); + ValidateWaveSize(ValCtx, EntryProps, F); - if (ShaderType == DXIL::ShaderKind::Compute || props.IsNode()) { - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + if (ShaderType == DXIL::ShaderKind::Compute || Props.IsNode()) { + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinCSThreadGroupX) || (x > DXIL::kMaxCSThreadGroupX)) { + if ((X < DXIL::kMinCSThreadGroupX) || (X > DXIL::kMaxCSThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinCSThreadGroupX), std::to_string(DXIL::kMaxCSThreadGroupX)}); } - if ((y < DXIL::kMinCSThreadGroupY) || (y > DXIL::kMaxCSThreadGroupY)) { + if ((Y < DXIL::kMinCSThreadGroupY) || (Y > DXIL::kMaxCSThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinCSThreadGroupY), std::to_string(DXIL::kMaxCSThreadGroupY)}); } - if ((z < DXIL::kMinCSThreadGroupZ) || (z > DXIL::kMaxCSThreadGroupZ)) { + if ((Z < DXIL::kMinCSThreadGroupZ) || (Z > DXIL::kMaxCSThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinCSThreadGroupZ), std::to_string(DXIL::kMaxCSThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxCSThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxCSThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxCSThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. } else if (ShaderType == DXIL::ShaderKind::Mesh) { - const auto &MS = props.ShaderProps.MS; - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + const auto &MS = Props.ShaderProps.MS; + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) { + if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinMSASThreadGroupX), std::to_string(DXIL::kMaxMSASThreadGroupX)}); } - if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) { + if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinMSASThreadGroupY), std::to_string(DXIL::kMaxMSASThreadGroupY)}); } - if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) { + if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinMSASThreadGroupZ), std::to_string(DXIL::kMaxMSASThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxMSASThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. - unsigned maxVertexCount = MS.maxVertexCount; - if (maxVertexCount > DXIL::kMaxMSOutputVertexCount) { + unsigned MaxVertexCount = MS.maxVertexCount; + if (MaxVertexCount > DXIL::kMaxMSOutputVertexCount) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMeshShaderMaxVertexCount, {std::to_string(DXIL::kMaxMSOutputVertexCount), - std::to_string(maxVertexCount)}); + std::to_string(MaxVertexCount)}); } - unsigned maxPrimitiveCount = MS.maxPrimitiveCount; - if (maxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) { + unsigned MaxPrimitiveCount = MS.maxPrimitiveCount; + if (MaxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderMaxPrimitiveCount, {std::to_string(DXIL::kMaxMSOutputPrimitiveCount), - std::to_string(maxPrimitiveCount)}); + std::to_string(MaxPrimitiveCount)}); } } else if (ShaderType == DXIL::ShaderKind::Amplification) { - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) { + if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinMSASThreadGroupX), std::to_string(DXIL::kMaxMSASThreadGroupX)}); } - if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) { + if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinMSASThreadGroupY), std::to_string(DXIL::kMaxMSASThreadGroupY)}); } - if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) { + if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinMSASThreadGroupZ), std::to_string(DXIL::kMaxMSASThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxMSASThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. } else if (ShaderType == DXIL::ShaderKind::Domain) { - const auto &DS = props.ShaderProps.DS; - DXIL::TessellatorDomain domain = DS.domain; - if (domain >= DXIL::TessellatorDomain::LastEntry) - domain = DXIL::TessellatorDomain::Undefined; - unsigned inputControlPointCount = DS.inputControlPoints; + const auto &DS = Props.ShaderProps.DS; + DXIL::TessellatorDomain Domain = DS.domain; + if (Domain >= DXIL::TessellatorDomain::LastEntry) + Domain = DXIL::TessellatorDomain::Undefined; + unsigned InputControlPointCount = DS.inputControlPoints; - if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmDSInputControlPointCountRange, {std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(inputControlPointCount)}); + std::to_string(InputControlPointCount)}); } - if (domain == DXIL::TessellatorDomain::Undefined) { + if (Domain == DXIL::TessellatorDomain::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmValidDomain); } - CheckPatchConstantSemantic(ValCtx, entryProps, Status, F); + CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F); } else if (ShaderType == DXIL::ShaderKind::Hull) { - const auto &HS = props.ShaderProps.HS; - DXIL::TessellatorDomain domain = HS.domain; - if (domain >= DXIL::TessellatorDomain::LastEntry) - domain = DXIL::TessellatorDomain::Undefined; - unsigned inputControlPointCount = HS.inputControlPoints; - if (inputControlPointCount == 0) { - const DxilSignature &inputSig = entryProps.sig.InputSignature; - if (!inputSig.GetElements().empty()) { + const auto &HS = Props.ShaderProps.HS; + DXIL::TessellatorDomain Domain = HS.domain; + if (Domain >= DXIL::TessellatorDomain::LastEntry) + Domain = DXIL::TessellatorDomain::Undefined; + unsigned InputControlPointCount = HS.inputControlPoints; + if (InputControlPointCount == 0) { + const DxilSignature &InputSig = EntryProps.sig.InputSignature; + if (!InputSig.GetElements().empty()) { ValCtx.EmitFnError(F, ValidationRule::SmZeroHSInputControlPointWithInput); } - } else if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + } else if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmHSInputControlPointCountRange, {std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(inputControlPointCount)}); + std::to_string(InputControlPointCount)}); } - unsigned outputControlPointCount = HS.outputControlPoints; - if (outputControlPointCount < DXIL::kMinIAPatchControlPointCount || - outputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + unsigned OutputControlPointCount = HS.outputControlPoints; + if (OutputControlPointCount < DXIL::kMinIAPatchControlPointCount || + OutputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmOutputControlPointCountRange, {std::to_string(DXIL::kMinIAPatchControlPointCount), std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(outputControlPointCount)}); + std::to_string(OutputControlPointCount)}); } - if (domain == DXIL::TessellatorDomain::Undefined) { + if (Domain == DXIL::TessellatorDomain::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmValidDomain); } - DXIL::TessellatorPartitioning partition = HS.partition; - if (partition == DXIL::TessellatorPartitioning::Undefined) { + DXIL::TessellatorPartitioning Partition = HS.partition; + if (Partition == DXIL::TessellatorPartitioning::Undefined) { ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorPartition); } - DXIL::TessellatorOutputPrimitive tessOutputPrimitive = HS.outputPrimitive; - if (tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined || - tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) { + DXIL::TessellatorOutputPrimitive TessOutputPrimitive = HS.outputPrimitive; + if (TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined || + TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) { ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorOutputPrimitive); } - float maxTessFactor = HS.maxTessFactor; - if (maxTessFactor < DXIL::kHSMaxTessFactorLowerBound || - maxTessFactor > DXIL::kHSMaxTessFactorUpperBound) { + float MaxTessFactor = HS.maxTessFactor; + if (MaxTessFactor < DXIL::kHSMaxTessFactorLowerBound || + MaxTessFactor > DXIL::kHSMaxTessFactorUpperBound) { ValCtx.EmitFnFormatError( F, ValidationRule::MetaMaxTessFactor, {std::to_string(DXIL::kHSMaxTessFactorLowerBound), std::to_string(DXIL::kHSMaxTessFactorUpperBound), - std::to_string(maxTessFactor)}); + std::to_string(MaxTessFactor)}); } // Domain and OutPrimivtive match. - switch (domain) { + switch (Domain) { case DXIL::TessellatorDomain::IsoLine: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::TriangleCW: case DXIL::TessellatorOutputPrimitive::TriangleCCW: ValCtx.EmitFnError(F, ValidationRule::SmIsoLineOutputPrimitiveMismatch); @@ -5465,7 +5537,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; case DXIL::TessellatorDomain::Tri: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::Line: ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch); break; @@ -5474,7 +5546,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; case DXIL::TessellatorDomain::Quad: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::Line: ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch); break; @@ -5487,39 +5559,39 @@ static void ValidateEntryProps(ValidationContext &ValCtx, break; } - CheckPatchConstantSemantic(ValCtx, entryProps, Status, F); + CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F); } else if (ShaderType == DXIL::ShaderKind::Geometry) { - const auto &GS = props.ShaderProps.GS; - unsigned maxVertexCount = GS.maxVertexCount; - if (maxVertexCount > DXIL::kMaxGSOutputVertexCount) { + const auto &GS = Props.ShaderProps.GS; + unsigned MaxVertexCount = GS.maxVertexCount; + if (MaxVertexCount > DXIL::kMaxGSOutputVertexCount) { ValCtx.EmitFnFormatError(F, ValidationRule::SmGSOutputVertexCountRange, {std::to_string(DXIL::kMaxGSOutputVertexCount), - std::to_string(maxVertexCount)}); + std::to_string(MaxVertexCount)}); } - unsigned instanceCount = GS.instanceCount; - if (instanceCount > DXIL::kMaxGSInstanceCount || instanceCount < 1) { + unsigned InstanceCount = GS.instanceCount; + if (InstanceCount > DXIL::kMaxGSInstanceCount || InstanceCount < 1) { ValCtx.EmitFnFormatError(F, ValidationRule::SmGSInstanceCountRange, {std::to_string(DXIL::kMaxGSInstanceCount), - std::to_string(instanceCount)}); + std::to_string(InstanceCount)}); } - DXIL::PrimitiveTopology topo = DXIL::PrimitiveTopology::Undefined; - bool bTopoMismatch = false; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + DXIL::PrimitiveTopology Topo = DXIL::PrimitiveTopology::Undefined; + bool TopoMismatch = false; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - if (topo == DXIL::PrimitiveTopology::Undefined) - topo = GS.streamPrimitiveTopologies[i]; - else if (topo != GS.streamPrimitiveTopologies[i]) { - bTopoMismatch = true; + if (Topo == DXIL::PrimitiveTopology::Undefined) + Topo = GS.streamPrimitiveTopologies[I]; + else if (Topo != GS.streamPrimitiveTopologies[I]) { + TopoMismatch = true; break; } } } - if (bTopoMismatch) - topo = DXIL::PrimitiveTopology::Undefined; - switch (topo) { + if (TopoMismatch) + Topo = DXIL::PrimitiveTopology::Undefined; + switch (Topo) { case DXIL::PrimitiveTopology::PointList: case DXIL::PrimitiveTopology::LineStrip: case DXIL::PrimitiveTopology::TriangleStrip: @@ -5529,9 +5601,9 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; } - DXIL::InputPrimitive inputPrimitive = GS.inputPrimitive; - unsigned VertexCount = GetNumVertices(inputPrimitive); - if (VertexCount == 0 && inputPrimitive != DXIL::InputPrimitive::Undefined) { + DXIL::InputPrimitive InputPrimitive = GS.inputPrimitive; + unsigned VertexCount = GetNumVertices(InputPrimitive); + if (VertexCount == 0 && InputPrimitive != DXIL::InputPrimitive::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmGSValidInputPrimitive); } } @@ -5542,10 +5614,10 @@ static void ValidateShaderState(ValidationContext &ValCtx) { if (ValCtx.isLibProfile) { for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); EntryStatus &Status = ValCtx.GetEntryStatus(&F); - ValidateEntryProps(ValCtx, entryProps, Status, &F); - ValidatePassThruHS(ValCtx, entryProps, &F); + ValidateEntryProps(ValCtx, EntryProps, Status, &F); + ValidatePassThruHS(ValCtx, EntryProps, &F); } } } else { @@ -5556,33 +5628,33 @@ static void ValidateShaderState(ValidationContext &ValCtx) { return; } EntryStatus &Status = ValCtx.GetEntryStatus(Entry); - DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry); - ValidateEntryProps(ValCtx, entryProps, Status, Entry); - ValidatePassThruHS(ValCtx, entryProps, Entry); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry); + ValidateEntryProps(ValCtx, EntryProps, Status, Entry); + ValidatePassThruHS(ValCtx, EntryProps, Entry); } } static CallGraphNode * -CalculateCallDepth(CallGraphNode *node, - std::unordered_map &depthMap, - std::unordered_set &callStack, - std::unordered_set &funcSet) { - unsigned depth = callStack.size(); - funcSet.insert(node->getFunction()); - for (auto it = node->begin(), ei = node->end(); it != ei; it++) { - CallGraphNode *toNode = it->second; - if (callStack.insert(toNode).second == false) { +CalculateCallDepth(CallGraphNode *Node, + std::unordered_map &DepthMap, + std::unordered_set &CallStack, + std::unordered_set &FuncSet) { + unsigned Depth = CallStack.size(); + FuncSet.insert(Node->getFunction()); + for (auto It = Node->begin(), EIt = Node->end(); It != EIt; It++) { + CallGraphNode *ToNode = It->second; + if (CallStack.insert(ToNode).second == false) { // Recursive. - return toNode; + return ToNode; } - if (depthMap[toNode] < depth) - depthMap[toNode] = depth; + if (DepthMap[ToNode] < Depth) + DepthMap[ToNode] = Depth; if (CallGraphNode *N = - CalculateCallDepth(toNode, depthMap, callStack, funcSet)) { + CalculateCallDepth(ToNode, DepthMap, CallStack, FuncSet)) { // Recursive return N; } - callStack.erase(toNode); + CallStack.erase(ToNode); } return nullptr; @@ -5592,29 +5664,29 @@ static void ValidateCallGraph(ValidationContext &ValCtx) { // Build CallGraph. CallGraph &CG = ValCtx.GetCallGraph(); - std::unordered_map depthMap; - std::unordered_set callStack; - CallGraphNode *entryNode = CG[ValCtx.DxilMod.GetEntryFunction()]; - depthMap[entryNode] = 0; - if (CallGraphNode *N = CalculateCallDepth(entryNode, depthMap, callStack, + std::unordered_map DepthMap; + std::unordered_set CallStack; + CallGraphNode *EntryNode = CG[ValCtx.DxilMod.GetEntryFunction()]; + DepthMap[EntryNode] = 0; + if (CallGraphNode *N = CalculateCallDepth(EntryNode, DepthMap, CallStack, ValCtx.entryFuncCallSet)) ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion); if (ValCtx.DxilMod.GetShaderModel()->IsHS()) { - CallGraphNode *patchConstantNode = + CallGraphNode *PatchConstantNode = CG[ValCtx.DxilMod.GetPatchConstantFunction()]; - depthMap[patchConstantNode] = 0; - callStack.clear(); + DepthMap[PatchConstantNode] = 0; + CallStack.clear(); if (CallGraphNode *N = - CalculateCallDepth(patchConstantNode, depthMap, callStack, + CalculateCallDepth(PatchConstantNode, DepthMap, CallStack, ValCtx.patchConstFuncCallSet)) ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion); } } static void ValidateFlowControl(ValidationContext &ValCtx) { - bool reducible = + bool Reducible = IsReducible(*ValCtx.DxilMod.GetModule(), IrreducibilityAction::Ignore); - if (!reducible) { + if (!Reducible) { ValCtx.EmitError(ValidationRule::FlowReducible); return; } @@ -5629,28 +5701,28 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { DominatorTree DT = DTA.run(F); LoopInfo LI; LI.Analyze(DT); - for (auto loopIt = LI.begin(); loopIt != LI.end(); loopIt++) { - Loop *loop = *loopIt; - SmallVector exitBlocks; - loop->getExitBlocks(exitBlocks); - if (exitBlocks.empty()) + for (auto LoopIt = LI.begin(); LoopIt != LI.end(); LoopIt++) { + Loop *Loop = *LoopIt; + SmallVector ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + if (ExitBlocks.empty()) ValCtx.EmitFnError(&F, ValidationRule::FlowDeadLoop); } // validate that there is no use of a value that has been output-completed // for this function. - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); - for (auto &it : hlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) { - Function *pF = it.second; + for (auto &It : HlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) { + Function *pF = It.second; if (!pF) continue; // first, collect all the output complete calls that are not dominated // by another OutputComplete call for the same handle value llvm::SmallMapVector, 4> - handleToCI; + HandleToCI; for (User *U : pF->users()) { // all OutputComplete calls are instructions, and call instructions, // so there shouldn't need to be a null check. @@ -5662,33 +5734,33 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { continue; DxilInst_OutputComplete OutputComplete(CI); - Value *completedRecord = OutputComplete.get_output(); + Value *CompletedRecord = OutputComplete.get_output(); - auto vIt = handleToCI.find(completedRecord); - if (vIt == handleToCI.end()) { + auto vIt = HandleToCI.find(CompletedRecord); + if (vIt == HandleToCI.end()) { llvm::SmallPtrSet s; s.insert(CI); - handleToCI.insert(std::make_pair(completedRecord, s)); + HandleToCI.insert(std::make_pair(CompletedRecord, s)); } else { // if the handle is already in the map, make sure the map's set of // output complete calls that dominate the handle and do not dominate // each other gets updated if necessary bool CI_is_dominated = false; - for (auto ocIt = vIt->second.begin(); ocIt != vIt->second.end();) { + for (auto OcIt = vIt->second.begin(); OcIt != vIt->second.end();) { // if our new OC CI dominates an OC instruction in the set, // then replace the instruction in the set with the new OC CI. - if (DT.dominates(CI, *ocIt)) { - auto cur_it = ocIt++; + if (DT.dominates(CI, *OcIt)) { + auto cur_it = OcIt++; vIt->second.erase(*cur_it); continue; } // Remember if our new CI gets dominated by any CI in the set. - if (DT.dominates(*ocIt, CI)) { + if (DT.dominates(*OcIt, CI)) { CI_is_dominated = true; break; } - ocIt++; + OcIt++; } // if no CI in the set dominates our new CI, // the new CI should be added to the set @@ -5697,14 +5769,14 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { } } - for (auto handle_iter = handleToCI.begin(), e = handleToCI.end(); + for (auto handle_iter = HandleToCI.begin(), e = HandleToCI.end(); handle_iter != e; handle_iter++) { for (auto user_itr = handle_iter->first->user_begin(); user_itr != handle_iter->first->user_end(); user_itr++) { User *pU = *user_itr; - Instruction *useInstr = cast(pU); - if (useInstr) { - if (CallInst *CI = dyn_cast(useInstr)) { + Instruction *UseInstr = cast(pU); + if (UseInstr) { + if (CallInst *CI = dyn_cast(UseInstr)) { // if the user is an output complete call that is in the set of // OutputComplete calls not dominated by another OutputComplete // call for the same handle value, no diagnostics need to be @@ -5715,15 +5787,15 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { // make sure any output complete call in the set // that dominates this use gets its diagnostic emitted. - for (auto ocIt = handle_iter->second.begin(); - ocIt != handle_iter->second.end(); ocIt++) { - Instruction *ocInstr = cast(*ocIt); - if (DT.dominates(ocInstr, useInstr)) { + for (auto OcIt = handle_iter->second.begin(); + OcIt != handle_iter->second.end(); OcIt++) { + Instruction *OcInstr = cast(*OcIt); + if (DT.dominates(OcInstr, UseInstr)) { ValCtx.EmitInstrError( - useInstr, + UseInstr, ValidationRule::InstrNodeRecordHandleUseAfterComplete); ValCtx.EmitInstrNote( - *ocIt, "record handle invalidated by OutputComplete"); + *OcIt, "record handle invalidated by OutputComplete"); break; } } @@ -5739,57 +5811,57 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { static void ValidateUninitializedOutput(ValidationContext &ValCtx, Function *F) { DxilModule &DM = ValCtx.DxilMod; - DxilEntryProps &entryProps = DM.GetDxilEntryProps(F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); EntryStatus &Status = ValCtx.GetEntryStatus(F); - const DxilFunctionProps &props = entryProps.props; + const DxilFunctionProps &Props = EntryProps.props; // For HS only need to check Tessfactor which is in patch constant sig. - if (props.IsHS()) { - std::vector &patchConstOrPrimCols = Status.patchConstOrPrimCols; - const DxilSignature &patchConstSig = - entryProps.sig.PatchConstOrPrimSignature; - for (auto &E : patchConstSig.GetElements()) { - unsigned mask = patchConstOrPrimCols[E->GetID()]; - unsigned requireMask = (1 << E->GetCols()) - 1; + if (Props.IsHS()) { + std::vector &PatchConstOrPrimCols = Status.patchConstOrPrimCols; + const DxilSignature &PatchConstSig = + EntryProps.sig.PatchConstOrPrimSignature; + for (auto &E : PatchConstSig.GetElements()) { + unsigned Mask = PatchConstOrPrimCols[E->GetID()]; + unsigned RequireMask = (1 << E->GetCols()) - 1; // TODO: check other case uninitialized output is allowed. - if (mask != requireMask && !E->GetSemantic()->IsArbitrary()) { + if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary()) { ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput, {E->GetName()}); } } return; } - const DxilSignature &outSig = entryProps.sig.OutputSignature; - std::vector &outputCols = Status.outputCols; - for (auto &E : outSig.GetElements()) { - unsigned mask = outputCols[E->GetID()]; - unsigned requireMask = (1 << E->GetCols()) - 1; + const DxilSignature &OutSig = EntryProps.sig.OutputSignature; + std::vector &OutputCols = Status.outputCols; + for (auto &E : OutSig.GetElements()) { + unsigned Mask = OutputCols[E->GetID()]; + unsigned RequireMask = (1 << E->GetCols()) - 1; // TODO: check other case uninitialized output is allowed. - if (mask != requireMask && !E->GetSemantic()->IsArbitrary() && + if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary() && E->GetSemantic()->GetKind() != Semantic::Kind::Target) { ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput, {E->GetName()}); } } - if (!props.IsGS()) { - unsigned posMask = Status.OutputPositionMask[0]; - if (posMask != 0xf && Status.hasOutputPosition[0]) { + if (!Props.IsGS()) { + unsigned PosMask = Status.OutputPositionMask[0]; + if (PosMask != 0xf && Status.hasOutputPosition[0]) { ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition); } } else { - const auto &GS = props.ShaderProps.GS; - unsigned streamMask = 0; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + const auto &GS = Props.ShaderProps.GS; + unsigned StreamMask = 0; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - streamMask |= 1 << i; + StreamMask |= 1 << I; } } - for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) { - if (streamMask & (1 << i)) { - unsigned posMask = Status.OutputPositionMask[i]; - if (posMask != 0xf && Status.hasOutputPosition[i]) { + for (unsigned I = 0; I < DXIL::kNumOutputStreams; I++) { + if (StreamMask & (1 << I)) { + unsigned PosMask = Status.OutputPositionMask[I]; + if (PosMask != 0xf && Status.hasOutputPosition[I]) { ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition); } } diff --git a/lib/HLSL/CMakeLists.txt b/lib/HLSL/CMakeLists.txt index 947fc4c14f..21bb9523a7 100644 --- a/lib/HLSL/CMakeLists.txt +++ b/lib/HLSL/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_library(LLVMHLSL DxilNoops.cpp DxilPreserveAllOutputs.cpp DxilRenameResourcesPass.cpp + DxilScalarizeVectorLoadStores.cpp DxilSimpleGVNHoist.cpp DxilSignatureValidation.cpp DxilTargetLowering.cpp diff --git a/lib/HLSL/DxilCondenseResources.cpp b/lib/HLSL/DxilCondenseResources.cpp index 82d5e14d00..529c203bdc 100644 --- a/lib/HLSL/DxilCondenseResources.cpp +++ b/lib/HLSL/DxilCondenseResources.cpp @@ -2061,7 +2061,8 @@ void DxilLowerCreateHandleForLib::ReplaceResourceUserWithHandle( }; // Search all users for update counter - bool updateAnnotateHandle = res.IsGloballyCoherent(); + bool updateAnnotateHandle = + res.IsGloballyCoherent() || res.IsReorderCoherent(); if (!res.HasCounter()) { for (User *U : handle->users()) { if (IsDxilOp(U, hlsl::OP::OpCode::BufferUpdateCounter)) { @@ -2321,6 +2322,7 @@ void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) { pDest->SetSampleCount(0); pDest->SetElementStride(0); pDest->SetGloballyCoherent(false); + pDest->SetReorderCoherent(false); pDest->SetHasCounter(false); pDest->SetRW(false); pDest->SetROV(false); diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp index 7d902a4ed7..c3a6ad7dfc 100644 --- a/lib/HLSL/DxilGenerationPass.cpp +++ b/lib/HLSL/DxilGenerationPass.cpp @@ -88,6 +88,7 @@ void InitResource(const DxilResource *pSource, DxilResource *pDest) { pDest->SetSampleCount(pSource->GetSampleCount()); pDest->SetElementStride(pSource->GetElementStride()); pDest->SetGloballyCoherent(pSource->IsGloballyCoherent()); + pDest->SetReorderCoherent(pSource->IsReorderCoherent()); pDest->SetHasCounter(pSource->HasCounter()); pDest->SetRW(pSource->IsRW()); pDest->SetROV(pSource->IsROV()); diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp index 68c83fc037..75d1bf78e9 100644 --- a/lib/HLSL/DxilLinker.cpp +++ b/lib/HLSL/DxilLinker.cpp @@ -1247,6 +1247,10 @@ void DxilLinkJob::RunPreparePass(Module &M) { PM.add(createDxilReinsertNopsPass()); PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false)); + // If we need SROA and dynamicindexvector to array, + // do it early to allow following scalarization to go forward. + PM.add(createDxilScalarizeVectorLoadStoresPass()); + // Remove unused functions. PM.add(createDxilDeadFunctionEliminationPass()); @@ -1255,6 +1259,12 @@ void DxilLinkJob::RunPreparePass(Module &M) { // For static global handle. PM.add(createLowerStaticGlobalIntoAlloca()); + // Change dynamic indexing vector to array where vectors aren't + // supported, but might be there from the initial compile. + if (!pSM->IsSM69Plus()) + PM.add( + createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */)); + // Remove MultiDimArray from function call arg. PM.add(createMultiDimArrayToOneDimArrayPass()); diff --git a/lib/HLSL/DxilPatchShaderRecordBindings.cpp b/lib/HLSL/DxilPatchShaderRecordBindings.cpp index 1873dcbcc4..e07a41a5c0 100644 --- a/lib/HLSL/DxilPatchShaderRecordBindings.cpp +++ b/lib/HLSL/DxilPatchShaderRecordBindings.cpp @@ -341,6 +341,7 @@ unsigned int DxilPatchShaderRecordBindings::AddHandle( if (pHandle) { pHandle->SetGloballyCoherent(false); + pHandle->SetReorderCoherent(false); pHandle->SetHasCounter(false); pHandle->SetCompType(CompType::getF32()); // TODO: Need to handle all types } diff --git a/lib/HLSL/DxilScalarizeVectorLoadStores.cpp b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp new file mode 100644 index 0000000000..febcf32358 --- /dev/null +++ b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp @@ -0,0 +1,231 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilScalarizeVectorLoadStores.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Lowers native vector load stores to potentially multiple scalar calls. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DXIL/DxilModule.h" +#include "dxc/HLSL/DxilGenerationPass.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +using namespace llvm; +using namespace hlsl; + +static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI); +static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI); + +class DxilScalarizeVectorLoadStores : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + explicit DxilScalarizeVectorLoadStores() : ModulePass(ID) {} + + StringRef getPassName() const override { + return "DXIL scalarize vector load/stores"; + } + + bool runOnModule(Module &M) override { + DxilModule &DM = M.GetOrCreateDxilModule(); + // Shader Model 6.9 allows native vectors and doesn't need this pass. + if (DM.GetShaderModel()->IsSM69Plus()) + return false; + + bool Changed = false; + + hlsl::OP *HlslOP = DM.GetOP(); + for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorLoad)) { + Function *Func = FIt.second; + if (!Func) + continue; + for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) { + CallInst *CI = cast(*(U++)); + scalarizeVectorLoad(HlslOP, M.getDataLayout(), CI); + Changed = true; + } + } + for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorStore)) { + Function *Func = FIt.second; + if (!Func) + continue; + for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) { + CallInst *CI = cast(*(U++)); + scalarizeVectorStore(HlslOP, M.getDataLayout(), CI); + Changed = true; + } + } + return Changed; + } +}; + +static unsigned GetRawBufferMask(unsigned NumComponents) { + switch (NumComponents) { + case 0: + return 0; + case 1: + return DXIL::kCompMask_X; + case 2: + return DXIL::kCompMask_X | DXIL::kCompMask_Y; + case 3: + return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z; + case 4: + default: + return DXIL::kCompMask_All; + } + return DXIL::kCompMask_All; +} + +static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI) { + IRBuilder<> Builder(CI); + // Collect the information required to break this into scalar ops from args. + DxilInst_RawBufferVectorLoad VecLd(CI); + OP::OpCode OpCode = OP::OpCode::RawBufferLoad; + llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode); + SmallVector Args; + Args.emplace_back(OpArg); // opcode @0. + Args.emplace_back(VecLd.get_buf()); // Resource handle @1. + Args.emplace_back(VecLd.get_index()); // Index @2. + Args.emplace_back(VecLd.get_elementOffset()); // Offset @3. + Args.emplace_back(nullptr); // Mask to be set later @4. + Args.emplace_back(VecLd.get_alignment()); // Alignment @5. + + // Set offset to increment depending on whether the real offset is defined. + unsigned OffsetIdx; + if (isa(VecLd.get_elementOffset())) + // Byte Address Buffers can't use offset, so use index. + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; + else + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; + + StructType *ResRetTy = cast(CI->getType()); + Type *Ty = ResRetTy->getElementType(0); + unsigned NumComponents = Ty->getVectorNumElements(); + Type *EltTy = Ty->getScalarType(); + unsigned EltSize = DL.getTypeAllocSize(EltTy); + + const unsigned MaxElemCount = 4; + SmallVector Elts(NumComponents); + Value *Ld = nullptr; + for (unsigned EIx = 0; EIx < NumComponents;) { + // Load 4 elements or however many less than 4 are left to load. + unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount); + Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = + HlslOP->GetI8Const(GetRawBufferMask(ChunkSize)); + // If we've loaded a chunk already, update offset to next chunk. + if (EIx > 0) + Args[OffsetIdx] = + Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize)); + Function *F = HlslOP->GetOpFunc(OpCode, EltTy); + Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(OpCode)); + for (unsigned ChIx = 0; ChIx < ChunkSize; ChIx++, EIx++) + Elts[EIx] = Builder.CreateExtractValue(Ld, ChIx); + } + + Value *RetValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); + for (unsigned ElIx = 0; ElIx < NumComponents; ElIx++) + RetValNew = Builder.CreateInsertElement(RetValNew, Elts[ElIx], ElIx); + + // Replace users of the vector extracted from the vector load resret. + Value *Status = nullptr; + for (auto CU = CI->user_begin(), CE = CI->user_end(); CU != CE;) { + auto EV = cast(*(CU++)); + unsigned Ix = EV->getIndices()[0]; + if (Ix == 0) { + // Handle value uses. + EV->replaceAllUsesWith(RetValNew); + } else if (Ix == 1) { + // Handle status uses. + if (!Status) + Status = Builder.CreateExtractValue(Ld, DXIL::kResRetStatusIndex); + EV->replaceAllUsesWith(Status); + } + EV->eraseFromParent(); + } + CI->eraseFromParent(); +} + +static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI) { + IRBuilder<> Builder(CI); + // Collect the information required to break this into scalar ops from args. + DxilInst_RawBufferVectorStore VecSt(CI); + OP::OpCode OpCode = OP::OpCode::RawBufferStore; + llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode); + SmallVector Args; + Args.emplace_back(OpArg); // opcode @0. + Args.emplace_back(VecSt.get_uav()); // Resource handle @1. + Args.emplace_back(VecSt.get_index()); // Index @2. + Args.emplace_back(VecSt.get_elementOffset()); // Offset @3. + Args.emplace_back(nullptr); // Val0 to be set later @4. + Args.emplace_back(nullptr); // Val1 to be set later @5. + Args.emplace_back(nullptr); // Val2 to be set later @6. + Args.emplace_back(nullptr); // Val3 to be set later @7. + Args.emplace_back(nullptr); // Mask to be set later @8. + Args.emplace_back(VecSt.get_alignment()); // Alignment @9. + + // Set offset to increment depending on whether the real offset is defined. + unsigned OffsetIdx; + if (isa(VecSt.get_elementOffset())) + // Byte Address Buffers can't use offset, so use index. + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; + else + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; + + Value *VecVal = VecSt.get_value0(); + + const unsigned MaxElemCount = 4; + Type *Ty = VecVal->getType(); + const unsigned NumComponents = Ty->getVectorNumElements(); + Type *EltTy = Ty->getScalarType(); + Value *UndefVal = UndefValue::get(EltTy); + unsigned EltSize = DL.getTypeAllocSize(EltTy); + Function *F = HlslOP->GetOpFunc(OpCode, EltTy); + for (unsigned EIx = 0; EIx < NumComponents;) { + // Store 4 elements or however many less than 4 are left to store. + unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount); + // For second and subsequent store calls, increment the resource-appropriate + // index or offset parameter. + if (EIx > 0) + Args[OffsetIdx] = + Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize)); + // Populate all value arguments either with the vector or undefs. + uint8_t Mask = 0; + unsigned ChIx = 0; + for (; ChIx < ChunkSize; ChIx++, EIx++) { + Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = + Builder.CreateExtractElement(VecVal, EIx); + Mask |= (1 << ChIx); + } + for (; ChIx < MaxElemCount; ChIx++) + Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = UndefVal; + + Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] = + HlslOP->GetU8Const(Mask); + Builder.CreateCall(F, Args); + } + CI->eraseFromParent(); +} + +char DxilScalarizeVectorLoadStores::ID = 0; + +ModulePass *llvm::createDxilScalarizeVectorLoadStoresPass() { + return new DxilScalarizeVectorLoadStores(); +} + +INITIALIZE_PASS(DxilScalarizeVectorLoadStores, + "hlsl-dxil-scalarize-vector-load-stores", + "DXIL scalarize vector load/stores", false, false) diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp index 93ba3b9816..db20d8a324 100644 --- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp +++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp @@ -76,6 +76,7 @@ Type *TryLowerMatTy(Type *Ty) { } class MatrixBitcastLowerPass : public FunctionPass { + bool SupportsVectors = false; public: static char ID; // Pass identification, replacement for typeid @@ -83,6 +84,9 @@ class MatrixBitcastLowerPass : public FunctionPass { StringRef getPassName() const override { return "Matrix Bitcast lower"; } bool runOnFunction(Function &F) override { + DxilModule &DM = F.getParent()->GetOrCreateDxilModule(); + SupportsVectors = DM.GetShaderModel()->IsSM69Plus(); + bool bUpdated = false; std::unordered_set matCastSet; for (auto blkIt = F.begin(); blkIt != F.end(); ++blkIt) { @@ -100,7 +104,6 @@ class MatrixBitcastLowerPass : public FunctionPass { } } - DxilModule &DM = F.getParent()->GetOrCreateDxilModule(); // Remove bitcast which has CallInst user. if (DM.GetShaderModel()->IsLib()) { for (auto it = matCastSet.begin(); it != matCastSet.end();) { @@ -185,7 +188,7 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { User *U = *(it++); if (GetElementPtrInst *GEP = dyn_cast(U)) { Type *EltTy = GEP->getType()->getPointerElementType(); - if (HLMatrixType::isa(EltTy)) { + if (HLMatrixType MatTy = HLMatrixType::dyn_cast(EltTy)) { // Change gep matrixArray, 0, index // into // gep oneDimArray, 0, index * matSize @@ -193,10 +196,11 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { SmallVector idxList(GEP->idx_begin(), GEP->idx_end()); DXASSERT(idxList.size() == 2, "else not one dim matrix array index to matrix"); - - HLMatrixType MatTy = HLMatrixType::cast(EltTy); - Value *matSize = Builder.getInt32(MatTy.getNumElements()); - idxList.back() = Builder.CreateMul(idxList.back(), matSize); + unsigned NumElts = MatTy.getNumElements(); + if (!SupportsVectors || NumElts == 1) { + Value *MatSize = Builder.getInt32(NumElts); + idxList.back() = Builder.CreateMul(idxList.back(), MatSize); + } Value *NewGEP = Builder.CreateGEP(A, idxList); lowerMatrix(GEP, NewGEP); DXASSERT(GEP->user_empty(), "else lower matrix fail"); @@ -211,13 +215,23 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { } else if (LoadInst *LI = dyn_cast(U)) { if (VectorType *Ty = dyn_cast(LI->getType())) { IRBuilder<> Builder(LI); - Value *zeroIdx = Builder.getInt32(0); - unsigned vecSize = Ty->getNumElements(); - Value *NewVec = UndefValue::get(LI->getType()); - for (unsigned i = 0; i < vecSize; i++) { - Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); - Value *Elt = Builder.CreateLoad(GEP); - NewVec = Builder.CreateInsertElement(NewVec, Elt, i); + Value *NewVec = nullptr; + unsigned VecSize = Ty->getVectorNumElements(); + if (SupportsVectors && VecSize > 1) { + // Create a replacement load using the vector pointer. + Instruction *NewLd = LI->clone(); + unsigned VecIdx = NewLd->getNumOperands() - 1; + NewLd->setOperand(VecIdx, A); + Builder.Insert(NewLd); + NewVec = NewLd; + } else { + Value *zeroIdx = Builder.getInt32(0); + NewVec = UndefValue::get(LI->getType()); + for (unsigned i = 0; i < VecSize; i++) { + Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); + Value *Elt = Builder.CreateLoad(GEP); + NewVec = Builder.CreateInsertElement(NewVec, Elt, i); + } } LI->replaceAllUsesWith(NewVec); LI->eraseFromParent(); @@ -228,12 +242,20 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { Value *V = ST->getValueOperand(); if (VectorType *Ty = dyn_cast(V->getType())) { IRBuilder<> Builder(LI); - Value *zeroIdx = Builder.getInt32(0); - unsigned vecSize = Ty->getNumElements(); - for (unsigned i = 0; i < vecSize; i++) { - Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); - Value *Elt = Builder.CreateExtractElement(V, i); - Builder.CreateStore(Elt, GEP); + if (SupportsVectors && Ty->getVectorNumElements() > 1) { + // Create a replacement store using the vector pointer. + Instruction *NewSt = ST->clone(); + unsigned VecIdx = NewSt->getNumOperands() - 1; + NewSt->setOperand(VecIdx, A); + Builder.Insert(NewSt); + } else { + Value *zeroIdx = Builder.getInt32(0); + unsigned vecSize = Ty->getNumElements(); + for (unsigned i = 0; i < vecSize; i++) { + Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); + Value *Elt = Builder.CreateExtractElement(V, i); + Builder.CreateStore(Elt, GEP); + } } ST->eraseFromParent(); } else { diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp index 037885c9d8..bab6e23a30 100644 --- a/lib/HLSL/HLModule.cpp +++ b/lib/HLSL/HLModule.cpp @@ -604,6 +604,9 @@ MDTuple *HLModule::EmitHLResources() { void HLModule::LoadHLResources(const llvm::MDOperand &MDO) { const llvm::MDTuple *pSRVs, *pUAVs, *pCBuffers, *pSamplers; + // No resources. Nothing to do. + if (MDO.get() == nullptr) + return; m_pMDHelper->GetDxilResources(MDO, pSRVs, pUAVs, pCBuffers, pSamplers); // Load SRV records. @@ -697,6 +700,7 @@ HLModule::AddResourceWithGlobalVariableAndProps(llvm::Constant *GV, Res->SetRW(true); Res->SetROV(RP.Basic.IsROV); Res->SetGloballyCoherent(RP.Basic.IsGloballyCoherent); + Res->SetReorderCoherent(RP.Basic.IsReorderCoherent); Res->SetHasCounter(RP.Basic.SamplerCmpOrHasCounter); Res->SetKind(RK); Res->SetGlobalSymbol(GV); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index bc293357d6..be45021e41 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7,8 +7,12 @@ // // // Lower functions to lower HL operations to DXIL operations. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /////////////////////////////////////////////////////////////////////////////// +#include "dxc/DXIL/DxilConstants.h" #define _USE_MATH_DEFINES #include #include @@ -421,6 +425,14 @@ struct IntrinsicLower { // IOP intrinsics. namespace { +// Creates the necessary scalar calls to for a "trivial" operation where only +// call instructions to a single function type are needed. +// The overload type `Ty` determines what scalarization might be required. +// Elements of any vectors in `refArgs` are extracted into scalars for each +// call generated while the same scalar values are used unaltered in each call. +// Utility objects `HlslOp` and `Builder` are used to generate calls to the +// given `DxilFunc` for each set of scalar arguments. +// The results are reconstructed into the given `RetTy` as needed. Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef refArgs, Type *Ty, Type *RetTy, OP *hlslOP, IRBuilder<> &Builder) { @@ -456,12 +468,40 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, } } } -// Generates a DXIL operation over an overloaded type (Ty), returning a -// RetTy value; when Ty is a vector, it will replicate per-element operations -// into RetTy to rebuild it. + +// Creates a native vector call to for a "trivial" operation where only a single +// call instruction is needed. The overload and return types are the same vector +// type `Ty`. +// Utility objects `HlslOp` and `Builder` are used to create a call to the given +// `DxilFunc` with `RefArgs` arguments. +Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, + ArrayRef Args, Type *Ty, OP *OP, + IRBuilder<> &Builder) { + if (!Ty->isVoidTy()) + return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode)); + return Builder.CreateCall(Func, Args); // Cannot add name to void. +} + +// Generates a DXIL operation with the overloaded type based on `Ty` and return +// type `RetTy`. When Ty is a vector, it will either generate per-element calls +// for each vector element and reconstruct the vector type from those results or +// operate on and return native vectors depending on vector size and the +// legality of the vector overload. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, Type *Ty, Type *RetTy, OP *hlslOP, IRBuilder<> &Builder) { + + // If supported and the overload type is a vector with more than 1 element, + // create a native vector operation. + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 && + hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && + OP::IsOverloadLegal(opcode, Ty)) { + Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); + return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP, + Builder); + } + + // Set overload type to the scalar type of `Ty` and generate call(s). Type *EltTy = Ty->getScalarType(); Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy); @@ -481,20 +521,34 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B); } -Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *Ty = src->getType(); +// Translate call that converts to a dxil unary operation with a different +// return type from the overload by passing the argument, explicit return type, +// and helper objects to the scalarizing unary dxil operation creation. +Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *, + bool &Translated) { + Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Type *Ty = Src->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src}; + IRBuilder<> Builder(CI); + hlsl::OP *OP = &Helper.hlslOP; + Type *RetTy = CI->getType(); + Constant *OpArg = OP->GetU32Const((unsigned)OpCode); + Value *Args[] = {OpArg, Src}; - return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder); + return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder); } -Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP, - Builder); +Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op, + IRBuilder<> &Builder) { + Type *Ty = Src->getType(); + + Constant *OpArg = Op->GetU32Const((unsigned)OpCode); + Value *Args[] = {OpArg, Src}; + + return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder); } Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, @@ -518,6 +572,9 @@ Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); } +// Translate call that trivially converts to a dxil unary operation by passing +// argument, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -525,11 +582,13 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); IRBuilder<> Builder(CI); hlsl::OP *hlslOP = &helper.hlslOP; - Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), - hlslOP, Builder); - return retVal; + + return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder); } +// Translate call that trivially converts to a dxil binary operation by passing +// arguments, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -544,6 +603,10 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return binOp; } +// Translate call that trivially converts to a dxil trinary (aka tertiary) +// operation by passing arguments, return type, and helper objects to either +// scalarizing or native vector dxil operation creation depending on version +// and vector size. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -735,6 +798,12 @@ bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, } } + // Only apply on aggregates of 16 or fewer elements, + // representing the max 4x4 matrix size. + Type *Ty = x->getType(); + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 16) + return false; + APFloat powAPF = isa(pow) ? cast(pow)->getElementAsAPFloat(0) : // should be a splat value @@ -2016,7 +2085,7 @@ Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *firstbitHi = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi); IRBuilder<> Builder(CI); Constant *neg1 = Builder.getInt32(-1); @@ -2049,7 +2118,7 @@ Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *firstbitLo = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); return firstbitLo; } @@ -2428,17 +2497,22 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, return dotOP; } -Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, - hlsl::OP *hlslOP, IRBuilder<> &Builder, - bool Unsigned = false) { - auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad; +// Instead of using a DXIL intrinsic, implement a dot product operation using +// multiply and add operations. Used for integer dots and long vectors. +Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, + IRBuilder<> &Builder, + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); - Value *Result = Builder.CreateMul(Elt0, Elt1); - for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) { - Elt0 = Builder.CreateExtractElement(arg0, iVecElt); - Elt1 = Builder.CreateExtractElement(arg1, iVecElt); - Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, + Value *Result; + if (Elt0->getType()->isFloatingPointTy()) + Result = Builder.CreateFMul(Elt0, Elt1); + else + Result = Builder.CreateMul(Elt0, Elt1); + for (unsigned Elt = 1; Elt < vecSize; ++Elt) { + Elt0 = Builder.CreateExtractElement(arg0, Elt); + Elt1 = Builder.CreateExtractElement(arg1, Elt); + Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP, Builder); } @@ -2477,12 +2551,16 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, unsigned vecSize = Ty->getVectorNumElements(); Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - if (Ty->getScalarType()->isFloatingPointTy()) { + Type *EltTy = Ty->getScalarType(); + if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); - } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_udot); - } + + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_udot) + MadOpCode = DXIL::OpCode::UMad; + else if (EltTy->isFloatingPointTy()) + MadOpCode = DXIL::OpCode::FMad; + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); } Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2985,23 +3063,6 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, return retVal; } -static Value *ScalarizeElements(Type *RetTy, ArrayRef Elts, - IRBuilder<> &Builder) { - // Extract value part. - Value *retVal = llvm::UndefValue::get(RetTy); - if (RetTy->isVectorTy()) { - unsigned vecSize = RetTy->getVectorNumElements(); - DXASSERT(vecSize <= Elts.size(), "vector size mismatch"); - for (unsigned i = 0; i < vecSize; i++) { - Value *retComp = Elts[i]; - retVal = Builder.CreateInsertElement(retVal, retComp, i); - } - } else { - retVal = Elts[0]; - } - return retVal; -} - void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder, hlsl::OP *hlslOp) { if (status && !isa(status)) { @@ -3046,8 +3107,10 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, if (arg0Ty->getScalarType()->isFloatingPointTy()) { return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_umul); + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_umul) + MadOpCode = DXIL::OpCode::UMad; + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); } } else { // mul(vector, scalar) == vector * scalar-splat @@ -3941,14 +4004,41 @@ TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, } // Load/Store intrinsics. +OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) { + switch (RK) { + case DxilResource::Kind::RawBuffer: + case DxilResource::Kind::StructuredBuffer: + return OP::OpCode::RawBufferLoad; + case DxilResource::Kind::TypedBuffer: + return OP::OpCode::BufferLoad; + case DxilResource::Kind::Invalid: + DXASSERT(0, "invalid resource kind"); + break; + default: + return OP::OpCode::TextureLoad; + } + return OP::OpCode::TextureLoad; +} + struct ResLoadHelper { + // Default constructor uses CI load intrinsic call + // to get the retval and various location indicators. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, - Value *h, IntrinsicOp IOP, bool bForSubscript = false); - // For double subscript. - ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip) - : opcode(OP::OpCode::TextureLoad), - intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst), - addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {} + Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr); + // Alternative constructor explicitly sets the index. + // Used for some subscript operators that feed the generic HL call inst + // into a load op and by the matrixload call instruction. + ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, + Value *Offset, Value *mip = nullptr) + : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), + addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { + opcode = LoadOpFromResKind(RK); + Type *Ty = Inst->getType(); + if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() && + Ty->getVectorNumElements() > 1 && + Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) + opcode = OP::OpCode::RawBufferVectorLoad; + } OP::OpCode opcode; IntrinsicOp intrinsicOpCode; unsigned dxilMajor; @@ -3961,122 +4051,93 @@ struct ResLoadHelper { Value *mipLevel; }; +// Uses CI arguments to determine the index, offset, and mipLevel also depending +// on the RK/RC resource kind and class, which determine the opcode. +// Handle and IOP are set explicitly. +// For typed buffer loads, the call instruction feeds into a load +// represented by TyBufSubLoad which determines the instruction to replace. +// Otherwise, CI is replaced. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, Value *hdl, - IntrinsicOp IOP, bool bForSubscript) + IntrinsicOp IOP, LoadInst *TyBufSubLoad) : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) { - switch (RK) { - case DxilResource::Kind::RawBuffer: - case DxilResource::Kind::StructuredBuffer: - opcode = OP::OpCode::RawBufferLoad; - break; - case DxilResource::Kind::TypedBuffer: - opcode = OP::OpCode::BufferLoad; - break; - case DxilResource::Kind::Invalid: - DXASSERT(0, "invalid resource kind"); - break; - default: - opcode = OP::OpCode::TextureLoad; - break; - } - retVal = CI; + opcode = LoadOpFromResKind(RK); + bool bForSubscript = false; + if (TyBufSubLoad) { + bForSubscript = true; + retVal = TyBufSubLoad; + } else + retVal = CI; const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx; addr = CI->getArgOperand(kAddrIdx); unsigned argc = CI->getNumArgOperands(); + Type *i32Ty = Type::getInt32Ty(CI->getContext()); + unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx; + unsigned OffsetIdx = HLOperandIndex::kInvalidIdx; if (opcode == OP::OpCode::TextureLoad) { - // mip at last channel - unsigned coordSize = DxilResource::GetNumCoords(RK); - - if (RC == DxilResourceBase::Class::SRV) { - if (bForSubscript) { - // Use 0 when access by []. - mipLevel = IRBuilder<>(CI).getInt32(0); - } else { - if (coordSize == 1 && !addr->getType()->isVectorTy()) { - // Use addr when access by Load. - mipLevel = addr; - } else { - mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize); - } - } - } else { - // Set mip level to undef for UAV. - mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext())); - } - - if (RC == DxilResourceBase::Class::SRV) { - unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx; - unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx; - if (RK == DxilResource::Kind::Texture2DMS || - RK == DxilResource::Kind::Texture2DMSArray) { - offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx; - statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; + bool IsMS = (RK == DxilResource::Kind::Texture2DMS || + RK == DxilResource::Kind::Texture2DMSArray); + // Set mip and status index. + offset = UndefValue::get(i32Ty); + if (IsMS) { + // Retrieve appropriate MS parameters. + StatusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; + // MS textures keep the sample param (mipLevel) regardless of writability. + if (bForSubscript) + mipLevel = ConstantInt::get(i32Ty, 0); + else mipLevel = CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); - } - - if (argc > offsetIdx) - offset = CI->getArgOperand(offsetIdx); - - if (argc > statusIdx) - status = CI->getArgOperand(statusIdx); - } else if (RC == DxilResourceBase::Class::UAV && - (RK == DxilResource::Kind::Texture2DMS || - RK == DxilResource::Kind::Texture2DMSArray)) { - unsigned statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; - mipLevel = CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); - - if (argc > statusIdx) - status = CI->getArgOperand(statusIdx); - + } else if (RC == DxilResourceBase::Class::UAV) { + // DXIL requires that non-MS UAV accesses set miplevel to undef. + mipLevel = UndefValue::get(i32Ty); + StatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx; } else { - const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx; - - if (argc > kStatusIdx) - status = CI->getArgOperand(kStatusIdx); + // Non-MS SRV case. + StatusIdx = HLOperandIndex::kTexLoadStatusOpIdx; + if (bForSubscript) + // Having no miplevel param, single subscripted SRVs default to 0. + mipLevel = ConstantInt::get(i32Ty, 0); + else + // Mip is stored at the last channel of the coordinate vector. + mipLevel = IRBuilder<>(CI).CreateExtractElement( + addr, DxilResource::GetNumCoords(RK)); } - } else { - const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx; - if (argc > kStatusIdx) - status = CI->getArgOperand(kStatusIdx); - } + if (RC == DxilResourceBase::Class::SRV) + OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx + : HLOperandIndex::kTexLoadOffsetOpIdx; + } else if (opcode == OP::OpCode::RawBufferLoad) { + // If native vectors are available and this load had a vector + // with more than one elements, convert the RawBufferLod to the + // native vector variant RawBufferVectorLoad. + Type *Ty = CI->getType(); + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 && + CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) + opcode = OP::OpCode::RawBufferVectorLoad; + } + + // Set offset. + if (DXIL::IsStructuredBuffer(RK)) + // Structured buffers receive no exterior offset in this constructor, + // but may need to increment it later. + offset = ConstantInt::get(i32Ty, 0U); + else if (argc > OffsetIdx) + // Textures may set the offset from an explicit argument. + offset = CI->getArgOperand(OffsetIdx); + else + // All other cases use undef. + offset = UndefValue::get(i32Ty); + + // Retrieve status value if provided. + if (argc > StatusIdx) + status = CI->getArgOperand(StatusIdx); } void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, hlsl::OP *OP, HLResource::Kind RK, const DataLayout &DL); -// Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi } -void Make64bitResultForLoad(Type *EltTy, ArrayRef resultElts32, - unsigned size, MutableArrayRef resultElts, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *i64Ty = Builder.getInt64Ty(); - Type *doubleTy = Builder.getDoubleTy(); - if (EltTy == doubleTy) { - Function *makeDouble = - hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy); - Value *makeDoubleOpArg = - Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble); - for (unsigned i = 0; i < size; i++) { - Value *lo = resultElts32[2 * i]; - Value *hi = resultElts32[2 * i + 1]; - Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi}); - resultElts[i] = V; - } - } else { - for (unsigned i = 0; i < size; i++) { - Value *lo = resultElts32[2 * i]; - Value *hi = resultElts32[2 * i + 1]; - lo = Builder.CreateZExt(lo, i64Ty); - hi = Builder.CreateZExt(hi, i64Ty); - hi = Builder.CreateShl(hi, 32); - resultElts[i] = Builder.CreateOr(lo, hi); - } - } -} - static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) { unsigned mask = 0; @@ -4108,183 +4169,208 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment); -static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned VecElemCount, - IRBuilder<> &Builder, Value *handle, - hlsl::OP *OP, Value *status, Value *bufIdx, - Value *baseOffset, const DataLayout &DL, - std::vector &bufLds, - unsigned baseAlign, bool isScalarTy = false); - -void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK, - IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) { - - Type *Ty = helper.retVal->getType(); - if (Ty->isPointerTy()) { - DXASSERT(!DxilResource::IsAnyTexture(RK), - "Textures should not be treated as structured buffers."); - TranslateStructBufSubscript(cast(helper.retVal), helper.handle, - helper.status, OP, RK, DL); - return; - } - +// Sets up arguments for buffer load call. +static SmallVector GetBufLoadArgs(ResLoadHelper helper, + HLResource::Kind RK, + IRBuilder<> Builder, + unsigned LdSize) { OP::OpCode opcode = helper.opcode; + llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode); - Type *i32Ty = Builder.getInt32Ty(); - Type *i64Ty = Builder.getInt64Ty(); - Type *doubleTy = Builder.getDoubleTy(); - Type *EltTy = Ty->getScalarType(); - unsigned numComponents = 1; - if (Ty->isVectorTy()) { - numComponents = Ty->getVectorNumElements(); - } - - if (DXIL::IsStructuredBuffer(RK) || DXIL::IsRawBuffer(RK)) { - std::vector bufLds; - const bool isBool = EltTy->isIntegerTy(1); - - // Bool are represented as i32 in memory - Type *MemReprTy = isBool ? Builder.getInt32Ty() : EltTy; - bool isScalarTy = !Ty->isVectorTy(); + unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U; + alignment = std::min(alignment, LdSize); + Constant *alignmentVal = Builder.getInt32(alignment); - Value *retValNew = nullptr; - if (DXIL::IsStructuredBuffer(RK)) { - retValNew = TranslateRawBufVecLd( - MemReprTy, numComponents, Builder, helper.handle, OP, helper.status, - helper.addr, OP->GetU32Const(0), DL, bufLds, - /*baseAlign (in bytes)*/ 8, isScalarTy); - } else { - retValNew = - TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, - OP, helper.status, nullptr, helper.addr, DL, - bufLds, /*baseAlign (in bytes)*/ 4, isScalarTy); - } + // Assemble args specific to the type bab/struct/typed: + // - Typed needs to handle the possibility of vector coords + // - Raws need to calculate alignment and mask values. + SmallVector Args; + Args.emplace_back(opArg); // opcode @0. + Args.emplace_back(helper.handle); // Resource handle @1 - DXASSERT_NOMSG(!bufLds.empty()); - dxilutil::MigrateDebugValue(helper.retVal, bufLds.front()); + // Set offsets appropriate for the load operation. + bool isVectorAddr = helper.addr->getType()->isVectorTy(); + if (opcode == OP::OpCode::TextureLoad) { + llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty()); - if (isBool) { - // Convert result back to register representation. - retValNew = Builder.CreateICmpNE( - retValNew, Constant::getNullValue(retValNew->getType())); + // Set mip level or sample for MS texutures @2. + Args.emplace_back(helper.mipLevel); + // Set texture coords according to resource kind @3-5 + // Coords unused by the resource kind are undefs. + unsigned coordSize = DxilResource::GetNumCoords(RK); + for (unsigned i = 0; i < 3; i++) + if (i < coordSize) + Args.emplace_back(isVectorAddr + ? Builder.CreateExtractElement(helper.addr, i) + : helper.addr); + else + Args.emplace_back(undefI); + + // Set texture offsets according to resource kind @7-9 + // Coords unused by the resource kind are undefs. + unsigned offsetSize = DxilResource::GetNumOffsets(RK); + if (!helper.offset || isa(helper.offset)) + offsetSize = 0; + for (unsigned i = 0; i < 3; i++) + if (i < offsetSize) + Args.emplace_back(Builder.CreateExtractElement(helper.offset, i)); + else + Args.emplace_back(undefI); + } else { + // If not TextureLoad, it could be a typed or raw buffer load. + // They have mostly similar arguments. + DXASSERT(opcode == OP::OpCode::RawBufferLoad || + opcode == OP::OpCode::RawBufferVectorLoad || + opcode == OP::OpCode::BufferLoad, + "Wrong opcode in get load args"); + Args.emplace_back( + isVectorAddr ? Builder.CreateExtractElement(helper.addr, (uint64_t)0) + : helper.addr); + Args.emplace_back(helper.offset); + if (opcode == OP::OpCode::RawBufferLoad) { + // Unlike typed buffer load, raw buffer load has mask and alignment. + Args.emplace_back(nullptr); // Mask will be added later %4. + Args.emplace_back(alignmentVal); // alignment @5. + } else if (opcode == OP::OpCode::RawBufferVectorLoad) { + // RawBufferVectorLoad takes just alignment, no mask. + Args.emplace_back(alignmentVal); // alignment @4 } - - helper.retVal->replaceAllUsesWith(retValNew); - helper.retVal = retValNew; - return; } + return Args; +} - bool isTyped = opcode == OP::OpCode::TextureLoad || - RK == DxilResource::Kind::TypedBuffer; - bool is64 = EltTy == i64Ty || EltTy == doubleTy; - if (is64 && isTyped) { - EltTy = i32Ty; - } - bool isBool = EltTy->isIntegerTy(1); - if (isBool) { - // Value will be loaded in its memory representation. - EltTy = i32Ty; - if (Ty->isVectorTy()) - Ty = VectorType::get(EltTy, numComponents); - } +// Emits as many calls as needed to load the full vector +// Performs any needed extractions and conversions of the results. +Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, + IRBuilder<> &Builder, hlsl::OP *OP, + const DataLayout &DL) { + OP::OpCode opcode = helper.opcode; + Type *Ty = helper.retVal->getType(); - Function *F = OP->GetOpFunc(opcode, EltTy); - llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); + unsigned NumComponents = 1; + if (Ty->isVectorTy()) + NumComponents = Ty->getVectorNumElements(); - llvm::Value *undefI = llvm::UndefValue::get(i32Ty); + const bool isTyped = DXIL::IsTyped(RK); + Type *EltTy = Ty->getScalarType(); + const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy()); + const bool isBool = EltTy->isIntegerTy(1); + // Values will be loaded in memory representations. + if (isBool || (is64 && isTyped)) + EltTy = Builder.getInt32Ty(); - SmallVector loadArgs; - loadArgs.emplace_back(opArg); // opcode - loadArgs.emplace_back(helper.handle); // resource handle + // Calculate load size with the scalar memory element type. + unsigned LdSize = DL.getTypeAllocSize(EltTy); - if (opcode == OP::OpCode::TextureLoad) { - // set mip level - loadArgs.emplace_back(helper.mipLevel); - } - - if (opcode == OP::OpCode::TextureLoad) { - // texture coord - unsigned coordSize = DxilResource::GetNumCoords(RK); - bool isVectorAddr = helper.addr->getType()->isVectorTy(); - for (unsigned i = 0; i < 3; i++) { - if (i < coordSize) { - loadArgs.emplace_back(isVectorAddr - ? Builder.CreateExtractElement(helper.addr, i) - : helper.addr); - } else - loadArgs.emplace_back(undefI); + // Adjust number of components as needed. + if (is64 && isTyped) { + // 64-bit types are stored as int32 pairs in typed buffers. + DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords."); + NumComponents *= 2; + } else if (opcode == OP::OpCode::RawBufferVectorLoad) { + // Native vector loads only have a single vector element in ResRet. + EltTy = VectorType::get(EltTy, NumComponents); + NumComponents = 1; + } + + SmallVector Args = GetBufLoadArgs(helper, RK, Builder, LdSize); + + // Keep track of the first load for debug info migration. + Value *FirstLd = nullptr; + + unsigned OffsetIdx = 0; + if (RK == DxilResource::Kind::RawBuffer) + // Raw buffers can't use offset param. Add to coord index. + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; + else if (RK == DxilResource::Kind::StructuredBuffer) + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; + + // Create call(s) to function object and collect results in Elts. + // Typed buffer loads are limited to one load of up to 4 32-bit values. + // Raw buffer loads might need multiple loads in chunks of 4. + SmallVector Elts(NumComponents); + for (unsigned i = 0; i < NumComponents;) { + // Load 4 elements or however many less than 4 are left to load. + unsigned chunkSize = std::min(NumComponents - i, 4U); + + // Assign mask for raw buffer loads. + if (opcode == OP::OpCode::RawBufferLoad) { + Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = + GetRawBufferMaskForETy(EltTy, chunkSize, OP); + // If we've loaded a chunk already, update offset to next chunk. + if (FirstLd != nullptr) + Args[OffsetIdx] = + Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize)); } - } else { - if (helper.addr->getType()->isVectorTy()) { - Value *scalarOffset = - Builder.CreateExtractElement(helper.addr, (uint64_t)0); - - // TODO: calculate the real address based on opcode - loadArgs.emplace_back(scalarOffset); // offset - } else { - // TODO: calculate the real address based on opcode + Function *F = OP->GetOpFunc(opcode, EltTy); + Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); - loadArgs.emplace_back(helper.addr); // offset - } - } - // offset 0 - if (opcode == OP::OpCode::TextureLoad) { - if (helper.offset && !isa(helper.offset)) { - unsigned offsetSize = DxilResource::GetNumOffsets(RK); - for (unsigned i = 0; i < 3; i++) { - if (i < offsetSize) - loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i)); - else - loadArgs.emplace_back(undefI); + // Extract elements from returned ResRet. + // Native vector loads just have one vector element in the ResRet. + // Others have up to four scalars that need to be individually extracted. + if (opcode == OP::OpCode::RawBufferVectorLoad) + Elts[i++] = Builder.CreateExtractValue(Ld, 0); + else + for (unsigned j = 0; j < chunkSize; j++, i++) + Elts[i] = Builder.CreateExtractValue(Ld, j); + + // Update status. + UpdateStatus(Ld, helper.status, Builder, OP); + + if (!FirstLd) + FirstLd = Ld; + } + DXASSERT(FirstLd, "No loads created by TranslateBufLoad"); + + // Convert loaded 32-bit integers to intended 64-bit type representation. + if (isTyped) { + Type *RegEltTy = Ty->getScalarType(); + if (RegEltTy->isDoubleTy()) { + Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, RegEltTy); + Value *makeDoubleOpArg = + Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble); + NumComponents /= 2; // Convert back to number of doubles. + for (unsigned i = 0; i < NumComponents; i++) { + Value *lo = Elts[2 * i]; + Value *hi = Elts[2 * i + 1]; + Elts[i] = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi}); } - } else { - loadArgs.emplace_back(undefI); - loadArgs.emplace_back(undefI); - loadArgs.emplace_back(undefI); + EltTy = RegEltTy; + } else if (RegEltTy->isIntegerTy(64)) { + NumComponents /= 2; // Convert back to number of int64s. + for (unsigned i = 0; i < NumComponents; i++) { + Value *lo = Elts[2 * i]; + Value *hi = Elts[2 * i + 1]; + lo = Builder.CreateZExt(lo, RegEltTy); + hi = Builder.CreateZExt(hi, RegEltTy); + hi = Builder.CreateShl(hi, 32); + Elts[i] = Builder.CreateOr(lo, hi); + } + EltTy = RegEltTy; } } - // Offset 1 - if (RK == DxilResource::Kind::TypedBuffer) { - loadArgs.emplace_back(undefI); - } - - Value *ResRet = Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode)); - dxilutil::MigrateDebugValue(helper.retVal, ResRet); - + // Package elements into a vector as needed. Value *retValNew = nullptr; - if (!is64 || !isTyped) { - retValNew = ScalarizeResRet(Ty, ResRet, Builder); + // Scalar or native vector loads need not construct vectors from elements. + if (!Ty->isVectorTy() || opcode == OP::OpCode::RawBufferVectorLoad) { + retValNew = Elts[0]; } else { - unsigned size = numComponents; - DXASSERT(size <= 2, "typed buffer only allow 4 dwords"); - EltTy = Ty->getScalarType(); - Value *Elts[2]; - - Make64bitResultForLoad(Ty->getScalarType(), - { - Builder.CreateExtractValue(ResRet, 0), - Builder.CreateExtractValue(ResRet, 1), - Builder.CreateExtractValue(ResRet, 2), - Builder.CreateExtractValue(ResRet, 3), - }, - size, Elts, OP, Builder); - - retValNew = ScalarizeElements(Ty, Elts, Builder); + retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); + for (unsigned i = 0; i < NumComponents; i++) + retValNew = Builder.CreateInsertElement(retValNew, Elts[i], i); } - if (isBool) { - // Convert result back to register representation. + // Convert loaded int32 bool results to i1 register representation. + if (isBool) retValNew = Builder.CreateICmpNE( retValNew, Constant::getNullValue(retValNew->getType())); - } - // replace helper.retVal->replaceAllUsesWith(retValNew); - // Save new ret val. helper.retVal = retValNew; - // get status - UpdateStatus(ResRet, helper.status, Builder, OP); + + return FirstLd; } Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -4292,6 +4378,7 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; + DataLayout &DL = helper.dataLayout; Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); IRBuilder<> Builder(CI); @@ -4299,9 +4386,19 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, DXIL::ResourceClass RC = pObjHelper->GetRC(handle); DXIL::ResourceKind RK = pObjHelper->GetRK(handle); - ResLoadHelper loadHelper(CI, RK, RC, handle, IOP); - TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout); - // CI is replaced in TranslateLoad. + ResLoadHelper ldHelper(CI, RK, RC, handle, IOP); + Type *Ty = CI->getType(); + Value *Ld = nullptr; + if (Ty->isPointerTy()) { + DXASSERT(!DxilResource::IsAnyTexture(RK), + "Textures should not be treated as structured buffers."); + TranslateStructBufSubscript(cast(ldHelper.retVal), handle, + ldHelper.status, hlslOP, RK, DL); + } else { + Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); + dxilutil::MigrateDebugValue(CI, Ld); + } + // CI is replaced by above translation calls.. return nullptr; } @@ -4345,19 +4442,20 @@ void Split64bitValForStore(Type *EltTy, ArrayRef vals, unsigned size, } void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, - Value *offset, IRBuilder<> &Builder, hlsl::OP *OP, - Value *sampIdx = nullptr) { + Value *Idx, Value *offset, IRBuilder<> &Builder, + hlsl::OP *OP, Value *sampIdx = nullptr) { Type *Ty = val->getType(); - - // This function is no longer used for lowering stores to a - // structured buffer. - DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer); - OP::OpCode opcode = OP::OpCode::NumOpCodes; + bool IsTyped = true; switch (RK) { case DxilResource::Kind::RawBuffer: case DxilResource::Kind::StructuredBuffer: + IsTyped = false; opcode = OP::OpCode::RawBufferStore; + // Where shader model and type allows, use vector store intrinsic. + if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && + Ty->isVectorTy() && Ty->getVectorNumElements() > 1) + opcode = OP::OpCode::RawBufferVectorStore; break; case DxilResource::Kind::TypedBuffer: opcode = OP::OpCode::BufferStore; @@ -4374,10 +4472,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, break; } - bool isTyped = opcode == OP::OpCode::TextureStore || - opcode == OP::OpCode::TextureStoreSample || - RK == DxilResource::Kind::TypedBuffer; - Type *i32Ty = Builder.getInt32Ty(); Type *i64Ty = Builder.getInt64Ty(); Type *doubleTy = Builder.getDoubleTy(); @@ -4400,11 +4494,10 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, alignValue = 4; Constant *Alignment = OP->GetI32Const(alignValue); bool is64 = EltTy == i64Ty || EltTy == doubleTy; - if (is64 && isTyped) { + if (is64 && IsTyped) { EltTy = i32Ty; } - Function *F = OP->GetOpFunc(opcode, EltTy); llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); llvm::Value *undefI = @@ -4416,44 +4509,58 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, storeArgs.emplace_back(opArg); // opcode storeArgs.emplace_back(handle); // resource handle - unsigned offset0Idx = 0; - if (RK == DxilResource::Kind::RawBuffer || - RK == DxilResource::Kind::TypedBuffer) { - // Offset 0 - if (offset->getType()->isVectorTy()) { - Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0); - storeArgs.emplace_back(scalarOffset); // offset + unsigned OffsetIdx = 0; + if (opcode == OP::OpCode::RawBufferStore || + opcode == OP::OpCode::RawBufferVectorStore || + opcode == OP::OpCode::BufferStore) { + // Append Coord0 (Index) value. + if (Idx->getType()->isVectorTy()) { + Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0); + storeArgs.emplace_back(ScalarIdx); // Coord0 (Index). } else { - storeArgs.emplace_back(offset); // offset + storeArgs.emplace_back(Idx); // Coord0 (Index). } - // Store offset0 for later use - offset0Idx = storeArgs.size() - 1; + // Store OffsetIdx representing the argument that may need to be incremented + // later to load additional chunks of data. + // Only structured buffers can use the offset parameter. + // Others must increment the index. + if (RK == DxilResource::Kind::StructuredBuffer) + OffsetIdx = storeArgs.size(); + else + OffsetIdx = storeArgs.size() - 1; - // Offset 1 - storeArgs.emplace_back(undefI); + // Coord1 (Offset). + storeArgs.emplace_back(offset); } else { // texture store unsigned coordSize = DxilResource::GetNumCoords(RK); // Set x first. - if (offset->getType()->isVectorTy()) - storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0)); + if (Idx->getType()->isVectorTy()) + storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0)); else - storeArgs.emplace_back(offset); - - // Store offset0 for later use - offset0Idx = storeArgs.size() - 1; + storeArgs.emplace_back(Idx); for (unsigned i = 1; i < 3; i++) { if (i < coordSize) - storeArgs.emplace_back(Builder.CreateExtractElement(offset, i)); + storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i)); else storeArgs.emplace_back(undefI); } // TODO: support mip for texture ST } + // RawBufferVectorStore only takes a single value and alignment arguments. + if (opcode == DXIL::OpCode::RawBufferVectorStore) { + storeArgs.emplace_back(val); + storeArgs.emplace_back(Alignment); + Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty); + Builder.CreateCall(F, storeArgs); + return; + } + Function *F = OP->GetOpFunc(opcode, EltTy); + constexpr unsigned MaxStoreElemCount = 4; const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements() : 1; const unsigned StoreInstCount = @@ -4474,30 +4581,24 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, } for (unsigned j = 0; j < storeArgsList.size(); j++) { - - // For second and subsequent store calls, increment the offset0 (i.e. store - // index) + // For second and subsequent store calls, increment the resource-appropriate + // index or offset parameter. if (j > 0) { - // Greater than four-components store is not allowed for - // TypedBuffer and Textures. So greater than four elements - // scenario should only get hit here for RawBuffer. - DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer); unsigned EltSize = OP->GetAllocSizeForType(EltTy); - unsigned newOffset = EltSize * MaxStoreElemCount * j; - Value *newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset); - newOffsetVal = - Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal); - storeArgsList[j][offset0Idx] = newOffsetVal; + unsigned NewCoord = EltSize * MaxStoreElemCount * j; + Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord); + NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal); + storeArgsList[j][OffsetIdx] = NewCoordVal; } - // values + // Set value parameters. uint8_t mask = 0; if (Ty->isVectorTy()) { unsigned vecSize = std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - (j * MaxStoreElemCount); Value *emptyVal = undefVal; - if (isTyped) { + if (IsTyped) { mask = DXIL::kCompMask_All; emptyVal = Builder.CreateExtractElement(val, (uint64_t)0); } @@ -4513,7 +4614,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, } } else { - if (isTyped) { + if (IsTyped) { mask = DXIL::kCompMask_All; storeArgsList[j].emplace_back(val); storeArgsList[j].emplace_back(val); @@ -4528,7 +4629,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, } } - if (is64 && isTyped) { + if (is64 && IsTyped) { unsigned size = 1; if (Ty->isVectorTy()) { size = @@ -4586,7 +4687,8 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx); Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx); - TranslateStore(RK, handle, val, offset, Builder, hlslOP); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); + TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP); return nullptr; } @@ -5680,7 +5782,24 @@ Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; - Value *refArgs[] = {nullptr, CI->getOperand(1)}; + // upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg + DXASSERT(CI->getNumArgOperands() == 3, + "hlopcode for allocaterayquery always expects 3 arguments"); + + llvm::Value *Arg = + CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx); + llvm::ConstantInt *ConstVal = llvm::dyn_cast(Arg); + DXASSERT(ConstVal, + "2nd argument to allocaterayquery must always be a constant value"); + if (ConstVal->getValue().getZExtValue() != 0) { + Value *refArgs[3] = { + nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx), + CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)}; + opcode = OP::OpCode::AllocateRayQuery2; + return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); + } + Value *refArgs[2] = { + nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)}; return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); } @@ -5689,7 +5808,6 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; - Value *opArg = hlslOP->GetU32Const(static_cast(opcode)); Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp]; @@ -6064,6 +6182,190 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, } // namespace +// Shader Execution Reordering. +namespace { +Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + hlsl::OP *HlslOP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + unsigned SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + if (Opcode == OP::OpCode::HitObject_MakeNop) { + Value *HitObject = TrivialDxilOperation( + Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); + Builder.CreateStore(HitObject, HitObjectPtr); + DXASSERT( + CI->use_empty(), + "Default ctor return type is a Clang artifact. Value must not be used"); + return nullptr; + } + + DXASSERT_NOMSG(CI->getNumArgOperands() == + HLOperandIndex::kHitObjectMakeMiss_NumOp); + Value *RayFlags = CI->getArgOperand(SrcIdx++); + Value *MissShaderIdx = CI->getArgOperand(SrcIdx++); + DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMissRayDescOpIdx); + Value *RayDescOrigin = CI->getArgOperand(SrcIdx++); + Value *RayDescOriginX = + Builder.CreateExtractElement(RayDescOrigin, (uint64_t)0); + Value *RayDescOriginY = + Builder.CreateExtractElement(RayDescOrigin, (uint64_t)1); + Value *RayDescOriginZ = + Builder.CreateExtractElement(RayDescOrigin, (uint64_t)2); + + Value *RayDescTMin = CI->getArgOperand(SrcIdx++); + Value *RayDescDirection = CI->getArgOperand(SrcIdx++); + Value *RayDescDirectionX = + Builder.CreateExtractElement(RayDescDirection, (uint64_t)0); + Value *RayDescDirectionY = + Builder.CreateExtractElement(RayDescDirection, (uint64_t)1); + Value *RayDescDirectionZ = + Builder.CreateExtractElement(RayDescDirection, (uint64_t)2); + + Value *RayDescTMax = CI->getArgOperand(SrcIdx++); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + + Value *OutHitObject = TrivialDxilOperation( + Opcode, + {nullptr, RayFlags, MissShaderIdx, RayDescOriginX, RayDescOriginY, + RayDescOriginZ, RayDescTMin, RayDescDirectionX, RayDescDirectionY, + RayDescDirectionZ, RayDescTMax}, + Helper.voidTy, CI, HlslOP); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; +} + +Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *OP = &Helper.hlslOP; + + // clang-format off + // Match MaybeReorderThread overload variants: + // void MaybeReorderThread(, + // HitObject Hit); + // void MaybeReorderThread(, + // uint CoherenceHint, + // uint NumCoherenceHintBitsFromLSB ); + // void MaybeReorderThread(, + // HitObject Hit, + // uint CoherenceHint, + // uint NumCoherenceHintBitsFromLSB); + // clang-format on + const unsigned NumHLArgs = CI->getNumArgOperands(); + DXASSERT_NOMSG(NumHLArgs >= 2); + + // Use a NOP HitObject for MaybeReorderThread without HitObject. + Value *HitObject = nullptr; + unsigned HLIndex = 1; + if (3 == NumHLArgs) { + HitObject = TrivialDxilOperation(DXIL::OpCode::HitObject_MakeNop, {nullptr}, + Type::getVoidTy(CI->getContext()), CI, OP); + } else { + Value *FirstParam = CI->getArgOperand(HLIndex); + DXASSERT_NOMSG(isa(FirstParam->getType())); + IRBuilder<> Builder(CI); + HitObject = Builder.CreateLoad(FirstParam); + HLIndex++; + } + + // If there are trailing parameters, these have to be the two coherence bit + // parameters + Value *CoherenceHint = nullptr; + Value *NumCoherenceHintBits = nullptr; + if (2 != NumHLArgs) { + DXASSERT_NOMSG(HLIndex + 2 == NumHLArgs); + CoherenceHint = CI->getArgOperand(HLIndex++); + NumCoherenceHintBits = CI->getArgOperand(HLIndex++); + DXASSERT_NOMSG(Helper.i32Ty == CoherenceHint->getType()); + DXASSERT_NOMSG(Helper.i32Ty == NumCoherenceHintBits->getType()); + } else { + CoherenceHint = UndefValue::get(Helper.i32Ty); + NumCoherenceHintBits = OP->GetU32Const(0); + } + + TrivialDxilOperation( + OpCode, {nullptr, HitObject, CoherenceHint, NumCoherenceHintBits}, + Type::getVoidTy(CI->getContext()), CI, OP); + return nullptr; +} + +Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return nullptr; // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectLoadLocalRootTableConstant( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectSetShaderTableIndex( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +} // namespace + // Resource Handle. namespace { Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP, @@ -6091,20 +6393,8 @@ Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateAnd(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateAnd(x, y); } Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6112,20 +6402,8 @@ Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateOr(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateOr(x, y); } Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6135,21 +6413,8 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltCond = Builder.CreateExtractElement(cond, i); - Value *EltTrue = Builder.CreateExtractElement(t, i); - Value *EltFalse = Builder.CreateExtractElement(f, i); - Value *tmp = Builder.CreateSelect(EltCond, EltTrue, EltFalse); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateSelect(cond, t, f); } } // namespace @@ -6166,7 +6431,6 @@ Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, } // SPIRV change starts -#ifdef ENABLE_SPIRV_CODEGEN Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, HLOperationLowerHelper &helper, @@ -6176,7 +6440,6 @@ Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP, dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic."); return nullptr; } -#endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, @@ -6410,18 +6673,20 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos}, {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos}, - {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, + {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet, DXIL::OpCode::Countbits}, {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseX}, + {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineX}, - {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseY}, + {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineY}, {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes}, @@ -6521,7 +6786,6 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8}, {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, -#ifdef ENABLE_SPIRV_CODEGEN {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic, @@ -6532,7 +6796,6 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, -#endif // ENABLE_SPIRV_CODEGEN {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream}, {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream}, {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, @@ -6760,11 +7023,9 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete, DXIL::OpCode::OutputComplete}, -// SPIRV change starts -#ifdef ENABLE_SPIRV_CODEGEN + // SPIRV change starts {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, -#endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends // Manually added part. @@ -6802,6 +7063,73 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake, + DXIL::OpCode::HitObject_MakeNop}, + {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, + DXIL::OpCode::MaybeReorderThread}, + {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::MOP_DxHitObject_FromRayQuery, TranslateHitObjectFromRayQuery, + DXIL::OpCode::HitObject_FromRayQuery}, + {IntrinsicOp::MOP_DxHitObject_GetAttributes, + TranslateHitObjectGetAttributes, DXIL::OpCode::HitObject_Attributes}, + {IntrinsicOp::MOP_DxHitObject_GetGeometryIndex, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_GeometryIndex}, + {IntrinsicOp::MOP_DxHitObject_GetHitKind, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_HitKind}, + {IntrinsicOp::MOP_DxHitObject_GetInstanceID, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_InstanceID}, + {IntrinsicOp::MOP_DxHitObject_GetInstanceIndex, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_InstanceIndex}, + {IntrinsicOp::MOP_DxHitObject_GetObjectRayDirection, + TranslateHitObjectVectorGetter, + DXIL::OpCode::HitObject_ObjectRayDirection}, + {IntrinsicOp::MOP_DxHitObject_GetObjectRayOrigin, + TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_ObjectRayOrigin}, + {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4, + TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4}, + {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld4x3, + TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4}, + {IntrinsicOp::MOP_DxHitObject_GetPrimitiveIndex, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_PrimitiveIndex}, + {IntrinsicOp::MOP_DxHitObject_GetRayFlags, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_RayFlags}, + {IntrinsicOp::MOP_DxHitObject_GetRayTCurrent, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_RayTCurrent}, + {IntrinsicOp::MOP_DxHitObject_GetRayTMin, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_RayTMin}, + {IntrinsicOp::MOP_DxHitObject_GetShaderTableIndex, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_ShaderTableIndex}, + {IntrinsicOp::MOP_DxHitObject_GetWorldRayDirection, + TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayDirection}, + {IntrinsicOp::MOP_DxHitObject_GetWorldRayOrigin, + TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayOrigin}, + {IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4, + TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4}, + {IntrinsicOp::MOP_DxHitObject_GetWorldToObject4x3, + TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4}, + {IntrinsicOp::MOP_DxHitObject_Invoke, TranslateHitObjectInvoke, + DXIL::OpCode::HitObject_Invoke}, + {IntrinsicOp::MOP_DxHitObject_IsHit, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_IsHit}, + {IntrinsicOp::MOP_DxHitObject_IsMiss, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_IsMiss}, + {IntrinsicOp::MOP_DxHitObject_IsNop, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_IsNop}, + {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant, + TranslateHitObjectLoadLocalRootTableConstant, + DXIL::OpCode::HitObject_LoadLocalRootTableConstant}, + {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMake, + DXIL::OpCode::HitObject_MakeMiss}, + {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex, + TranslateHitObjectSetShaderTableIndex, + DXIL::OpCode::HitObject_SetShaderTableIndex}, + {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay, + DXIL::OpCode::HitObject_TraceRay}, }; } // namespace static_assert( @@ -7887,113 +8215,36 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset, Builder.CreateCall(dxilF, Args); } -static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned ElemCount, - IRBuilder<> &Builder, Value *handle, - hlsl::OP *OP, Value *status, Value *bufIdx, - Value *baseOffset, const DataLayout &DL, - std::vector &bufLds, - unsigned baseAlign, bool isScalarTy) { - - unsigned EltSize = DL.getTypeAllocSize(VecEltTy); - unsigned alignment = std::min(baseAlign, EltSize); - Constant *alignmentVal = OP->GetI32Const(alignment); - - if (baseOffset == nullptr) { - baseOffset = OP->GetU32Const(0); - } - - std::vector elts(ElemCount); - unsigned rest = (ElemCount % 4); - for (unsigned i = 0; i < ElemCount - rest; i += 4) { - Value *ResultElts[4]; - Value *bufLd = - GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, - ResultElts, OP, Builder, 4, alignmentVal); - bufLds.emplace_back(bufLd); - elts[i] = ResultElts[0]; - elts[i + 1] = ResultElts[1]; - elts[i + 2] = ResultElts[2]; - elts[i + 3] = ResultElts[3]; - - baseOffset = Builder.CreateAdd(baseOffset, OP->GetU32Const(4 * EltSize)); - } - - if (rest) { - Value *ResultElts[4]; - Value *bufLd = - GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, - ResultElts, OP, Builder, rest, alignmentVal); - bufLds.emplace_back(bufLd); - for (unsigned i = 0; i < rest; i++) - elts[ElemCount - rest + i] = ResultElts[i]; - } - - // If the expected return type is scalar then skip building a vector - if (isScalarTy) { - return elts[0]; - } - - Value *Vec = HLMatrixLower::BuildVector(VecEltTy, elts, Builder); - return Vec; -} - -Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder, - Value *handle, hlsl::OP *OP, Value *status, - Value *bufIdx, Value *baseOffset, +Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, + Value *handle, HLResource::Kind RK, hlsl::OP *OP, + Value *status, Value *bufIdx, Value *baseOffset, const DataLayout &DL) { + + ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset); +#ifndef NDEBUG + Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); + Type *matType = ptr->getType()->getPointerElementType(); HLMatrixType MatTy = HLMatrixType::cast(matType); - Type *EltTy = MatTy.getElementTypeForMem(); - unsigned matSize = MatTy.getNumElements(); - std::vector bufLds; - Value *Vec = - TranslateRawBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx, - baseOffset, DL, bufLds, /*baseAlign (in bytes)*/ 8); - Vec = MatTy.emitLoweredMemToReg(Vec, Builder); - return Vec; + DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == + helper.retVal->getType(), + "helper type should match vectorized matrix"); +#endif + return TranslateBufLoad(helper, RK, Builder, OP, DL); } void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle, hlsl::OP *OP, Value *bufIdx, Value *baseOffset, Value *val, const DataLayout &DL) { - HLMatrixType MatTy = HLMatrixType::cast(matType); - Type *EltTy = MatTy.getElementTypeForMem(); - - val = MatTy.emitLoweredRegToMem(val, Builder); - - unsigned EltSize = DL.getTypeAllocSize(EltTy); - Constant *Alignment = OP->GetI32Const(EltSize); - Value *offset = baseOffset; - if (baseOffset == nullptr) - offset = OP->GetU32Const(0); - - unsigned matSize = MatTy.getNumElements(); - Value *undefElt = UndefValue::get(EltTy); - - unsigned storeSize = matSize; - if (matSize % 4) { - storeSize = matSize + 4 - (matSize & 3); - } - std::vector elts(storeSize, undefElt); - for (unsigned i = 0; i < matSize; i++) - elts[i] = Builder.CreateExtractElement(val, i); - - for (unsigned i = 0; i < matSize; i += 4) { - uint8_t mask = 0; - for (unsigned j = 0; j < 4 && (i + j) < matSize; j++) { - if (elts[i + j] != undefElt) - mask |= (1 << j); - } - GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder, - {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask, - Alignment); - // Update offset by 4*4bytes. - offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize)); - } + [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType); + DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(), + "helper type should match vectorized matrix"); + TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, + baseOffset, Builder, OP); } -void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP, - Value *status, Value *bufIdx, Value *baseOffset, - const DataLayout &DL) { +void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK, + hlsl::OP *OP, Value *status, Value *bufIdx, + Value *baseOffset, const DataLayout &DL) { IRBuilder<> Builder(CI); HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); unsigned opcode = GetHLOpcode(CI); @@ -8006,13 +8257,10 @@ void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP, // orientation. switch (matOp) { case HLMatLoadStoreOpcode::RowMatLoad: - case HLMatLoadStoreOpcode::ColMatLoad: { - Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); - Value *NewLd = TranslateStructBufMatLd( - ptr->getType()->getPointerElementType(), Builder, handle, OP, status, - bufIdx, baseOffset, DL); - CI->replaceAllUsesWith(NewLd); - } break; + case HLMatLoadStoreOpcode::ColMatLoad: + TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx, + baseOffset, DL); + break; case HLMatLoadStoreOpcode::RowMatStore: case HLMatLoadStoreOpcode::ColMatStore: { Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx); @@ -8136,6 +8384,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, GEP->eraseFromParent(); } else if (StoreInst *stUser = dyn_cast(subsUser)) { + // Store elements of matrix in a struct. Needs to be done one scalar at a + // time even for vectors in the case that matrix orientation spreads the + // indexed scalars throughout the matrix vector. IRBuilder<> stBuilder(stUser); Value *Val = stUser->getValueOperand(); if (Val->getType()->isVectorTy()) { @@ -8159,6 +8410,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, LoadInst *ldUser = cast(subsUser); IRBuilder<> ldBuilder(ldUser); Value *ldData = UndefValue::get(resultType); + // Load elements of matrix in a struct. Needs to be done one scalar at a + // time even for vectors in the case that matrix orientation spreads the + // indexed scalars throughout the matrix vector. if (resultType->isVectorTy()) { for (unsigned i = 0; i < resultSize; i++) { Value *ResultElt; @@ -8283,57 +8537,26 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, } userCall->eraseFromParent(); } else if (group == HLOpcodeGroup::HLMatLoadStore) - TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx, + // Load/Store matrix within a struct + TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx, baseOffset, DL); else if (group == HLOpcodeGroup::HLSubscript) { + // Subscript of matrix within a struct TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx, baseOffset, status, OP, DL); } - } else if (isa(user) || isa(user)) { - LoadInst *LdInst = dyn_cast(user); - StoreInst *StInst = dyn_cast(user); - - Type *Ty = isa(user) ? LdInst->getType() - : StInst->getValueOperand()->getType(); - Type *pOverloadTy = Ty->getScalarType(); - Value *Offset = baseOffset; - - if (LdInst) { - unsigned NumComponents = 0; - if (VectorType *VTy = dyn_cast(Ty)) - NumComponents = VTy->getNumElements(); - else - NumComponents = 1; - Value *ResultElts[4]; - Constant *Alignment = - OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType())); - GenerateRawBufLd(handle, bufIdx, Offset, status, pOverloadTy, ResultElts, - OP, Builder, NumComponents, Alignment); - Value *NewLd = ScalarizeElements(Ty, ResultElts, Builder); - LdInst->replaceAllUsesWith(NewLd); - } else { - Value *val = StInst->getValueOperand(); - Value *undefVal = llvm::UndefValue::get(pOverloadTy); - Value *vals[] = {undefVal, undefVal, undefVal, undefVal}; - uint8_t mask = 0; - if (Ty->isVectorTy()) { - unsigned vectorNumElements = Ty->getVectorNumElements(); - DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector"); - assert(vectorNumElements <= 4); - for (unsigned i = 0; i < vectorNumElements; i++) { - vals[i] = Builder.CreateExtractElement(val, i); - mask |= (1 << i); - } - } else { - vals[0] = val; - mask = DXIL::kCompMask_X; - } - Constant *alignment = - OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType())); - GenerateStructBufSt(handle, bufIdx, Offset, pOverloadTy, OP, Builder, - vals, mask, alignment); - } - user->eraseFromParent(); + } else if (LoadInst *LdInst = dyn_cast(user)) { + // Load of scalar/vector within a struct or structured raw load. + ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset); + TranslateBufLoad(helper, ResKind, Builder, OP, DL); + + LdInst->eraseFromParent(); + } else if (StoreInst *StInst = dyn_cast(user)) { + // Store of scalar/vector within a struct or structured raw store. + Value *val = StInst->getValueOperand(); + TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, + baseOffset, Builder, OP); + StInst->eraseFromParent(); } else if (BitCastInst *BCI = dyn_cast(user)) { // Recurse users for (auto U = BCI->user_begin(); U != BCI->user_end();) { @@ -8368,13 +8591,18 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()), "else bitness is wrong"); - offset = Builder.CreateAdd(offset, baseOffset); + // No offset into element for Raw buffers; byte offset is in bufIdx. + if (DXIL::IsRawBuffer(ResKind)) + bufIdx = Builder.CreateAdd(offset, bufIdx); + else + baseOffset = Builder.CreateAdd(offset, baseOffset); for (auto U = GEP->user_begin(); U != GEP->user_end();) { Value *GEPUser = *(U++); TranslateStructBufSubscriptUser(cast(GEPUser), handle, - ResKind, bufIdx, offset, status, OP, DL); + ResKind, bufIdx, baseOffset, status, OP, + DL); } // delete the inst GEP->eraseFromParent(); @@ -8388,13 +8616,12 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx); Value *bufIdx = nullptr; Value *offset = nullptr; - if (ResKind == HLResource::Kind::RawBuffer) { - offset = subscriptIndex; - } else { + bufIdx = subscriptIndex; + if (ResKind == HLResource::Kind::RawBuffer) + offset = UndefValue::get(Type::getInt32Ty(CI->getContext())); + else // StructuredBuffer, TypedBuffer, etc. - bufIdx = subscriptIndex; offset = OP->GetU32Const(0); - } for (auto U = CI->user_begin(); U != CI->user_end();) { Value *user = *(U++); @@ -8408,19 +8635,14 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, // HLSubscript. namespace { -Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK, - DXIL::ResourceClass RC, Value *handle, - LoadInst *ldInst, IRBuilder<> &Builder, - hlsl::OP *hlslOP, const DataLayout &DL) { - ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, - /*bForSubscript*/ true); - // Default sampleIdx for 2DMS textures. - if (RK == DxilResource::Kind::Texture2DMS || - RK == DxilResource::Kind::Texture2DMSArray) - ldHelper.mipLevel = hlslOP->GetU32Const(0); - // use ldInst as retVal - ldHelper.retVal = ldInst; - TranslateLoad(ldHelper, RK, Builder, hlslOP, DL); +Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK, + DXIL::ResourceClass RC, Value *handle, + LoadInst *ldInst, IRBuilder<> &Builder, + hlsl::OP *hlslOP, const DataLayout &DL) { + // The arguments to the call instruction are used to determine the access, + // the return value and type come from the load instruction. + ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst); + TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); // delete the ld ldInst->eraseFromParent(); return ldHelper.retVal; @@ -8463,9 +8685,9 @@ Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx, return VecVal; } -void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { +void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); hlsl::OP *hlslOP = &helper.hlslOP; @@ -8480,14 +8702,15 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, User *user = *(It++); Instruction *I = cast(user); IRBuilder<> Builder(I); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); if (LoadInst *ldInst = dyn_cast(user)) { - TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, - helper.dataLayout); + TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP, + helper.dataLayout); } else if (StoreInst *stInst = dyn_cast(user)) { Value *val = stInst->getValueOperand(); TranslateStore(RK, handle, val, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - Builder, hlslOP); + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), + UndefI, Builder, hlslOP); // delete the st stInst->eraseFromParent(); } else if (GetElementPtrInst *GEP = dyn_cast(user)) { @@ -8504,7 +8727,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate Ld. LoadInst *tmpLd = StBuilder.CreateLoad(CI); - Value *ldVal = TranslateTypedBufLoad( + Value *ldVal = TranslateTypedBufSubscript( CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout); // Update vector. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx, @@ -8512,9 +8735,10 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate St. // Reset insert point, UpdateVectorElt may move SI to different block. StBuilder.SetInsertPoint(SI); - TranslateStore(RK, handle, ldVal, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - StBuilder, hlslOP); + TranslateStore( + RK, handle, ldVal, + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI, + StBuilder, hlslOP); SI->eraseFromParent(); continue; } @@ -8524,7 +8748,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate tmp vector load with vector type & translate it LoadInst *tmpLd = LdBuilder.CreateLoad(CI); - Value *ldVal = TranslateTypedBufLoad( + Value *ldVal = TranslateTypedBufSubscript( CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout); // get the single element @@ -8697,15 +8921,17 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, DXASSERT(CI->hasOneUse(), "subscript should only have one use"); IRBuilder<> Builder(CI); if (LoadInst *ldInst = dyn_cast(*U)) { - ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel); - TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); + Value *Offset = UndefValue::get(Builder.getInt32Ty()); + ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel); + TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); ldInst->eraseFromParent(); } else { StoreInst *stInst = cast(*U); Value *val = stInst->getValueOperand(); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); TranslateStore(RK, handle, val, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - Builder, hlslOP, mipLevel); + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), + UndefI, Builder, hlslOP, mipLevel); stInst->eraseFromParent(); } Translated = true; @@ -8736,7 +8962,7 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK, helper.dataLayout); else - TranslateDefaultSubscript(CI, helper, pObjHelper, Translated); + TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated); return; } diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp index feeb23a5da..d2438c7e22 100644 --- a/lib/Transforms/Scalar/LowerTypePasses.cpp +++ b/lib/Transforms/Scalar/LowerTypePasses.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "dxc/DXIL/DxilConstants.h" +#include "dxc/DXIL/DxilModule.h" #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilUtil.h" #include "dxc/HLSL/HLModule.h" @@ -180,10 +181,12 @@ bool LowerTypePass::runOnModule(Module &M) { namespace { class DynamicIndexingVectorToArray : public LowerTypePass { bool ReplaceAllVectors; + bool SupportsVectors; public: explicit DynamicIndexingVectorToArray(bool ReplaceAll = false) - : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {} + : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll), + SupportsVectors(false) {} static char ID; // Pass identification, replacement for typeid void applyOptions(PassOptions O) override; void dumpConfig(raw_ostream &OS) override; @@ -194,6 +197,7 @@ class DynamicIndexingVectorToArray : public LowerTypePass { Type *lowerType(Type *Ty) override; Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override; StringRef getGlobalPrefix() override { return ".v"; } + void initialize(Module &M) override; private: bool HasVectorDynamicIndexing(Value *V); @@ -207,6 +211,18 @@ class DynamicIndexingVectorToArray : public LowerTypePass { void ReplaceAddrSpaceCast(ConstantExpr *CE, Value *A, IRBuilder<> &Builder); }; +void DynamicIndexingVectorToArray::initialize(Module &M) { + // Set vector support according to available Dxil version. + // Use HLModule or metadata for version info. + // Otherwise retrieve from dxil module or metadata. + unsigned Major = 0, Minor = 0; + if (M.HasHLModule()) + M.GetHLModule().GetShaderModel()->GetDxilVersion(Major, Minor); + else + dxilutil::LoadDxilVersion(&M, Major, Minor); + SupportsVectors = (Major == 1 && Minor >= 9); +} + void DynamicIndexingVectorToArray::applyOptions(PassOptions O) { GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors, ReplaceAllVectors); @@ -306,9 +322,21 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) { } bool DynamicIndexingVectorToArray::needToLower(Value *V) { + bool MustReplaceVector = ReplaceAllVectors; Type *Ty = V->getType()->getPointerElementType(); - if (dyn_cast(Ty)) { - if (isa(V) || ReplaceAllVectors) { + + if (ArrayType *AT = dyn_cast(Ty)) { + // Array must be replaced even without dynamic indexing to remove vector + // type in dxil. + MustReplaceVector = true; + Ty = dxilutil::GetArrayEltTy(AT); + } + + if (isa(Ty)) { + // Only needed for 2+ vectors where native vectors unsupported. + if (SupportsVectors && Ty->getVectorNumElements() > 1) + return false; + if (isa(V) || MustReplaceVector) { return true; } // Don't lower local vector which only static indexing. @@ -319,12 +347,6 @@ bool DynamicIndexingVectorToArray::needToLower(Value *V) { ReplaceStaticIndexingOnVector(V); return false; } - } else if (ArrayType *AT = dyn_cast(Ty)) { - // Array must be replaced even without dynamic indexing to remove vector - // type in dxil. - // TODO: optimize static array index in later pass. - Type *EltTy = dxilutil::GetArrayEltTy(AT); - return isa(EltTy); } return false; } diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 0c3e13f608..e487079b94 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -81,16 +81,18 @@ class SROA_Helper { static bool DoScalarReplacement(Value *V, std::vector &Elts, Type *&BrokenUpTy, uint64_t &NumInstances, IRBuilder<> &Builder, bool bFlatVector, - bool hasPrecise, DxilTypeSystem &typeSys, - const DataLayout &DL, + bool SupportsVectors, bool hasPrecise, + DxilTypeSystem &typeSys, const DataLayout &DL, SmallVector &DeadInsts, DominatorTree *DT); - static bool - DoScalarReplacement(GlobalVariable *GV, std::vector &Elts, - IRBuilder<> &Builder, bool bFlatVector, bool hasPrecise, - DxilTypeSystem &typeSys, const DataLayout &DL, - SmallVector &DeadInsts, DominatorTree *DT); + static bool DoScalarReplacement(GlobalVariable *GV, + std::vector &Elts, + IRBuilder<> &Builder, bool bFlatVector, + bool SupportsVectors, bool hasPrecise, + DxilTypeSystem &typeSys, const DataLayout &DL, + SmallVector &DeadInsts, + DominatorTree *DT); static unsigned GetEltAlign(unsigned ValueAlign, const DataLayout &DL, Type *EltTy, unsigned Offset); // Lower memcpy related to V. @@ -1714,6 +1716,7 @@ bool isGroupShareOrConstStaticArray(GlobalVariable *GV) { bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { Module &M = *HLM.GetModule(); + bool SupportsVectors = HLM.GetShaderModel()->IsSM69Plus(); DxilTypeSystem &typeSys = HLM.GetTypeSystem(); const DataLayout &DL = M.getDataLayout(); @@ -1878,7 +1881,8 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { uint64_t NumInstances = 1; bool SROAed = SROA_Helper::DoScalarReplacement( AI, Elts, BrokenUpTy, NumInstances, Builder, - /*bFlatVector*/ true, hasPrecise, typeSys, DL, DeadInsts, &DT); + /*bFlatVector*/ true, SupportsVectors, hasPrecise, typeSys, DL, + DeadInsts, &DT); if (SROAed) { Type *Ty = AI->getAllocatedType(); @@ -1945,7 +1949,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { continue; } - // Flat Global vector if no dynamic vector indexing. + // Flatten global vector if it has no dynamic vector indexing. bool bFlatVector = !hasDynamicVectorIndexing(GV); if (bFlatVector) { @@ -1981,7 +1985,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { // SROA_Parameter_HLSL has no access to a domtree, if one is needed, // it'll be generated SROAed = SROA_Helper::DoScalarReplacement( - GV, Elts, Builder, bFlatVector, + GV, Elts, Builder, bFlatVector, SupportsVectors, // TODO: set precise. /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr); } @@ -2771,6 +2775,14 @@ void SROA_Helper::RewriteCall(CallInst *CI) { RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, /*bIn*/ true, /*bOut*/ true); } break; + case IntrinsicOp::MOP_DxHitObject_MakeMiss: { + if (OldVal == + CI->getArgOperand(HLOperandIndex::kHitObjectMakeMissRayDescOpIdx)) { + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + } + } break; case IntrinsicOp::MOP_TraceRayInline: { if (OldVal == CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) { @@ -2920,7 +2932,8 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy, bool SROA_Helper::DoScalarReplacement(Value *V, std::vector &Elts, Type *&BrokenUpTy, uint64_t &NumInstances, IRBuilder<> &Builder, bool bFlatVector, - bool hasPrecise, DxilTypeSystem &typeSys, + bool SupportsVectors, bool hasPrecise, + DxilTypeSystem &typeSys, const DataLayout &DL, SmallVector &DeadInsts, DominatorTree *DT) { @@ -3033,6 +3046,10 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector &Elts, if (!bFlatVector) return false; + // Skip vector where supported if it has more than 1 element. + if (SupportsVectors && ElTy->getVectorNumElements() > 1) + return false; + // for array of vector // split into arrays of scalar VectorType *ElVT = cast(ElTy); @@ -3114,13 +3131,11 @@ unsigned SROA_Helper::GetEltAlign(unsigned ValueAlign, const DataLayout &DL, /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new /// AllocaInsts into Elts. Then do SROA on V. -bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV, - std::vector &Elts, - IRBuilder<> &Builder, bool bFlatVector, - bool hasPrecise, DxilTypeSystem &typeSys, - const DataLayout &DL, - SmallVector &DeadInsts, - DominatorTree *DT) { +bool SROA_Helper::DoScalarReplacement( + GlobalVariable *GV, std::vector &Elts, IRBuilder<> &Builder, + bool bFlatVector, bool SupportsVectors, bool hasPrecise, + DxilTypeSystem &typeSys, const DataLayout &DL, + SmallVector &DeadInsts, DominatorTree *DT) { DEBUG(dbgs() << "Found inst to SROA: " << *GV << '\n'); Type *Ty = GV->getType(); // Skip none pointer types. @@ -3134,6 +3149,9 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV, // Skip basic types. if (Ty->isSingleValueType() && !Ty->isVectorTy()) return false; + // Skip vector where supported if it has more than 1 element. + if (Ty->isVectorTy() && SupportsVectors && Ty->getVectorNumElements() > 1) + return false; // Skip matrix types. if (HLMatrixType::isa(Ty)) return false; @@ -3240,6 +3258,10 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV, if (!bFlatVector) return false; + // Skip vector where supported if it has more than 1 element. + if (SupportsVectors && ElTy->getVectorNumElements() > 1) + return false; + // for array of vector // split into arrays of scalar VectorType *ElVT = cast(ElTy); @@ -5277,6 +5299,8 @@ void SROA_Parameter_HLSL::flattenArgument( std::vector &FlatAnnotationList, BasicBlock *EntryBlock, ArrayRef DDIs) { std::deque WorkList; + bool SupportsVectors = m_pHLModule->GetShaderModel()->IsSM69Plus(); + WorkList.push_back({Arg, paramAnnotation}); unsigned startArgIndex = FlatAnnotationList.size(); @@ -5351,8 +5375,8 @@ void SROA_Parameter_HLSL::flattenArgument( // DomTree isn't used by arguments SROAed = SROA_Helper::DoScalarReplacement( V, Elts, BrokenUpTy, NumInstances, Builder, - /*bFlatVector*/ false, annotation.IsPrecise(), dxilTypeSys, DL, - DeadInsts, /*DT*/ nullptr); + /*bFlatVector*/ false, SupportsVectors, annotation.IsPrecise(), + dxilTypeSys, DL, DeadInsts, /*DT*/ nullptr); } if (SROAed) { diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 729771c7c7..730354af99 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,6 +14,9 @@ // //===----------------------------------------------------------------------===// +#include "dxc/DXIL/DxilModule.h" +#include "dxc/DXIL/DxilUtil.h" + #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -151,6 +154,7 @@ class Scalarizer : public FunctionPass, // HLSL Change Begin bool AllowFolding = false; + bool SupportsVectors = false; Scalarizer(bool AllowFolding) : FunctionPass(ID), AllowFolding(AllowFolding) { @@ -290,6 +294,13 @@ bool Scalarizer::doInitialization(Module &M) { } bool Scalarizer::runOnFunction(Function &F) { + // HLSL Change start - set SupportsVectors + const Module *M = F.getParent(); + unsigned Major = 0, Minor = 0; + if (hlsl::dxilutil::LoadDxilVersion(M, Major, Minor)) + SupportsVectors = (Major == 1 && Minor >= 9); + // HLSL Change end - set SupportsVectors + for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { BasicBlock *BB = BBI; for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { @@ -436,7 +447,8 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, template bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { VectorType *VT = dyn_cast(I.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -457,7 +469,8 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { bool Scalarizer::visitSelectInst(SelectInst &SI) { VectorType *VT = dyn_cast(SI.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -500,7 +513,8 @@ bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) { bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { VectorType *VT = dyn_cast(GEPI.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; IRBuilder<> Builder(GEPI.getParent(), &GEPI); @@ -534,7 +548,8 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { bool Scalarizer::visitCastInst(CastInst &CI) { VectorType *VT = dyn_cast(CI.getDestTy()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -559,6 +574,12 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { unsigned DstNumElems = DstVT->getNumElements(); unsigned SrcNumElems = SrcVT->getNumElements(); + + // HLSL Change Begin - allow > 1 vectors where supported. + if (SupportsVectors && (DstNumElems > 1 || SrcNumElems > 1)) + return false; + // HLSL Change End - allow > 1 vectors where supported. + IRBuilder<> Builder(BCI.getParent(), &BCI); Builder.AllowFolding = this->AllowFolding; // HLSL Change Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); @@ -609,7 +630,8 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { VectorType *VT = dyn_cast(SVI.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -643,7 +665,8 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { bool Scalarizer::visitPHINode(PHINode &PHI) { VectorType *VT = dyn_cast(PHI.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -679,6 +702,10 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) { return false; unsigned NumElems = Layout.VecTy->getNumElements(); + // HLSL Change Begin - allow > 1 vectors where supported. + if (SupportsVectors && NumElems > 1) + return false; + // HLSL Change End - allow > 1 vectors where supported. IRBuilder<> Builder(LI.getParent(), &LI); Builder.AllowFolding = this->AllowFolding; // HLSL Change Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); @@ -705,6 +732,10 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) { return false; unsigned NumElems = Layout.VecTy->getNumElements(); + // HLSL Change Begin - allow > 1 vectors where supported. + if (SupportsVectors && NumElems > 1) + return false; + // HLSL Change End - allow > 1 vectors where supported. IRBuilder<> Builder(SI.getParent(), &SI); Builder.AllowFolding = this->AllowFolding; // HLSL Change Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index f0d2dbcd7a..46294b3db8 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -29,7 +28,9 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include @@ -473,6 +474,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const char *NameSuffix, ClonedCodeInfo *CodeInfo, CloningDirector *Director) { + TimeTraceScope TimeScope("CloneAndPruneIntoFromInst", [&] { + return (Twine(OldFunc->getName()) + "->" + NewFunc->getName()).str(); + }); assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index f6a255a0e4..bfa4b61fbe 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -12,10 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -24,13 +23,13 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -38,8 +37,10 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; @@ -291,6 +292,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, /// non-aliasing property communicated by the metadata could have /// call-site-specific control dependencies). static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { + TimeTraceScope TimeScope("CloneAliasScopeMetadata", + [&] { return CS.getCalledFunction()->getName(); }); const Function *CalledFunc = CS.getCalledFunction(); SetVector MD; @@ -401,6 +404,8 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, const DataLayout &DL, AliasAnalysis *AA) { + TimeTraceScope TimeScope("AddAliasScopeMetadata", + [&] { return CS.getCalledFunction()->getName(); }); if (!EnableNoAliasConversion) return; @@ -872,6 +877,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { + TimeTraceScope TimeScope("fixupLineNumbers", [&] { return Fn->getName(); }); DebugLoc TheCallDL = TheCall->getDebugLoc(); #if 0 // HLSL Change if (!TheCallDL) diff --git a/tools/clang/CMakeLists.txt b/tools/clang/CMakeLists.txt index 71190336ca..449e6c28b4 100644 --- a/tools/clang/CMakeLists.txt +++ b/tools/clang/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.8) +cmake_minimum_required(VERSION 3.17.2) # HLSL Change - Require CMake 3.17.2. # FIXME: It may be removed when we use 2.8.12. if(CMAKE_VERSION VERSION_LESS 2.8.12) diff --git a/tools/clang/include/clang/AST/DeclCXX.h b/tools/clang/include/clang/AST/DeclCXX.h index 3b07576545..36e0f99c82 100644 --- a/tools/clang/include/clang/AST/DeclCXX.h +++ b/tools/clang/include/clang/AST/DeclCXX.h @@ -465,6 +465,10 @@ class CXXRecordDecl : public RecordDecl { /// \brief Whether we are currently parsing base specifiers. bool IsParsingBaseSpecifiers : 1; + /// \brief Whether this class contains at least one member or base + /// class containing an HLSL vector longer than 4 elements. + bool HasHLSLLongVector : 1; + /// \brief The number of base class specifiers in Bases. unsigned NumBases; @@ -1018,6 +1022,13 @@ class CXXRecordDecl : public RecordDecl { return data().NeedOverloadResolutionForDestructor; } + // HLSL Change add HLSL Long vector bit. + /// \brief Determine whether this class contains an HLSL long vector + /// of over 4 elements. + bool hasHLSLLongVector() { return data().HasHLSLLongVector; } + /// \brief Set that this class contains an HLSL long vector of over 4 elements + bool setHasHLSLLongVector() { return data().HasHLSLLongVector = true; } + /// \brief Determine whether this class describes a lambda function object. bool isLambda() const { // An update record can't turn a non-lambda into a lambda. diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 2aa9afa5f9..3a02824b3a 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /// /// \file // /// \brief Defines the HLSL type system interface. // @@ -31,6 +34,7 @@ namespace clang { class ASTContext; class AttributeList; +class CXXConstructorDecl; class CXXMethodDecl; class CXXRecordDecl; class ClassTemplateDecl; @@ -348,9 +352,10 @@ void AddHLSLNodeOutputRecordTemplate( _Outptr_ clang::ClassTemplateDecl **outputRecordTemplateDecl, bool isCompleteType = true); -clang::CXXRecordDecl *DeclareRecordTypeWithHandle(clang::ASTContext &context, - llvm::StringRef name, - bool isCompleteType = true); +clang::CXXRecordDecl * +DeclareRecordTypeWithHandle(clang::ASTContext &context, llvm::StringRef name, + bool isCompleteType = true, + clang::InheritableAttr *Attr = nullptr); void AddRaytracingConstants(clang::ASTContext &context); void AddSamplerFeedbackConstants(clang::ASTContext &context); @@ -381,15 +386,16 @@ clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext( clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle( clang::ASTContext &context, llvm::StringRef typeName, - llvm::StringRef templateParamName, - clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class); + llvm::StringRef templateParamName, clang::InheritableAttr *Attr = nullptr); clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext( clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, llvm::StringRef templateParamName, - clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class); -clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context, - bool bTBuf); + clang::InheritableAttr *Attr = nullptr); +clang::CXXRecordDecl * +DeclareConstantBufferViewType(clang::ASTContext &context, + clang::InheritableAttr *Attr); clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context); +clang::CXXRecordDecl *DeclareHitObjectType(clang::NamespaceDecl &NSDecl); clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context, bool bSampler); @@ -400,6 +406,10 @@ DeclareNodeOrRecordType(clang::ASTContext &Ctx, DXIL::NodeIOKind Type, bool IsCompleteType = false); #ifdef ENABLE_SPIRV_CODEGEN +clang::CXXRecordDecl * +DeclareVkBufferPointerType(clang::ASTContext &context, + clang::DeclContext *declContext); + clang::CXXRecordDecl *DeclareInlineSpirvType(clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, @@ -425,7 +435,7 @@ clang::VarDecl *DeclareBuiltinGlobal(llvm::StringRef name, clang::QualType Ty, /// method. AST context in which to /// work. Class in which the function template /// is declared. Function for which a -/// template is created. Declarations for templates to the /// function. Count of /// template declarations. A new function template declaration @@ -460,6 +470,7 @@ bool IsHLSLUnsigned(clang::QualType type); bool IsHLSLMinPrecision(clang::QualType type); bool HasHLSLUNormSNorm(clang::QualType type, bool *pIsSNorm = nullptr); bool HasHLSLGloballyCoherent(clang::QualType type); +bool HasHLSLReorderCoherent(clang::QualType type); bool IsHLSLInputPatchType(clang::QualType type); bool IsHLSLOutputPatchType(clang::QualType type); bool IsHLSLPointStreamType(clang::QualType type); @@ -471,6 +482,7 @@ bool IsHLSLNodeInputType(clang::QualType type); bool IsHLSLDynamicResourceType(clang::QualType type); bool IsHLSLDynamicSamplerType(clang::QualType type); bool IsHLSLNodeType(clang::QualType type); +bool IsHLSLHitObjectType(clang::QualType type); bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type); bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type); @@ -530,6 +542,29 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator, clang::QualType paramType); bool IsPatchConstantFunctionDecl(const clang::FunctionDecl *FD); +#ifdef ENABLE_SPIRV_CODEGEN +bool IsVKBufferPointerType(clang::QualType type); +clang::QualType GetVKBufferPointerBufferType(clang::QualType type); +unsigned GetVKBufferPointerAlignment(clang::QualType type); +#endif + +/// Adds a constructor declaration to the specified class +/// record. ASTContext that owns +/// declarations. Record declaration in which +/// to add constructor. Result type for +/// constructor. Types for constructor +/// parameters. Names for constructor +/// parameters. Name for +/// constructor. Whether the constructor is a +/// const function. The method declaration for the +/// constructor. +clang::CXXConstructorDecl *CreateConstructorDeclarationWithParams( + clang::ASTContext &context, clang::CXXRecordDecl *recordDecl, + clang::QualType resultType, llvm::ArrayRef paramTypes, + llvm::ArrayRef paramNames, + clang::DeclarationName declarationName, bool isConst, + bool isTemplateFunction = false); + /// Adds a function declaration to the specified class /// record. ASTContext that owns /// declarations. Record declaration in which @@ -544,6 +579,7 @@ clang::CXXMethodDecl *CreateObjectFunctionDeclarationWithParams( clang::QualType resultType, llvm::ArrayRef paramTypes, llvm::ArrayRef paramNames, clang::DeclarationName declarationName, bool isConst, + clang::StorageClass SC = clang::StorageClass::SC_None, bool isTemplateFunction = false); DXIL::ResourceClass GetResourceClassForType(const clang::ASTContext &context, diff --git a/tools/clang/include/clang/AST/OperationKinds.h b/tools/clang/include/clang/AST/OperationKinds.h index 75e665a5e9..3909c8b5e8 100644 --- a/tools/clang/include/clang/AST/OperationKinds.h +++ b/tools/clang/include/clang/AST/OperationKinds.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file enumerates the different kinds of operations that can be @@ -321,6 +324,8 @@ enum CastKind { CK_HLSLCC_FloatingToIntegral, CK_HLSLCC_FloatingToBoolean, CK_HLSLCC_FloatingCast, + CK_VK_BufferPointerToIntegral, + CK_VK_IntegralToBufferPointer, // HLSL Change - Made CK_Invalid an enum case because otherwise it is UB to // assign it to a value of CastKind. diff --git a/tools/clang/include/clang/AST/Type.h b/tools/clang/include/clang/AST/Type.h index f393f88ce9..2c96bbc295 100644 --- a/tools/clang/include/clang/AST/Type.h +++ b/tools/clang/include/clang/AST/Type.h @@ -3652,7 +3652,8 @@ class AttributedType : public Type, public llvm::FoldingSetNode { attr_hlsl_row_major, attr_hlsl_column_major, attr_hlsl_globallycoherent, - // HLSL Change Ends + attr_hlsl_reordercoherent, + // HLSL Change Ends }; private: diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 3a6718a339..2518423565 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// class DocumentationCategory { @@ -851,6 +854,12 @@ def HLSLGloballyCoherent : InheritableAttr { let Documentation = [Undocumented]; } +def HLSLReorderCoherent : InheritableAttr { + let Spellings = [CXX11<"", "reordercoherent", 2015>]; + let Subjects = SubjectList<[Var, Function]>; + let Documentation = [Undocumented]; +} + def HLSLShader : InheritableAttr { let Spellings = [CXX11<"", "shader", 2017>]; let Args = [StringArgument<"stage">]; // one of compute, pixel, vertex, hull, domain, geometry, node @@ -939,6 +948,52 @@ def HLSLCXXOverload : InheritableAttr { let Documentation = [Undocumented]; } +def HLSLVector : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLMatrix : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLTessPatch : InheritableAttr { + let Spellings = []; // No spellings! + let Args = [BoolArgument<"IsInput">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLStreamOutput : InheritableAttr { + let Spellings = []; // No spellings! + // PrimVertices are the number of vertices that make up the streamed + // primitive. Points have 1. Lines have 2. Triangles have 3. + let Args = [UnsignedArgument<"PrimVertices">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLResource : InheritableAttr { + let Spellings = []; // No spellings! + let Args = [UnsignedArgument<"ResKindUint">, + UnsignedArgument<"ResClassUint">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; + + // Add enum typed getters for safety and brevity. + let AdditionalMembers = [{ + hlsl::DXIL::ResourceKind getResKind() const { + return (hlsl::DXIL::ResourceKind)getResKindUint(); + } + hlsl::DXIL::ResourceClass getResClass() const { + return (hlsl::DXIL::ResourceClass)getResClassUint(); + } + }]; +} + def HLSLNodeLaunch : InheritableAttr { let Spellings = [CXX11<"", "nodelaunch", 2017>]; let Args = [StringArgument<"LaunchType">]; // one of broadcasting, coalescing, thread @@ -992,13 +1047,6 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr { let Documentation = [Undocumented]; } -def HLSLResource : InheritableAttr { - let Spellings = []; // No spellings! - let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">]; - let Subjects = SubjectList<[CXXRecord]>; - let Documentation = [Undocumented]; -} - def HLSLNodeObject : InheritableAttr { let Spellings = []; // No spellings! let Subjects = SubjectList<[CXXRecord]>; @@ -1110,6 +1158,28 @@ def HLSLNodeObject : InheritableAttr { }]; } +// HLSL Ray Query Attribute + +def HLSLRayQueryObject : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLSubObject : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; + let Args = [UnsignedArgument<"SubObjKindUint">, UnsignedArgument<"HitGroupType">]; +} + +// HLSL HitObject Attribute + +def HLSLHitObject : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} // HLSL Parameter Attributes @@ -1386,6 +1456,20 @@ def VKStorageClassExt : InheritableAttr { let Documentation = [Undocumented]; } +def VKBufferPointer : InheritableAttr { + let Spellings = [CXX11<"", "hlsl_vk_buffer_pointer", 2021>]; + let LangOpts = [SPIRV]; + let Documentation = [Undocumented]; +} + +def VKAliasedPointer : InheritableAttr { + let Spellings = [CXX11<"vk", "aliased_pointer">]; + let Subjects = SubjectList<[Var, ParmVar], ErrorDiag>; + let Args = []; + let LangOpts = [SPIRV]; + let Documentation = [Undocumented]; +} + // Global variables that are of struct type def StructGlobalVar : SubsetSubjecthasGlobalStorage() && S->getType()->isStructureType()}]>; diff --git a/tools/clang/include/clang/Basic/DiagnosticGroups.td b/tools/clang/include/clang/Basic/DiagnosticGroups.td index 39618aed04..ff21b34652 100644 --- a/tools/clang/include/clang/Basic/DiagnosticGroups.td +++ b/tools/clang/include/clang/Basic/DiagnosticGroups.td @@ -799,10 +799,12 @@ def HLSLPayloadAccessQualifer: DiagGroup<"payload-access-qualifier", [ HLSLPayloadAccessQualiferPerf, HLSLPayloadAccessQualiferCall ]>; +def HLSLRayQueryFlags : DiagGroup<"hlsl-rayquery-flags">; def HLSLSemanticIdentifierCollision : DiagGroup<"semantic-identifier-collision">; def HLSLStructurizeExitsLifetimeMarkersConflict: DiagGroup<"structurize-exits-lifetime-markers-conflict">; def HLSLParameterUsage : DiagGroup<"parameter-usage">; def HLSLAvailability: DiagGroup<"hlsl-availability">; +def HLSLAvailabilityConstant: DiagGroup<"hlsl-availability-constant">; def HLSLBarrier : DiagGroup<"hlsl-barrier">; def HLSLLegacyLiterals : DiagGroup<"hlsl-legacy-literal">; // HLSL Change Ends diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 99b6534e1f..6254e5fc71 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -7519,8 +7522,8 @@ def err_hlsl_half_load_store: Error< "LoadHalf and StoreHalf are not supported for min precision mode">; def err_hlsl_interfaces_cannot_inherit: Error< "interfaces cannot inherit from other types">; -def err_hlsl_invalid_range_1_4: Error< - "invalid value, valid range is between 1 and 4 inclusive">; +def err_hlsl_invalid_range_1_to_max + : Error<"invalid value, valid range is between 1 and %0 inclusive">; def err_hlsl_matrix_member_bad_format: Error< "invalid format for matrix subscript '%0'">; def err_hlsl_matrix_member_empty: Error< @@ -7549,6 +7552,8 @@ def err_hlsl_vector_element_index_out_of_bounds: Error< "vector element index '%0' is out of bounds">; def err_hlsl_vector_member_too_many_positions: Error< "more than four positions are referenced in '%0'">; +def err_hlsl_vector_member_on_long_vector: Error< + "invalid swizzle '%0' on vector of over 4 elements.">; def err_hlsl_missing_type_specifier : Error< // Patterened after err_missing_type_specifier "HLSL requires a type specifier for all declarations">; def err_hlsl_multiple_concrete_bases : Error< @@ -7652,8 +7657,20 @@ def err_payload_fields_is_payload_and_overqualified : Error< "payload field '%0' is a payload struct. Payload access qualifiers are not allowed on payload types.">; def warn_hlsl_payload_qualifer_dropped : Warning< "payload access qualifiers ignored. These are only supported for lib_6_7+ targets and lib_6_6 with with the -enable-payload-qualifiers flag.">, InGroup; +def warn_hlsl_rayquery_flags_disallowed : Warning< + "A non-zero value for the RayQueryFlags template argument requires" + " shader model 6.9 or above.">, DefaultError, InGroup; +def warn_hlsl_rayquery_flags_conflict : Warning< + "When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags" + " must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.">, DefaultError, InGroup; def err_hlsl_unsupported_builtin_op: Error< "operator cannot be used with built-in type %0">; +def warn_hlsl_builtin_constant_unavailable: Warning< + "potential misuse of built-in constant %0 in shader model %1; introduced" + " in shader model %2">, InGroup; +def warn_hlsl_builtin_type_unavailable: Warning< + "potential misuse of built-in type %0 in shader model %1; introduced" + " in shader model %2">, DefaultError, InGroup; def err_hlsl_unsupported_char_literal : Error< "unsupported style of char literal - use a single-character char-based literal">; def err_hlsl_unsupported_clipplane_argument_expression : Error< @@ -7689,8 +7706,10 @@ def err_hlsl_varmodifierna : Error< "%0 is not a valid modifier for a %1">; def err_hlsl_varmodifierna_decltype : Error< "%0 is not a valid modifier for a declaration of type %1">; -def note_hlsl_globallycoherent_applies_to : Note< - "'globallycoherent' can only be applied to UAV or RWDispatchNodeInputRecord objects">; +def note_hlsl_coherence_applies_to : Note< + "'%select{reordercoherent|globallycoherent}0' can only be applied to UAV%select{| or RWDispatchNodeInputRecord}0 objects">; +def warn_hlsl_gc_implies_rc_attribute : Warning< + "attribute 'reordercoherent' implied by 'globallycoherent' in %0. 'reordercoherent' ignored.">; def err_hlsl_varmodifiersna : Error< "%0 and %1 cannot be used together for a %2">; def err_hlsl_vla : Error< // Patterened after err_opencl_vla @@ -7701,8 +7720,6 @@ def err_hlsl_control_flow_cond_not_scalar : Error< "%0 statement conditional expressions must evaluate to a scalar">; def err_hlsl_unsupportedvectortype : Error< "%0 is declared with type %1, but only primitive scalar values are supported">; -def err_hlsl_unsupportedvectorsize : Error< - "%0 is declared with size %1, but only values 1 through 4 are supported">; def err_hlsl_unsupportedmatrixsize : Error< "%0 is declared with size %1x%2, but only values 1 through 4 are supported">; def err_hlsl_norm_float_only : Error< @@ -7741,9 +7758,17 @@ def warn_hlsl_semantic_attribute_position_misuse_hint: Warning< def warn_hlsl_unary_negate_unsigned : Warning< "unary negate of unsigned value is still unsigned">, InGroup, DefaultWarn; -def warn_hlsl_impcast_glc_mismatch : Warning< - "implicit conversion from %0 to %1 %select{loses|adds}2 globallycoherent annotation">, - InGroup, DefaultWarn; +def warn_hlsl_impcast_coherence_mismatch : Warning< + "implicit conversion from %0 to %1 %select{" + "demotes globallycoherent to reordercoherent|" + "promotes reordercoherent to globallycoherent|" + "loses reordercoherent|" + "loses globallycoherent|" + "adds reordercoherent|" + "adds globallycoherent}2 annotation">, + InGroup; +def warn_hlsl_glc_implies_rdc : Warning< + "attribute 'globallycoherent' implies 'reordercoherent'">, InGroup; def warn_hlsl_narrowing : Warning< "conversion from larger type %0 to smaller type %1, possible loss of data">, InGroup, DefaultWarn; @@ -7826,7 +7851,7 @@ def warn_hlsl_intrinsic_in_wrong_shader_model : Warning< "intrinsic %0 potentially used by '%1' requires shader model %2 or greater">, DefaultError, InGroup; def warn_hlsl_intrinsic_overload_in_wrong_shader_model : Warning< - "overload of intrinsic %0 requires shader model %1 or greater">, + "overload of intrinsic %0 requires shader model %1 or greater">, DefaultError, InGroup; def err_hlsl_intrinsic_template_arg_unsupported: Error< "Explicit template arguments on intrinsic %0 are not supported">; @@ -7853,6 +7878,14 @@ def err_hlsl_load_from_mesh_out_arrays: Error< "output arrays of a mesh shader can not be read from">; def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; +def err_hlsl_unsupported_long_vector + : Error<"vectors of over 4 elements in " + "%select{ConstantBuffers or TextureBuffers|" + "tessellation patches|geometry streams|node records|" + "cbuffers or tbuffers|user-defined struct parameter|" + "entry function parameters|entry function return type|" + "patch constant function parameters|patch constant function return type|" + "payload parameters}0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< @@ -7953,7 +7986,7 @@ def err_hlsl_barrier_invalid_memory_flags: Error< "UAV_MEMORY, GROUP_SHARED_MEMORY, NODE_INPUT_MEMORY, NODE_OUTPUT_MEMORY flags">; def err_hlsl_barrier_invalid_semantic_flags: Error< "invalid SemanticFlags for Barrier operation; expected 0 or some combination of " - "GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE flags">; + "GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE%select{|, REORDER_SCOPE}0 flags">; def warn_hlsl_barrier_group_memory_requires_group: Warning< "GROUP_SHARED_MEMORY specified for Barrier operation when context has no visible group">, InGroup, DefaultError; @@ -7974,10 +8007,20 @@ def warn_hlsl_legacy_integer_literal_signedness: Warning< InGroup, DefaultIgnore; def err_hlsl_unsupported_semantic_index: Error< "'%0' is defined with semantic index %1, but only values 0 through %2 are supported">; + +// Shader Execution Reordering +def err_hlsl_reorder_unsupported_stage : Error< + "dx::MaybeReorderThread is unavailable in shader stage '%0' (requires 'raygeneration')">; +def err_hlsl_hitobject_unsupported_stage : Error< + "dx::HitObject is unavailable in shader stage '%0' (requires 'raygeneration', 'closesthit' or 'miss')">; // HLSL Change Ends // SPIRV Change Starts def err_hlsl_vulkan_specific_feature: Error<"%0 is a Vulkan specific feature">; +def err_hlsl_vk_pointer_cast_alignment: Error< + "Vulkan buffer pointer cannot be cast to greater alignment">; +def err_hlsl_vk_static_pointer_cast_type: Error< + "vk::static_pointer_cast() content type must be base class of argument's content type">; // SPIRV Change Ends let CategoryName = "OpenMP Issue" in { diff --git a/tools/clang/include/clang/Basic/LangOptions.h b/tools/clang/include/clang/Basic/LangOptions.h index 8dc15da5d8..433b767c8d 100644 --- a/tools/clang/include/clang/Basic/LangOptions.h +++ b/tools/clang/include/clang/Basic/LangOptions.h @@ -15,7 +15,7 @@ #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H #define LLVM_CLANG_BASIC_LANGOPTIONS_H -#include "dxc/DXIL/DxilConstants.h" // For DXIL::DefaultLinkage +#include "dxc/DXIL/DxilConstants.h" // For DXIL:: default values. #include "dxc/Support/HLSLVersion.h" #include "clang/Basic/CommentOptions.h" #include "clang/Basic/LLVM.h" @@ -168,6 +168,7 @@ class LangOptions : public LangOptionsBase { hlsl::DXIL::DefaultLinkage::Default; /// Whether use row major as default matrix major. bool HLSLDefaultRowMajor = false; + unsigned MaxHLSLVectorLength = hlsl::DXIL::kDefaultMaxVectorLength; // HLSL Change Ends bool SPIRV = false; // SPIRV Change diff --git a/tools/clang/include/clang/Basic/TokenKinds.def b/tools/clang/include/clang/Basic/TokenKinds.def index 2267b12b74..6933c965cf 100644 --- a/tools/clang/include/clang/Basic/TokenKinds.def +++ b/tools/clang/include/clang/Basic/TokenKinds.def @@ -508,6 +508,7 @@ KEYWORD(lineadj , KEYHLSL) KEYWORD(triangle , KEYHLSL) KEYWORD(triangleadj , KEYHLSL) KEYWORD(globallycoherent , KEYHLSL) +KEYWORD(reordercoherent , KEYHLSL) KEYWORD(interface , KEYHLSL) KEYWORD(sampler_state , KEYHLSL) KEYWORD(technique , KEYHLSL) diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h index 32ee187091..3c1871df37 100644 --- a/tools/clang/include/clang/SPIRV/FeatureManager.h +++ b/tools/clang/include/clang/SPIRV/FeatureManager.h @@ -59,10 +59,12 @@ enum class Extension { KHR_physical_storage_buffer, KHR_vulkan_memory_model, NV_compute_shader_derivatives, + KHR_compute_shader_derivatives, KHR_fragment_shader_barycentric, KHR_maximal_reconvergence, KHR_float_controls, NV_shader_subgroup_partitioned, + KHR_quad_control, Unknown, }; @@ -132,6 +134,9 @@ class FeatureManager { /// Returns false otherwise. bool isTargetEnvVulkan1p3OrAbove(); + /// Return true if the target environment is a Vulkan environment. + bool isTargetEnvVulkan(); + /// Returns the spv_target_env matching the input string if possible. /// This functions matches the spv_target_env with the command-line version /// of the name ('vulkan1.1', not 'Vulkan 1.1'). diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index f03735115b..5e03d1ef96 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVBUILDER_H #define LLVM_CLANG_SPIRV_SPIRVBUILDER_H @@ -239,7 +242,7 @@ class SpirvBuilder { /// \brief Creates an operation with the given OpGroupNonUniform* SPIR-V /// opcode. SpirvGroupNonUniformOp *createGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope execScope, + spv::Op op, QualType resultType, llvm::Optional execScope, llvm::ArrayRef operands, SourceLocation, llvm::Optional groupOp = llvm::None); @@ -273,6 +276,14 @@ class SpirvBuilder { SpirvInstruction *sample, SourceLocation); + /// \brief Creates an OpConverPtrToU SPIR-V instruction with the given + /// parameters. + SpirvConvertPtrToU *createConvertPtrToU(SpirvInstruction *ptr, QualType type); + + /// \brief Creates an OpConverUToPtr SPIR-V instruction with the given + /// parameters. + SpirvConvertUToPtr *createConvertUToPtr(SpirvInstruction *val, QualType type); + /// \brief Creates SPIR-V instructions for sampling the given image. /// /// If compareVal is given a non-zero value, *Dref* variants of OpImageSample* diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h index e65097bedb..c18c139642 100644 --- a/tools/clang/include/clang/SPIRV/SpirvContext.h +++ b/tools/clang/include/clang/SPIRV/SpirvContext.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVCONTEXT_H #define LLVM_CLANG_SPIRV_SPIRVCONTEXT_H @@ -317,6 +320,13 @@ class SpirvContext { const HybridPointerType *getPointerType(QualType pointee, spv::StorageClass); + const ForwardPointerType *getForwardPointerType(QualType pointee); + + const SpirvPointerType *getForwardReference(QualType type); + + void registerForwardReference(QualType type, + const SpirvPointerType *pointerType); + /// Generates (or reuses an existing) OpString for the given string literal. SpirvString *getSpirvString(llvm::StringRef str); @@ -478,6 +488,8 @@ class SpirvContext { llvm::SmallVector hybridStructTypes; llvm::DenseMap pointerTypes; llvm::SmallVector hybridPointerTypes; + llvm::MapVector forwardPointerTypes; + llvm::MapVector forwardReferences; llvm::DenseSet functionTypes; llvm::DenseMap spirvIntrinsicTypesById; llvm::SmallVector spirvIntrinsicTypes; diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index 7ec1375bde..f49a295610 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H #define LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H @@ -67,6 +71,10 @@ class SpirvInstruction { IK_ConstantComposite, IK_ConstantNull, + // Pointer <-> uint conversions. + IK_ConvertPtrToU, + IK_ConvertUToPtr, + // OpUndef IK_Undef, @@ -1306,6 +1314,50 @@ class SpirvConstantNull : public SpirvConstant { bool operator==(const SpirvConstantNull &that) const; }; +class SpirvConvertPtrToU : public SpirvInstruction { +public: + SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type, + SourceLocation loc = {}, SourceRange range = {}); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertPtrToU) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ConvertPtrToU; + } + + bool operator==(const SpirvConvertPtrToU &that) const; + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPtr() const { return ptr; } + +private: + SpirvInstruction *ptr; +}; + +class SpirvConvertUToPtr : public SpirvInstruction { +public: + SpirvConvertUToPtr(SpirvInstruction *intValue, QualType type, + SourceLocation loc = {}, SourceRange range = {}); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertUToPtr) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ConvertUToPtr; + } + + bool operator==(const SpirvConvertUToPtr &that) const; + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getVal() const { return val; } + +private: + SpirvInstruction *val; +}; + class SpirvUndef : public SpirvInstruction { public: SpirvUndef(QualType type); @@ -1514,7 +1566,8 @@ class SpirvFunctionCall : public SpirvInstruction { /// \brief OpGroupNonUniform* instructions class SpirvGroupNonUniformOp : public SpirvInstruction { public: - SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, spv::Scope scope, + SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, + llvm::Optional scope, llvm::ArrayRef operands, SourceLocation loc, llvm::Optional group); @@ -1528,7 +1581,8 @@ class SpirvGroupNonUniformOp : public SpirvInstruction { bool invokeVisitor(Visitor *v) override; - spv::Scope getExecutionScope() const { return execScope; } + bool hasExecutionScope() const { return execScope.hasValue(); } + spv::Scope getExecutionScope() const { return execScope.getValue(); } llvm::ArrayRef getOperands() const { return operands; } @@ -1546,7 +1600,7 @@ class SpirvGroupNonUniformOp : public SpirvInstruction { } private: - spv::Scope execScope; + llvm::Optional execScope; llvm::SmallVector operands; llvm::Optional groupOp; }; diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h index 221f01e5ff..00a00ef238 100644 --- a/tools/clang/include/clang/SPIRV/SpirvType.h +++ b/tools/clang/include/clang/SPIRV/SpirvType.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVTYPE_H #define LLVM_CLANG_SPIRV_SPIRVTYPE_H @@ -53,6 +56,7 @@ class SpirvType { TK_RuntimeArray, TK_Struct, TK_Pointer, + TK_ForwardPointer, TK_Function, TK_AccelerationStructureNV, TK_RayQueryKHR, @@ -387,6 +391,26 @@ class SpirvPointerType : public SpirvType { spv::StorageClass storageClass; }; +/// Represents a SPIR-V forwarding pointer type. +class ForwardPointerType : public SpirvType { +public: + ForwardPointerType(QualType pointee) + : SpirvType(TK_ForwardPointer), pointeeType(pointee) {} + + static bool classof(const SpirvType *t) { + return t->getKind() == TK_ForwardPointer; + } + + const QualType getPointeeType() const { return pointeeType; } + + bool operator==(const ForwardPointerType &that) const { + return pointeeType == that.pointeeType; + } + +private: + const QualType pointeeType; +}; + /// Represents a SPIR-V function type. None of the parameters nor the return /// type is allowed to be a hybrid type. class FunctionType : public SpirvType { diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h index 303a4600a1..93682518a1 100644 --- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h +++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVVISITOR_H #define LLVM_CLANG_SPIRV_SPIRVVISITOR_H @@ -89,6 +93,8 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvConstantFloat) DEFINE_VISIT_METHOD(SpirvConstantComposite) DEFINE_VISIT_METHOD(SpirvConstantNull) + DEFINE_VISIT_METHOD(SpirvConvertPtrToU) + DEFINE_VISIT_METHOD(SpirvConvertUToPtr) DEFINE_VISIT_METHOD(SpirvUndef) DEFINE_VISIT_METHOD(SpirvCompositeConstruct) DEFINE_VISIT_METHOD(SpirvCompositeExtract) diff --git a/tools/clang/include/clang/Sema/Sema.h b/tools/clang/include/clang/Sema/Sema.h index 42ab80b617..755c7e0755 100644 --- a/tools/clang/include/clang/Sema/Sema.h +++ b/tools/clang/include/clang/Sema/Sema.h @@ -3804,9 +3804,8 @@ class Sema { bool CheckHLSLUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation Loc, UnaryExprOrTypeTrait ExprKind); void DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A); - void DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr, - QualType TargetType, - SourceLocation Loc); + void DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType, + SourceLocation Loc); void CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto); void DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index 40b030b430..59d99ab4c5 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,6 +128,8 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); +bool ContainsLongVector(clang::QualType); + bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, const clang::QualType &targetType, @@ -201,7 +203,8 @@ void Indent(unsigned int Indentation, llvm::raw_ostream &Out); void GetHLSLAttributedTypes(clang::Sema *self, clang::QualType type, const clang::AttributedType **ppMatrixOrientation, const clang::AttributedType **ppNorm, - const clang::AttributedType **ppGLC); + const clang::AttributedType **ppGLC, + const clang::AttributedType **ppRDC); bool IsMatrixType(clang::Sema *self, clang::QualType type); bool IsVectorType(clang::Sema *self, clang::QualType type); diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 02125d5a84..0a688c03fa 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // // This file implements the ASTContext interface for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// @@ -23,6 +26,7 @@ #include "clang/AST/ExternalASTSource.h" #include "clang/AST/HlslBuiltinTypeDeclBuilder.h" #include "clang/AST/TypeLoc.h" +#include "clang/Basic/Specifiers.h" #include "clang/Sema/Overload.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" @@ -329,6 +333,9 @@ void hlsl::AddHLSLMatrixTemplate(ASTContext &context, typeDeclBuilder.addField("h", vectorArrayType); + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLMatrixAttr::CreateImplicit(context)); + // Add an operator[]. The operator ranges from zero to rowcount-1, and returns // a vector of colcount elements. const unsigned int templateDepth = 0; @@ -385,6 +392,9 @@ void hlsl::AddHLSLVectorTemplate(ASTContext &context, // Add an 'h' field to hold the handle. typeDeclBuilder.addField("h", vectorType); + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLVectorAttr::CreateImplicit(context)); + // Add an operator[]. The operator ranges from zero to colcount-1, and returns // a scalar. @@ -525,20 +535,33 @@ hlsl::DeclareRecordTypeWithHandleAndNoMemberFunctions(ASTContext &context, /// CXXRecordDecl * hlsl::DeclareRecordTypeWithHandle(ASTContext &context, StringRef name, - bool isCompleteType /*= true */) { + bool isCompleteType /*= true */, + InheritableAttr *Attr) { BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), name, TagDecl::TagKind::TTK_Struct); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField("h", GetHLSLObjectHandleType(context)); + if (Attr) + typeDeclBuilder.getRecordDecl()->addAttr(Attr); + if (isCompleteType) return typeDeclBuilder.completeDefinition(); return typeDeclBuilder.getRecordDecl(); } +AvailabilityAttr *ConstructAvailabilityAttribute(clang::ASTContext &context, + VersionTuple Introduced) { + AvailabilityAttr *AAttr = AvailabilityAttr::CreateImplicit( + context, &context.Idents.get(""), clang::VersionTuple(6, 9), + clang::VersionTuple(), clang::VersionTuple(), false, ""); + return AAttr; +} + // creates a global static constant unsigned integer with value. // equivalent to: static const uint name = val; static void AddConstUInt(clang::ASTContext &context, DeclContext *DC, - StringRef name, unsigned val) { + StringRef name, unsigned val, + AvailabilityAttr *AAttr = nullptr) { IdentifierInfo &Id = context.Idents.get(name, tok::TokenKind::identifier); QualType type = context.getConstType(context.UnsignedIntTy); VarDecl *varDecl = VarDecl::Create(context, DC, NoLoc, NoLoc, &Id, type, @@ -548,6 +571,9 @@ static void AddConstUInt(clang::ASTContext &context, DeclContext *DC, context, llvm::APInt(context.getIntWidth(type), val), type, NoLoc); varDecl->setInit(exprVal); varDecl->setImplicit(true); + if (AAttr) + varDecl->addAttr(AAttr); + DC->addDecl(varDecl); } @@ -560,6 +586,7 @@ static void AddConstUInt(clang::ASTContext &context, StringRef name, struct Enumerant { StringRef name; unsigned value; + AvailabilityAttr *avail = nullptr; }; static void AddTypedefPseudoEnum(ASTContext &context, StringRef name, @@ -575,33 +602,45 @@ static void AddTypedefPseudoEnum(ASTContext &context, StringRef name, enumDecl->setImplicit(true); // static const uint = ; for (const Enumerant &enumerant : enumerants) { - AddConstUInt(context, curDC, enumerant.name, enumerant.value); + AddConstUInt(context, curDC, enumerant.name, enumerant.value, + enumerant.avail); } } /// Adds all constants and enums for ray tracing void hlsl::AddRaytracingConstants(ASTContext &context) { + + // Create aversion tuple for availability attributes + // for the RAYQUERY_FLAG enum + VersionTuple VT69 = VersionTuple(6, 9); + AddTypedefPseudoEnum( context, "RAY_FLAG", - { - {"RAY_FLAG_NONE", (unsigned)DXIL::RayFlag::None}, - {"RAY_FLAG_FORCE_OPAQUE", (unsigned)DXIL::RayFlag::ForceOpaque}, - {"RAY_FLAG_FORCE_NON_OPAQUE", - (unsigned)DXIL::RayFlag::ForceNonOpaque}, - {"RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH", - (unsigned)DXIL::RayFlag::AcceptFirstHitAndEndSearch}, - {"RAY_FLAG_SKIP_CLOSEST_HIT_SHADER", - (unsigned)DXIL::RayFlag::SkipClosestHitShader}, - {"RAY_FLAG_CULL_BACK_FACING_TRIANGLES", - (unsigned)DXIL::RayFlag::CullBackFacingTriangles}, - {"RAY_FLAG_CULL_FRONT_FACING_TRIANGLES", - (unsigned)DXIL::RayFlag::CullFrontFacingTriangles}, - {"RAY_FLAG_CULL_OPAQUE", (unsigned)DXIL::RayFlag::CullOpaque}, - {"RAY_FLAG_CULL_NON_OPAQUE", (unsigned)DXIL::RayFlag::CullNonOpaque}, - {"RAY_FLAG_SKIP_TRIANGLES", (unsigned)DXIL::RayFlag::SkipTriangles}, - {"RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES", - (unsigned)DXIL::RayFlag::SkipProceduralPrimitives}, - }); + {{"RAY_FLAG_NONE", (unsigned)DXIL::RayFlag::None}, + {"RAY_FLAG_FORCE_OPAQUE", (unsigned)DXIL::RayFlag::ForceOpaque}, + {"RAY_FLAG_FORCE_NON_OPAQUE", (unsigned)DXIL::RayFlag::ForceNonOpaque}, + {"RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH", + (unsigned)DXIL::RayFlag::AcceptFirstHitAndEndSearch}, + {"RAY_FLAG_SKIP_CLOSEST_HIT_SHADER", + (unsigned)DXIL::RayFlag::SkipClosestHitShader}, + {"RAY_FLAG_CULL_BACK_FACING_TRIANGLES", + (unsigned)DXIL::RayFlag::CullBackFacingTriangles}, + {"RAY_FLAG_CULL_FRONT_FACING_TRIANGLES", + (unsigned)DXIL::RayFlag::CullFrontFacingTriangles}, + {"RAY_FLAG_CULL_OPAQUE", (unsigned)DXIL::RayFlag::CullOpaque}, + {"RAY_FLAG_CULL_NON_OPAQUE", (unsigned)DXIL::RayFlag::CullNonOpaque}, + {"RAY_FLAG_SKIP_TRIANGLES", (unsigned)DXIL::RayFlag::SkipTriangles}, + {"RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES", + (unsigned)DXIL::RayFlag::SkipProceduralPrimitives}, + {"RAY_FLAG_FORCE_OMM_2_STATE", (unsigned)DXIL::RayFlag::ForceOMM2State, + ConstructAvailabilityAttribute(context, VT69)}}); + + AddTypedefPseudoEnum( + context, "RAYQUERY_FLAG", + {{"RAYQUERY_FLAG_NONE", (unsigned)DXIL::RayQueryFlag::None}, + {"RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS", + (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps, + ConstructAvailabilityAttribute(context, VT69)}}); AddTypedefPseudoEnum( context, "COMMITTED_STATUS", @@ -663,6 +702,10 @@ void hlsl::AddRaytracingConstants(ASTContext &context) { AddConstUInt( context, StringRef("RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES"), (unsigned)DXIL::RaytracingPipelineFlags::SkipProceduralPrimitives); + AddConstUInt(context, context.getTranslationUnitDecl(), + StringRef("RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS"), + (unsigned)DXIL::RaytracingPipelineFlags::AllowOpacityMicromaps, + ConstructAvailabilityAttribute(context, VT69)); } /// Adds all constants and enums for sampler feedback @@ -675,6 +718,8 @@ void hlsl::AddSamplerFeedbackConstants(ASTContext &context) { /// Adds all enums for Barrier intrinsic void hlsl::AddBarrierConstants(ASTContext &context) { + VersionTuple VT69 = VersionTuple(6, 9); + AddTypedefPseudoEnum( context, "MEMORY_TYPE_FLAG", {{"UAV_MEMORY", (unsigned)DXIL::MemoryTypeFlag::UavMemory}, @@ -687,7 +732,9 @@ void hlsl::AddBarrierConstants(ASTContext &context) { context, "BARRIER_SEMANTIC_FLAG", {{"GROUP_SYNC", (unsigned)DXIL::BarrierSemanticFlag::GroupSync}, {"GROUP_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::GroupScope}, - {"DEVICE_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::DeviceScope}}); + {"DEVICE_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::DeviceScope}, + {"REORDER_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::ReorderScope, + ConstructAvailabilityAttribute(context, VT69)}}); } static Expr *IntConstantAsBoolExpr(clang::Sema &sema, uint64_t value) { @@ -915,6 +962,7 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext( ASTContext &context, DeclContext *declContext, StringRef name, uint8_t templateArgCount, TypeSourceInfo *defaultTypeArgValue, InheritableAttr *Attr) { + DXASSERT(templateArgCount != 0, "otherwise caller should be creating a class or struct"); DXASSERT(templateArgCount <= 2, "otherwise the function needs to be updated " @@ -938,11 +986,9 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext( QualType elementType = context.getTemplateTypeParmType( /*templateDepth*/ 0, 0, ParameterPackFalse, elementTemplateParamDecl); - if (templateArgCount > 1 && - // Only need array type for inputpatch and outputpatch. - // Avoid Texture2DMS which may use 0 count. - // TODO: use hlsl types to do the check. - !name.startswith("Texture") && !name.startswith("RWTexture")) { + // Only need array type for inputpatch and outputpatch. + if (Attr && isa(Attr)) { + DXASSERT(templateArgCount == 2, "Tess patches need 2 template params"); Expr *countExpr = DeclRefExpr::Create( context, NestedNameSpecifierLoc(), NoLoc, countTemplateParamDecl, false, DeclarationNameInfo(countTemplateParamDecl->getDeclName(), NoLoc), @@ -1033,10 +1079,51 @@ static void CreateConstructorDeclaration( (*constructorDecl)->setAccess(AccessSpecifier::AS_public); } +CXXConstructorDecl *hlsl::CreateConstructorDeclarationWithParams( + ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, + ArrayRef paramTypes, ArrayRef paramNames, + DeclarationName declarationName, bool isConst, bool isTemplateFunction) { + DXASSERT_NOMSG(recordDecl != nullptr); + DXASSERT_NOMSG(!resultType.isNull()); + DXASSERT_NOMSG(paramTypes.size() == paramNames.size()); + + TypeSourceInfo *tinfo; + CXXConstructorDecl *constructorDecl; + CreateConstructorDeclaration(context, recordDecl, resultType, paramTypes, + declarationName, isConst, &constructorDecl, + &tinfo); + + // Create and associate parameters to constructor. + SmallVector parmVarDecls; + if (!paramTypes.empty()) { + for (unsigned int i = 0; i < paramTypes.size(); ++i) { + IdentifierInfo *argIi = &context.Idents.get(paramNames[i]); + ParmVarDecl *parmVarDecl = ParmVarDecl::Create( + context, constructorDecl, NoLoc, NoLoc, argIi, paramTypes[i], + context.getTrivialTypeSourceInfo(paramTypes[i], NoLoc), + StorageClass::SC_None, nullptr); + parmVarDecl->setScopeInfo(0, i); + DXASSERT(parmVarDecl->getFunctionScopeIndex() == i, + "otherwise failed to set correct index"); + parmVarDecls.push_back(parmVarDecl); + } + constructorDecl->setParams(ArrayRef(parmVarDecls)); + AssociateParametersToFunctionPrototype(tinfo, &parmVarDecls.front(), + parmVarDecls.size()); + } + + // If this is going to be part of a template function decl, don't add it to + // the record because the template function decl will be added instead. + if (!isTemplateFunction) + recordDecl->addDecl(constructorDecl); + + return constructorDecl; +} + static void CreateObjectFunctionDeclaration( ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, ArrayRef args, DeclarationName declarationName, bool isConst, - CXXMethodDecl **functionDecl, TypeSourceInfo **tinfo) { + StorageClass SC, CXXMethodDecl **functionDecl, TypeSourceInfo **tinfo) { DXASSERT_NOMSG(recordDecl != nullptr); DXASSERT_NOMSG(functionDecl != nullptr); @@ -1048,8 +1135,8 @@ static void CreateObjectFunctionDeclaration( *tinfo = context.getTrivialTypeSourceInfo(functionQT, NoLoc); DXASSERT_NOMSG(*tinfo != nullptr); *functionDecl = CXXMethodDecl::Create( - context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo, - StorageClass::SC_None, InlineSpecifiedFalse, IsConstexprFalse, NoLoc); + context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo, SC, + InlineSpecifiedFalse, IsConstexprFalse, NoLoc); DXASSERT_NOMSG(*functionDecl != nullptr); (*functionDecl)->setLexicalDeclContext(recordDecl); (*functionDecl)->setAccess(AccessSpecifier::AS_public); @@ -1058,7 +1145,8 @@ static void CreateObjectFunctionDeclaration( CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams( ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, ArrayRef paramTypes, ArrayRef paramNames, - DeclarationName declarationName, bool isConst, bool isTemplateFunction) { + DeclarationName declarationName, bool isConst, StorageClass SC, + bool isTemplateFunction) { DXASSERT_NOMSG(recordDecl != nullptr); DXASSERT_NOMSG(!resultType.isNull()); DXASSERT_NOMSG(paramTypes.size() == paramNames.size()); @@ -1066,7 +1154,7 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams( TypeSourceInfo *tinfo; CXXMethodDecl *functionDecl; CreateObjectFunctionDeclaration(context, recordDecl, resultType, paramTypes, - declarationName, isConst, &functionDecl, + declarationName, isConst, SC, &functionDecl, &tinfo); // Create and associate parameters to method. @@ -1098,41 +1186,50 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams( CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandle( ASTContext &context, StringRef typeName, StringRef templateParamName, - TagTypeKind tagKind) { + InheritableAttr *Attr) { return DeclareUIntTemplatedTypeWithHandleInDeclContext( context, context.getTranslationUnitDecl(), typeName, templateParamName, - tagKind); + Attr); } CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext( ASTContext &context, DeclContext *declContext, StringRef typeName, - StringRef templateParamName, TagTypeKind tagKind) { + StringRef templateParamName, InheritableAttr *Attr) { // template FeedbackTexture2D[Array] { ... } - BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, tagKind); + BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, + TagTypeKind::TTK_Class); typeDeclBuilder.addIntegerTemplateParam(templateParamName, context.UnsignedIntTy); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. + if (Attr) + typeDeclBuilder.getRecordDecl()->addAttr(Attr); + return typeDeclBuilder.getRecordDecl(); } clang::CXXRecordDecl * -hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) { +hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, + InheritableAttr *Attr) { // Create ConstantBufferView template declaration in translation unit scope // like other resource. // template ConstantBuffer { int h; } DeclContext *DC = context.getTranslationUnitDecl(); + DXASSERT(Attr, "Constbuffer types require an attribute"); - BuiltinTypeDeclBuilder typeDeclBuilder( - DC, bTBuf ? "TextureBuffer" : "ConstantBuffer", - TagDecl::TagKind::TTK_Struct); + const char *TypeName = "ConstantBuffer"; + if (IsTBuffer(cast(Attr)->getResKind())) + TypeName = "TextureBuffer"; + BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName, + TagDecl::TagKind::TTK_Struct); (void)typeDeclBuilder.addTypeTemplateParam("T"); typeDeclBuilder.startDefinition(); CXXRecordDecl *templateRecordDecl = typeDeclBuilder.getRecordDecl(); typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. + typeDeclBuilder.getRecordDecl()->addAttr(Attr); typeDeclBuilder.getRecordDecl(); @@ -1143,7 +1240,14 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) { // template RayQuery { ... } BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), "RayQuery"); - typeDeclBuilder.addIntegerTemplateParam("flags", context.UnsignedIntTy); + typeDeclBuilder.addIntegerTemplateParam("constRayFlags", + context.UnsignedIntTy); + // create an optional second template argument with default value + // that contains the value of DXIL::RayFlag::None + llvm::Optional DefaultRayQueryFlag = + static_cast(DXIL::RayFlag::None); + typeDeclBuilder.addIntegerTemplateParam( + "RayQueryFlags", context.UnsignedIntTy, DefaultRayQueryFlag); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. @@ -1160,10 +1264,51 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) { context.DeclarationNames.getCXXConstructorName(canQualType), false, &pConstructorDecl, &pTypeSourceInfo); typeDeclBuilder.getRecordDecl()->addDecl(pConstructorDecl); - + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLRayQueryObjectAttr::CreateImplicit(context)); return typeDeclBuilder.getRecordDecl(); } +CXXRecordDecl *hlsl::DeclareHitObjectType(NamespaceDecl &NSDecl) { + ASTContext &Context = NSDecl.getASTContext(); + // HitObject { ... } + BuiltinTypeDeclBuilder TypeDeclBuilder(&NSDecl, "HitObject"); + TypeDeclBuilder.startDefinition(); + + // Add handle to mark as HLSL object. + TypeDeclBuilder.addField("h", GetHLSLObjectHandleType(Context)); + CXXRecordDecl *RecordDecl = TypeDeclBuilder.getRecordDecl(); + + CanQualType canQualType = Context.getCanonicalType( + Context.getRecordType(TypeDeclBuilder.getRecordDecl())); + + // Add constructor that will be lowered to MOP_HitObject_MakeNop. + CXXConstructorDecl *pConstructorDecl = nullptr; + TypeSourceInfo *pTypeSourceInfo = nullptr; + CreateConstructorDeclaration( + Context, RecordDecl, Context.VoidTy, {}, + Context.DeclarationNames.getCXXConstructorName(canQualType), false, + &pConstructorDecl, &pTypeSourceInfo); + RecordDecl->addDecl(pConstructorDecl); + pConstructorDecl->addAttr(HLSLIntrinsicAttr::CreateImplicit( + Context, "op", "", + static_cast(hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop))); + pConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(Context)); + + // Add AvailabilityAttribute for SM6.9+ + VersionTuple VT69 = VersionTuple(6, 9); + RecordDecl->addAttr(ConstructAvailabilityAttribute(Context, VT69)); + + // Add the implicit HLSLHitObjectAttr attribute to unambiguously recognize the + // builtin HitObject type. + RecordDecl->addAttr(HLSLHitObjectAttr::CreateImplicit(Context)); + RecordDecl->setImplicit(true); + + // Add to namespace + RecordDecl->setDeclContext(&NSDecl); + return RecordDecl; +} + CXXRecordDecl *hlsl::DeclareResourceType(ASTContext &context, bool bSampler) { // struct ResourceDescriptor { uint8 desc; } StringRef Name = bSampler ? ".Sampler" : ".Resource"; @@ -1227,6 +1372,49 @@ CXXRecordDecl *hlsl::DeclareNodeOrRecordType( } #ifdef ENABLE_SPIRV_CODEGEN +CXXRecordDecl *hlsl::DeclareVkBufferPointerType(ASTContext &context, + DeclContext *declContext) { + BuiltinTypeDeclBuilder Builder(declContext, "BufferPointer", + TagDecl::TagKind::TTK_Struct); + TemplateTypeParmDecl *TyParamDecl = + Builder.addTypeTemplateParam("recordtype"); + Builder.addIntegerTemplateParam("alignment", context.UnsignedIntTy, 0); + + Builder.startDefinition(); + + QualType paramType = QualType(TyParamDecl->getTypeForDecl(), 0); + CXXRecordDecl *recordDecl = Builder.getRecordDecl(); + + CXXMethodDecl *methodDecl = CreateObjectFunctionDeclarationWithParams( + context, recordDecl, context.getLValueReferenceType(paramType), {}, {}, + DeclarationName(&context.Idents.get("Get")), true); + CanQualType canQualType = + recordDecl->getTypeForDecl()->getCanonicalTypeUnqualified(); + auto *copyConstructorDecl = CreateConstructorDeclarationWithParams( + context, recordDecl, context.VoidTy, + {context.getRValueReferenceType(canQualType)}, {"bufferPointer"}, + context.DeclarationNames.getCXXConstructorName(canQualType), false, true); + auto *addressConstructorDecl = CreateConstructorDeclarationWithParams( + context, recordDecl, context.VoidTy, {context.UnsignedIntTy}, {"address"}, + context.DeclarationNames.getCXXConstructorName(canQualType), false, true); + hlsl::CreateFunctionTemplateDecl( + context, recordDecl, copyConstructorDecl, + Builder.getTemplateDecl()->getTemplateParameters()->begin(), 2); + hlsl::CreateFunctionTemplateDecl( + context, recordDecl, addressConstructorDecl, + Builder.getTemplateDecl()->getTemplateParameters()->begin(), 2); + + StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic); + unsigned Opcode = static_cast(IntrinsicOp::MOP_GetBufferContents); + methodDecl->addAttr( + HLSLIntrinsicAttr::CreateImplicit(context, OpcodeGroup, "", Opcode)); + methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context)); + copyConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context)); + addressConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context)); + + return Builder.completeDefinition(); +} + CXXRecordDecl *hlsl::DeclareInlineSpirvType(clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp index 9ef771b932..baed44667f 100644 --- a/tools/clang/lib/AST/DeclCXX.cpp +++ b/tools/clang/lib/AST/DeclCXX.cpp @@ -48,34 +48,33 @@ void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const { } CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) - : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0), - Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false), - Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true), - HasPrivateFields(false), HasProtectedFields(false), HasPublicFields(false), - HasMutableFields(false), HasVariantMembers(false), HasOnlyCMembers(true), - HasInClassInitializer(false), HasUninitializedReferenceMember(false), - NeedOverloadResolutionForMoveConstructor(false), - NeedOverloadResolutionForMoveAssignment(false), - NeedOverloadResolutionForDestructor(false), - DefaultedMoveConstructorIsDeleted(false), - DefaultedMoveAssignmentIsDeleted(false), - DefaultedDestructorIsDeleted(false), - HasTrivialSpecialMembers(SMF_All), - DeclaredNonTrivialSpecialMembers(0), - HasIrrelevantDestructor(true), - HasConstexprNonCopyMoveConstructor(false), - DefaultedDefaultConstructorIsConstexpr(true), - HasConstexprDefaultConstructor(false), - HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), - UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0), - ImplicitCopyConstructorHasConstParam(true), - ImplicitCopyAssignmentHasConstParam(true), - HasDeclaredCopyConstructorWithConstParam(false), - HasDeclaredCopyAssignmentWithConstParam(false), - IsLambda(false), IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0), - Bases(), VBases(), - Definition(D), FirstFriend() { -} + // HLSL Change Begin - Add HasLongVector and clang-format + : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0), + Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false), + Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true), + HasPrivateFields(false), HasProtectedFields(false), + HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false), + HasOnlyCMembers(true), HasInClassInitializer(false), + HasUninitializedReferenceMember(false), + NeedOverloadResolutionForMoveConstructor(false), + NeedOverloadResolutionForMoveAssignment(false), + NeedOverloadResolutionForDestructor(false), + DefaultedMoveConstructorIsDeleted(false), + DefaultedMoveAssignmentIsDeleted(false), + DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All), + DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true), + HasConstexprNonCopyMoveConstructor(false), + DefaultedDefaultConstructorIsConstexpr(true), + HasConstexprDefaultConstructor(false), + HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), + UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0), + ImplicitCopyConstructorHasConstParam(true), + ImplicitCopyAssignmentHasConstParam(true), + HasDeclaredCopyConstructorWithConstParam(false), + HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false), + IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0), + NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {} +// HLSL Change End - Add HasLongVector and clang-format CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { return Bases.get(Definition->getASTContext().getExternalSource()); @@ -204,6 +203,11 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, if (!BaseClassDecl->isStandardLayout()) data().IsStandardLayout = false; + // HLSL Change Begin - Propagate presence of long vector to child classes. + if (BaseClassDecl->hasHLSLLongVector()) + data().HasHLSLLongVector = true; + // HLSL Change End + // Record if this base is the first non-literal field or base. if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C)) data().HasNonLiteralTypeFieldsOrBases = true; @@ -385,6 +389,11 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { data().NeedOverloadResolutionForMoveConstructor = true; data().NeedOverloadResolutionForDestructor = true; } + + // HLSL Change Begin - Propagate presence of long vector to child classes. + if (Subobj->hasHLSLLongVector()) + data().HasHLSLLongVector = true; + // HLSL Change End } /// Callback function for CXXRecordDecl::forallBases that acknowledges diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp index 0e2ec8c6c2..c6dc21217e 100644 --- a/tools/clang/lib/AST/Expr.cpp +++ b/tools/clang/lib/AST/Expr.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the Expr class and subclasses. @@ -1716,7 +1719,11 @@ const char *CastExpr::getCastKindName() const { return "HLSLCC_FloatingToBoolean"; case CK_HLSLCC_FloatingCast: return "HLSLCC_FloatingCast"; - // HLSL Change Ends + case CK_VK_BufferPointerToIntegral: + return "VK_BufferPointerToIntegral"; + case CK_VK_IntegralToBufferPointer: + return "VK_IntegralToBufferPointer"; + // HLSL Change Ends } llvm_unreachable("Unhandled cast kind!"); diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp index 5e8d4700bd..69e0760bce 100644 --- a/tools/clang/lib/AST/ExprConstant.cpp +++ b/tools/clang/lib/AST/ExprConstant.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the Expr constant evaluator. @@ -7829,6 +7832,12 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { return false; return Success(Value, E); } + + // HLSL Change Starts + case CK_VK_BufferPointerToIntegral: { + return false; + // HLSL Change Ends + } } llvm_unreachable("unknown cast resulting in integral value"); diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index d83b307463..5b19e064a3 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -5,6 +5,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. // // /// /// \file // @@ -53,44 +56,33 @@ ConvertHLSLVecMatTypeToExtVectorType(const clang::ASTContext &context, return nullptr; } -bool IsHLSLVecMatType(clang::QualType type) { - const Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "vector") { - return true; - } else if (templateDecl->getName() == "matrix") { - return true; - } - } +template static AttrType *getAttr(clang::QualType type) { + type = type.getCanonicalType(); + if (const RecordType *RT = type->getAs()) { + if (const auto *Spec = + dyn_cast(RT->getDecl())) + if (const auto *Template = + dyn_cast(Spec->getSpecializedTemplate())) + return Template->getTemplatedDecl()->getAttr(); + if (const auto *Decl = dyn_cast(RT->getDecl())) + return Decl->getAttr(); } - return false; + return nullptr; +} + +bool IsHLSLVecMatType(clang::QualType type) { + return getAttr(type) || getAttr(type); } bool IsHLSLMatType(clang::QualType type) { - const clang::Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "matrix") { - return true; - } - } - } + if (getAttr(type)) + return true; return false; } bool IsHLSLVecType(clang::QualType type) { - const clang::Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "vector") { - return true; - } - } - } + if (getAttr(type)) + return true; return false; } @@ -286,6 +278,18 @@ bool HasHLSLGloballyCoherent(clang::QualType type) { return false; } +bool HasHLSLReorderCoherent(clang::QualType type) { + const AttributedType *AT = type->getAs(); + while (AT) { + AttributedType::Kind kind = AT->getAttrKind(); + if (kind == AttributedType::attr_hlsl_reordercoherent) + return true; + AT = AT->getLocallyUnqualifiedSingleStepDesugaredType() + ->getAs(); + } + return false; +} + /// Checks whether the pAttributes indicate a parameter is inout or out; if /// inout, pIsIn will be set to true. bool IsParamAttributedAsOut(clang::AttributeList *pAttributes, bool *pIsIn); @@ -474,160 +478,56 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) { QualType elemTy = arg0.getAsType(); return elemTy; } + // TODO: Add type cache to ASTContext. bool IsHLSLInputPatchType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "InputPatch") { - return true; - } - } - } + if (const HLSLTessPatchAttr *Attr = getAttr(type)) + return Attr->getIsInput(); return false; } + bool IsHLSLOutputPatchType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "OutputPatch") { - return true; - } - } - } + if (const HLSLTessPatchAttr *Attr = getAttr(type)) + return !Attr->getIsInput(); return false; } + bool IsHLSLPointStreamType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "PointStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Point; return false; } + bool IsHLSLLineStreamType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "LineStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Line; return false; } + bool IsHLSLTriangleStreamType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "TriangleStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Triangle; return false; } + bool IsHLSLStreamOutputType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "PointStream") - return true; - if (templateDecl->getName() == "LineStream") - return true; - if (templateDecl->getName() == "TriangleStream") - return true; - } - } + if (getAttr(type)) + return true; return false; } -bool IsHLSLResourceType(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "Texture1D" || name == "RWTexture1D") - return true; - if (name == "Texture2D" || name == "RWTexture2D") - return true; - if (name == "Texture2DMS" || name == "RWTexture2DMS") - return true; - if (name == "Texture3D" || name == "RWTexture3D") - return true; - if (name == "TextureCube" || name == "RWTextureCube") - return true; - if (name == "Texture1DArray" || name == "RWTexture1DArray") - return true; - if (name == "Texture2DArray" || name == "RWTexture2DArray") - return true; - if (name == "Texture2DMSArray" || name == "RWTexture2DMSArray") - return true; - if (name == "TextureCubeArray" || name == "RWTextureCubeArray") - return true; - - if (name == "FeedbackTexture2D" || name == "FeedbackTexture2DArray") - return true; - - if (name == "RasterizerOrderedTexture1D" || - name == "RasterizerOrderedTexture2D" || - name == "RasterizerOrderedTexture3D" || - name == "RasterizerOrderedTexture1DArray" || - name == "RasterizerOrderedTexture2DArray" || - name == "RasterizerOrderedBuffer" || - name == "RasterizerOrderedByteAddressBuffer" || - name == "RasterizerOrderedStructuredBuffer") - return true; - - if (name == "ByteAddressBuffer" || name == "RWByteAddressBuffer") - return true; - - if (name == "StructuredBuffer" || name == "RWStructuredBuffer") - return true; - - if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer") - return true; - - if (name == "Buffer" || name == "RWBuffer") - return true; - - if (name == "SamplerState" || name == "SamplerComparisonState") - return true; - - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - - if (name == "RaytracingAccelerationStructure") - return true; - } +bool IsHLSLResourceType(clang::QualType type) { + if (getAttr(type)) + return true; return false; } -static HLSLNodeObjectAttr *getNodeAttr(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - if (const auto *Spec = - dyn_cast(RT->getDecl())) - if (const auto *Template = - dyn_cast(Spec->getSpecializedTemplate())) - return Template->getTemplatedDecl()->getAttr(); - if (const auto *Decl = dyn_cast(RT->getDecl())) - return Decl->getAttr(); - } - return nullptr; +bool IsHLSLHitObjectType(QualType type) { + return nullptr != getAttr(type); } DXIL::NodeIOKind GetNodeIOType(clang::QualType type) { - if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type)) + if (const HLSLNodeObjectAttr *Attr = getAttr(type)) return Attr->getNodeIOType(); return DXIL::NodeIOKind::Invalid; } @@ -654,27 +554,20 @@ bool IsHLSLDynamicSamplerType(clang::QualType type) { } bool IsHLSLNodeType(clang::QualType type) { - if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type)) + if (const HLSLNodeObjectAttr *Attr = getAttr(type)) return true; return false; } bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return DXIL::IsCTBuffer(Attr->getResKind()); return false; } bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - // Read-only records - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return DXIL::IsCTBuffer(Attr->getResKind()); return false; } @@ -701,14 +594,8 @@ bool IsHLSLNodeOutputType(clang::QualType type) { } bool IsHLSLStructuredBufferType(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "StructuredBuffer" || name == "RWStructuredBuffer") - return true; - - if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return Attr->getResKind() == DXIL::ResourceKind::StructuredBuffer; return false; } @@ -812,64 +699,20 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator, bool GetHLSLSubobjectKind(clang::QualType type, DXIL::SubobjectKind &subobjectKind, DXIL::HitGroupType &hgType) { - hgType = (DXIL::HitGroupType)(-1); type = type.getCanonicalType(); if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - switch (name.size()) { - case 17: - return name == "StateObjectConfig" - ? (subobjectKind = DXIL::SubobjectKind::StateObjectConfig, - true) - : false; - case 18: - return name == "LocalRootSignature" - ? (subobjectKind = DXIL::SubobjectKind::LocalRootSignature, - true) - : false; - case 19: - return name == "GlobalRootSignature" - ? (subobjectKind = DXIL::SubobjectKind::GlobalRootSignature, - true) - : false; - case 29: - return name == "SubobjectToExportsAssociation" - ? (subobjectKind = - DXIL::SubobjectKind::SubobjectToExportsAssociation, - true) - : false; - case 22: - return name == "RaytracingShaderConfig" - ? (subobjectKind = DXIL::SubobjectKind::RaytracingShaderConfig, - true) - : false; - case 24: - return name == "RaytracingPipelineConfig" - ? (subobjectKind = - DXIL::SubobjectKind::RaytracingPipelineConfig, - true) - : false; - case 25: - return name == "RaytracingPipelineConfig1" - ? (subobjectKind = - DXIL::SubobjectKind::RaytracingPipelineConfig1, - true) - : false; - case 16: - if (name == "TriangleHitGroup") { - subobjectKind = DXIL::SubobjectKind::HitGroup; - hgType = DXIL::HitGroupType::Triangle; - return true; - } - return false; - case 27: - if (name == "ProceduralPrimitiveHitGroup") { - subobjectKind = DXIL::SubobjectKind::HitGroup; - hgType = DXIL::HitGroupType::ProceduralPrimitive; - return true; - } + RecordDecl *RD = RT->getDecl(); + if (!RD->hasAttr()) { return false; } + + HLSLSubObjectAttr *Attr = RD->getAttr(); + subobjectKind = static_cast(Attr->getSubObjKindUint()); + hgType = static_cast(Attr->getHitGroupType()); + if (subobjectKind == DXIL::SubobjectKind::HitGroup) + DXASSERT(DXIL::IsValidHitGroupType(hgType), "invalid hit group type"); + + return true; } return false; } @@ -906,6 +749,50 @@ bool IsHLSLRayQueryType(clang::QualType type) { return false; } +#ifdef ENABLE_SPIRV_CODEGEN +static llvm::Optional> +MaybeGetVKBufferPointerParams(clang::QualType type) { + const RecordType *RT = dyn_cast(type.getCanonicalType()); + if (!RT) + return llvm::None; + + const ClassTemplateSpecializationDecl *templateDecl = + dyn_cast(RT->getAsCXXRecordDecl()); + if (!templateDecl || !templateDecl->getName().equals("BufferPointer")) + return llvm::None; + + auto *namespaceDecl = + dyn_cast_or_null(templateDecl->getDeclContext()); + if (!namespaceDecl || !namespaceDecl->getName().equals("vk")) + return llvm::None; + + const TemplateArgumentList &argList = templateDecl->getTemplateArgs(); + QualType bufferType = argList[0].getAsType(); + unsigned align = + argList.size() > 1 ? argList[1].getAsIntegral().getLimitedValue() : 0; + return std::make_pair(bufferType, align); +} + +bool IsVKBufferPointerType(clang::QualType type) { + return MaybeGetVKBufferPointerParams(type).hasValue(); +} + +QualType GetVKBufferPointerBufferType(clang::QualType type) { + auto bpParams = MaybeGetVKBufferPointerParams(type); + assert(bpParams.hasValue() && + "cannot get pointer type for type that is not a vk::BufferPointer"); + return bpParams.getValue().first; +} + +unsigned GetVKBufferPointerAlignment(clang::QualType type) { + auto bpParams = MaybeGetVKBufferPointerParams(type); + assert( + bpParams.hasValue() && + "cannot get pointer alignment for type that is not a vk::BufferPointer"); + return bpParams.getValue().second; +} +#endif + QualType GetHLSLResourceResultType(QualType type) { // Don't canonicalize the type as to not lose snorm in Buffer const RecordType *RT = type->getAs(); @@ -914,7 +801,8 @@ QualType GetHLSLResourceResultType(QualType type) { if (const ClassTemplateSpecializationDecl *templateDecl = dyn_cast(RD)) { - if (RD->getName().startswith("FeedbackTexture")) { + const HLSLResourceAttr *Attr = getAttr(type); + if (Attr && DXIL::IsFeedbackTexture(Attr->getResKind())) { // Feedback textures are write-only and the data is opaque, // so there is no result type per se. return {}; diff --git a/tools/clang/lib/AST/Type.cpp b/tools/clang/lib/AST/Type.cpp index 06db4747ff..51c20218cc 100644 --- a/tools/clang/lib/AST/Type.cpp +++ b/tools/clang/lib/AST/Type.cpp @@ -2945,6 +2945,7 @@ bool AttributedType::isHLSLTypeSpec() const { case attr_hlsl_snorm: case attr_hlsl_unorm: case attr_hlsl_globallycoherent: + case attr_hlsl_reordercoherent: return true; } llvm_unreachable("invalid attr kind"); @@ -2975,7 +2976,8 @@ bool AttributedType::isCallingConv() const { case attr_hlsl_snorm: case attr_hlsl_unorm: case attr_hlsl_globallycoherent: - // HLSL Change Ends + case attr_hlsl_reordercoherent: + // HLSL Change Ends return false; case attr_pcs: diff --git a/tools/clang/lib/AST/TypePrinter.cpp b/tools/clang/lib/AST/TypePrinter.cpp index 621e1d46a0..ca9e15bfd7 100644 --- a/tools/clang/lib/AST/TypePrinter.cpp +++ b/tools/clang/lib/AST/TypePrinter.cpp @@ -1174,6 +1174,9 @@ void TypePrinter::printAttributedBefore(const AttributedType *T, case AttributedType::attr_hlsl_globallycoherent: OS << "globallycoherent "; break; + case AttributedType::attr_hlsl_reordercoherent: + OS << "reordercoherent "; + break; default: // Only HLSL attribute types are covered. break; diff --git a/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/tools/clang/lib/CodeGen/CGDebugInfo.cpp index 206f7d9523..d947887d62 100644 --- a/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1047,8 +1047,17 @@ bool CGDebugInfo::TryCollectHLSLRecordElements(const RecordType *Ty, unsigned VecSize = hlsl::GetHLSLVecSize(QualTy); unsigned ElemSizeInBits = CGM.getContext().getTypeSize(ElemQualTy); unsigned CurrentAlignedOffset = 0; + SmallString<8> FieldNameBuf; for (unsigned ElemIdx = 0; ElemIdx < VecSize; ++ElemIdx) { - StringRef FieldName = StringRef(&"xyzw"[ElemIdx], 1); + StringRef FieldName; + if (VecSize <= 4) { + FieldName = StringRef(&"xyzw"[ElemIdx], 1); + } else { + FieldNameBuf.clear(); + llvm::raw_svector_ostream OS(FieldNameBuf); + OS << 'c' << ElemIdx; + FieldName = OS.str(); + } CurrentAlignedOffset = llvm::RoundUpToAlignment(CurrentAlignedOffset, AlignBits); llvm::DIType *FieldType = diff --git a/tools/clang/lib/CodeGen/CGExprScalar.cpp b/tools/clang/lib/CodeGen/CGExprScalar.cpp index 0cb993e6f4..530c791fcc 100644 --- a/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -3713,20 +3713,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { llvm::Value *CondV = CGF.EmitScalarExpr(condExpr); llvm::Value *LHS = Visit(lhsExpr); llvm::Value *RHS = Visit(rhsExpr); - if (llvm::VectorType *VT = dyn_cast(CondV->getType())) { - llvm::VectorType *ResultVT = cast(LHS->getType()); - llvm::Value *result = llvm::UndefValue::get(ResultVT); - for (unsigned i = 0; i < VT->getNumElements(); i++) { - llvm::Value *EltCond = Builder.CreateExtractElement(CondV, i); - llvm::Value *EltL = Builder.CreateExtractElement(LHS, i); - llvm::Value *EltR = Builder.CreateExtractElement(RHS, i); - llvm::Value *EltSelect = Builder.CreateSelect(EltCond, EltL, EltR); - result = Builder.CreateInsertElement(result, EltSelect, i); - } - return result; - } else { - return Builder.CreateSelect(CondV, LHS, RHS); - } + return Builder.CreateSelect(CondV, LHS, RHS); } if (hlsl::IsHLSLMatType(E->getType())) { llvm::Value *Cond = CGF.EmitScalarExpr(condExpr); diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index 29ed954425..16ddeaec60 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -300,7 +300,7 @@ class CGMSHLSLRuntime : public CGHLSLRuntime { clang::QualType QaulTy) override; void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, llvm::Value *V) override; - const clang::Expr *CheckReturnStmtGLCMismatch( + const clang::Expr *CheckReturnStmtCoherenceMismatch( CodeGenFunction &CGF, const Expr *RV, const clang::ReturnStmt &S, clang::QualType FnRetTy, const std::function &TmpArgMap) @@ -2500,9 +2500,11 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { // Type annotation for this pointer. if (const CXXMethodDecl *MFD = dyn_cast(FD)) { - const CXXRecordDecl *RD = MFD->getParent(); - QualType Ty = CGM.getContext().getTypeDeclType(RD); - AddTypeAnnotation(Ty, dxilTypeSys, arrayEltSize); + if (!MFD->isStatic()) { + const CXXRecordDecl *RD = MFD->getParent(); + QualType Ty = CGM.getContext().getTypeDeclType(RD); + AddTypeAnnotation(Ty, dxilTypeSys, arrayEltSize); + } } for (const ValueDecl *param : FD->params()) { @@ -2801,16 +2803,20 @@ void CGMSHLSLRuntime::MarkPotentialResourceTemp(CodeGenFunction &CGF, AddValToPropertyMap(V, QualTy); } -static bool isGLCMismatch(QualType Ty0, QualType Ty1, const Expr *SrcExp, - clang::SourceLocation Loc, DiagnosticsEngine &Diags) { - if (HasHLSLGloballyCoherent(Ty0) == HasHLSLGloballyCoherent(Ty1)) - return false; +static std::pair getCoherenceMismatch(QualType Ty0, QualType Ty1, + const Expr *SrcExp) { + std::pair Mismatch{ + HasHLSLGloballyCoherent(Ty0) != HasHLSLGloballyCoherent(Ty1), + HasHLSLReorderCoherent(Ty0) != HasHLSLReorderCoherent(Ty1)}; + if (!Mismatch.first && !Mismatch.second) + return {false, false}; + if (const CastExpr *Cast = dyn_cast(SrcExp)) { // Skip flat conversion which is for createHandleFromHeap. if (Cast->getCastKind() == CastKind::CK_FlatConversion) - return false; + return {false, false}; } - return true; + return Mismatch; } void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, @@ -2827,19 +2833,23 @@ void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, AddValToPropertyMap(V, D.getType()); if (D.hasInit()) { - if (isGLCMismatch(D.getType(), D.getInit()->getType(), D.getInit(), - D.getLocation(), CGM.getDiags())) { - objectProperties.updateGLC(V); + auto [glcMismatch, rdcMismatch] = + getCoherenceMismatch(D.getType(), D.getInit()->getType(), D.getInit()); + + if (glcMismatch || rdcMismatch) { + objectProperties.updateCoherence(V, glcMismatch, rdcMismatch); } } } -const clang::Expr *CGMSHLSLRuntime::CheckReturnStmtGLCMismatch( +const clang::Expr *CGMSHLSLRuntime::CheckReturnStmtCoherenceMismatch( CodeGenFunction &CGF, const Expr *RV, const clang::ReturnStmt &S, clang::QualType FnRetTy, const std::function &TmpArgMap) { - if (!isGLCMismatch(RV->getType(), FnRetTy, RV, S.getReturnLoc(), - CGM.getDiags())) { + auto [glcMismatch, rdcMismatch] = + getCoherenceMismatch(RV->getType(), FnRetTy, RV); + + if (!glcMismatch && !rdcMismatch) { return RV; } const FunctionDecl *FD = cast(CGF.CurFuncDecl); @@ -2911,10 +2921,11 @@ void CGMSHLSLRuntime::addResource(Decl *D) { if (VD->hasInit() && resClass != DXIL::ResourceClass::Invalid) { if (resClass == DXIL::ResourceClass::UAV) { - if (isGLCMismatch(VD->getType(), VD->getInit()->getType(), - VD->getInit(), D->getLocation(), CGM.getDiags())) { + auto [glcMismatch, rdcMismatch] = getCoherenceMismatch( + VD->getType(), VD->getInit()->getType(), VD->getInit()); + if (glcMismatch || rdcMismatch) { GlobalVariable *GV = cast(CGM.GetAddrOfGlobalVar(VD)); - objectProperties.updateGLC(GV); + objectProperties.updateCoherence(GV, glcMismatch, rdcMismatch); } } return; @@ -3461,8 +3472,11 @@ bool CGMSHLSLRuntime::SetUAVSRV(SourceLocation loc, } } } + // 'globallycoherent' implies 'reordercoherent' if (HasHLSLGloballyCoherent(QualTy)) { hlslRes->SetGloballyCoherent(true); + } else if (HasHLSLReorderCoherent(QualTy)) { + hlslRes->SetReorderCoherent(true); } if (resClass == hlsl::DxilResourceBase::Class::SRV) { hlslRes->SetRW(false); @@ -3495,6 +3509,8 @@ uint32_t CGMSHLSLRuntime::AddUAVSRV(VarDecl *decl, if (decl->hasAttr()) { hlslRes->SetGloballyCoherent(true); } + if (decl->hasAttr()) + hlslRes->SetReorderCoherent(true); if (!SetUAVSRV(decl->getLocation(), resClass, hlslRes.get(), VarTy)) return 0; @@ -6138,8 +6154,9 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy)); bool bAnnotResource = false; if (isObject) { - if (isGLCMismatch(Param->getType(), Arg->getType(), Arg, - Arg->getExprLoc(), CGM.getDiags())) { + auto [glcMismatch, rdcMismatch] = + getCoherenceMismatch(Param->getType(), Arg->getType(), Arg); + if (glcMismatch || rdcMismatch) { // NOTE: if function is noinline, resource parameter is not allowed. // Here assume function will be always inlined. // This can only take care resource as parameter. When parameter is diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp index 8af96cc3cd..13edadf9df 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp @@ -2795,10 +2795,12 @@ unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size, } // Translate RayQuery constructor. From: -// %call = call %"RayQuery" @(%"RayQuery" %ptr) +// %call = call %"RayQuery>" +// @(%"RayQuery" %ptr) // To: -// i32 %handle = AllocateRayQuery(i32 , i32 -// %flags) %gep = GEP %"RayQuery" %ptr, 0, 0 store i32* %gep, i32 +// i32 %handle = AllocateRayQuery2(i32 , i32 +// %flags, i32 %constrayqueryflags <0 if not given>) %gep = GEP +// %"RayQuery" %ptr, 0, 0 store i32* %gep, i32 // %handle ; and replace uses of %call with %ptr void TranslateRayQueryConstructor(HLModule &HLM) { llvm::Module &M = *HLM.GetModule(); @@ -2822,9 +2824,13 @@ void TranslateRayQueryConstructor(HLModule &HLM) { llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext()); llvm::ConstantInt *i32Zero = llvm::ConstantInt::get(i32Ty, (uint64_t)0, false); + + // the third argument will default to 0 if the rayquery constructor doesn't + // have a second template argument llvm::FunctionType *funcTy = - llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false); + llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty, i32Ty}, false); unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery; + llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false); Function *opFunc = GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode); @@ -2839,14 +2845,22 @@ void TranslateRayQueryConstructor(HLModule &HLM) { HLM.GetTypeSystem().GetStructAnnotation(pRQType); DXASSERT(SA, "otherwise, could not find type annoation for RayQuery " "specialization"); - DXASSERT(SA->GetNumTemplateArgs() == 1 && - SA->GetTemplateArgAnnotation(0).IsIntegral(), + DXASSERT((SA->GetNumTemplateArgs() == 1 && + SA->GetTemplateArgAnnotation(0).IsIntegral()) || + (SA->GetNumTemplateArgs() == 2 && + SA->GetTemplateArgAnnotation(0).IsIntegral() && + SA->GetTemplateArgAnnotation(1).IsIntegral()), "otherwise, RayQuery has changed, or lacks template args"); llvm::IRBuilder<> Builder(CI); llvm::Value *rayFlags = Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral()); - llvm::Value *Call = - Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName()); + // the default val of 0 will be assigned if there is no 2nd template arg + llvm::Value *rayQueryFlags = + Builder.getInt32(SA->GetTemplateArgAnnotation(1).GetIntegral()); + + llvm::Value *Call = Builder.CreateCall( + opFunc, {opVal, rayFlags, rayQueryFlags}, pThis->getName()); + llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero}); Builder.CreateStore(Call, GEP); CI->replaceAllUsesWith(pThis); @@ -4020,12 +4034,17 @@ hlsl::DxilResourceProperties DxilObjectProperties::GetResource(llvm::Value *V) { return it->second; return DxilResourceProperties(); } -void DxilObjectProperties::updateGLC(llvm::Value *V) { +void DxilObjectProperties::updateCoherence(llvm::Value *V, + bool updateGloballyCoherent, + bool updateReorderCoherent) { auto it = resMap.find(V); if (it == resMap.end()) return; - it->second.Basic.IsGloballyCoherent ^= 1; + if (updateGloballyCoherent) + it->second.Basic.IsGloballyCoherent ^= 1; + if (updateReorderCoherent) + it->second.Basic.IsReorderCoherent ^= 1; } } // namespace CGHLSLMSHelper diff --git a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h index 9058ed4f6d..7fca5d4025 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h +++ b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h @@ -159,7 +159,8 @@ struct DxilObjectProperties { bool AddResource(llvm::Value *V, const hlsl::DxilResourceProperties &RP); bool IsResource(llvm::Value *V); hlsl::DxilResourceProperties GetResource(llvm::Value *V); - void updateGLC(llvm::Value *V); + void updateCoherence(llvm::Value *V, bool updateGloballyCoherent, + bool updateReorderCoherent); // MapVector for deterministic iteration order. llvm::MapVector resMap; diff --git a/tools/clang/lib/CodeGen/CGHLSLRuntime.h b/tools/clang/lib/CodeGen/CGHLSLRuntime.h index 3e27951e86..b100d93579 100644 --- a/tools/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/tools/clang/lib/CodeGen/CGHLSLRuntime.h @@ -146,7 +146,7 @@ class CGHLSLRuntime { virtual void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, llvm::Value *V) = 0; - virtual const clang::Expr *CheckReturnStmtGLCMismatch( + virtual const clang::Expr *CheckReturnStmtCoherenceMismatch( CodeGenFunction &CGF, const clang::Expr *RV, const clang::ReturnStmt &S, clang::QualType FnRetTy, const std::function &TmpArgMap) = 0; diff --git a/tools/clang/lib/CodeGen/CGStmt.cpp b/tools/clang/lib/CodeGen/CGStmt.cpp index 080d824022..1b1f593271 100644 --- a/tools/clang/lib/CodeGen/CGStmt.cpp +++ b/tools/clang/lib/CodeGen/CGStmt.cpp @@ -525,6 +525,10 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { // HLSL Change Begins. void CodeGenFunction::EmitDiscardStmt(const DiscardStmt &S) { + // Skip unreachable discard. + if (!HaveInsertPoint()) + return; + CGM.getHLSLRuntime().EmitHLSLDiscard(*this); } // HLSL Change Ends. @@ -1174,8 +1178,8 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { auto MapTemp = [&](const VarDecl *LocalVD, llvm::Value *TmpArg) { OutParamScope.addTemp(LocalVD, TmpArg); }; - RV = CGM.getHLSLRuntime().CheckReturnStmtGLCMismatch(*this, RV, S, - FnRetTy, MapTemp); + RV = CGM.getHLSLRuntime().CheckReturnStmtCoherenceMismatch( + *this, RV, S, FnRetTy, MapTemp); // HLSL Change Ends. CharUnits Alignment = getContext().getTypeAlignInChars(RV->getType()); EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, Alignment, diff --git a/tools/clang/lib/CodeGen/CodeGenModule.cpp b/tools/clang/lib/CodeGen/CodeGenModule.cpp index 73ad296d47..b274ea9d64 100644 --- a/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -3376,6 +3376,12 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { /// EmitTopLevelDecl - Emit code for a single top level declaration. void CodeGenModule::EmitTopLevelDecl(Decl *D) { + llvm::TimeTraceScope TimeScope("CGM::EmitTopLevelDecl", [&] { + if (const auto *ND = dyn_cast(D)) + return ND->getName(); + return StringRef("Unnamed decl"); + }); + // Ignore dependent declarations. if (D->getDeclContext() && D->getDeclContext()->isDependentContext()) return; diff --git a/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/tools/clang/lib/CodeGen/CodeGenTypes.cpp index d11575d359..82328c8fb5 100644 --- a/tools/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/tools/clang/lib/CodeGen/CodeGenTypes.cpp @@ -14,21 +14,23 @@ #include "CodeGenTypes.h" #include "CGCXXABI.h" #include "CGCall.h" +#include "CGHLSLRuntime.h" // HLSL Change #include "CGOpenCLRuntime.h" #include "CGRecordLayout.h" +#include "CodeGenModule.h" // HLSL Change #include "TargetInfo.h" +#include "dxc/DXIL/DxilUtil.h" // HLSL Change #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" -#include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclTemplate.h" // HLSL Change - clang-format #include "clang/AST/Expr.h" +#include "clang/AST/HlslTypes.h" // HLSL Change #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" -#include "CodeGenModule.h" // HLSL Change -#include "CGHLSLRuntime.h" // HLSL Change using namespace clang; using namespace CodeGen; @@ -365,7 +367,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { .getConstantArrayType(eltTy, llvm::APInt(32, count), ArrayType::ArraySizeModifier::Normal, 0) .getTypePtr(); - } + } else if (hlsl::IsHLSLHitObjectType(T)) // HLSL Change + return hlsl::dxilutil::GetHLSLHitObjectType(&TheModule); else return ConvertRecordDeclType(RT->getDecl()); } diff --git a/tools/clang/lib/Lex/PPMacroExpansion.cpp b/tools/clang/lib/Lex/PPMacroExpansion.cpp index 64ce8c9182..ebfb93df2e 100644 --- a/tools/clang/lib/Lex/PPMacroExpansion.cpp +++ b/tools/clang/lib/Lex/PPMacroExpansion.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the top level handling of macro expansion for the @@ -1080,7 +1083,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("nullability", true) .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory)) .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread)) - .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow)) + .Case("dataflow_sanitizer", + LangOpts.Sanitize.has(SanitizerKind::DataFlow)) // Objective-C features .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? .Case("objc_arc", LangOpts.ObjCAutoRefCount) @@ -1180,6 +1184,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("has_trivial_constructor", LangOpts.CPlusPlus) .Case("has_trivial_destructor", LangOpts.CPlusPlus) .Case("has_virtual_destructor", LangOpts.CPlusPlus) + .Case("hlsl_vk_buffer_pointer", LangOpts.SPIRV) .Case("is_abstract", LangOpts.CPlusPlus) .Case("is_base_of", LangOpts.CPlusPlus) .Case("is_class", LangOpts.CPlusPlus) diff --git a/tools/clang/lib/Parse/ParseAST.cpp b/tools/clang/lib/Parse/ParseAST.cpp index e06a4ee09e..c8009b9b53 100644 --- a/tools/clang/lib/Parse/ParseAST.cpp +++ b/tools/clang/lib/Parse/ParseAST.cpp @@ -100,8 +100,6 @@ void clang::ParseAST(Preprocessor &PP, ASTConsumer *Consumer, void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { - // HLSL Change - Support hierarchial time tracing. - llvm::TimeTraceScope TimeScope("Frontend", StringRef("")); // Collect global stats on Decls/Stmts (until we have a module streamer). if (PrintStats) { Decl::EnableStatistics(); @@ -137,6 +135,8 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { External->StartTranslationUnit(Consumer); if (!S.getDiagnostics().hasUnrecoverableErrorOccurred()) { // HLSL Change: Skip if fatal error already occurred + // HLSL Change - Support hierarchial time tracing. + llvm::TimeTraceScope TimeScope("Frontend", StringRef("")); if (P.ParseTopLevelDecl(ADecl)) { if (!External && !S.getLangOpts().CPlusPlus) P.Diag(diag::ext_empty_translation_unit); @@ -151,10 +151,14 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { } } // HLSL Change: Skip if fatal error already occurred - // Process any TopLevelDecls generated by #pragma weak. - for (Decl *D : S.WeakTopLevelDecls()) - Consumer->HandleTopLevelDecl(DeclGroupRef(D)); - + { + // HLSL Change - Support hierarchial time tracing. + llvm::TimeTraceScope TimeScope("Frontend - Consumer", StringRef("")); + // Process any TopLevelDecls generated by #pragma weak. + for (Decl *D : S.WeakTopLevelDecls()) + Consumer->HandleTopLevelDecl(DeclGroupRef(D)); + } + // HLSL Change Starts // Provide the opportunity to generate translation-unit level validation // errors in the front-end, without relying on code generation being diff --git a/tools/clang/lib/Parse/ParseDecl.cpp b/tools/clang/lib/Parse/ParseDecl.cpp index 4ca80fcec6..59be41a484 100644 --- a/tools/clang/lib/Parse/ParseDecl.cpp +++ b/tools/clang/lib/Parse/ParseDecl.cpp @@ -3877,6 +3877,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, case tok::kw_precise: case tok::kw_sample: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_center: case tok::kw_indices: case tok::kw_vertices: @@ -5321,6 +5322,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { case tok::kw_shared: case tok::kw_groupshared: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_uniform: case tok::kw_in: case tok::kw_out: @@ -6125,6 +6127,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) { switch (Tok.getKind()) { case tok::kw_center: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_precise: case tok::kw_sample: case tok::kw_indices: diff --git a/tools/clang/lib/Parse/ParseExpr.cpp b/tools/clang/lib/Parse/ParseExpr.cpp index 745b506468..8f51dd4b6c 100644 --- a/tools/clang/lib/Parse/ParseExpr.cpp +++ b/tools/clang/lib/Parse/ParseExpr.cpp @@ -795,6 +795,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::kw_precise: case tok::kw_sample: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_center: case tok::kw_indices: case tok::kw_vertices: @@ -1740,6 +1741,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { switch (auto tk = Tok.getKind()) { case tok::kw_center: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_precise: case tok::kw_sample: case tok::kw_indices: diff --git a/tools/clang/lib/Parse/ParseStmt.cpp b/tools/clang/lib/Parse/ParseStmt.cpp index 95dea4ab2c..6fa33d7108 100644 --- a/tools/clang/lib/Parse/ParseStmt.cpp +++ b/tools/clang/lib/Parse/ParseStmt.cpp @@ -179,6 +179,7 @@ Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts, case tok::kw_precise: case tok::kw_sample: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_center: case tok::kw_indices: case tok::kw_vertices: diff --git a/tools/clang/lib/Parse/ParseTentative.cpp b/tools/clang/lib/Parse/ParseTentative.cpp index 29c6e49770..6bdef3a547 100644 --- a/tools/clang/lib/Parse/ParseTentative.cpp +++ b/tools/clang/lib/Parse/ParseTentative.cpp @@ -1275,6 +1275,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult, case tok::kw_precise: case tok::kw_center: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_indices: case tok::kw_vertices: case tok::kw_primitives: diff --git a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp index 492640c493..db140f4766 100644 --- a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp +++ b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "AlignmentSizeCalculator.h" @@ -277,14 +280,20 @@ std::pair AlignmentSizeCalculator::getAlignmentAndSize( if (recordType != nullptr) { const llvm::StringRef name = recordType->getDecl()->getName(); - if (isTypeInVkNamespace(recordType) && name == "SpirvType") { - const ClassTemplateSpecializationDecl *templateDecl = - cast(recordType->getDecl()); - const uint64_t size = - templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue(); - const uint64_t alignment = - templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue(); - return {alignment, size}; + if (isTypeInVkNamespace(recordType)) { + if (name == "BufferPointer") { + return {8, 8}; // same as uint64_t + } + + if (name == "SpirvType") { + const ClassTemplateSpecializationDecl *templateDecl = + cast(recordType->getDecl()); + const uint64_t size = + templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue(); + const uint64_t alignment = + templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue(); + return {alignment, size}; + } } } diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index 50a7ab0905..24dfdc2e9a 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "CapabilityVisitor.h" @@ -200,8 +203,10 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type, } // Pointer type else if (const auto *ptrType = dyn_cast(type)) { - addCapabilityForType(ptrType->getPointeeType(), loc, sc); - if (sc == spv::StorageClass::PhysicalStorageBuffer) { + addCapabilityForType(ptrType->getPointeeType(), loc, + ptrType->getStorageClass()); + if (ptrType->getStorageClass() == + spv::StorageClass::PhysicalStorageBuffer) { addExtension(Extension::KHR_physical_storage_buffer, "SPV_KHR_physical_storage_buffer", loc); addCapability(spv::Capability::PhysicalStorageBufferAddresses); @@ -852,6 +857,12 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) { spv::Capability::FragmentShaderShadingRateInterlockEXT, }); + addExtensionAndCapabilitiesIfEnabled( + Extension::KHR_compute_shader_derivatives, + { + spv::Capability::ComputeDerivativeGroupQuadsKHR, + spv::Capability::ComputeDerivativeGroupLinearKHR, + }); addExtensionAndCapabilitiesIfEnabled( Extension::NV_compute_shader_derivatives, { @@ -876,6 +887,9 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) { addCapability(spv::Capability::InterpolationFunction); + addExtensionAndCapabilitiesIfEnabled(Extension::KHR_quad_control, + {spv::Capability::QuadControlKHR}); + return true; } diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp index fd0fa8a3d0..de73d5e417 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp @@ -860,7 +860,7 @@ bool DeclResultIdMapper::createStageOutputVar(const DeclaratorDecl *decl, QualType arrayType = astContext.getConstantArrayType( type, llvm::APInt(32, arraySize), clang::ArrayType::Normal, 0); - stageVarInstructions[cast(decl)] = + msOutIndicesBuiltin = getBuiltinVar(builtinID, arrayType, decl->getLocation()); } else { // For NV_mesh_shader, the built type is PrimitiveIndicesNV @@ -871,7 +871,7 @@ bool DeclResultIdMapper::createStageOutputVar(const DeclaratorDecl *decl, astContext.UnsignedIntTy, llvm::APInt(32, arraySize), clang::ArrayType::Normal, 0); - stageVarInstructions[cast(decl)] = + msOutIndicesBuiltin = getBuiltinVar(builtinID, arrayType, decl->getLocation()); } @@ -3522,7 +3522,8 @@ SpirvVariable *DeclResultIdMapper::createSpirvInterfaceVariable( // Decorate with PerPrimitiveNV for per-primitive out variables. spvBuilder.decoratePerPrimitiveNV(varInstr, varInstr->getSourceLocation()); - } else { + } else if (stageVar.getSemanticInfo().getKind() != + hlsl::Semantic::Kind::DomainLocation) { spvBuilder.decoratePatch(varInstr, varInstr->getSourceLocation()); } } diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.h b/tools/clang/lib/SPIRV/DeclResultIdMapper.h index 80723393ce..6ac17fde9d 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.h +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.h @@ -559,6 +559,11 @@ class DeclResultIdMapper { return value; } + SpirvVariable *getMSOutIndicesBuiltin() { + assert(msOutIndicesBuiltin && "Variable usage before decl parsing."); + return msOutIndicesBuiltin; + } + /// Decorate with spirv intrinsic attributes with lamda function variable /// check void decorateWithIntrinsicAttrs( @@ -1014,6 +1019,25 @@ class DeclResultIdMapper { /// creating that stage variable, so that we don't need to query them again /// for reading and writing. llvm::DenseMap stageVarInstructions; + + /// Special case for the Indices builtin: + /// - this builtin has a different layout in HLSL & SPIR-V, meaning it + /// requires + /// the same kind of handling as classic stageVarInstructions: + /// -> load into a HLSL compatible tmp + /// -> write back into the SPIR-V compatible layout. + /// - but the builtin is shared across invocations (not only lanes). + /// -> we must only write/read from the indices requested by the user. + /// - the variable can be passed to other functions as a out param + /// -> we cannot copy-in/copy-out because shared across invocations. + /// -> we cannot pass a simple pointer: layout differences between + /// HLSL/SPIR-V. + /// + /// All this means we must keep track of the builtin, and each assignment to + /// this will have to handle the layout differences. The easiest solution is + /// to keep this builtin global to the module if present. + SpirvVariable *msOutIndicesBuiltin = nullptr; + /// Vector of all defined resource variables. llvm::SmallVector resourceVars; /// Mapping from {RW|Append|Consume}StructuredBuffers to their diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index 6f6f5f88cd..eb00f59632 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // Do not change the inclusion order between "dxc/Support/*" files. @@ -488,6 +491,7 @@ std::vector EmitVisitor::takeBinary() { debugVariableBinary.end()); result.insert(result.end(), annotationsBinary.begin(), annotationsBinary.end()); + result.insert(result.end(), fwdDeclBinary.begin(), fwdDeclBinary.end()); result.insert(result.end(), typeConstantBinary.begin(), typeConstantBinary.end()); result.insert(result.end(), globalVarsBinary.begin(), globalVarsBinary.end()); @@ -1016,6 +1020,28 @@ bool EmitVisitor::visit(SpirvConstantNull *inst) { return true; } +bool EmitVisitor::visit(SpirvConvertPtrToU *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(getOrAssignResultId(inst->getPtr())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvConvertUToPtr *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(getOrAssignResultId(inst->getVal())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + bool EmitVisitor::visit(SpirvUndef *inst) { typeHandler.getOrCreateUndef(inst); emitDebugNameForInstruction(getOrAssignResultId(inst), @@ -1108,9 +1134,10 @@ bool EmitVisitor::visit(SpirvGroupNonUniformOp *inst) { initInstruction(inst); curInst.push_back(inst->getResultTypeId()); curInst.push_back(getOrAssignResultId(inst)); - curInst.push_back(typeHandler.getOrCreateConstantInt( - llvm::APInt(32, static_cast(inst->getExecutionScope())), - context.getUIntType(32), /* isSpecConst */ false)); + if (inst->hasExecutionScope()) + curInst.push_back(typeHandler.getOrCreateConstantInt( + llvm::APInt(32, static_cast(inst->getExecutionScope())), + context.getUIntType(32), /* isSpecConst */ false)); if (inst->hasGroupOp()) curInst.push_back(static_cast(inst->getGroupOp())); for (auto *operand : inst->getOperands()) @@ -2012,10 +2039,11 @@ void EmitTypeHandler::initTypeInstruction(spv::Op op) { curTypeInst.push_back(static_cast(op)); } -void EmitTypeHandler::finalizeTypeInstruction() { +void EmitTypeHandler::finalizeTypeInstruction(bool isFwdDecl) { curTypeInst[0] |= static_cast(curTypeInst.size()) << 16; - typeConstantBinary->insert(typeConstantBinary->end(), curTypeInst.begin(), - curTypeInst.end()); + auto binarySection = isFwdDecl ? fwdDeclBinary : typeConstantBinary; + binarySection->insert(binarySection->end(), curTypeInst.begin(), + curTypeInst.end()); } uint32_t EmitTypeHandler::getResultIdForType(const SpirvType *type, @@ -2594,6 +2622,17 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { curTypeInst.push_back(pointeeType); finalizeTypeInstruction(); } + // Forward pointer types + else if (const auto *fwdPtrType = dyn_cast(type)) { + const SpirvPointerType *ptrType = + context.getForwardReference(fwdPtrType->getPointeeType()); + const uint32_t refId = emitType(ptrType); + initTypeInstruction(spv::Op::OpTypeForwardPointer); + curTypeInst.push_back(refId); + curTypeInst.push_back(static_cast(ptrType->getStorageClass())); + finalizeTypeInstruction(true); + return refId; + } // Function types else if (const auto *fnType = dyn_cast(type)) { const uint32_t retTypeId = emitType(fnType->getReturnType()); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h index 2f5d99b89d..1f9b0939e6 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.h +++ b/tools/clang/lib/SPIRV/EmitVisitor.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_EMITVISITOR_H #define LLVM_CLANG_SPIRV_EMITVISITOR_H @@ -49,15 +53,15 @@ class EmitTypeHandler { EmitTypeHandler(ASTContext &astCtx, SpirvContext &spvContext, const SpirvCodeGenOptions &opts, FeatureManager &featureMgr, std::vector *debugVec, - std::vector *decVec, + std::vector *decVec, std::vector *fwdVec, std::vector *typesVec, const std::function &takeNextIdFn) : astContext(astCtx), context(spvContext), featureManager(featureMgr), debugVariableBinary(debugVec), annotationsBinary(decVec), - typeConstantBinary(typesVec), takeNextIdFunction(takeNextIdFn), - emittedConstantInts({}), emittedConstantFloats({}), - emittedConstantComposites({}), emittedConstantNulls({}), - emittedUndef({}), emittedConstantBools() { + fwdDeclBinary(fwdVec), typeConstantBinary(typesVec), + takeNextIdFunction(takeNextIdFn), emittedConstantInts({}), + emittedConstantFloats({}), emittedConstantComposites({}), + emittedConstantNulls({}), emittedUndef({}), emittedConstantBools() { assert(decVec); assert(typesVec); } @@ -120,7 +124,7 @@ class EmitTypeHandler { private: void initTypeInstruction(spv::Op op); - void finalizeTypeInstruction(); + void finalizeTypeInstruction(bool isFwdDecl = false); // Returns the result-id for the given type and decorations. If a type with // the same decorations have already been used, it returns the existing @@ -161,6 +165,7 @@ class EmitTypeHandler { std::vector curDecorationInst; std::vector *debugVariableBinary; std::vector *annotationsBinary; + std::vector *fwdDeclBinary; std::vector *typeConstantBinary; std::function takeNextIdFunction; @@ -207,7 +212,7 @@ class EmitVisitor : public Visitor { : Visitor(opts, spvCtx), astContext(astCtx), featureManager(featureMgr), id(0), typeHandler(astCtx, spvCtx, opts, featureMgr, &debugVariableBinary, - &annotationsBinary, &typeConstantBinary, + &annotationsBinary, &fwdDeclBinary, &typeConstantBinary, [this]() -> uint32_t { return takeNextId(); }), debugMainFileId(0), debugInfoExtInstId(0), debugLineStart(0), debugLineEnd(0), debugColumnStart(0), debugColumnEnd(0), @@ -254,6 +259,8 @@ class EmitVisitor : public Visitor { bool visit(SpirvConstantFloat *) override; bool visit(SpirvConstantComposite *) override; bool visit(SpirvConstantNull *) override; + bool visit(SpirvConvertPtrToU *) override; + bool visit(SpirvConvertUToPtr *) override; bool visit(SpirvUndef *) override; bool visit(SpirvCompositeConstruct *) override; bool visit(SpirvCompositeExtract *) override; @@ -438,7 +445,9 @@ class EmitVisitor : public Visitor { // All annotation instructions: OpDecorate, OpMemberDecorate, OpGroupDecorate, // OpGroupMemberDecorate, and OpDecorationGroup. std::vector annotationsBinary; - // All type and constant instructions + // All forward pointer type declaration instructions + std::vector fwdDeclBinary; + // All other type and constant instructions std::vector typeConstantBinary; // All global variable declarations (all OpVariable instructions whose Storage // Class is not Function) diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp index 2512984a4c..7fb449fee9 100644 --- a/tools/clang/lib/SPIRV/FeatureManager.cpp +++ b/tools/clang/lib/SPIRV/FeatureManager.cpp @@ -215,6 +215,8 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) { .Case("SPV_KHR_physical_storage_buffer", Extension::KHR_physical_storage_buffer) .Case("SPV_KHR_vulkan_memory_model", Extension::KHR_vulkan_memory_model) + .Case("SPV_KHR_compute_shader_derivatives", + Extension::KHR_compute_shader_derivatives) .Case("SPV_NV_compute_shader_derivatives", Extension::NV_compute_shader_derivatives) .Case("SPV_KHR_fragment_shader_barycentric", @@ -224,6 +226,7 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) { .Case("SPV_KHR_float_controls", Extension::KHR_float_controls) .Case("SPV_NV_shader_subgroup_partitioned", Extension::NV_shader_subgroup_partitioned) + .Case("SPV_KHR_quad_control", Extension::KHR_quad_control) .Default(Extension::Unknown); } @@ -283,6 +286,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) { return "SPV_KHR_physical_storage_buffer"; case Extension::KHR_vulkan_memory_model: return "SPV_KHR_vulkan_memory_model"; + case Extension::KHR_compute_shader_derivatives: + return "SPV_KHR_compute_shader_derivatives"; case Extension::NV_compute_shader_derivatives: return "SPV_NV_compute_shader_derivatives"; case Extension::KHR_fragment_shader_barycentric: @@ -293,6 +298,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) { return "SPV_KHR_float_controls"; case Extension::NV_shader_subgroup_partitioned: return "SPV_NV_shader_subgroup_partitioned"; + case Extension::KHR_quad_control: + return "SPV_KHR_quad_control"; default: break; } @@ -370,6 +377,10 @@ bool FeatureManager::enabledByDefault(Extension ext) { // KHR_ray_tracing and NV_ray_tracing are mutually exclusive so enable only // KHR extension by default case Extension::NV_ray_tracing: + return false; + // KHR_compute_shader_derivatives and NV_compute_shader_derivatives are + // mutually exclusive so enable only KHR extension by default + case Extension::NV_compute_shader_derivatives: return false; // Enabling EXT_demote_to_helper_invocation changes the code generation // behavior for the 'discard' statement. Therefore we will only enable it if @@ -405,5 +416,23 @@ bool FeatureManager::isTargetEnvVulkan1p3OrAbove() { return targetEnv >= SPV_ENV_VULKAN_1_3; } +bool FeatureManager::isTargetEnvVulkan() { + // This assert ensure that this list will be updated, if necessary, when + // a new target environment is added. + static_assert(SPV_ENV_VULKAN_1_4 + 1 == SPV_ENV_MAX); + + switch (targetEnv) { + case SPV_ENV_VULKAN_1_0: + case SPV_ENV_VULKAN_1_1: + case SPV_ENV_VULKAN_1_2: + case SPV_ENV_VULKAN_1_1_SPIRV_1_4: + case SPV_ENV_VULKAN_1_3: + case SPV_ENV_VULKAN_1_4: + return true; + default: + return false; + } +} + } // end namespace spirv } // end namespace clang diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index 24cce9d89e..b31d19b5d8 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "LowerTypeVisitor.h" @@ -549,7 +552,9 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type, // checking the general struct type. if (const auto *spvType = lowerResourceType(type, rule, isRowMajor, srcLoc)) { - spvContext.registerStructDeclForSpirvType(spvType, decl); + if (!isa(spvType)) { + spvContext.registerStructDeclForSpirvType(spvType, decl); + } return spvType; } @@ -809,6 +814,32 @@ const SpirvType *LowerTypeVisitor::lowerVkTypeInVkNamespace( QualType realType = hlsl::GetHLSLResourceTemplateParamType(type); return lowerType(realType, rule, llvm::None, srcLoc); } + if (name == "BufferPointer") { + const size_t visitedTypeStackSize = visitedTypeStack.size(); + (void)visitedTypeStackSize; // suppress unused warning (used only in assert) + + for (QualType t : visitedTypeStack) { + if (t == type) { + return spvContext.getForwardPointerType(type); + } + } + + QualType realType = hlsl::GetHLSLResourceTemplateParamType(type); + if (rule == SpirvLayoutRule::Void) { + rule = spvOptions.sBufferLayoutRule; + } + visitedTypeStack.push_back(type); + + const SpirvType *spirvType = lowerType(realType, rule, llvm::None, srcLoc); + const auto *pointerType = spvContext.getPointerType( + spirvType, spv::StorageClass::PhysicalStorageBuffer); + spvContext.registerForwardReference(type, pointerType); + + assert(visitedTypeStack.back() == type); + visitedTypeStack.pop_back(); + assert(visitedTypeStack.size() == visitedTypeStackSize); + return pointerType; + } emitError("unknown type %0 in vk namespace", srcLoc) << type; return nullptr; } @@ -834,26 +865,6 @@ LowerTypeVisitor::lowerResourceType(QualType type, SpirvLayoutRule rule, // TODO: avoid string comparison once hlsl::IsHLSLResouceType() does that. - // Vulkan does not yet support true 16-bit float texture objexts. - if (name == "Buffer" || name == "RWBuffer" || name == "Texture1D" || - name == "Texture2D" || name == "Texture3D" || name == "TextureCube" || - name == "Texture1DArray" || name == "Texture2DArray" || - name == "Texture2DMS" || name == "Texture2DMSArray" || - name == "TextureCubeArray" || name == "RWTexture1D" || - name == "RWTexture2D" || name == "RWTexture3D" || - name == "RWTexture1DArray" || name == "RWTexture2DArray") { - const auto sampledType = hlsl::GetHLSLResourceResultType(type); - const auto loweredType = - lowerType(getElementType(astContext, sampledType), rule, - /*isRowMajor*/ llvm::None, srcLoc); - if (const auto *floatType = dyn_cast(loweredType)) { - if (floatType->getBitwidth() == 16) { - emitError("16-bit texture types not yet supported with -spirv", srcLoc); - return nullptr; - } - } - } - { // Texture types spv::Dim dim = {}; bool isArray = {}; diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h index 96235d1508..5b26b67e3a 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_SPIRV_LOWERTYPEVISITOR_H @@ -137,6 +140,7 @@ class LowerTypeVisitor : public Visitor { AlignmentSizeCalculator alignmentCalc; /// alignment calculator bool useArrayForMat1xN; /// SPIR-V array for HLSL Matrix 1xN SpirvBuilder &spvBuilder; + SmallVector visitedTypeStack; // for type recursion detection }; } // end namespace spirv diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index b1e7388f16..689fc0715f 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "clang/SPIRV/SpirvBuilder.h" @@ -202,6 +205,14 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType, instruction->setLayoutRule(pointer->getLayoutRule()); instruction->setRValue(true); + if (pointer->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer) { + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t align, size, stride; + std::tie(align, size) = alignmentCalc.getAlignmentAndSize( + resultType, pointer->getLayoutRule(), llvm::None, &stride); + instruction->setAlignment(align); + } + if (pointer->containsAliasComponent() && isAKindOfStructuredOrByteBuffer(resultType)) { instruction->setStorageClass(spv::StorageClass::Uniform); @@ -300,6 +311,16 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address, new (context) SpirvStore(loc, address, source, llvm::None, range); insertPoint->addInstruction(instruction); + if (address->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer && + address->getAstResultType() != QualType()) { // exclude raw buffer + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t align, size, stride; + std::tie(align, size) = alignmentCalc.getAlignmentAndSize( + address->getAstResultType(), address->getLayoutRule(), llvm::None, + &stride); + instruction->setAlignment(align); + } + if (address->isRasterizerOrdered()) { createEndInvocationInterlockEXT(loc, range); } @@ -432,7 +453,7 @@ SpirvSpecConstantBinaryOp *SpirvBuilder::createSpecConstantBinaryOp( } SpirvGroupNonUniformOp *SpirvBuilder::createGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope execScope, + spv::Op op, QualType resultType, llvm::Optional execScope, llvm::ArrayRef operands, SourceLocation loc, llvm::Optional groupOp) { assert(insertPoint && "null insert point"); @@ -491,6 +512,22 @@ SpirvImageTexelPointer *SpirvBuilder::createImageTexelPointer( return instruction; } +SpirvConvertPtrToU *SpirvBuilder::createConvertPtrToU(SpirvInstruction *ptr, + QualType type) { + auto *instruction = new (context) SpirvConvertPtrToU(ptr, type); + instruction->setRValue(true); + insertPoint->addInstruction(instruction); + return instruction; +} + +SpirvConvertUToPtr *SpirvBuilder::createConvertUToPtr(SpirvInstruction *val, + QualType type) { + auto *instruction = new (context) SpirvConvertUToPtr(val, type); + instruction->setRValue(false); + insertPoint->addInstruction(instruction); + return instruction; +} + spv::ImageOperandsMask SpirvBuilder::composeImageOperandsMask( SpirvInstruction *bias, SpirvInstruction *lod, const std::pair &grad, @@ -994,6 +1031,8 @@ SpirvInstruction *SpirvBuilder::createEmulatedBitFieldExtract( rightShift->setResultType(baseType); } + rightShift->setRValue(true); + return rightShift; } diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp index 6af36eb691..47dfc67433 100644 --- a/tools/clang/lib/SPIRV/SpirvContext.cpp +++ b/tools/clang/lib/SPIRV/SpirvContext.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include @@ -328,6 +331,29 @@ const HybridPointerType *SpirvContext::getPointerType(QualType pointee, return result; } +const ForwardPointerType * +SpirvContext::getForwardPointerType(QualType pointee) { + assert(hlsl::IsVKBufferPointerType(pointee)); + + auto foundPointee = forwardPointerTypes.find(pointee); + if (foundPointee != forwardPointerTypes.end()) { + return foundPointee->second; + } + + return forwardPointerTypes[pointee] = new (this) ForwardPointerType(pointee); +} + +const SpirvPointerType *SpirvContext::getForwardReference(QualType type) { + return forwardReferences[type]; +} + +void SpirvContext::registerForwardReference( + QualType type, const SpirvPointerType *pointerType) { + assert(pointerType->getStorageClass() == + spv::StorageClass::PhysicalStorageBuffer); + forwardReferences[type] = pointerType; +} + FunctionType * SpirvContext::getFunctionType(const SpirvType *ret, llvm::ArrayRef param) { diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 3a67257da7..cd5f860555 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements a SPIR-V emitter class that takes in HLSL AST and emits @@ -809,21 +813,17 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) { spvBuilder.setMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450); - // Even though the 'workQueue' grows due to the above loop, the first - // 'numEntryPoints' entries in the 'workQueue' are the ones with the HLSL - // 'shader' attribute, and must therefore be entry functions. - assert(numEntryPoints <= workQueue.size()); - - for (uint32_t i = 0; i < numEntryPoints; ++i) { + for (uint32_t i = 0; i < workQueue.size(); ++i) { // TODO: assign specific StageVars w.r.t. to entry point const FunctionInfo *entryInfo = workQueue[i]; - assert(entryInfo->isEntryFunction); - spvBuilder.addEntryPoint( - getSpirvShaderStage( - entryInfo->shaderModelKind, - featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)), - entryInfo->entryFunction, getEntryPointName(entryInfo), - getInterfacesForEntryPoint(entryInfo->entryFunction)); + if (entryInfo->isEntryFunction) { + spvBuilder.addEntryPoint( + getSpirvShaderStage( + entryInfo->shaderModelKind, + featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)), + entryInfo->entryFunction, getEntryPointName(entryInfo), + getInterfacesForEntryPoint(entryInfo->entryFunction)); + } } // Add Location decorations to stage input/output variables. @@ -1237,12 +1237,17 @@ SpirvInstruction *SpirvEmitter::doExpr(const Expr *expr, } else if (isa(expr)) { assert(curThis); result = curThis; - } else if (isa(expr)) { + } else if (const auto *constructExpr = dyn_cast(expr)) { // For RayQuery type, we should not explicitly initialize it using // CXXConstructExpr e.g., RayQuery<0> r = RayQuery<0>() is the same as we do // not have a variable initialization. Setting nullptr for the SPIR-V // instruction used for expr will let us skip the variable initialization. - if (!hlsl::IsHLSLRayQueryType(expr->getType())) + if (hlsl::IsVKBufferPointerType(expr->getType())) { + const Expr *arg = constructExpr->getArg(0); + SpirvInstruction *value = loadIfGLValue(arg, arg->getSourceRange()); + result = spvBuilder.createConvertUToPtr(value, expr->getType()); + result->setRValue(); + } else if (!hlsl::IsHLSLRayQueryType(expr->getType())) result = curThis; } else if (const auto *unaryExpr = dyn_cast(expr)) { result = doUnaryExprOrTypeTraitExpr(unaryExpr); @@ -1547,7 +1552,23 @@ void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) { // Create all parameters. for (uint32_t i = 0; i < decl->getNumParams(); ++i) { const ParmVarDecl *paramDecl = decl->getParamDecl(i); - (void)declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn); + QualType paramType = paramDecl->getType(); + auto *param = + declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn); +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(paramType)) { + Optional isRowMajor = llvm::None; + QualType desugaredType = desugarType(paramType, &isRowMajor); + if (hlsl::IsVKBufferPointerType(desugaredType)) { + spvBuilder.decorateWithLiterals( + param, + static_cast(paramDecl->hasAttr() + ? spv::Decoration::AliasedPointer + : spv::Decoration::RestrictPointer), + {}, loc); + } + } +#endif } if (decl->hasBody()) { @@ -1648,6 +1669,15 @@ bool SpirvEmitter::validateVKAttributes(const NamedDecl *decl) { loc); success = false; } + +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(cast(decl)->getType())) { + emitError("vk::push_constant attribute cannot be used on declarations " + "with vk::BufferPointer type", + loc); + success = false; + } +#endif } // vk::shader_record_nv is supported only on cbuffer/ConstantBuffer @@ -1884,6 +1914,19 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { } } + if (featureManager.isTargetEnvVulkan() && + (isTexture(decl->getType()) || isRWTexture(decl->getType()) || + isBuffer(decl->getType()) || isRWBuffer(decl->getType()))) { + const auto sampledType = hlsl::GetHLSLResourceResultType(decl->getType()); + if (isFloatOrVecMatOfFloatType(sampledType) && + isOrContains16BitType(sampledType, spirvOptions.enable16BitTypes)) { + emitError("The sampled type for textures cannot be a floating point type " + "smaller than 32-bits when targeting a Vulkan environment.", + loc); + return; + } + } + if (decl->hasAttr()) { // This is a VarDecl for specialization constant. createSpecConstant(decl); @@ -1942,6 +1985,11 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { return; } + if (hlsl::IsVKBufferPointerType(decl->getType()) && !decl->hasInit()) { + emitError("vk::BufferPointer has no default constructor", loc); + return; + } + // We can have VarDecls inside cbuffer/tbuffer. For those VarDecls, we need // to emit their cbuffer/tbuffer as a whole and access each individual one // using access chains. @@ -2028,10 +2076,24 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { needsLegalization = true; } - if (var != nullptr && decl->hasAttrs()) { - declIdMapper.decorateWithIntrinsicAttrs(decl, var); - if (auto attr = decl->getAttr()) { - var->setStorageClass(static_cast(attr->getStclass())); + if (var != nullptr) { + Optional isRowMajor = llvm::None; + QualType desugaredType = desugarType(decl->getType(), &isRowMajor); + if (hlsl::IsVKBufferPointerType(desugaredType)) { + spvBuilder.decorateWithLiterals( + var, + static_cast(decl->hasAttr() + ? spv::Decoration::AliasedPointer + : spv::Decoration::RestrictPointer), + {}, loc); + } + + if (decl->hasAttrs()) { + declIdMapper.decorateWithIntrinsicAttrs(decl, var); + if (auto attr = decl->getAttr()) { + var->setStorageClass( + static_cast(attr->getStclass())); + } } } @@ -3104,12 +3166,6 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) { argInfo && argInfo->getStorageClass() != spv::StorageClass::Function && isResourceType(paramType); - // HLSL requires that the parameters be copied in and out from temporaries. - // This looks for cases where the copy can be elided. To generate valid - // SPIR-V, the argument must be a memory declaration. - // - // - // If argInfo is nullptr and argInst is a rvalue, we do not have a proper // pointer to pass to the function. we need a temporary variable in that // case. @@ -3118,7 +3174,7 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) { // create a temporary variable for it because the function definition // expects are point-to-pointer argument for resources, which will be // resolved by legalization. - if ((argInfo || (argInst && argInst->getopcode() == spv::Op::OpVariable)) && + if ((argInfo || (argInst && !argInst->isRValue())) && canActAsOutParmVar(param) && !isArgGlobalVarWithResourceType && paramTypeMatchesArgType(paramType, arg->getType())) { // Based on SPIR-V spec, function parameter must be always Function @@ -3657,14 +3713,22 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr, emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc()) << expr->getCastKindName() << expr->getSourceRange(); expr->dump(); - return 0; + return nullptr; } } + case CastKind::CK_ToVoid: + return nullptr; + case CastKind::CK_VK_BufferPointerToIntegral: { + return spvBuilder.createConvertPtrToU(doExpr(subExpr, range), toType); + } + case CastKind::CK_VK_IntegralToBufferPointer: { + return spvBuilder.createConvertUToPtr(doExpr(subExpr, range), toType); + } default: emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc()) << expr->getCastKindName() << expr->getSourceRange(); expr->dump(); - return 0; + return nullptr; } } @@ -5437,6 +5501,8 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr, case IntrinsicOp::MOP_WorldRayDirection: case IntrinsicOp::MOP_WorldRayOrigin: return processRayQueryIntrinsics(expr, opcode); + case IntrinsicOp::MOP_GetBufferContents: + return processIntrinsicGetBufferContents(expr); default: emitError("intrinsic '%0' method unimplemented", expr->getCallee()->getExprLoc()) @@ -7016,6 +7082,12 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, if (const auto *recordType = valType->getAs()) { assert(recordType->isStructureType()); + if (isTypeInVkNamespace(recordType) && + recordType->getDecl()->getName().equals("BufferPointer")) { + // Uniquely among structs, vk::BufferPointer lowers to a pointer type. + return srcVal; + } + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, spvBuilder); const StructType *spirvStructType = @@ -8128,17 +8200,21 @@ void SpirvEmitter::assignToMSOutIndices( if (indices.size() > 1) { vecComponent = indices.back(); } - auto *var = declIdMapper.getStageVarInstruction(decl); - const auto *varTypeDecl = astContext.getAsConstantArrayType(decl->getType()); - QualType varType = varTypeDecl->getElementType(); + SpirvVariable *var = declIdMapper.getMSOutIndicesBuiltin(); + uint32_t numVertices = 1; - if (!isVectorType(varType, nullptr, &numVertices)) { - assert(isScalarType(varType)); - } - QualType valueType = value->getAstResultType(); uint32_t numValues = 1; - if (!isVectorType(valueType, nullptr, &numValues)) { - assert(isScalarType(valueType)); + { + const auto *varTypeDecl = + astContext.getAsConstantArrayType(decl->getType()); + QualType varType = varTypeDecl->getElementType(); + if (!isVectorType(varType, nullptr, &numVertices)) { + assert(isScalarType(varType)); + } + QualType valueType = value->getAstResultType(); + if (!isVectorType(valueType, nullptr, &numValues)) { + assert(isScalarType(valueType)); + } } const auto loc = decl->getLocation(); @@ -8185,7 +8261,10 @@ void SpirvEmitter::assignToMSOutIndices( assert(numValues == numVertices); if (extMesh) { // create accesschain for Primitive*IndicesEXT[vertIndex]. - auto *ptr = spvBuilder.createAccessChain(varType, var, vertIndex, loc); + const ConstantArrayType *CAT = + astContext.getAsConstantArrayType(var->getAstResultType()); + auto *ptr = spvBuilder.createAccessChain(CAT->getElementType(), var, + vertIndex, loc); // finally create store for Primitive*IndicesEXT[vertIndex] = value. spvBuilder.createStore(ptr, value, loc); } else { @@ -9192,6 +9271,10 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { case hlsl::IntrinsicOp::IOP_QuadReadLaneAt: retVal = processWaveQuadWideShuffle(callExpr, hlslOpcode); break; + case hlsl::IntrinsicOp::IOP_QuadAny: + case hlsl::IntrinsicOp::IOP_QuadAll: + retVal = processWaveQuadAnyAll(callExpr, hlslOpcode); + break; case hlsl::IntrinsicOp::IOP_abort: case hlsl::IntrinsicOp::IOP_GetRenderTargetSampleCount: case hlsl::IntrinsicOp::IOP_GetRenderTargetSamplePosition: { @@ -9391,6 +9474,14 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { case hlsl::IntrinsicOp::IOP_EvaluateAttributeSnapped: { retVal = processEvaluateAttributeAt(callExpr, hlslOpcode, srcLoc, srcRange); break; + } + case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: { + retVal = processIntrinsicPointerCast(callExpr, false); + break; + } + case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: { + retVal = processIntrinsicPointerCast(callExpr, true); + break; } INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true); INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false); @@ -10146,6 +10237,53 @@ SpirvEmitter::processWaveQuadWideShuffle(const CallExpr *callExpr, opcode, retType, spv::Scope::Subgroup, {value, target}, srcLoc); } +SpirvInstruction *SpirvEmitter::processWaveQuadAnyAll(const CallExpr *callExpr, + hlsl::IntrinsicOp op) { + // Signatures: + // bool QuadAny(bool localValue) + // bool QuadAll(bool localValue) + assert(callExpr->getNumArgs() == 1); + assert(op == hlsl::IntrinsicOp::IOP_QuadAny || + op == hlsl::IntrinsicOp::IOP_QuadAll); + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "Wave Operation", + callExpr->getExprLoc()); + + auto *predicate = doExpr(callExpr->getArg(0)); + const auto srcLoc = callExpr->getExprLoc(); + + if (!featureManager.isExtensionEnabled(Extension::KHR_quad_control)) { + // We can't use QuadAny/QuadAll, so implement them using QuadSwap. We + // will read the value at each quad invocation, then combine them. + + spv::Op reducer = op == hlsl::IntrinsicOp::IOP_QuadAny + ? spv::Op::OpLogicalOr + : spv::Op::OpLogicalAnd; + + SpirvInstruction *result = predicate; + + for (size_t i = 0; i < 3; i++) { + SpirvInstruction *invocationValue = spvBuilder.createGroupNonUniformOp( + spv::Op::OpGroupNonUniformQuadSwap, astContext.BoolTy, + spv::Scope::Subgroup, + {predicate, spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, i))}, + srcLoc); + result = spvBuilder.createBinaryOp(reducer, astContext.BoolTy, result, + invocationValue, srcLoc); + } + + return result; + } + + spv::Op opcode = op == hlsl::IntrinsicOp::IOP_QuadAny + ? spv::Op::OpGroupNonUniformQuadAnyKHR + : spv::Op::OpGroupNonUniformQuadAllKHR; + + return spvBuilder.createGroupNonUniformOp(opcode, astContext.BoolTy, + llvm::Optional(), + {predicate}, srcLoc); +} + SpirvInstruction * SpirvEmitter::processWaveActiveAllEqual(const CallExpr *callExpr) { assert(callExpr->getNumArgs() == 1); @@ -10770,6 +10908,56 @@ SpirvEmitter::processIntrinsicClamp(const CallExpr *callExpr) { loc, range); } +SpirvInstruction * +SpirvEmitter::processIntrinsicPointerCast(const CallExpr *callExpr, + bool isStatic) { + const Expr *argExpr = callExpr->getArg(0); + SpirvInstruction *ptr = doExpr(argExpr); + QualType srcType = argExpr->getType(); + QualType destType = callExpr->getType(); + QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType); + QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType); + return srcTypeArg == destTypeArg + ? ptr + : spvBuilder.createUnaryOp(spv::Op::OpBitcast, destType, ptr, + callExpr->getExprLoc(), + callExpr->getSourceRange()); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents( + const CXXMemberCallExpr *callExpr) { + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + Expr *obj = callExpr->getImplicitObjectArgument(); + SpirvInstruction *bufferPointer = doExpr(obj); + if (!bufferPointer) + return nullptr; + if (bufferPointer->isRValue()) { + bufferPointer->setRValue(false); + bufferPointer->setStorageClass(spv::StorageClass::PhysicalStorageBuffer); + return bufferPointer; + } + + unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType()); + lowerTypeVisitor.visitInstruction(bufferPointer); + + const SpirvPointerType *bufferPointerType = + dyn_cast(bufferPointer->getResultType()); + SpirvLoad *retVal = + spvBuilder.createLoad(bufferPointerType->getPointeeType(), bufferPointer, + callExpr->getLocStart()); + if (!align) { + QualType bufferType = hlsl::GetVKBufferPointerBufferType(obj->getType()); + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t stride; + std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize( + bufferType, retVal->getLayoutRule(), llvm::None, &stride); + } + retVal->setAlignment(align); + retVal->setRValue(false); + return retVal; +} + SpirvInstruction * SpirvEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr, bool isDevice, bool groupSync, @@ -15044,6 +15232,10 @@ void SpirvEmitter::addDerivativeGroupExecutionMode() { // to 2D quad rules. Using derivative operations in any numthreads // configuration not matching either of these is invalid and will produce an // error. + static_assert(spv::ExecutionMode::DerivativeGroupQuadsNV == + spv::ExecutionMode::DerivativeGroupQuadsKHR); + static_assert(spv::ExecutionMode::DerivativeGroupLinearNV == + spv::ExecutionMode::DerivativeGroupLinearKHR); spv::ExecutionMode em = spv::ExecutionMode::DerivativeGroupQuadsNV; if (numThreads[0] % 4 == 0 && numThreads[1] == 1 && numThreads[2] == 1) { em = spv::ExecutionMode::DerivativeGroupLinearNV; diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index eca038527f..79d2c43c35 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file defines a SPIR-V emitter class that takes in HLSL AST and emits @@ -491,6 +495,15 @@ class SpirvEmitter : public ASTConsumer { /// Processes the 'lit' intrinsic function. SpirvInstruction *processIntrinsicLit(const CallExpr *); + /// Processes the 'vk::static_pointer_cast' and 'vk_reinterpret_pointer_cast' + /// intrinsic functions. + SpirvInstruction *processIntrinsicPointerCast(const CallExpr *, + bool isStatic); + + /// Processes the vk::BufferPointer intrinsic function 'Get'. + SpirvInstruction * + processIntrinsicGetBufferContents(const CXXMemberCallExpr *); + /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync', /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync', /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic @@ -657,6 +670,10 @@ class SpirvEmitter : public ASTConsumer { SpirvInstruction *processWaveQuadWideShuffle(const CallExpr *, hlsl::IntrinsicOp op); + /// Processes SM6.7 quad any/all. + SpirvInstruction *processWaveQuadAnyAll(const CallExpr *, + hlsl::IntrinsicOp op); + /// Generates the Spir-V instructions needed to implement the given call to /// WaveActiveAllEqual. Returns a pointer to the instruction that produces the /// final result. diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index 21aada9e82..f41de03adc 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the in-memory representation of SPIR-V instructions. @@ -57,6 +61,8 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantInteger) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantFloat) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantComposite) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantNull) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertPtrToU) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertUToPtr) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvUndef) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeConstruct) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeExtract) @@ -620,6 +626,28 @@ bool SpirvConstantNull::operator==(const SpirvConstantNull &that) const { astResultType == that.astResultType; } +SpirvConvertPtrToU::SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type, + SourceLocation loc, SourceRange range) + : SpirvInstruction(IK_ConvertPtrToU, spv::Op::OpConvertPtrToU, type, loc, + range), + ptr(ptr) {} + +bool SpirvConvertPtrToU::operator==(const SpirvConvertPtrToU &that) const { + return opcode == that.opcode && resultType == that.resultType && + astResultType == that.astResultType && ptr == that.ptr; +} + +SpirvConvertUToPtr::SpirvConvertUToPtr(SpirvInstruction *val, QualType type, + SourceLocation loc, SourceRange range) + : SpirvInstruction(IK_ConvertUToPtr, spv::Op::OpConvertUToPtr, type, loc, + range), + val(val) {} + +bool SpirvConvertUToPtr::operator==(const SpirvConvertUToPtr &that) const { + return opcode == that.opcode && resultType == that.resultType && + astResultType == that.astResultType && val == that.val; +} + SpirvUndef::SpirvUndef(QualType type) : SpirvInstruction(IK_Undef, spv::Op::OpUndef, type, /*SourceLocation*/ {}) {} @@ -677,7 +705,7 @@ SpirvFunctionCall::SpirvFunctionCall(QualType resultType, SourceLocation loc, function(fn), args(argsVec.begin(), argsVec.end()) {} SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope scope, + spv::Op op, QualType resultType, llvm::Optional scope, llvm::ArrayRef operandsVec, SourceLocation loc, llvm::Optional group) : SpirvInstruction(IK_GroupNonUniformOp, op, resultType, loc), @@ -709,6 +737,8 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( case spv::Op::OpGroupNonUniformLogicalAnd: case spv::Op::OpGroupNonUniformLogicalOr: case spv::Op::OpGroupNonUniformLogicalXor: + case spv::Op::OpGroupNonUniformQuadAnyKHR: + case spv::Op::OpGroupNonUniformQuadAllKHR: assert(operandsVec.size() == 1); break; @@ -740,6 +770,11 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( assert(false && "Unexpected Group non-uniform opcode"); break; } + + if (op != spv::Op::OpGroupNonUniformQuadAnyKHR && + op != spv::Op::OpGroupNonUniformQuadAllKHR) { + assert(scope.hasValue()); + } } SpirvImageOp::SpirvImageOp( diff --git a/tools/clang/lib/Sema/SemaCast.cpp b/tools/clang/lib/Sema/SemaCast.cpp index 10668dc388..f5a864e2b6 100644 --- a/tools/clang/lib/Sema/SemaCast.cpp +++ b/tools/clang/lib/Sema/SemaCast.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements semantic analysis for cast expressions, including @@ -1543,6 +1546,20 @@ TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType, if (InitSeq.isConstructorInitialization()) Kind = CK_ConstructorConversion; +#ifdef ENABLE_SPIRV_CODEGEN + // Special cases for vk::BufferPointer. + else if (hlsl::IsVKBufferPointerType(SrcExpr.get()->getType()) && + DestType->isIntegerType() && CCK == Sema::CCK_CStyleCast) { + Kind = CK_VK_BufferPointerToIntegral; + SrcExpr = Result; + return TC_Success; + } else if (hlsl::IsVKBufferPointerType(DestType) && + SrcExpr.get()->getType()->isIntegerType()) { + Kind = CK_VK_IntegralToBufferPointer; + SrcExpr = Result; + return TC_Success; + } +#endif else Kind = CK_NoOp; diff --git a/tools/clang/lib/Sema/SemaChecking.cpp b/tools/clang/lib/Sema/SemaChecking.cpp index 2fde458499..9e64732336 100644 --- a/tools/clang/lib/Sema/SemaChecking.cpp +++ b/tools/clang/lib/Sema/SemaChecking.cpp @@ -6772,8 +6772,8 @@ static void AnalyzeAssignment(Sema &S, BinaryOperator *E) { // Just recurse on the LHS. AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc()); - S.DiagnoseGloballyCoherentMismatch(E->getRHS(), E->getLHS()->getType(), - E->getOperatorLoc()); + S.DiagnoseCoherenceMismatch(E->getRHS(), E->getLHS()->getType(), + E->getOperatorLoc()); // We want to recurse on the RHS as normal unless we're assigning to // a bitfield. @@ -6887,7 +6887,7 @@ void CheckImplicitArgumentConversions(Sema &S, CallExpr *TheCall, ++ArgIdx, ++ParmIdx) { ParmVarDecl *PD = FD->getParamDecl(ParmIdx); Expr *CurrA = TheCall->getArg(ArgIdx); - S.DiagnoseGloballyCoherentMismatch(CurrA, PD->getType(), CC); + S.DiagnoseCoherenceMismatch(CurrA, PD->getType(), CC); } } // HLSL CHange End diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 6d838fb203..36ab55ea10 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,6 +810,13 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } + if (ContainsLongVector(Payload->getType())) { + const unsigned PayloadParametersIdx = 10; + S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) + << PayloadParametersIdx; + return; + } + CollectNonAccessableFields(PayloadType, CallerStage, {}, {}, NonWriteableFields, NonReadableFields); diff --git a/tools/clang/lib/Sema/SemaDecl.cpp b/tools/clang/lib/Sema/SemaDecl.cpp index 06bdeb491a..e09bf4623c 100644 --- a/tools/clang/lib/Sema/SemaDecl.cpp +++ b/tools/clang/lib/Sema/SemaDecl.cpp @@ -9167,9 +9167,10 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, // HLSL Change begin // When initializing an HLSL resource type we should diagnose mismatches in - // globally coherent annotations _unless_ the source is a dynamic resource - // placeholder type where we safely infer the globallycoherent annotaiton. - DiagnoseGloballyCoherentMismatch(Init, DclT, Init->getExprLoc()); + // globally and reorder coherent annotations _unless_ the source is a dynamic + // resource placeholder type where we safely infer the coherence + // annotations. + DiagnoseCoherenceMismatch(Init, DclT, Init->getExprLoc()); // HLSL Change end // Expressions default to 'id' when we're in a debugger diff --git a/tools/clang/lib/Sema/SemaDeclAttr.cpp b/tools/clang/lib/Sema/SemaDeclAttr.cpp index 723900cd07..085874a0ed 100644 --- a/tools/clang/lib/Sema/SemaDeclAttr.cpp +++ b/tools/clang/lib/Sema/SemaDeclAttr.cpp @@ -5105,6 +5105,17 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D, for (const AttributeList* l = AttrList; l; l = l->getNext()) ProcessDeclAttribute(*this, S, D, *l, IncludeCXX11Attributes); + // HLSL Change Starts - Warn of redundant reorder / globally coherent + // attributes + if (D->hasAttr() && + D->hasAttr()) { + Diag(AttrList->getLoc(), diag::warn_hlsl_gc_implies_rc_attribute) + << cast(D); + D->dropAttr(); + return; + } + // HLSL Change Ends + // FIXME: We should be able to handle these cases in TableGen. // GCC accepts // static int a9 __attribute__((weakref)); diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index c8c762a0a1..507b6a7508 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -2787,13 +2787,18 @@ bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS, // Never if a scope specifier was provided. if (SS.isSet()) { // HLSL Change begins - // We want to be able to have intrinsics inside the "vk" namespace. + // We want to be able to have intrinsics inside the "vk" and "dx" + // namespaces. const bool isVkNamespace = SS.getScopeRep() && SS.getScopeRep()->getAsNamespace() && SS.getScopeRep()->getAsNamespace()->getName() == "vk"; - if (!isVkNamespace) - // HLSL Change ends + const bool isDxNamespace = + SS.getScopeRep() && SS.getScopeRep()->getAsNamespace() && + SS.getScopeRep()->getAsNamespace()->getName() == "dx"; + + if (!isVkNamespace && !isDxNamespace) + // HLSL Change ends return false; } diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp index f46bb0ad9f..5113c56205 100644 --- a/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// /// /// \file @@ -1052,6 +1055,56 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, // corresponding cast expression. if (Exprs.size() == 1 && !ListInitialization) { Expr *Arg = Exprs[0]; +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(Ty) && Arg->getType()->isIntegerType()) { + typedef DeclContext::specific_decl_iterator ft_iter; + auto *recordDecl = Ty->getAsCXXRecordDecl(); + auto *specDecl = cast(recordDecl); + auto *templatedDecl = + specDecl->getSpecializedTemplate()->getTemplatedDecl(); + auto functionTemplateDecls = + llvm::iterator_range(ft_iter(templatedDecl->decls_begin()), + ft_iter(templatedDecl->decls_end())); + for (auto *ftd : functionTemplateDecls) { + auto *fd = ftd->getTemplatedDecl(); + if (fd->getNumParams() != 1 || + !fd->getParamDecl(0)->getType()->isIntegerType()) + continue; + + void *insertPos; + auto templateArgs = ftd->getInjectedTemplateArgs(); + auto *functionDecl = ftd->findSpecialization(templateArgs, insertPos); + if (!functionDecl) { + DeclarationNameInfo DInfo(ftd->getDeclName(), + recordDecl->getLocation()); + auto *templateArgList = TemplateArgumentList::CreateCopy( + Context, templateArgs.data(), templateArgs.size()); + functionDecl = CXXConstructorDecl::Create( + Context, recordDecl, Arg->getLocStart(), DInfo, Ty, TInfo, false, + false, false, false); + functionDecl->setFunctionTemplateSpecialization(ftd, templateArgList, + insertPos); + } else if (functionDecl->getDeclKind() != Decl::Kind::CXXConstructor) { + continue; + } + + CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified(); + if (!Arg->isRValue()) { + Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get(); + } + if (argType != Context.UnsignedLongLongTy) { + Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy, + CK_IntegralCast) + .get(); + } + return CXXConstructExpr::Create( + Context, Ty, TyBeginLoc, cast(functionDecl), + false, {Arg}, false, false, false, false, + CXXConstructExpr::ConstructionKind::CK_Complete, + SourceRange(LParenLoc, RParenLoc)); + } + } +#endif return BuildCXXFunctionalCastExpr(TInfo, LParenLoc, Arg, RParenLoc); } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index fb3937cfd5..418425a468 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // // This file implements the semantic support for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// @@ -14,6 +17,7 @@ #include "VkConstantsTables.h" #include "dxc/DXIL/DxilFunctionProps.h" #include "dxc/DXIL/DxilShaderModel.h" +#include "dxc/DXIL/DxilUtil.h" #include "dxc/HLSL/HLOperations.h" #include "dxc/HlslIntrinsicOp.h" #include "dxc/Support/Global.h" @@ -31,6 +35,8 @@ #include "clang/AST/HlslTypes.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/Specifiers.h" +#include "clang/Parse/ParseDiagnostic.h" #include "clang/Sema/ExternalSemaSource.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" @@ -40,6 +46,7 @@ #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -191,6 +198,7 @@ enum ArBasicKind { AR_OBJECT_VK_LITERAL, AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID, + AR_OBJECT_VK_BUFFER_POINTER, #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -243,6 +251,9 @@ enum ArBasicKind { AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + // Shader Execution Reordering + AR_OBJECT_HIT_OBJECT, + AR_BASIC_MAXIMUM_COUNT }; @@ -363,6 +374,8 @@ enum ArBasicKind { #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0) +#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0) + #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0) #define IS_BPROP_TEXTURE(_Props) (((_Props)&BPROP_TEXTURE) != 0) @@ -541,6 +554,7 @@ const UINT g_uBasicKindProps[] = { BPROP_OBJECT, // AR_OBJECT_VK_LITERAL, BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE use recordType BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID use recordType + BPROP_OBJECT, // AR_OBJECT_VK_BUFFER_POINTER use recordType #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -566,9 +580,9 @@ const UINT g_uBasicKindProps[] = { 0, // AR_OBJECT_PROCEDURAL_PRIMITIVE_HIT_GROUP, 0, // AR_OBJECT_RAYTRACING_PIPELINE_CONFIG1, - BPROP_OBJECT, // AR_OBJECT_RAY_QUERY, - BPROP_OBJECT, // AR_OBJECT_HEAP_RESOURCE, - BPROP_OBJECT, // AR_OBJECT_HEAP_SAMPLER, + LICOMPTYPE_RAY_QUERY, // AR_OBJECT_RAY_QUERY, + BPROP_OBJECT, // AR_OBJECT_HEAP_RESOURCE, + BPROP_OBJECT, // AR_OBJECT_HEAP_SAMPLER, BPROP_OBJECT | BPROP_RWBUFFER | BPROP_TEXTURE, // AR_OBJECT_RWTEXTURE2DMS BPROP_OBJECT | BPROP_RWBUFFER | @@ -591,6 +605,9 @@ const UINT g_uBasicKindProps[] = { BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + // Shader Execution Reordering + LICOMPTYPE_HIT_OBJECT, // AR_OBJECT_HIT_OBJECT, + // AR_BASIC_MAXIMUM_COUNT }; @@ -616,6 +633,8 @@ C_ASSERT(ARRAYSIZE(g_uBasicKindProps) == AR_BASIC_MAXIMUM_COUNT); #define IS_BASIC_STREAM(_Kind) IS_BPROP_STREAM(GetBasicKindProps(_Kind)) +#define IS_BASIC_PATCH(_Kind) IS_BPROP_PATCH(GetBasicKindProps(_Kind)) + #define IS_BASIC_SAMPLER(_Kind) IS_BPROP_SAMPLER(GetBasicKindProps(_Kind)) #define IS_BASIC_TEXTURE(_Kind) IS_BPROP_TEXTURE(GetBasicKindProps(_Kind)) #define IS_BASIC_OBJECT(_Kind) IS_BPROP_OBJECT(GetBasicKindProps(_Kind)) @@ -1116,6 +1135,9 @@ static const ArBasicKind g_ResourceCT[] = {AR_OBJECT_HEAP_RESOURCE, static const ArBasicKind g_RayDescCT[] = {AR_OBJECT_RAY_DESC, AR_BASIC_UNKNOWN}; +static const ArBasicKind g_RayQueryCT[] = {AR_OBJECT_RAY_QUERY, + AR_BASIC_UNKNOWN}; + static const ArBasicKind g_AccelerationStructCT[] = { AR_OBJECT_ACCELERATION_STRUCT, AR_BASIC_UNKNOWN}; @@ -1214,6 +1236,15 @@ static const ArBasicKind g_AnyOutputRecordCT[] = { AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_BASIC_UNKNOWN}; +// Shader Execution Reordering +static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT, + AR_BASIC_UNKNOWN}; + +#ifdef ENABLE_SPIRV_CODEGEN +static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER, + AR_BASIC_UNKNOWN}; +#endif + // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value. const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_NullCT, // LICOMPTYPE_VOID @@ -1268,6 +1299,11 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_AnyOutputRecordCT, // LICOMPTYPE_ANY_NODE_OUTPUT_RECORD g_GroupNodeOutputRecordsCT, // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS + g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT + g_RayQueryCT, // LICOMPTYPE_RAY_QUERY +#ifdef ENABLE_SPIRV_CODEGEN + g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER +#endif }; static_assert( ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT, @@ -1326,6 +1362,7 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = { AR_OBJECT_VK_SPIRV_TYPE, AR_OBJECT_VK_SPIRV_OPAQUE_TYPE, AR_OBJECT_VK_INTEGRAL_CONSTANT, AR_OBJECT_VK_LITERAL, AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID, + AR_OBJECT_VK_BUFFER_POINTER, #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1356,7 +1393,10 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = { AR_OBJECT_NODE_OUTPUT, AR_OBJECT_EMPTY_NODE_OUTPUT, AR_OBJECT_NODE_OUTPUT_ARRAY, AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY, - AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS}; + AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + + // Shader Execution Reordering + AR_OBJECT_HIT_OBJECT}; // Count of template arguments for basic kind of objects that look like // templates (one or more type arguments). @@ -1429,6 +1469,7 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = { 1, // AR_OBJECT_VK_LITERAL, 1, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE 1, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID + 2, // AR_OBJECT_VK_BUFFER_POINTER #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1472,6 +1513,9 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = { 1, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, 1, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS + + // Shader Execution Reordering + 0, // AR_OBJECT_HIT_OBJECT, }; C_ASSERT(_countof(g_ArBasicKindsAsTypes) == @@ -1574,6 +1618,7 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = { {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_LITERAL, {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID + {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_BUFFER_POINTER #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1618,76 +1663,177 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = { {1, MipsFalse, SampleFalse}, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS {1, MipsFalse, SampleFalse}, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS + + // Shader Execution Reordering + {0, MipsFalse, SampleFalse}, // AR_OBJECT_HIT_OBJECT, }; C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsSubscripts)); // Type names for ArBasicKind values. static const char *g_ArBasicTypeNames[] = { - "bool", "float", "half", "half", "float", "double", "int", "sbyte", "byte", - "short", "ushort", "int", "uint", "long", "ulong", "min10float", - "min16float", "min12int", "min16int", "min16uint", "int8_t4_packed", - "uint8_t4_packed", "enum", - - "", "", "", "", "", "", + "bool", + "float", + "half", + "half", + "float", + "double", + "int", + "sbyte", + "byte", + "short", + "ushort", + "int", + "uint", + "long", + "ulong", + "min10float", + "min16float", + "min12int", + "min16int", + "min16uint", + "int8_t4_packed", + "uint8_t4_packed", + "enum", + + "", + "", + "", + "", + "", + "", "enum class", - "null", "literal string", "string", + "null", + "literal string", + "string", // "texture", - "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", "Texture3D", - "TextureCube", "TextureCubeArray", "Texture2DMS", "Texture2DMSArray", - "SamplerState", "sampler1D", "sampler2D", "sampler3D", "samplerCUBE", - "SamplerComparisonState", "Buffer", "RenderTargetView", "DepthStencilView", - "ComputeShader", "DomainShader", "GeometryShader", "HullShader", - "PixelShader", "VertexShader", "pixelfragment", "vertexfragment", - "StateBlock", "Rasterizer", "DepthStencil", "Blend", "PointStream", - "LineStream", "TriangleStream", "InputPatch", "OutputPatch", "RWTexture1D", - "RWTexture1DArray", "RWTexture2D", "RWTexture2DArray", "RWTexture3D", - "RWBuffer", "ByteAddressBuffer", "RWByteAddressBuffer", "StructuredBuffer", - "RWStructuredBuffer", "RWStructuredBuffer(Incrementable)", - "RWStructuredBuffer(Decrementable)", "AppendStructuredBuffer", + "Texture1D", + "Texture1DArray", + "Texture2D", + "Texture2DArray", + "Texture3D", + "TextureCube", + "TextureCubeArray", + "Texture2DMS", + "Texture2DMSArray", + "SamplerState", + "sampler1D", + "sampler2D", + "sampler3D", + "samplerCUBE", + "SamplerComparisonState", + "Buffer", + "RenderTargetView", + "DepthStencilView", + "ComputeShader", + "DomainShader", + "GeometryShader", + "HullShader", + "PixelShader", + "VertexShader", + "pixelfragment", + "vertexfragment", + "StateBlock", + "Rasterizer", + "DepthStencil", + "Blend", + "PointStream", + "LineStream", + "TriangleStream", + "InputPatch", + "OutputPatch", + "RWTexture1D", + "RWTexture1DArray", + "RWTexture2D", + "RWTexture2DArray", + "RWTexture3D", + "RWBuffer", + "ByteAddressBuffer", + "RWByteAddressBuffer", + "StructuredBuffer", + "RWStructuredBuffer", + "RWStructuredBuffer(Incrementable)", + "RWStructuredBuffer(Decrementable)", + "AppendStructuredBuffer", "ConsumeStructuredBuffer", - "ConstantBuffer", "TextureBuffer", + "ConstantBuffer", + "TextureBuffer", - "RasterizerOrderedBuffer", "RasterizerOrderedByteAddressBuffer", - "RasterizerOrderedStructuredBuffer", "RasterizerOrderedTexture1D", - "RasterizerOrderedTexture1DArray", "RasterizerOrderedTexture2D", - "RasterizerOrderedTexture2DArray", "RasterizerOrderedTexture3D", + "RasterizerOrderedBuffer", + "RasterizerOrderedByteAddressBuffer", + "RasterizerOrderedStructuredBuffer", + "RasterizerOrderedTexture1D", + "RasterizerOrderedTexture1DArray", + "RasterizerOrderedTexture2D", + "RasterizerOrderedTexture2DArray", + "RasterizerOrderedTexture3D", - "FeedbackTexture2D", "FeedbackTexture2DArray", + "FeedbackTexture2D", + "FeedbackTexture2DArray", // SPIRV change starts #ifdef ENABLE_SPIRV_CODEGEN - "SubpassInput", "SubpassInputMS", "SpirvType", "SpirvOpaqueType", - "integral_constant", "Literal", "ext_type", "ext_result_id", + "SubpassInput", + "SubpassInputMS", + "SpirvType", + "SpirvOpaqueType", + "integral_constant", + "Literal", + "ext_type", + "ext_result_id", + "BufferPointer", #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends "", - "deprecated effect object", "wave_t", "RayDesc", - "RaytracingAccelerationStructure", "user defined type", + "deprecated effect object", + "wave_t", + "RayDesc", + "RaytracingAccelerationStructure", + "user defined type", "BuiltInTriangleIntersectionAttributes", // subobjects - "StateObjectConfig", "GlobalRootSignature", "LocalRootSignature", - "SubobjectToExportsAssociation", "RaytracingShaderConfig", - "RaytracingPipelineConfig", "TriangleHitGroup", - "ProceduralPrimitiveHitGroup", "RaytracingPipelineConfig1", - - "RayQuery", "HEAP_Resource", "HEAP_Sampler", - - "RWTexture2DMS", "RWTexture2DMSArray", + "StateObjectConfig", + "GlobalRootSignature", + "LocalRootSignature", + "SubobjectToExportsAssociation", + "RaytracingShaderConfig", + "RaytracingPipelineConfig", + "TriangleHitGroup", + "ProceduralPrimitiveHitGroup", + "RaytracingPipelineConfig1", + + "RayQuery", + "HEAP_Resource", + "HEAP_Sampler", + + "RWTexture2DMS", + "RWTexture2DMSArray", // Workgraphs - "EmptyNodeInput", "DispatchNodeInputRecord", "RWDispatchNodeInputRecord", - "GroupNodeInputRecords", "RWGroupNodeInputRecords", "ThreadNodeInputRecord", + "EmptyNodeInput", + "DispatchNodeInputRecord", + "RWDispatchNodeInputRecord", + "GroupNodeInputRecords", + "RWGroupNodeInputRecords", + "ThreadNodeInputRecord", "RWThreadNodeInputRecord", - "NodeOutput", "EmptyNodeOutput", "NodeOutputArray", "EmptyNodeOutputArray", + "NodeOutput", + "EmptyNodeOutput", + "NodeOutputArray", + "EmptyNodeOutputArray", - "ThreadNodeOutputRecords", "GroupNodeOutputRecords"}; + "ThreadNodeOutputRecords", + "GroupNodeOutputRecords", + + // Shader Execution Reordering + "HitObject", +}; C_ASSERT(_countof(g_ArBasicTypeNames) == AR_BASIC_MAXIMUM_COUNT); @@ -1727,6 +1873,10 @@ static const char *g_DeprecatedEffectObjectNames[] = { "RenderTargetView", // 16 }; +static bool IsStaticMember(const HLSL_INTRINSIC *fn) { + return fn->Flags & INTRIN_FLAG_STATIC_MEMBER; +} + static bool IsVariadicIntrinsicFunction(const HLSL_INTRINSIC *fn) { return fn->pArgs[fn->uNumArgs - 1].uTemplateId == INTRIN_TEMPLATE_VARARGS; } @@ -1806,12 +1956,19 @@ static void AddHLSLIntrinsicAttr(FunctionDecl *FD, ASTContext &context, } FD->addAttr( HLSLIntrinsicAttr::CreateImplicit(context, tableName, lowering, opcode)); - if (pIntrinsic->bReadNone) + if (pIntrinsic->Flags & INTRIN_FLAG_READ_NONE) FD->addAttr(ConstAttr::CreateImplicit(context)); - if (pIntrinsic->bReadOnly) + if (pIntrinsic->Flags & INTRIN_FLAG_READ_ONLY) FD->addAttr(PureAttr::CreateImplicit(context)); - if (pIntrinsic->bIsWave) + if (pIntrinsic->Flags & INTRIN_FLAG_IS_WAVE) FD->addAttr(HLSLWaveSensitiveAttr::CreateImplicit(context)); + if (pIntrinsic->MinShaderModel) { + unsigned Major = pIntrinsic->MinShaderModel >> 4; + unsigned Minor = pIntrinsic->MinShaderModel & 0xF; + FD->addAttr(AvailabilityAttr::CreateImplicit( + context, &context.Idents.get(""), clang::VersionTuple(Major, Minor), + clang::VersionTuple(), clang::VersionTuple(), false, "")); + } } static FunctionDecl * @@ -1857,12 +2014,14 @@ AddHLSLIntrinsicFunction(ASTContext &context, NamespaceDecl *NS, const QualType fnReturnType = functionArgQualTypes[0]; std::vector fnArgTypes(functionArgQualTypes.begin() + 1, functionArgQualTypes.end()); + + StorageClass SC = IsStaticMember(pIntrinsic) ? SC_Static : SC_Extern; QualType functionType = context.getFunctionType(fnReturnType, fnArgTypes, protoInfo, paramMods); FunctionDecl *functionDecl = FunctionDecl::Create( context, currentDeclContext, NoLoc, - DeclarationNameInfo(functionName, NoLoc), functionType, nullptr, - StorageClass::SC_Extern, InlineSpecifiedFalse, HasWrittenPrototypeTrue); + DeclarationNameInfo(functionName, NoLoc), functionType, nullptr, SC, + InlineSpecifiedFalse, HasWrittenPrototypeTrue); currentDeclContext->addDecl(functionDecl); functionDecl->setLexicalDeclContext(currentDeclContext); @@ -2271,6 +2430,10 @@ static void GetIntrinsicMethods(ArBasicKind kind, *intrinsics = g_RayQueryMethods; *intrinsicCount = _countof(g_RayQueryMethods); break; + case AR_OBJECT_HIT_OBJECT: + *intrinsics = g_DxHitObjectMethods; + *intrinsicCount = _countof(g_DxHitObjectMethods); + break; case AR_OBJECT_RWTEXTURE2DMS: *intrinsics = g_RWTexture2DMSMethods; *intrinsicCount = _countof(g_RWTexture2DMSMethods); @@ -2643,13 +2806,17 @@ AddBuiltInTriangleIntersectionAttributes(ASTContext &context, // // Subobjects -static CXXRecordDecl *StartSubobjectDecl(ASTContext &context, - const char *name) { +static CXXRecordDecl * +StartSubobjectDecl(ASTContext &context, const char *name, + DXIL::SubobjectKind Kind, + DXIL::HitGroupType HGT = DXIL::HitGroupType::LastEntry) { IdentifierInfo &id = context.Idents.get(StringRef(name), tok::TokenKind::identifier); CXXRecordDecl *decl = CXXRecordDecl::Create( context, TagTypeKind::TTK_Struct, context.getTranslationUnitDecl(), NoLoc, NoLoc, &id, nullptr, DelayTypeCreationTrue); + decl->addAttr(HLSLSubObjectAttr::CreateImplicit( + context, static_cast(Kind), static_cast(HGT))); decl->addAttr(FinalAttr::CreateImplicit(context, FinalAttr::Keyword_final)); decl->startDefinition(); return decl; @@ -2666,7 +2833,8 @@ void FinishSubobjectDecl(ASTContext &context, CXXRecordDecl *decl) { // uint32_t Flags; // }; static CXXRecordDecl *CreateSubobjectStateObjectConfig(ASTContext &context) { - CXXRecordDecl *decl = StartSubobjectDecl(context, "StateObjectConfig"); + CXXRecordDecl *decl = StartSubobjectDecl( + context, "StateObjectConfig", DXIL::SubobjectKind::StateObjectConfig); CreateSimpleField(context, decl, "Flags", context.UnsignedIntTy, AccessSpecifier::AS_private); FinishSubobjectDecl(context, decl); @@ -2680,7 +2848,10 @@ static CXXRecordDecl *CreateSubobjectStateObjectConfig(ASTContext &context) { static CXXRecordDecl *CreateSubobjectRootSignature(ASTContext &context, bool global) { CXXRecordDecl *decl = StartSubobjectDecl( - context, global ? "GlobalRootSignature" : "LocalRootSignature"); + context, global ? "GlobalRootSignature" : "LocalRootSignature", + global ? DXIL::SubobjectKind::GlobalRootSignature + : DXIL::SubobjectKind::LocalRootSignature); + CreateSimpleField(context, decl, "Data", context.HLSLStringTy, AccessSpecifier::AS_private); FinishSubobjectDecl(context, decl); @@ -2695,7 +2866,8 @@ static CXXRecordDecl *CreateSubobjectRootSignature(ASTContext &context, static CXXRecordDecl * CreateSubobjectSubobjectToExportsAssoc(ASTContext &context) { CXXRecordDecl *decl = - StartSubobjectDecl(context, "SubobjectToExportsAssociation"); + StartSubobjectDecl(context, "SubobjectToExportsAssociation", + DXIL::SubobjectKind::SubobjectToExportsAssociation); CreateSimpleField(context, decl, "Subobject", context.HLSLStringTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "Exports", context.HLSLStringTy, @@ -2711,7 +2883,9 @@ CreateSubobjectSubobjectToExportsAssoc(ASTContext &context) { // }; static CXXRecordDecl * CreateSubobjectRaytracingShaderConfig(ASTContext &context) { - CXXRecordDecl *decl = StartSubobjectDecl(context, "RaytracingShaderConfig"); + CXXRecordDecl *decl = + StartSubobjectDecl(context, "RaytracingShaderConfig", + DXIL::SubobjectKind::RaytracingShaderConfig); CreateSimpleField(context, decl, "MaxPayloadSizeInBytes", context.UnsignedIntTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "MaxAttributeSizeInBytes", @@ -2726,7 +2900,9 @@ CreateSubobjectRaytracingShaderConfig(ASTContext &context) { // }; static CXXRecordDecl * CreateSubobjectRaytracingPipelineConfig(ASTContext &context) { - CXXRecordDecl *decl = StartSubobjectDecl(context, "RaytracingPipelineConfig"); + CXXRecordDecl *decl = + StartSubobjectDecl(context, "RaytracingPipelineConfig", + DXIL::SubobjectKind::RaytracingPipelineConfig); CreateSimpleField(context, decl, "MaxTraceRecursionDepth", context.UnsignedIntTy, AccessSpecifier::AS_private); FinishSubobjectDecl(context, decl); @@ -2741,7 +2917,8 @@ CreateSubobjectRaytracingPipelineConfig(ASTContext &context) { static CXXRecordDecl * CreateSubobjectRaytracingPipelineConfig1(ASTContext &context) { CXXRecordDecl *decl = - StartSubobjectDecl(context, "RaytracingPipelineConfig1"); + StartSubobjectDecl(context, "RaytracingPipelineConfig1", + DXIL::SubobjectKind::RaytracingPipelineConfig1); CreateSimpleField(context, decl, "MaxTraceRecursionDepth", context.UnsignedIntTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "Flags", context.UnsignedIntTy, @@ -2756,7 +2933,9 @@ CreateSubobjectRaytracingPipelineConfig1(ASTContext &context) { // string ClosestHit; // }; static CXXRecordDecl *CreateSubobjectTriangleHitGroup(ASTContext &context) { - CXXRecordDecl *decl = StartSubobjectDecl(context, "TriangleHitGroup"); + CXXRecordDecl *decl = StartSubobjectDecl(context, "TriangleHitGroup", + DXIL::SubobjectKind::HitGroup, + DXIL::HitGroupType::Triangle); CreateSimpleField(context, decl, "AnyHit", context.HLSLStringTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "ClosestHit", context.HLSLStringTy, @@ -2773,8 +2952,9 @@ static CXXRecordDecl *CreateSubobjectTriangleHitGroup(ASTContext &context) { // }; static CXXRecordDecl * CreateSubobjectProceduralPrimitiveHitGroup(ASTContext &context) { - CXXRecordDecl *decl = - StartSubobjectDecl(context, "ProceduralPrimitiveHitGroup"); + CXXRecordDecl *decl = StartSubobjectDecl( + context, "ProceduralPrimitiveHitGroup", DXIL::SubobjectKind::HitGroup, + DXIL::HitGroupType::ProceduralPrimitive); CreateSimpleField(context, decl, "AnyHit", context.HLSLStringTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "ClosestHit", context.HLSLStringTy, @@ -2822,6 +3002,7 @@ class HLSLExternalSource : public ExternalSemaSource { ClassTemplateDecl *m_vkIntegralConstantTemplateDecl; ClassTemplateDecl *m_vkLiteralTemplateDecl; + ClassTemplateDecl *m_vkBufferPointerTemplateDecl; // Declarations for Work Graph Output Record types ClassTemplateDecl *m_GroupNodeOutputRecordsTemplateDecl; @@ -2833,6 +3014,9 @@ class HLSLExternalSource : public ExternalSemaSource { // Namespace decl for Vulkan-specific intrinsic functions NamespaceDecl *m_vkNSDecl; + // Namespace decl for dx intrinsic functions + NamespaceDecl *m_dxNSDecl; + // Context being processed. ASTContext *m_context; @@ -2856,8 +3040,9 @@ class HLSLExternalSource : public ExternalSemaSource { TypedefDecl *m_matrixShorthandTypes[HLSLScalarTypeCount][4][4]; // Vector types already built. - QualType m_vectorTypes[HLSLScalarTypeCount][4]; - TypedefDecl *m_vectorTypedefs[HLSLScalarTypeCount][4]; + QualType m_vectorTypes[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength]; + TypedefDecl + *m_vectorTypedefs[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength]; // BuiltinType for each scalar type. QualType m_baseTypes[HLSLScalarTypeCount]; @@ -3049,10 +3234,13 @@ class HLSLExternalSource : public ExternalSemaSource { IdentifierInfo *ii = &m_context->Idents.get(StringRef(intrinsic->pArgs[0].pName)); DeclarationName declarationName = DeclarationName(ii); + + StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_None; + CXXMethodDecl *functionDecl = CreateObjectFunctionDeclarationWithParams( *m_context, recordDecl, functionResultQT, ArrayRef(argsQTs, numParams), - ArrayRef(argNames, numParams), declarationName, true, + ArrayRef(argNames, numParams), declarationName, true, SC, templateParamNamedDeclsCount > 0); functionDecl->setImplicit(true); @@ -3254,7 +3442,7 @@ class HLSLExternalSource : public ExternalSemaSource { *m_context, recordDecl, resultType, ArrayRef(indexType), ArrayRef(StringRef("index")), m_context->DeclarationNames.getCXXOperatorName(OO_Subscript), true, - true); + StorageClass::SC_None, true); hlsl::CreateFunctionTemplateDecl( *m_context, recordDecl, functionDecl, reinterpret_cast(&templateTypeParmDecl), 1); @@ -3298,9 +3486,8 @@ class HLSLExternalSource : public ExternalSemaSource { return -1; } -#ifdef ENABLE_SPIRV_CODEGEN - SmallVector CreateTemplateTypeParmDeclsForVkIntrinsicFunction( - const HLSL_INTRINSIC *intrinsic) { + SmallVector CreateTemplateTypeParmDeclsForIntrinsicFunction( + const HLSL_INTRINSIC *intrinsic, NamespaceDecl *nsDecl) { SmallVector templateTypeParmDecls; auto &context = m_sema->getASTContext(); const HLSL_INTRINSIC_ARGUMENT *pArgs = intrinsic->pArgs; @@ -3311,9 +3498,8 @@ class HLSLExternalSource : public ExternalSemaSource { pArgs[i].uLegalTemplates == LITEMPLATE_ANY) { IdentifierInfo *id = &context.Idents.get("T"); TemplateTypeParmDecl *templateTypeParmDecl = - TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0, - 0, id, TypenameTrue, - ParameterPackFalse); + TemplateTypeParmDecl::Create(context, nsDecl, NoLoc, NoLoc, 0, 0, + id, TypenameTrue, ParameterPackFalse); if (TInfo == nullptr) { TInfo = m_sema->getASTContext().CreateTypeSourceInfo( m_context->UnsignedIntTy, 0); @@ -3322,12 +3508,31 @@ class HLSLExternalSource : public ExternalSemaSource { templateTypeParmDecls.push_back(templateTypeParmDecl); continue; } + if (pArgs[i].uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { + if (TInfo == nullptr) { + TInfo = m_sema->getASTContext().CreateTypeSourceInfo( + m_context->UnsignedIntTy, 0); + } + IdentifierInfo *idT = &context.Idents.get("T"); + IdentifierInfo *idA = &context.Idents.get("A"); + TemplateTypeParmDecl *templateTypeParmDecl = + TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0, + 0, idT, TypenameTrue, + ParameterPackFalse); + NonTypeTemplateParmDecl *nonTypeTemplateParmDecl = + NonTypeTemplateParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, + 0, 1, idA, context.UnsignedIntTy, + ParameterPackFalse, TInfo); + templateTypeParmDecl->setDefaultArgument(TInfo); + templateTypeParmDecls.push_back(templateTypeParmDecl); + templateTypeParmDecls.push_back(nonTypeTemplateParmDecl); + } } return templateTypeParmDecls; } SmallVector - CreateParmDeclsForVkIntrinsicFunction( + CreateParmDeclsForIntrinsicFunction( const HLSL_INTRINSIC *intrinsic, const SmallVectorImpl ¶mTypes, const SmallVectorImpl ¶mMods) { @@ -3352,7 +3557,7 @@ class HLSLExternalSource : public ExternalSemaSource { return paramDecls; } - SmallVector VkIntrinsicFunctionParamTypes( + SmallVector getIntrinsicFunctionParamTypes( const HLSL_INTRINSIC *intrinsic, const SmallVectorImpl &templateTypeParmDecls) { auto &context = m_sema->getASTContext(); @@ -3387,8 +3592,26 @@ class HLSLExternalSource : public ExternalSemaSource { case LICOMPTYPE_VOID: paramTypes.push_back(context.VoidTy); break; + case LICOMPTYPE_HIT_OBJECT: + paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT)); + break; +#ifdef ENABLE_SPIRV_CODEGEN + case LICOMPTYPE_VK_BUFFER_POINTER: { + const ArBasicKind *match = + std::find(g_ArBasicKindsAsTypes, + &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], + AR_OBJECT_VK_BUFFER_POINTER); + DXASSERT(match != + &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], + "otherwise can't find constant in basic kinds"); + size_t index = match - g_ArBasicKindsAsTypes; + paramTypes.push_back( + m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index])); + break; + } +#endif default: - DXASSERT(false, "Argument type of vk:: intrinsic function is not " + DXASSERT(false, "Argument type of intrinsic function is not " "supported"); break; } @@ -3396,9 +3619,9 @@ class HLSLExternalSource : public ExternalSemaSource { return paramTypes; } - QualType - VkIntrinsicFunctionType(const SmallVectorImpl ¶mTypes, - const SmallVectorImpl ¶mMods) { + QualType getIntrinsicFunctionType( + const SmallVectorImpl ¶mTypes, + const SmallVectorImpl ¶mMods) { DXASSERT(!paramTypes.empty(), "Given param type vector is empty"); ArrayRef params({}); @@ -3411,7 +3634,7 @@ class HLSLExternalSource : public ExternalSemaSource { EmptyEPI, paramMods); } - void SetParmDeclsForVkIntrinsicFunction( + void SetParmDeclsForIntrinsicFunction( TypeSourceInfo *TInfo, FunctionDecl *functionDecl, const SmallVectorImpl ¶mDecls) { FunctionProtoTypeLoc Proto = @@ -3426,47 +3649,39 @@ class HLSLExternalSource : public ExternalSemaSource { functionDecl->setParams(paramDecls); } - // Adds intrinsic function declarations to the "vk" namespace. - // It does so only if SPIR-V code generation is being done. - // Assumes the implicit "vk" namespace has already been created. - void AddVkIntrinsicFunctions() { - // If not doing SPIR-V CodeGen, return. - if (!m_sema->getLangOpts().SPIRV) - return; - - DXASSERT(m_vkNSDecl, "caller has not created the vk namespace yet"); - + void AddIntrinsicFunctionsToNamespace(const HLSL_INTRINSIC *table, + uint32_t tableSize, + NamespaceDecl *nsDecl) { auto &context = m_sema->getASTContext(); - for (uint32_t i = 0; i < _countof(g_VkIntrinsics); ++i) { - const HLSL_INTRINSIC *intrinsic = &g_VkIntrinsics[i]; + for (uint32_t i = 0; i < tableSize; ++i) { + const HLSL_INTRINSIC *intrinsic = &table[i]; const IdentifierInfo &fnII = context.Idents.get( intrinsic->pArgs->pName, tok::TokenKind::identifier); DeclarationName functionName(&fnII); // Create TemplateTypeParmDecl. SmallVector templateTypeParmDecls = - CreateTemplateTypeParmDeclsForVkIntrinsicFunction(intrinsic); + CreateTemplateTypeParmDeclsForIntrinsicFunction(intrinsic, nsDecl); // Get types for parameters. SmallVector paramTypes = - VkIntrinsicFunctionParamTypes(intrinsic, templateTypeParmDecls); + getIntrinsicFunctionParamTypes(intrinsic, templateTypeParmDecls); SmallVector paramMods; InitParamMods(intrinsic, paramMods); // Create FunctionDecl. - QualType fnType = VkIntrinsicFunctionType(paramTypes, paramMods); + StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern; + QualType fnType = getIntrinsicFunctionType(paramTypes, paramMods); TypeSourceInfo *TInfo = m_sema->getASTContext().CreateTypeSourceInfo(fnType, 0); FunctionDecl *functionDecl = FunctionDecl::Create( - context, m_vkNSDecl, NoLoc, DeclarationNameInfo(functionName, NoLoc), - fnType, TInfo, StorageClass::SC_Extern, InlineSpecifiedFalse, - HasWrittenPrototypeTrue); + context, nsDecl, NoLoc, DeclarationNameInfo(functionName, NoLoc), + fnType, TInfo, SC, InlineSpecifiedFalse, HasWrittenPrototypeTrue); // Create and set ParmVarDecl. SmallVector paramDecls = - CreateParmDeclsForVkIntrinsicFunction(intrinsic, paramTypes, - paramMods); - SetParmDeclsForVkIntrinsicFunction(TInfo, functionDecl, paramDecls); + CreateParmDeclsForIntrinsicFunction(intrinsic, paramTypes, paramMods); + SetParmDeclsForIntrinsicFunction(TInfo, functionDecl, paramDecls); if (!templateTypeParmDecls.empty()) { TemplateParameterList *templateParmList = TemplateParameterList::Create( @@ -3474,22 +3689,52 @@ class HLSLExternalSource : public ExternalSemaSource { templateTypeParmDecls.size(), NoLoc); functionDecl->setTemplateParameterListsInfo(context, 1, &templateParmList); - FunctionTemplateDecl *functionTemplate = FunctionTemplateDecl::Create( - context, m_vkNSDecl, NoLoc, functionName, templateParmList, - functionDecl); + FunctionTemplateDecl *functionTemplate = + FunctionTemplateDecl::Create(context, nsDecl, NoLoc, functionName, + templateParmList, functionDecl); functionDecl->setDescribedFunctionTemplate(functionTemplate); - m_vkNSDecl->addDecl(functionTemplate); - functionTemplate->setDeclContext(m_vkNSDecl); + nsDecl->addDecl(functionTemplate); + functionTemplate->setDeclContext(nsDecl); } else { - m_vkNSDecl->addDecl(functionDecl); - functionDecl->setLexicalDeclContext(m_vkNSDecl); - functionDecl->setDeclContext(m_vkNSDecl); + nsDecl->addDecl(functionDecl); + functionDecl->setLexicalDeclContext(nsDecl); + functionDecl->setDeclContext(nsDecl); } functionDecl->setImplicit(true); } } + // Adds intrinsic function declarations to the "dx" namespace. + // Assumes the implicit "vk" namespace has already been created. + void AddDxIntrinsicFunctions() { + DXASSERT(m_dxNSDecl, "caller has not created the dx namespace yet"); + + AddIntrinsicFunctionsToNamespace(g_DxIntrinsics, _countof(g_DxIntrinsics), + m_dxNSDecl); + // Eagerly declare HitObject methods. This is required to make lookup of + // 'static' HLSL member functions work without special-casing HLSL scope + // lookup. + CXXRecordDecl *HitObjectDecl = + GetBasicKindType(AR_OBJECT_HIT_OBJECT)->getAsCXXRecordDecl(); + CompleteType(HitObjectDecl); + } + +#ifdef ENABLE_SPIRV_CODEGEN + // Adds intrinsic function declarations to the "vk" namespace. + // It does so only if SPIR-V code generation is being done. + // Assumes the implicit "vk" namespace has already been created. + void AddVkIntrinsicFunctions() { + // If not doing SPIR-V CodeGen, return. + if (!m_sema->getLangOpts().SPIRV) + return; + + DXASSERT(m_vkNSDecl, "caller has not created the vk namespace yet"); + + AddIntrinsicFunctionsToNamespace(g_VkIntrinsics, _countof(g_VkIntrinsics), + m_vkNSDecl); + } + // Adds implicitly defined Vulkan-specific constants to the "vk" namespace. // It does so only if SPIR-V code generation is being done. // Assumes the implicit "vk" namespace has already been created. @@ -3540,6 +3785,20 @@ class HLSLExternalSource : public ExternalSemaSource { if (kind == AR_OBJECT_LEGACY_EFFECT) effectKindIndex = i; + InheritableAttr *Attr = nullptr; + if (IS_BASIC_STREAM(kind)) + Attr = HLSLStreamOutputAttr::CreateImplicit( + *m_context, kind - AR_OBJECT_POINTSTREAM + 1); + else if (IS_BASIC_PATCH(kind)) + Attr = HLSLTessPatchAttr::CreateImplicit(*m_context, + kind == AR_OBJECT_INPUTPATCH); + else { + DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries; + DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid; + if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass)) + Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind, + (unsigned)ResClass); + } DXASSERT(kind < _countof(g_ArBasicTypeNames), "g_ArBasicTypeNames has the wrong number of entries"); assert(kind < _countof(g_ArBasicTypeNames)); @@ -3586,11 +3845,15 @@ class HLSLExternalSource : public ExternalSemaSource { break; } } else if (kind == AR_OBJECT_CONSTANT_BUFFER) { - recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ false); + recordDecl = DeclareConstantBufferViewType(*m_context, Attr); } else if (kind == AR_OBJECT_TEXTURE_BUFFER) { - recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ true); + recordDecl = DeclareConstantBufferViewType(*m_context, Attr); } else if (kind == AR_OBJECT_RAY_QUERY) { recordDecl = DeclareRayQueryType(*m_context); + } else if (kind == AR_OBJECT_HIT_OBJECT) { + // Declare 'HitObject' in '::dx' extension namespace. + DXASSERT(m_dxNSDecl, "namespace ::dx must be declared in SM6.9+"); + recordDecl = DeclareHitObjectType(*m_dxNSDecl); } else if (kind == AR_OBJECT_HEAP_RESOURCE) { recordDecl = DeclareResourceType(*m_context, /*bSampler*/ false); if (SM->IsSM66Plus()) { @@ -3609,10 +3872,10 @@ class HLSLExternalSource : public ExternalSemaSource { } } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D) { recordDecl = DeclareUIntTemplatedTypeWithHandle( - *m_context, "FeedbackTexture2D", "kind"); + *m_context, "FeedbackTexture2D", "kind", Attr); } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D_ARRAY) { recordDecl = DeclareUIntTemplatedTypeWithHandle( - *m_context, "FeedbackTexture2DArray", "kind"); + *m_context, "FeedbackTexture2DArray", "kind", Attr); } else if (kind == AR_OBJECT_EMPTY_NODE_INPUT) { recordDecl = DeclareNodeOrRecordType( *m_context, DXIL::NodeIOKind::EmptyInput, @@ -3725,23 +3988,21 @@ class HLSLExternalSource : public ExternalSemaSource { recordDecl = DeclareTemplateTypeWithHandleInDeclContext( *m_context, m_vkNSDecl, typeName, 1, nullptr); recordDecl->setImplicit(true); + } else if (kind == AR_OBJECT_VK_BUFFER_POINTER) { + if (!m_vkNSDecl) + continue; + recordDecl = DeclareVkBufferPointerType(*m_context, m_vkNSDecl); + recordDecl->setImplicit(true); + m_vkBufferPointerTemplateDecl = recordDecl->getDescribedClassTemplate(); } #endif else if (templateArgCount == 0) { - recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName, - /*isCompleteType*/ false); + recordDecl = + DeclareRecordTypeWithHandle(*m_context, typeName, + /*isCompleteType*/ false, Attr); } else { DXASSERT(templateArgCount == 1 || templateArgCount == 2, "otherwise a new case has been added"); - - InheritableAttr *Attr = nullptr; - DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries; - DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid; - if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass)) { - Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind, - (unsigned)ResClass); - } - TypeSourceInfo *typeDefault = TemplateHasDefaultType(kind) ? float4TypeSourceInfo : nullptr; recordDecl = DeclareTemplateTypeWithHandle( @@ -3830,7 +4091,7 @@ class HLSLExternalSource : public ExternalSemaSource { clang::TypedefDecl *LookupVectorShorthandType(HLSLScalarType scalarType, UINT colCount) { DXASSERT_NOMSG(scalarType != HLSLScalarType::HLSLScalarType_unknown && - colCount <= 4); + colCount <= DXIL::kDefaultMaxVectorLength); TypedefDecl *qts = m_vectorTypedefs[scalarType][colCount - 1]; if (qts == nullptr) { QualType type = LookupVectorType(scalarType, colCount); @@ -3845,9 +4106,10 @@ class HLSLExternalSource : public ExternalSemaSource { HLSLExternalSource() : m_matrixTemplateDecl(nullptr), m_vectorTemplateDecl(nullptr), m_vkIntegralConstantTemplateDecl(nullptr), - m_vkLiteralTemplateDecl(nullptr), m_hlslNSDecl(nullptr), - m_vkNSDecl(nullptr), m_context(nullptr), m_sema(nullptr), - m_hlslStringTypedef(nullptr) { + m_vkLiteralTemplateDecl(nullptr), + m_vkBufferPointerTemplateDecl(nullptr), m_hlslNSDecl(nullptr), + m_vkNSDecl(nullptr), m_dxNSDecl(nullptr), m_context(nullptr), + m_sema(nullptr), m_hlslStringTypedef(nullptr) { memset(m_matrixTypes, 0, sizeof(m_matrixTypes)); memset(m_matrixShorthandTypes, 0, sizeof(m_matrixShorthandTypes)); memset(m_vectorTypes, 0, sizeof(m_vectorTypes)); @@ -3876,6 +4138,14 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema = &S; S.addExternalSource(this); + m_dxNSDecl = + NamespaceDecl::Create(context, context.getTranslationUnitDecl(), + /*Inline*/ false, SourceLocation(), + SourceLocation(), &context.Idents.get("dx"), + /*PrevDecl*/ nullptr); + m_dxNSDecl->setImplicit(); + context.getTranslationUnitDecl()->addDecl(m_dxNSDecl); + #ifdef ENABLE_SPIRV_CODEGEN if (m_sema->getLangOpts().SPIRV) { // Create the "vk" namespace which contains Vulkan-specific intrinsics. @@ -3894,6 +4164,8 @@ class HLSLExternalSource : public ExternalSemaSource { AddIntrinsicTableMethods(intrinsic); } + AddDxIntrinsicFunctions(); + #ifdef ENABLE_SPIRV_CODEGEN if (m_sema->getLangOpts().SPIRV) { // Add Vulkan-specific intrinsics. @@ -3937,7 +4209,9 @@ class HLSLExternalSource : public ExternalSemaSource { } QualType LookupVectorType(HLSLScalarType scalarType, unsigned int colCount) { - QualType qt = m_vectorTypes[scalarType][colCount - 1]; + QualType qt; + if (colCount < DXIL::kDefaultMaxVectorLength) + qt = m_vectorTypes[scalarType][colCount - 1]; if (qt.isNull()) { if (m_scalarTypes[scalarType].isNull()) { LookupScalarTypeDef(scalarType); @@ -3945,7 +4219,8 @@ class HLSLExternalSource : public ExternalSemaSource { qt = GetOrCreateVectorSpecialization(*m_context, m_sema, m_vectorTemplateDecl, m_scalarTypes[scalarType], colCount); - m_vectorTypes[scalarType][colCount - 1] = qt; + if (colCount < DXIL::kDefaultMaxVectorLength) + m_vectorTypes[scalarType][colCount - 1] = qt; } return qt; } @@ -3969,13 +4244,6 @@ class HLSLExternalSource : public ExternalSemaSource { return IsSubobjectBasicKind(GetTypeElementKind(type)); } - bool IsRayQueryBasicKind(ArBasicKind kind) { - return kind == AR_OBJECT_RAY_QUERY; - } - bool IsRayQueryType(QualType type) { - return IsRayQueryBasicKind(GetTypeElementKind(type)); - } - void WarnMinPrecision(QualType Type, SourceLocation Loc) { Type = Type->getCanonicalTypeUnqualified(); if (IsVectorType(m_sema, Type) || IsMatrixType(m_sema, Type)) { @@ -4580,6 +4848,7 @@ class HLSLExternalSource : public ExternalSemaSource { case AR_OBJECT_WAVE: case AR_OBJECT_ACCELERATION_STRUCT: case AR_OBJECT_RAY_DESC: + case AR_OBJECT_HIT_OBJECT: case AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES: case AR_OBJECT_RWTEXTURE2DMS: case AR_OBJECT_RWTEXTURE2DMS_ARRAY: @@ -4596,7 +4865,11 @@ class HLSLExternalSource : public ExternalSemaSource { case AR_OBJECT_NODE_OUTPUT_ARRAY: case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY: case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS: - case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: { + case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: +#ifdef ENABLE_SPIRV_CODEGEN + case AR_OBJECT_VK_BUFFER_POINTER: +#endif + { const ArBasicKind *match = std::find( g_ArBasicKindsAsTypes, &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind); @@ -4722,16 +4995,14 @@ class HLSLExternalSource : public ExternalSemaSource { ResKind = DXIL::ResourceKind::RawBuffer; ResClass = DXIL::ResourceClass::UAV; return true; - case AR_OBJECT_CONSUME_STRUCTURED_BUFFER: - case AR_OBJECT_APPEND_STRUCTURED_BUFFER: - // It may seem incorrect to make these SRV, - // but it is consistent with GetHLSLResourceProperties(). case AR_OBJECT_STRUCTURED_BUFFER: ResKind = DXIL::ResourceKind::StructuredBuffer; ResClass = DXIL::ResourceClass::SRV; return true; case AR_OBJECT_RWSTRUCTURED_BUFFER: case AR_OBJECT_ROVSTRUCTURED_BUFFER: + case AR_OBJECT_CONSUME_STRUCTURED_BUFFER: + case AR_OBJECT_APPEND_STRUCTURED_BUFFER: ResKind = DXIL::ResourceKind::StructuredBuffer; ResClass = DXIL::ResourceClass::UAV; return true; @@ -4741,7 +5012,7 @@ class HLSLExternalSource : public ExternalSemaSource { return true; case AR_OBJECT_TEXTURE_BUFFER: ResKind = DXIL::ResourceKind::TBuffer; - ResClass = DXIL::ResourceClass::CBuffer; + ResClass = DXIL::ResourceClass::SRV; return true; case AR_OBJECT_FEEDBACKTEXTURE2D: ResKind = DXIL::ResourceKind::FeedbackTexture2D; @@ -4751,6 +5022,15 @@ class HLSLExternalSource : public ExternalSemaSource { ResKind = DXIL::ResourceKind::FeedbackTexture2DArray; ResClass = DXIL::ResourceClass::SRV; return true; + case AR_OBJECT_SAMPLER: + case AR_OBJECT_SAMPLERCOMPARISON: + ResKind = DXIL::ResourceKind::Sampler; + ResClass = DXIL::ResourceClass::Sampler; + return true; + case AR_OBJECT_ACCELERATION_STRUCT: + ResKind = DXIL::ResourceKind::RTAccelerationStructure; + ResClass = DXIL::ResourceClass::SRV; + return true; default: return false; } @@ -4896,12 +5176,18 @@ class HLSLExternalSource : public ExternalSemaSource { ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace && ULE->getQualifier()->getAsNamespace()->getName() == "vk"; + const bool isDxNamespace = + ULE->getQualifier() && + ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace && + ULE->getQualifier()->getAsNamespace()->getName() == "dx"; + // Intrinsics live in the global namespace, so references to their names // should be either unqualified or '::'-prefixed. - // Exception: Vulkan-specific intrinsics live in the 'vk::' namespace. - if (isQualified && !isGlobalNamespace && !isVkNamespace) { + // Exceptions: + // - Vulkan-specific intrinsics live in the 'vk::' namespace. + // - DirectX-specific intrinsics live in the 'dx::' namespace. + if (isQualified && !isGlobalNamespace && !isVkNamespace && !isDxNamespace) return false; - } const DeclarationNameInfo declName = ULE->getNameInfo(); IdentifierInfo *idInfo = declName.getName().getAsIdentifierInfo(); @@ -4912,6 +5198,10 @@ class HLSLExternalSource : public ExternalSemaSource { StringRef nameIdentifier = idInfo->getName(); const HLSL_INTRINSIC *table = g_Intrinsics; auto tableCount = _countof(g_Intrinsics); + if (isDxNamespace) { + table = g_DxIntrinsics; + tableCount = _countof(g_DxIntrinsics); + } #ifdef ENABLE_SPIRV_CODEGEN if (isVkNamespace) { table = g_VkIntrinsics; @@ -4948,11 +5238,16 @@ class HLSLExternalSource : public ExternalSemaSource { m_usedIntrinsics.insert(UsedIntrinsic(pIntrinsic, functionArgTypes)); bool insertedNewValue = insertResult.second; if (insertedNewValue) { + NamespaceDecl *nsDecl = m_hlslNSDecl; + if (isVkNamespace) + nsDecl = m_vkNSDecl; + else if (isDxNamespace) + nsDecl = m_dxNSDecl; DXASSERT(tableName, "otherwise IDxcIntrinsicTable::GetTableName() failed"); - intrinsicFuncDecl = AddHLSLIntrinsicFunction( - *m_context, isVkNamespace ? m_vkNSDecl : m_hlslNSDecl, tableName, - lowering, pIntrinsic, &functionArgTypes); + intrinsicFuncDecl = + AddHLSLIntrinsicFunction(*m_context, nsDecl, tableName, lowering, + pIntrinsic, &functionArgTypes); insertResult.first->setFunctionDecl(intrinsicFuncDecl); } else { intrinsicFuncDecl = (*insertResult.first).getFunctionDecl(); @@ -5033,10 +5328,6 @@ class HLSLExternalSource : public ExternalSemaSource { AR_BASIC_UNKNOWN; } - /// Checks whether the specified value is a valid vector - /// size. - bool IsValidVectorSize(size_t length) { return 1 <= length && length <= 4; } - /// Checks whether the specified value is a valid matrix row or /// column size. bool IsValidMatrixColOrRowSize(size_t length) { @@ -5072,11 +5363,6 @@ class HLSLExternalSource : public ExternalSemaSource { false); } else if (objectKind == AR_TOBJ_VECTOR) { bool valid = true; - if (!IsValidVectorSize(GetHLSLVecSize(type))) { - valid = false; - m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectorsize) - << type << GetHLSLVecSize(type); - } if (!IsScalarType(GetMatrixOrVectorElementType(type))) { valid = false; m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectortype) @@ -5099,6 +5385,10 @@ class HLSLExternalSource : public ExternalSemaSource { << type << GetMatrixOrVectorElementType(type); } return valid; +#ifdef ENABLE_SPIRV_CODEGEN + } else if (hlsl::IsVKBufferPointerType(qt)) { + return true; +#endif } else if (qt->isStructureOrClassType()) { const RecordType *recordType = qt->getAs(); objectKind = ClassifyRecordType(recordType); @@ -5194,9 +5484,13 @@ class HLSLExternalSource : public ExternalSemaSource { SourceLocation Loc); bool CheckRangedTemplateArgument(SourceLocation diagLoc, - llvm::APSInt &sintValue) { - if (!sintValue.isStrictlyPositive() || sintValue.getLimitedValue() > 4) { - m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_4); + llvm::APSInt &sintValue, bool IsVector) { + unsigned MaxLength = DXIL::kDefaultMaxVectorLength; + if (IsVector) + MaxLength = m_sema->getLangOpts().MaxHLSLVectorLength; + if (!sintValue.isStrictlyPositive() || + sintValue.getLimitedValue() > MaxLength) { + m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_to_max) << MaxLength; return true; } @@ -5219,11 +5513,14 @@ class HLSLExternalSource : public ExternalSemaSource { return false; } // Allow object type for Constant/TextureBuffer. - if (templateName == "ConstantBuffer" || templateName == "TextureBuffer") { + HLSLResourceAttr *ResAttr = + Template->getTemplatedDecl()->getAttr(); + if (ResAttr && DXIL::IsCTBuffer(ResAttr->getResKind())) { if (TemplateArgList.size() == 1) { const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); - DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, ""); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "cbuffer with non-type template arg"); QualType argType = arg.getAsType(); SourceLocation argSrcLoc = argLoc.getLocation(); if (IsScalarType(argType) || IsVectorType(m_sema, argType) || @@ -5233,23 +5530,14 @@ class HLSLExternalSource : public ExternalSemaSource { << argType; return true; } - if (auto *TST = dyn_cast(argType)) { - // This is a bit of a special case we need to handle. Because the - // buffer types don't use their template parameter in a way that would - // force instantiation, we need to force specialization here. - GetOrCreateTemplateSpecialization( - *m_context, *m_sema, - cast( - TST->getTemplateName().getAsTemplateDecl()), - llvm::ArrayRef(TST->getArgs(), - TST->getNumArgs())); - } - if (const RecordType *recordType = argType->getAs()) { - if (!recordType->getDecl()->isCompleteDefinition()) { - m_sema->Diag(argSrcLoc, diag::err_typecheck_decl_incomplete_type) - << argType; - return true; - } + m_sema->RequireCompleteType(argSrcLoc, argType, + diag::err_typecheck_decl_incomplete_type); + + if (ContainsLongVector(argType)) { + const unsigned ConstantBuffersOrTextureBuffersIdx = 0; + m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) + << ConstantBuffersOrTextureBuffersIdx; + return true; } } return false; @@ -5279,22 +5567,13 @@ class HLSLExternalSource : public ExternalSemaSource { // template instantiation. if (ArgTy->isDependentType()) return false; - if (auto *recordType = ArgTy->getAs()) { - if (CXXRecordDecl *cxxRecordDecl = - dyn_cast(recordType->getDecl())) { - if (ClassTemplateSpecializationDecl *templateSpecializationDecl = - dyn_cast(cxxRecordDecl)) { - if (templateSpecializationDecl->getSpecializationKind() == - TSK_Undeclared) { - // Make sure specialization is done before IsTypeNumeric. - // If not, ArgTy might be treat as empty struct. - m_sema->RequireCompleteType( - ArgLoc.getLocation(), ArgTy, - diag::err_typecheck_decl_incomplete_type); - } - } - } - } + // Make sure specialization is done before IsTypeNumeric. + // If not, ArgTy might be treat as empty struct. + m_sema->RequireCompleteType(ArgLoc.getLocation(), ArgTy, + diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = ArgTy->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; // The node record type must be compound - error if it is not. if (GetTypeObjectKind(ArgTy) != AR_TOBJ_COMPOUND) { m_sema->Diag(ArgLoc.getLocation(), diag::err_hlsl_node_record_type) @@ -5316,6 +5595,78 @@ class HLSLExternalSource : public ExternalSemaSource { return true; } return false; + } else if (Template->getTemplatedDecl() + ->hasAttr()) { + int numArgs = TemplateArgList.size(); + DXASSERT(numArgs == 1 || numArgs == 2, + "otherwise the template has not been declared properly"); + + // first, determine if the rayquery flag AllowOpacityMicromaps is set + bool HasRayQueryFlagAllowOpacityMicromaps = false; + if (numArgs > 1) { + const TemplateArgument &Arg2 = TemplateArgList[1].getArgument(); + Expr *Expr2 = Arg2.getAsExpr(); + llvm::APSInt Arg2val; + Expr2->isIntegerConstantExpr(Arg2val, m_sema->getASTContext()); + if (Arg2val.getZExtValue() & + (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps) + HasRayQueryFlagAllowOpacityMicromaps = true; + } + + // next, get the first template argument, to check if + // the ForceOMM2State flag is set + const TemplateArgument &Arg1 = TemplateArgList[0].getArgument(); + Expr *Expr1 = Arg1.getAsExpr(); + llvm::APSInt Arg1val; + bool HasRayFlagForceOMM2State = + Expr1->isIntegerConstantExpr(Arg1val, m_sema->getASTContext()) && + (Arg1val.getLimitedValue() & + (uint64_t)DXIL::RayFlag::ForceOMM2State) != 0; + + // finally, if ForceOMM2State is set and AllowOpacityMicromaps + // isn't, emit a warning + if (HasRayFlagForceOMM2State && !HasRayQueryFlagAllowOpacityMicromaps) + m_sema->Diag(TemplateArgList[0].getLocation(), + diag::warn_hlsl_rayquery_flags_conflict); + } else if (Template->getTemplatedDecl()->hasAttr()) { + DXASSERT(TemplateArgList.size() > 0, + "Tessellation patch should have at least one template args"); + const TemplateArgumentLoc &argLoc = TemplateArgList[0]; + const TemplateArgument &arg = argLoc.getArgument(); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "Tessellation patch requires type template arg 0"); + + m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(), + diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; + if (ContainsLongVector(arg.getAsType())) { + const unsigned TessellationPatchesIDx = 1; + m_sema->Diag(argLoc.getLocation(), + diag::err_hlsl_unsupported_long_vector) + << TessellationPatchesIDx; + return true; + } + } else if (Template->getTemplatedDecl()->hasAttr()) { + DXASSERT(TemplateArgList.size() > 0, + "Geometry streams should have at least one template args"); + const TemplateArgumentLoc &argLoc = TemplateArgList[0]; + const TemplateArgument &arg = argLoc.getArgument(); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "Geometry stream requires type template arg 0"); + m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(), + diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; + if (ContainsLongVector(arg.getAsType())) { + const unsigned GeometryStreamsIdx = 2; + m_sema->Diag(argLoc.getLocation(), + diag::err_hlsl_unsupported_long_vector) + << GeometryStreamsIdx; + return true; + } } bool isMatrix = Template->getCanonicalDecl() == @@ -5337,9 +5688,7 @@ class HLSLExternalSource : public ExternalSemaSource { // NOTE: IsValidTemplateArgumentType emits its own diagnostics return true; } - HLSLResourceAttr *ResAttr = - Template->getTemplatedDecl()->getAttr(); - if (ResAttr && IsTyped((DXIL::ResourceKind)ResAttr->getResKind())) { + if (ResAttr && IsTyped(ResAttr->getResKind())) { // Check vectors for being too large. if (IsVectorType(m_sema, argType)) { unsigned NumElt = hlsl::GetElementCount(argType); @@ -5368,17 +5717,16 @@ class HLSLExternalSource : public ExternalSemaSource { llvm::APSInt constantResult; if (expr != nullptr && expr->isIntegerConstantExpr(constantResult, *m_context)) { - if (CheckRangedTemplateArgument(argSrcLoc, constantResult)) { + if (CheckRangedTemplateArgument(argSrcLoc, constantResult, + isVector)) return true; - } } } } else if (arg.getKind() == TemplateArgument::ArgKind::Integral) { if (isMatrix || isVector) { llvm::APSInt Val = arg.getAsIntegral(); - if (CheckRangedTemplateArgument(argSrcLoc, Val)) { + if (CheckRangedTemplateArgument(argSrcLoc, Val, isVector)) return true; - } } } } @@ -5670,11 +6018,12 @@ class HLSLExternalSource : public ExternalSemaSource { Params.push_back(paramDecl); } + StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern; QualType T = TInfo->getType(); DeclarationNameInfo NameInfo(FunctionTemplate->getDeclName(), NoLoc); CXXMethodDecl *method = CXXMethodDecl::Create( *m_context, dyn_cast(owner), NoLoc, NameInfo, T, TInfo, - SC_Extern, InlineSpecifiedFalse, IsConstexprFalse, NoLoc); + SC, InlineSpecifiedFalse, IsConstexprFalse, NoLoc); // Add intrinsic attr AddHLSLIntrinsicAttr(method, *m_context, tableName, lowering, intrinsic); @@ -6261,8 +6610,10 @@ bool HLSLExternalSource::MatchArguments( argTypes.clear(); const bool isVariadic = IsVariadicIntrinsicFunction(pIntrinsic); - static const UINT UnusedSize = 0xFF; - static const BYTE MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1; + static const uint32_t UnusedSize = std::numeric_limits::max(); + static const uint32_t MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1; + assert(MaxIntrinsicArgs < std::numeric_limits::max() && + "This should be a pretty small number"); #define CAB(cond, arg) \ { \ if (!(cond)) { \ @@ -6277,7 +6628,7 @@ bool HLSLExternalSource::MatchArguments( ArBasicKind ComponentType[MaxIntrinsicArgs]; // Component type for each argument, // AR_BASIC_UNKNOWN if unspecified. - UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UNUSED_INDEX32 + UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UnusedSize // if unspecified. badArgIdx = MaxIntrinsicArgs; @@ -6512,6 +6863,7 @@ bool HLSLExternalSource::MatchArguments( if (pIntrinsic->pArgs[0].qwUsage && pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_TYPE && pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION && + pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION_2 && pIntrinsic->pArgs[0].uComponentTypeId != INTRIN_COMPTYPE_FROM_NODEOUTPUT) { CAB(pIntrinsic->pArgs[0].uTemplateId < MaxIntrinsicArgs, 0); @@ -6552,7 +6904,8 @@ bool HLSLExternalSource::MatchArguments( // Check template. if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE || - pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION) { + pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION || + pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { continue; // Already verified that this is available. } if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) { @@ -6631,6 +6984,9 @@ bool HLSLExternalSource::MatchArguments( } } + std::string profile = m_sema->getLangOpts().HLSLProfile; + const ShaderModel *SM = hlsl::ShaderModel::GetByName(profile.c_str()); + // Populate argTypes. for (size_t i = 0; i <= Args.size(); i++) { const HLSL_INTRINSIC_ARGUMENT *pArgument = &pIntrinsic->pArgs[i]; @@ -6718,6 +7074,14 @@ bool HLSLExternalSource::MatchArguments( } else { pNewType = functionTemplateTypeArg; } + } else if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { + if (i == 0 && + (builtinOp == hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast || + builtinOp == hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast)) { + pNewType = Args[0]->getType(); + } else { + badArgIdx = std::min(badArgIdx, i); + } } else if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) { if (objectElement.isNull()) { @@ -6801,8 +7165,9 @@ bool HLSLExternalSource::MatchArguments( } // Verify that the final results are in bounds. - CAB(uCols > 0 && uCols <= MaxVectorSize && uRows > 0 && - uRows <= MaxVectorSize, + CAB((uCols > 0 && uRows > 0 && + ((uCols <= MaxVectorSize && uRows <= MaxVectorSize) || + (SM->IsSM69Plus() && uRows == 1))), i); // Const @@ -7935,7 +8300,8 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL( DXASSERT_NOMSG(initSequence != nullptr); // In HLSL there are no default initializers, eg float4x4 m(); - // Except for RayQuery constructor (also handle InitializationKind::IK_Value) + // Except for RayQuery and HitObject constructors (also handle + // InitializationKind::IK_Value) if (Kind.getKind() == InitializationKind::IK_Default || Kind.getKind() == InitializationKind::IK_Value) { QualType destBaseType = m_context->getBaseElementType(Entity.getType()); @@ -7946,7 +8312,9 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL( GetRecordDeclForBuiltInOrStruct(typeRecordDecl)); DXASSERT(index != -1, "otherwise can't find type we already determined was an object"); - if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY) { + + if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY || + g_ArBasicKindsAsTypes[index] == AR_OBJECT_HIT_OBJECT) { CXXConstructorDecl *Constructor = *typeRecordDecl->ctor_begin(); initSequence->AddConstructorInitializationStep( Constructor, AccessSpecifier::AS_public, destBaseType, false, false, @@ -8571,6 +8939,9 @@ ExprResult HLSLExternalSource::LookupVectorMemberExprForHLSL( llvm_unreachable("Unknown VectorMemberAccessError value"); } + if (colCount > 4) + msg = diag::err_hlsl_vector_member_on_long_vector; + if (msg != 0) { m_sema->Diag(MemberLoc, msg) << memberText; @@ -9397,6 +9768,13 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr, return false; } +#ifdef ENABLE_SPIRV_CODEGEN + // Cast vk::BufferPointer to pointer address. + if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) { + return TargetInfo.EltKind == AR_BASIC_UINT64; + } +#endif + // Cast cbuffer to its result value. if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER || SourceInfo.EltKind == AR_OBJECT_TEXTURE_BUFFER) && @@ -11202,7 +11580,8 @@ static bool CheckFinishedCrossGroupSharingCall(Sema &S, CXXMethodDecl *MD, return false; } -static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { +static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE, + const hlsl::ShaderModel *SM) { DXASSERT(FD->getNumParams() == 2, "otherwise, unknown Barrier overload"); // Emit error when MemoryTypeFlags are known to be invalid. @@ -11232,12 +11611,18 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { llvm::APSInt SemanticFlagsVal; if (SemanticFlagsExpr->isIntegerConstantExpr(SemanticFlagsVal, S.Context)) { SemanticFlags = SemanticFlagsVal.getLimitedValue(); - if ((uint32_t)SemanticFlags & - ~(uint32_t)DXIL::BarrierSemanticFlag::ValidMask) { + uint32_t ValidMask = 0U; + if (SM->IsSM69Plus()) { + ValidMask = + static_cast(hlsl::DXIL::BarrierSemanticFlag::ValidMask); + } else { + ValidMask = + static_cast(hlsl::DXIL::BarrierSemanticFlag::LegacyFlags); + } + if ((uint32_t)SemanticFlags & ~ValidMask) { S.Diags.Report(SemanticFlagsExpr->getExprLoc(), diag::err_hlsl_barrier_invalid_semantic_flags) - << (uint32_t)SemanticFlags - << (uint32_t)DXIL::BarrierSemanticFlag::ValidMask; + << SM->IsSM69Plus(); return true; } } @@ -11245,6 +11630,32 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { return false; } +#ifdef ENABLE_SPIRV_CODEGEN +static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, + bool isStatic) { + const Expr *argExpr = CE->getArg(0); + QualType srcType = argExpr->getType(); + QualType destType = CE->getType(); + QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType); + QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType); + + if (isStatic && srcTypeArg != destTypeArg && + !S.IsDerivedFrom(srcTypeArg, destTypeArg)) { + S.Diags.Report(CE->getExprLoc(), + diag::err_hlsl_vk_static_pointer_cast_type); + return true; + } + + if (hlsl::GetVKBufferPointerAlignment(destType) > + hlsl::GetVKBufferPointerAlignment(srcType)) { + S.Diags.Report(CE->getExprLoc(), diag::err_hlsl_vk_pointer_cast_alignment); + return true; + } + + return false; +} +#endif + // Check HLSL call constraints, not fatal to creating the AST. void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto) { @@ -11254,6 +11665,9 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, if (!IsBuiltinTable(IntrinsicAttr->getGroup())) return; + const auto *SM = + hlsl::ShaderModel::GetByName(getLangOpts().HLSLProfile.c_str()); + hlsl::IntrinsicOp opCode = (hlsl::IntrinsicOp)IntrinsicAttr->getOpcode(); switch (opCode) { case hlsl::IntrinsicOp::MOP_FinishedCrossGroupSharing: @@ -11261,8 +11675,16 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, TheCall->getLocStart()); break; case hlsl::IntrinsicOp::IOP_Barrier: - CheckBarrierCall(*this, FDecl, TheCall); + CheckBarrierCall(*this, FDecl, TheCall, SM); break; +#ifdef ENABLE_SPIRV_CODEGEN + case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: + CheckVKBufferPointerCast(*this, FDecl, TheCall, false); + break; + case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: + CheckVKBufferPointerCast(*this, FDecl, TheCall, true); + break; +#endif default: break; } @@ -11522,6 +11944,52 @@ static void DiagnoseReachableBarrier(Sema &S, CallExpr *CE, } } +bool IsRayFlagForceOMM2StateSet(Sema &sema, const CallExpr *CE) { + const Expr *Expr1 = CE->getArg(1); + llvm::APSInt constantResult; + return Expr1->isIntegerConstantExpr(constantResult, sema.getASTContext()) && + (constantResult.getLimitedValue() & + (uint64_t)DXIL::RayFlag::ForceOMM2State) != 0; +} + +void DiagnoseTraceRayInline(Sema &sema, CallExpr *callExpr) { + // Validate if the RayFlag parameter has RAY_FLAG_FORCE_OMM_2_STATE set, + // the RayQuery decl must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set, + // otherwise emit a diagnostic. + if (IsRayFlagForceOMM2StateSet(sema, callExpr)) { + CXXMemberCallExpr *CXXCallExpr = dyn_cast(callExpr); + if (!CXXCallExpr) { + return; + } + const DeclRefExpr *DRE = + dyn_cast(CXXCallExpr->getImplicitObjectArgument()); + assert(DRE); + QualType QT = DRE->getType(); + auto *typeRecordDecl = QT->getAsCXXRecordDecl(); + ClassTemplateSpecializationDecl *SpecDecl = + llvm::dyn_cast(typeRecordDecl); + + if (!SpecDecl) + return; + + // Guaranteed 2 arguments since the rayquery constructor + // automatically creates 2 template args + DXASSERT(SpecDecl->getTemplateArgs().size() == 2, + "else rayquery constructor template args are not 2"); + llvm::APSInt Arg2val = SpecDecl->getTemplateArgs()[1].getAsIntegral(); + bool IsRayQueryAllowOMMSet = + Arg2val.getZExtValue() & + (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps; + if (!IsRayQueryAllowOMMSet) { + // Diagnose the call + sema.Diag(CXXCallExpr->getExprLoc(), + diag::warn_hlsl_rayquery_flags_conflict); + sema.Diag(DRE->getDecl()->getLocation(), diag::note_previous_decl) + << "RayQueryFlags"; + } + } +} + static bool isStringLiteral(QualType type) { if (!type->isConstantArrayType()) return false; @@ -11529,6 +11997,35 @@ static bool isStringLiteral(QualType type) { return eType->isSpecificBuiltinType(BuiltinType::Char_S); } +static void DiagnoseReachableSERCall(Sema &S, CallExpr *CE, + DXIL::ShaderKind EntrySK, + const FunctionDecl *EntryDecl, + bool IsReorderOperation) { + bool ValidEntry = false; + switch (EntrySK) { + default: + break; + case DXIL::ShaderKind::ClosestHit: + case DXIL::ShaderKind::Miss: + ValidEntry = !IsReorderOperation; + break; + case DXIL::ShaderKind::RayGeneration: + ValidEntry = true; + break; + } + + if (ValidEntry) + return; + + int DiagID = IsReorderOperation ? diag::err_hlsl_reorder_unsupported_stage + : diag::err_hlsl_hitobject_unsupported_stage; + + SourceLocation EntryLoc = EntryDecl->getLocation(); + SourceLocation Loc = CE->getExprLoc(); + S.Diag(Loc, DiagID) << ShaderModel::FullNameFromKind(EntrySK); + S.Diag(EntryLoc, diag::note_hlsl_entry_defined_here); +} + // Check HLSL member call constraints for used functions. // locallyVisited is true if this call has been visited already from any other // entry function. Used to avoid duplicate diagnostics when not dependent on @@ -11566,6 +12063,16 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, DiagnoseReachableBarrier(*this, CE, SM, EntrySK, NodeLaunchTy, EntryDecl, Diags); break; + case hlsl::IntrinsicOp::MOP_TraceRayInline: + DiagnoseTraceRayInline(*this, CE); + break; + case hlsl::IntrinsicOp::MOP_DxHitObject_MakeMiss: + case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop: + DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, false); + break; + case hlsl::IntrinsicOp::IOP_DxMaybeReorderThread: + DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, true); + break; default: break; } @@ -11581,10 +12088,18 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, HLSLExternalSource *source = HLSLExternalSource::FromSema(self); ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy); switch (shapeKind) { + case AR_TOBJ_VECTOR: + if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) { + const unsigned NodeRecordsIdx = 3; + self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) + << NodeRecordsIdx; + Empty = false; + return false; + } + LLVM_FALLTHROUGH; case AR_TOBJ_ARRAY: case AR_TOBJ_BASIC: case AR_TOBJ_MATRIX: - case AR_TOBJ_VECTOR: Empty = false; return false; case AR_TOBJ_OBJECT: @@ -11603,14 +12118,15 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, bool ErrorFound = false; const RecordDecl *RD = ArgTy->getAs()->getDecl(); // Check the fields of the RecordDecl - RecordDecl::field_iterator begin = RD->field_begin(); - RecordDecl::field_iterator end = RD->field_end(); - while (begin != end) { - const FieldDecl *FD = *begin; + for (auto *FD : RD->fields()) ErrorFound |= DiagnoseNodeStructArgument(self, ArgLoc, FD->getType(), Empty, FD); - begin++; - } + if (RD->isCompleteDefinition()) + if (auto *Child = dyn_cast(RD)) + // Walk up the inheritance chain and check base class fields + for (auto &B : Child->bases()) + ErrorFound |= + DiagnoseNodeStructArgument(self, ArgLoc, B.getType(), Empty); return ErrorFound; } default: @@ -12046,6 +12562,21 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } +bool hlsl::ContainsLongVector(QualType QT) { + if (QT.isNull() || QT->isDependentType()) + return false; + + while (const ArrayType *Arr = QT->getAsArrayTypeUnsafe()) + QT = Arr->getElementType(); + + if (CXXRecordDecl *Decl = QT->getAsCXXRecordDecl()) { + if (!Decl->isCompleteDefinition()) + return false; + return Decl->hasHLSLLongVector(); + } + return false; +} + bool hlsl::IsConversionToLessOrEqualElements( clang::Sema *self, const clang::ExprResult &sourceExpr, const clang::QualType &targetType, bool explicitConversion) { @@ -13158,8 +13689,9 @@ ValidateMaxRecordsSharedWithAttributes(Sema &S, Decl *D, void Sema::DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A) { HLSLExternalSource *ExtSource = HLSLExternalSource::FromSema(this); - if (const HLSLGloballyCoherentAttr *HLSLGCAttr = - dyn_cast(A)) { + const bool IsGCAttr = isa(A); + const bool IsRCAttr = isa(A); + if (IsGCAttr || IsRCAttr) { const ValueDecl *TD = cast(D); if (TD->getType()->isDependentType()) return; @@ -13168,23 +13700,25 @@ void Sema::DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A) { DeclType = FD->getReturnType(); while (DeclType->isArrayType()) DeclType = QualType(DeclType->getArrayElementTypeNoTypeQual(), 0); + const bool IsAllowedNodeIO = + IsGCAttr && + GetNodeIOType(DeclType) == DXIL::NodeIOKind::RWDispatchNodeInputRecord; + const bool IsUAV = + hlsl::GetResourceClassForType(getASTContext(), DeclType) == + hlsl::DXIL::ResourceClass::UAV; if (ExtSource->GetTypeObjectKind(DeclType) != AR_TOBJ_OBJECT || - (hlsl::GetResourceClassForType(getASTContext(), DeclType) != - hlsl::DXIL::ResourceClass::UAV && - GetNodeIOType(DeclType) != - DXIL::NodeIOKind::RWDispatchNodeInputRecord)) { + (!IsUAV && !IsAllowedNodeIO)) { Diag(A->getLocation(), diag::err_hlsl_varmodifierna_decltype) << A << DeclType->getCanonicalTypeUnqualified() << A->getRange(); - Diag(A->getLocation(), diag::note_hlsl_globallycoherent_applies_to) - << A << A->getRange(); + Diag(A->getLocation(), diag::note_hlsl_coherence_applies_to) + << (int)IsGCAttr << A << A->getRange(); } return; } } -void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr, - QualType TargetType, - SourceLocation Loc) { +void Sema::DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType, + SourceLocation Loc) { QualType SrcTy = SrcExpr->getType(); QualType DstTy = TargetType; if (SrcTy->isArrayType() && DstTy->isArrayType()) { @@ -13196,9 +13730,39 @@ void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr, GetNodeIOType(DstTy) == DXIL::NodeIOKind::RWDispatchNodeInputRecord) { bool SrcGL = hlsl::HasHLSLGloballyCoherent(SrcTy); bool DstGL = hlsl::HasHLSLGloballyCoherent(DstTy); - if (SrcGL != DstGL) - Diag(Loc, diag::warn_hlsl_impcast_glc_mismatch) - << SrcExpr->getType() << TargetType << /*loses|adds*/ DstGL; + // 'reordercoherent' attribute dropped earlier in presence of + // 'globallycoherent' + bool SrcRD = hlsl::HasHLSLReorderCoherent(SrcTy); + bool DstRD = hlsl::HasHLSLReorderCoherent(DstTy); + + enum { + NoMismatch = -1, + DemoteToRD = 0, + PromoteToGL = 1, + LosesRD = 2, + LosesGL = 3, + AddsRD = 4, + AddsGL = 5 + } MismatchType = NoMismatch; + + if (SrcGL && DstRD) + MismatchType = DemoteToRD; + else if (SrcRD && DstGL) + MismatchType = PromoteToGL; + else if (SrcRD && !DstRD) + MismatchType = LosesRD; + else if (SrcGL && !DstGL) + MismatchType = LosesGL; + else if (!SrcRD && DstRD) + MismatchType = AddsRD; + else if (!SrcGL && DstGL) + MismatchType = AddsGL; + + if (MismatchType == NoMismatch) + return; + + Diag(Loc, diag::warn_hlsl_impcast_coherence_mismatch) + << SrcExpr->getType() << TargetType << MismatchType; } } @@ -13347,6 +13911,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, declAttr = ::new (S.Context) HLSLGloballyCoherentAttr( A.getRange(), S.Context, A.getAttributeSpellingListIndex()); break; + case AttributeList::AT_HLSLReorderCoherent: + declAttr = ::new (S.Context) HLSLReorderCoherentAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + break; case AttributeList::AT_HLSLIndices: declAttr = ::new (S.Context) HLSLIndicesAttr( A.getRange(), S.Context, A.getAttributeSpellingListIndex()); @@ -13405,6 +13973,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, A.getRange(), S.Context, A.getAttributeSpellingListIndex()); break; // SPIRV Change Starts + case AttributeList::AT_VKAliasedPointer: { + declAttr = ::new (S.Context) VKAliasedPointerAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + } break; case AttributeList::AT_VKDecorateIdExt: { if (A.getNumArgs() == 0 || !A.getArg(0).is()) { Handled = false; @@ -14369,6 +14941,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, *pDispatchGrid = nullptr, *pMaxDispatchGrid = nullptr; bool usageIn = false; bool usageOut = false; + bool isGroupShared = false; for (clang::AttributeList *pAttr = D.getDeclSpec().getAttributes().getList(); pAttr != NULL; pAttr = pAttr->getNext()) { @@ -14392,6 +14965,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } break; case AttributeList::AT_HLSLGroupShared: + isGroupShared = true; if (!isGlobal) { Diag(pAttr->getLoc(), diag::err_hlsl_varmodifierna) << pAttr->getName() << declarationType << pAttr->getRange(); @@ -14405,6 +14979,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } break; case AttributeList::AT_HLSLGloballyCoherent: // Handled elsewhere + case AttributeList::AT_HLSLReorderCoherent: // Handled elsewhere break; case AttributeList::AT_HLSLUniform: if (!(isGlobal || isParameter)) { @@ -14672,6 +15247,23 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, result = false; } + // Disallow long vecs from $Global cbuffers. + if (isGlobal && !isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) { + // Suppress actual emitting of errors for incompletable types here + // They are redundant to those produced in ActOnUninitializedDecl. + struct SilentDiagnoser : public TypeDiagnoser { + SilentDiagnoser() : TypeDiagnoser(true) {} + virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {} + } SD; + RequireCompleteType(D.getLocStart(), qt, SD); + if (ContainsLongVector(qt)) { + unsigned CbuffersOrTbuffersIdx = 4; + Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) + << CbuffersOrTbuffersIdx; + result = false; + } + } + // SPIRV change starts #ifdef ENABLE_SPIRV_CODEGEN // Validate that Vulkan specific feature is only used when targeting SPIR-V @@ -14783,15 +15375,17 @@ static QualType getUnderlyingType(QualType Type) { void hlsl::GetHLSLAttributedTypes( clang::Sema *self, clang::QualType type, const clang::AttributedType **ppMatrixOrientation, - const clang::AttributedType **ppNorm, const clang::AttributedType **ppGLC) { + const clang::AttributedType **ppNorm, const clang::AttributedType **ppGLC, + const clang::AttributedType **ppRDC) { AssignOpt(nullptr, ppMatrixOrientation); AssignOpt(nullptr, ppNorm); AssignOpt(nullptr, ppGLC); + AssignOpt(nullptr, ppRDC); // Note: we clear output pointers once set so we can stop searching QualType Desugared = getUnderlyingType(type); const AttributedType *AT = dyn_cast(Desugared); - while (AT && (ppMatrixOrientation || ppNorm || ppGLC)) { + while (AT && (ppMatrixOrientation || ppNorm || ppGLC || ppRDC)) { AttributedType::Kind Kind = AT->getAttrKind(); if (Kind == AttributedType::attr_hlsl_row_major || @@ -14811,6 +15405,11 @@ void hlsl::GetHLSLAttributedTypes( *ppGLC = AT; ppGLC = nullptr; } + } else if (Kind == AttributedType::attr_hlsl_reordercoherent) { + if (ppRDC) { + *ppRDC = AT; + ppRDC = nullptr; + } } Desugared = getUnderlyingType(AT->getEquivalentType()); @@ -15195,6 +15794,10 @@ void hlsl::CustomPrintHLSLAttr(const clang::Attr *A, llvm::raw_ostream &Out, Out << "globallycoherent "; break; + case clang::attr::HLSLReorderCoherent: + Out << "reordercoherent "; + break; + case clang::attr::HLSLIndices: Out << "indices "; break; @@ -15402,6 +16005,7 @@ bool hlsl::IsHLSLAttr(clang::attr::Kind AttrKind) { case clang::attr::HLSLNodeLocalRootArgumentsTableIndex: case clang::attr::HLSLNodeShareInputOf: case clang::attr::HLSLNodeTrackRWInputSharing: + case clang::attr::HLSLReorderCoherent: case clang::attr::VKBinding: case clang::attr::VKBuiltIn: case clang::attr::VKConstantId: @@ -15560,6 +16164,17 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { return false; } +// Verify that user-defined intrinsic struct args contain no long vectors +static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { + if (ContainsLongVector(Arg->getType())) { + const unsigned UserDefinedStructParameterIdx = 5; + S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) + << UserDefinedStructParameterIdx; + return true; + } + return false; +} + static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, CallExpr *TheCall) { assert(TheCall->getNumArgs() > 0); @@ -15577,6 +16192,12 @@ static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) { auto attr = FDecl->getAttr(); + if (!attr) + return false; + + if (!IsBuiltinTable(attr->getGroup())) + return false; + switch (hlsl::IntrinsicOp(attr->getOpcode())) { case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex: // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want @@ -15588,6 +16209,22 @@ bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) { // existing ones. See the ExtensionTest.EvalAttributeCollision test. assert(FDecl->getName() == "GetAttributeAtVertex"); return CheckIntrinsicGetAttributeAtVertex(this, FDecl, TheCall); + case hlsl::IntrinsicOp::IOP_DispatchMesh: + assert(TheCall->getNumArgs() > 3); + assert(FDecl->getName() == "DispatchMesh"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(3)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_CallShader: + assert(TheCall->getNumArgs() > 1); + assert(FDecl->getName() == "CallShader"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(1)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_TraceRay: + assert(TheCall->getNumArgs() > 7); + assert(FDecl->getName() == "TraceRay"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(7)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_ReportHit: + assert(TheCall->getNumArgs() > 2); + assert(FDecl->getName() == "ReportHit"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(2)->IgnoreCasts()); default: break; } @@ -16268,6 +16905,23 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { return; } + // Check general parameter characteristics + // Would be nice to check for resources here as they crash the compiler now. + // See issue #7186. + for (const auto *param : FD->params()) { + if (ContainsLongVector(param->getType())) { + const unsigned EntryFunctionParametersIdx = 6; + S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) + << EntryFunctionParametersIdx; + } + } + + if (ContainsLongVector(FD->getReturnType())) { + const unsigned EntryFunctionReturnIdx = 7; + S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) + << EntryFunctionReturnIdx; + } + DXIL::ShaderKind Stage = ShaderModel::KindFromFullName(shaderAttr->getStage()); llvm::StringRef StageName = shaderAttr->getStage(); diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index cf5d741541..abca7cbf86 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -9,16 +9,24 @@ // // /////////////////////////////////////////////////////////////////////////////// +#include "dxc/DXIL/DxilFunctionProps.h" #include "dxc/DXIL/DxilShaderModel.h" +#include "dxc/HLSL/HLOperations.h" #include "dxc/HlslIntrinsicOp.h" #include "dxc/Support/Global.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/HlslTypes.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/TypeLoc.h" #include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaHLSL.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include using namespace clang; @@ -142,13 +150,21 @@ class CallGraphWithRecurseGuard { } public: - void BuildForEntry(FunctionDecl *EntryFnDecl) { + void BuildForEntry(FunctionDecl *EntryFnDecl, + llvm::ArrayRef GlobalsWithInit) { DXASSERT_NOMSG(EntryFnDecl); EntryFnDecl = getFunctionWithBody(EntryFnDecl); PendingFunctions pendingFunctions; FnReferenceVisitor visitor(m_visitedFunctions, pendingFunctions, m_callNodes); - pendingFunctions.push_back(EntryFnDecl); + + // First, traverse all initializers, then entry function. + m_visitedFunctions.insert(EntryFnDecl); + visitor.setSourceFn(EntryFnDecl); + for (VarDecl *VD : GlobalsWithInit) + visitor.TraverseDecl(VD); + visitor.TraverseDecl(EntryFnDecl); + while (!pendingFunctions.empty()) { FunctionDecl *pendingDecl = pendingFunctions.pop_back_val(); if (m_visitedFunctions.insert(pendingDecl).second == true) { @@ -284,33 +300,67 @@ std::vector GetAllExportedFDecls(clang::Sema *self) { return AllExportedFDecls; } +void GatherGlobalsWithInitializers( + DeclContext *DC, llvm::SmallVectorImpl &GlobalsWithInit, + llvm::SmallVectorImpl &SubObjects) { + for (auto *D : DC->decls()) { + // Skip built-ins and function decls. + if (D->isImplicit() || isa(D)) + continue; + if (auto *VD = dyn_cast(D)) { + // Add if user-defined static or groupshared global with initializer. + if (VD->hasInit() && VD->hasGlobalStorage() && + (VD->getStorageClass() == SC_Static || + VD->hasAttr())) { + // Place subobjects in a separate collection. + if (const RecordType *RT = VD->getType()->getAs()) { + if (RT->getDecl()->hasAttr()) { + SubObjects.push_back(VD); + continue; + } + } + GlobalsWithInit.push_back(VD); + } + } else if (auto *DC = dyn_cast(D)) { + // Recurse into DeclContexts like namespace, cbuffer, class/struct, etc. + GatherGlobalsWithInitializers(DC, GlobalsWithInit, SubObjects); + } + } +} + // in the non-library case, this function will be run only once, // but in the library case, this function will be run for each // viable top-level function declaration by // ValidateNoRecursionInTranslationUnit. // (viable as in, is exported) -clang::FunctionDecl *ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph, - clang::FunctionDecl *FD) { +clang::FunctionDecl * +ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph, + clang::FunctionDecl *FD, + llvm::ArrayRef GlobalsWithInit) { // Validate that there is no recursion reachable by this function declaration // NOTE: the information gathered here could be used to bypass code generation // on functions that are unreachable (as an early form of dead code // elimination). if (FD) { - callGraph.BuildForEntry(FD); + callGraph.BuildForEntry(FD, GlobalsWithInit); return callGraph.CheckRecursion(FD); } return nullptr; } -class HLSLCallDiagnoseVisitor - : public RecursiveASTVisitor { +class HLSLReachableDiagnoseVisitor + : public RecursiveASTVisitor { public: - explicit HLSLCallDiagnoseVisitor( + explicit HLSLReachableDiagnoseVisitor( Sema *S, const hlsl::ShaderModel *SM, DXIL::ShaderKind EntrySK, DXIL::NodeLaunchType NodeLaunchTy, const FunctionDecl *EntryDecl, - llvm::SmallPtrSetImpl &DiagnosedCalls) + llvm::SmallPtrSetImpl &DiagnosedCalls, + llvm::SmallPtrSetImpl &DeclAvailabilityChecked, + llvm::SmallSet &DiagnosedTypeLocs) : sema(S), SM(SM), EntrySK(EntrySK), NodeLaunchTy(NodeLaunchTy), - EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls) {} + EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls), + DeclAvailabilityChecked(DeclAvailabilityChecked), + DiagnosedTypeLocs(DiagnosedTypeLocs) {} bool VisitCallExpr(CallExpr *CE) { // Set flag if already diagnosed from another entry, allowing some @@ -325,6 +375,126 @@ class HLSLCallDiagnoseVisitor return true; } + bool VisitVarDecl(VarDecl *VD) { + QualType VarType = VD->getType(); + if (const TemplateSpecializationType *TST = + dyn_cast(VarType.getTypePtr())) { + const TemplateDecl *TD = TST->getTemplateName().getAsTemplateDecl(); + if (!TD) + return true; + + // verify this is a rayquery decl + if (TD->getTemplatedDecl()->hasAttr()) { + if (TST->getNumArgs() == 1) { + return true; + } + // now guaranteed 2 args + const TemplateArgument &Arg2 = TST->getArg(1); + Expr *Expr2 = Arg2.getAsExpr(); + llvm::APSInt Arg2val; + Expr2->isIntegerConstantExpr(Arg2val, sema->getASTContext()); + + const ShaderModel *SM = hlsl::ShaderModel::GetByName( + sema->getLangOpts().HLSLProfile.c_str()); + + if (Arg2val.getZExtValue() != 0 && !SM->IsSMAtLeast(6, 9)) { + // if it's an integer literal, emit + // warn_hlsl_rayquery_flags_disallowed + if (Arg2.getKind() == TemplateArgument::Expression) { + if (auto *castExpr = dyn_cast( + Arg2.getAsExpr()->IgnoreParens())) { + // Now check if the sub-expression is a DeclRefExpr + Expr *subExpr = castExpr->getSubExpr(); + if (auto *IL = dyn_cast(subExpr)) + sema->Diag(VD->getLocStart(), + diag::warn_hlsl_rayquery_flags_disallowed); + return true; + } + } + } + } + } + return true; + } + + bool VisitTypeLoc(TypeLoc TL) { + // Diagnose availability for used type. + if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(TL)) { + UnqualTypeLoc UTL = TL.getUnqualifiedLoc(); + DiagnoseAvailability(AAttr, TL.getType(), UTL.getLocStart()); + } + + return true; + } + + bool VisitDeclRefExpr(DeclRefExpr *DRE) { + // Diagnose availability for referenced decl. + if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(DRE)) { + DiagnoseAvailability(AAttr, DRE->getDecl(), DRE->getExprLoc()); + } + + return true; + } + + AvailabilityAttr *GetAvailabilityAttrOnce(TypeLoc TL) { + QualType Ty = TL.getType(); + CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + if (!RD) + return nullptr; + AvailabilityAttr *AAttr = RD->getAttr(); + if (!AAttr) + return nullptr; + // Skip redundant availability diagnostics for the same Type. + // Use the end location to avoid diagnosing the same type multiple times. + if (!DiagnosedTypeLocs.insert(TL.getEndLoc()).second) + return nullptr; + + return AAttr; + } + + AvailabilityAttr *GetAvailabilityAttrOnce(DeclRefExpr *DRE) { + AvailabilityAttr *AAttr = DRE->getDecl()->getAttr(); + if (!AAttr) + return nullptr; + // Skip redundant availability diagnostics for the same Decl. + if (!DeclAvailabilityChecked.insert(DRE).second) + return nullptr; + + return AAttr; + } + + bool CheckSMVersion(VersionTuple AAttrVT) { + VersionTuple SMVT = VersionTuple(SM->GetMajor(), SM->GetMinor()); + return SMVT >= AAttrVT; + } + + void DiagnoseAvailability(AvailabilityAttr *AAttr, QualType Ty, + SourceLocation Loc) { + VersionTuple AAttrVT = AAttr->getIntroduced(); + if (CheckSMVersion(AAttrVT)) + return; + + sema->Diag(Loc, diag::warn_hlsl_builtin_type_unavailable) + << Ty << SM->GetName() << AAttrVT.getAsString(); + } + + void DiagnoseAvailability(AvailabilityAttr *AAttr, NamedDecl *ND, + SourceLocation Loc) { + VersionTuple AAttrVT = AAttr->getIntroduced(); + if (CheckSMVersion(AAttrVT)) + return; + + if (isa(ND)) { + sema->Diag(Loc, diag::warn_hlsl_intrinsic_in_wrong_shader_model) + << ND->getQualifiedNameAsString() << EntryDecl + << AAttrVT.getAsString(); + return; + } + + sema->Diag(Loc, diag::warn_hlsl_builtin_constant_unavailable) + << ND << SM->GetName() << AAttrVT.getAsString(); + } + clang::Sema *getSema() { return sema; } private: @@ -334,6 +504,8 @@ class HLSLCallDiagnoseVisitor DXIL::NodeLaunchType NodeLaunchTy; const FunctionDecl *EntryDecl; llvm::SmallPtrSetImpl &DiagnosedCalls; + llvm::SmallPtrSetImpl &DeclAvailabilityChecked; + llvm::SmallSet &DiagnosedTypeLocs; }; std::optional @@ -428,18 +600,38 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { const auto *shaderModel = hlsl::ShaderModel::GetByName(self->getLangOpts().HLSLProfile.c_str()); - std::set DiagnosedDecls; + llvm::SmallVector GlobalsWithInit; + llvm::SmallVector SubObjects; + std::set DiagnosedRecursiveDecls; llvm::SmallPtrSet DiagnosedCalls; + llvm::SmallPtrSet DeclAvailabilityChecked; + llvm::SmallSet DiagnosedTypeLocs; + + GatherGlobalsWithInitializers(self->getASTContext().getTranslationUnitDecl(), + GlobalsWithInit, SubObjects); + + if (shaderModel->GetKind() == DXIL::ShaderKind::Library) { + DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid; + HLSLReachableDiagnoseVisitor Visitor( + self, shaderModel, shaderModel->GetKind(), NodeLaunchTy, nullptr, + DiagnosedCalls, DeclAvailabilityChecked, DiagnosedTypeLocs); + for (VarDecl *VD : SubObjects) + Visitor.TraverseDecl(VD); + } + // for each FDecl, check for recursion for (FunctionDecl *FDecl : FDeclsToCheck) { CallGraphWithRecurseGuard callGraph; - FunctionDecl *result = ValidateNoRecursion(callGraph, FDecl); + ArrayRef InitGlobals = {}; + // if entry function, include globals with initializers. + if (FDecl->hasAttr()) + InitGlobals = GlobalsWithInit; + FunctionDecl *result = ValidateNoRecursion(callGraph, FDecl, InitGlobals); if (result) { // don't emit duplicate diagnostics for the same recursive function // if A and B call recursive function C, only emit 1 diagnostic for C. - if (DiagnosedDecls.find(result) == DiagnosedDecls.end()) { - DiagnosedDecls.insert(result); + if (DiagnosedRecursiveDecls.insert(result).second) { self->Diag(result->getSourceRange().getBegin(), diag::err_hlsl_no_recursion) << FDecl->getQualifiedNameAsString() @@ -463,12 +655,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } if (pPatchFnDecl) { - FunctionDecl *patchResult = ValidateNoRecursion(callGraph, pPatchFnDecl); + FunctionDecl *patchResult = + ValidateNoRecursion(callGraph, pPatchFnDecl, GlobalsWithInit); // In this case, recursion was detected in the patch-constant function if (patchResult) { - if (DiagnosedDecls.find(patchResult) == DiagnosedDecls.end()) { - DiagnosedDecls.insert(patchResult); + if (DiagnosedRecursiveDecls.insert(patchResult).second) { self->Diag(patchResult->getSourceRange().getBegin(), diag::err_hlsl_no_recursion) << pPatchFnDecl->getQualifiedNameAsString() @@ -482,15 +674,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { // disconnected with respect to the call graph. // Only check this if neither function decl is recursive if (!result && !patchResult) { - CallGraphWithRecurseGuard CG; - CG.BuildForEntry(pPatchFnDecl); - if (CG.CheckReachability(pPatchFnDecl, FDecl)) { + if (callGraph.CheckReachability(pPatchFnDecl, FDecl)) { self->Diag(FDecl->getSourceRange().getBegin(), diag::err_hlsl_patch_reachability_not_allowed) << 1 << FDecl->getName() << 0 << pPatchFnDecl->getName(); } - CG.BuildForEntry(FDecl); - if (CG.CheckReachability(FDecl, pPatchFnDecl)) { + if (callGraph.CheckReachability(FDecl, pPatchFnDecl)) { self->Diag(FDecl->getSourceRange().getBegin(), diag::err_hlsl_patch_reachability_not_allowed) << 0 << pPatchFnDecl->getName() << 1 << FDecl->getName(); @@ -520,8 +709,21 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { << hullPatchCount.value(); } } - } + for (const auto *param : pPatchFnDecl->params()) + if (ContainsLongVector(param->getType())) { + const unsigned PatchConstantFunctionParametersIdx = 8; + self->Diag(param->getLocation(), + diag::err_hlsl_unsupported_long_vector) + << PatchConstantFunctionParametersIdx; + } + if (ContainsLongVector(pPatchFnDecl->getReturnType())) { + const unsigned PatchConstantFunctionReturnIdx = 9; + self->Diag(pPatchFnDecl->getLocation(), + diag::err_hlsl_unsupported_long_vector) + << PatchConstantFunctionReturnIdx; + } + } DXIL::ShaderKind EntrySK = shaderModel->GetKind(); DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid; if (EntrySK == DXIL::ShaderKind::Library) { @@ -537,12 +739,16 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { NodeLaunchTy = DXIL::NodeLaunchType::Broadcasting; } } + // Visit all visited functions in call graph to collect illegal intrinsic // calls. - for (FunctionDecl *FD : callGraph.GetVisitedFunctions()) { - HLSLCallDiagnoseVisitor Visitor(self, shaderModel, EntrySK, NodeLaunchTy, - FDecl, DiagnosedCalls); + HLSLReachableDiagnoseVisitor Visitor( + self, shaderModel, EntrySK, NodeLaunchTy, FDecl, DiagnosedCalls, + DeclAvailabilityChecked, DiagnosedTypeLocs); + // Visit globals with initializers when processing entry point. + for (VarDecl *VD : InitGlobals) + Visitor.TraverseDecl(VD); + for (FunctionDecl *FD : callGraph.GetVisitedFunctions()) Visitor.TraverseDecl(FD); - } } } diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp index 650fe38adc..636eaf0213 100644 --- a/tools/clang/lib/Sema/SemaOverload.cpp +++ b/tools/clang/lib/Sema/SemaOverload.cpp @@ -10936,7 +10936,13 @@ bool Sema::buildOverloadedCallSet(Scope *S, Expr *Fn, ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace && ULE->getQualifier()->getAsNamespace()->getName() == "vk"; - assert((!ULE->getQualifier() || isVkNamespace) && "non-vk qualified name with ADL"); + bool isDxNamespace = + ULE->getQualifier() && + ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace && + ULE->getQualifier()->getAsNamespace()->getName() == "dx"; + + assert((!ULE->getQualifier() || isVkNamespace || isDxNamespace) && + "expected vk or dx qualified name with ADL"); // HLSL Change Ends // We don't perform ADL for implicit declarations of builtins. diff --git a/tools/clang/lib/Sema/SemaStmt.cpp b/tools/clang/lib/Sema/SemaStmt.cpp index ce1e55bb0e..4e47a68888 100644 --- a/tools/clang/lib/Sema/SemaStmt.cpp +++ b/tools/clang/lib/Sema/SemaStmt.cpp @@ -3184,7 +3184,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // HLSL Change begin - Diagnose mismatched globallycoherent attrs on return. if (RetValExp) - DiagnoseGloballyCoherentMismatch(RetValExp, FnRetType, ReturnLoc); + DiagnoseCoherenceMismatch(RetValExp, FnRetType, ReturnLoc); // HLSL Change end bool HasDependentReturnType = FnRetType->isDependentType(); diff --git a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp index a6ae05faa5..1eacedbb0b 100644 --- a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2139,6 +2139,18 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation, SourceLocation(), SourceLocation(), nullptr); CheckCompletedCXXClass(Instantiation); + // HLSL Change Begin - set longvec bit for vectors of over 4 elements + ClassTemplateSpecializationDecl *Spec = + dyn_cast(Instantiation); + if (Spec && Spec->hasAttr()) { + const TemplateArgumentList &argList = Spec->getTemplateArgs(); + const TemplateArgument &arg1 = argList[1]; + llvm::APSInt vecSize = arg1.getAsIntegral(); + if (vecSize.getLimitedValue() > hlsl::DXIL::kDefaultMaxVectorLength) + Instantiation->setHasHLSLLongVector(); + } + // HLSL Change End - set longvec bit for vectors of over 4 elements + // Default arguments are parsed, if not instantiated. We can go instantiate // default arg exprs for default constructors if necessary now. ActOnFinishCXXMemberDefaultArgs(Instantiation); diff --git a/tools/clang/lib/Sema/SemaType.cpp b/tools/clang/lib/Sema/SemaType.cpp index 5a8f9d13b3..ff3b0dbac7 100644 --- a/tools/clang/lib/Sema/SemaType.cpp +++ b/tools/clang/lib/Sema/SemaType.cpp @@ -4528,7 +4528,9 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) { return AttributeList::AT_HLSLColumnMajor; case AttributedType::attr_hlsl_globallycoherent: return AttributeList::AT_HLSLGloballyCoherent; - // HLSL Change Ends + case AttributedType::attr_hlsl_reordercoherent: + return AttributeList::AT_HLSLReorderCoherent; + // HLSL Change Ends } llvm_unreachable("unexpected attribute kind!"); } @@ -5771,6 +5773,7 @@ static bool isHLSLTypeAttr(AttributeList::Kind Kind) { case AttributeList::AT_HLSLSnorm: case AttributeList::AT_HLSLUnorm: case AttributeList::AT_HLSLGloballyCoherent: + case AttributeList::AT_HLSLReorderCoherent: return true; default: // Only meant to catch attr handled by handleHLSLTypeAttr, ignore the rest @@ -5802,7 +5805,9 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State, const AttributedType *pMatrixOrientation = nullptr; const AttributedType *pNorm = nullptr; const AttributedType *pGLC = nullptr; - hlsl::GetHLSLAttributedTypes(&S, Type, &pMatrixOrientation, &pNorm, &pGLC); + const AttributedType *pRDC = nullptr; + hlsl::GetHLSLAttributedTypes(&S, Type, &pMatrixOrientation, &pNorm, &pGLC, + &pRDC); if (pMatrixOrientation && (Kind == AttributeList::AT_HLSLColumnMajor || @@ -5836,13 +5841,18 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State, return true; } - if (pGLC && Kind == AttributeList::AT_HLSLGloballyCoherent) { - AttributedType::Kind CurAttrKind = pGLC->getAttrKind(); - if (Kind == getAttrListKind(CurAttrKind)) { - S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) - << Attr.getName() << Attr.getRange(); - } - } + const bool hasGLC = pGLC; + const bool addsGLC = Kind == AttributeList::AT_HLSLGloballyCoherent; + const bool hasRDC = pRDC; + const bool addsRDC = Kind == AttributeList::AT_HLSLReorderCoherent; + + const bool hasMismatchingAttrs = hasGLC && hasRDC; + const bool addsMismatchingAttr = (hasGLC && addsRDC) || (hasRDC && addsGLC); + if ((hasGLC && addsGLC) || (hasRDC && addsRDC)) + S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) + << Attr.getName() << Attr.getRange(); + else if (!hasMismatchingAttrs && addsMismatchingAttr) + S.Diag(Attr.getLoc(), diag::warn_hlsl_glc_implies_rdc) << Attr.getRange(); AttributedType::Kind TAK; switch (Kind) { @@ -5853,6 +5863,9 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State, case AttributeList::AT_HLSLSnorm: TAK = AttributedType::attr_hlsl_snorm; break; case AttributeList::AT_HLSLGloballyCoherent: TAK = AttributedType::attr_hlsl_globallycoherent; break; + case AttributeList::AT_HLSLReorderCoherent: + TAK = AttributedType::attr_hlsl_reordercoherent; + break; } Type = S.Context.getAttributedType(TAK, Type, Type); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl new file mode 100644 index 0000000000..d92ce7b9ca --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// CHECK: %[[uH:[^ ]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?u@@3V?$RWBuffer@M@@A", align 4 +// CHECK: %[[uLIBH:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %[[uH]]) ; CreateHandleForLib(Resource) +// CHECK: %[[uANNOT:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[uLIBH]], %dx.types.ResourceProperties { i32 69642, i32 265 }) ; AnnotateHandle(res,props) resource: reordercoherent RWTypedBuffer +// CHECK: %{{[^ ]+}} = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %[[uANNOT]], i32 0, i32 undef) ; BufferLoad(srv,index,wot) + +RWBuffer OutBuf : register(u1); +reordercoherent RWBuffer u : register(u2); + +float read(RWBuffer buf) { + return buf[0]; +} + +[shader("raygeneration")] +void main() { + OutBuf[0] = read(u); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl new file mode 100644 index 0000000000..ea47281d0d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// CHECK: !"uav1", {{.+}}, ![[TAGMD:[0-9]+]]} +// CHECK: ![[TAGMD]] = !{i32 0, i32 9, i32 4, i1 true + +reordercoherent RWTexture1D uav1 : register(u3); +RWBuffer uav2; + +[shader("raygeneration")] +void main() +{ + reordercoherent RWTexture1D uav3 = uav1; + uav3[0] = 5; + uav1[0] = 2; + uav2[1] = 3; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl new file mode 100644 index 0000000000..8b60c0cd67 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// Make sure uav array can have reordercoherent. +// CHECK: !{{.*}} = !{i32 1, [12 x %"class.RWTexture2D"]* bitcast ([12 x %dx.types.Handle]* @"\01?tex@@3PAV?$RWTexture2D@M@@A" to [12 x %"class.RWTexture2D"]*), !"tex", i32 0, i32 2, i32 12, i32 2, i1 false, i1 false, i1 false, ![[TAGMD:.*]]} +// CHECK: ![[TAGMD]] = !{i32 0, i32 9, i32 4, i1 true} + + +RWBuffer OutBuf: register(u1); +reordercoherent RWTexture2D tex[12] : register(u2); + +[shader("raygeneration")] +void main() { + int2 c = DispatchRaysIndex().xy; + OutBuf[0] = tex[0][c]; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index e6246845b3..9f7a487a05 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -3,14 +3,34 @@ // RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=2 %s | FileCheck %s + +// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=2 %s | FileCheck %s + // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s + // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT + // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float -DCOLS=4 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool -DCOLS=4 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=uint64_t -DCOLS=2 %s | FileCheck %s @@ -26,8 +46,6 @@ // for different aggregate buffer types and indices. /////////////////////////////////////////////////////////////////////// - - // CHECK: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]], #if !defined(ATY) @@ -68,6 +86,16 @@ struct OffVector { } }; +template +struct Matrix { + matrix m; + Matrix operator+(Matrix mat) { + Matrix ret; + ret.m = m + mat.m; + return ret; + } +}; + ByteAddressBuffer RoByBuf : register(t1); RWByteAddressBuffer RwByBuf : register(u1); @@ -156,6 +184,8 @@ void main(uint ix[2] : IX) { // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] + // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -163,6 +193,8 @@ void main(uint ix[2] : IX) { TYPE stbElt1 SS = RwStBuf.Load(ix[0]); // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[BOFF]] + // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] + // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl new file mode 100644 index 0000000000..03735cb968 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl @@ -0,0 +1,162 @@ +// RUN: %dxc -DTYPE=float -T vs_6_6 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// RUN: %dxc -DTYPE=float1 -T vs_6_6 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// Confirm that 6.9 doesn't use vector loads for scalars and vec1s +// RUN: %dxc -DTYPE=float -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// RUN: %dxc -DTYPE=float1 -T vs_6_9 %s | FileCheck %s +// RUiN: %dxc -DTYPE=bool1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64 + +/////////////////////////////////////////////////////////////////////// +// Test codegen for various load and store operations and conversions +// for different scalar buffer types and confirm that the proper +// loads, stores, and conversion operations take place. +/////////////////////////////////////////////////////////////////////// + + +// These -DAGs must match the same line. That is the only reason for the -DAG. +// The first match will assign [[TY]] to the native type +// For most runs, the second match will assign [[TY32]] to the same thing. +// For 64-bit types, the memory representation is i32 and a separate variable is needed. +// For these cases, there is another line that will always match i32. +// This line will also force the previous -DAGs to match the same line since the most +// This shader can produce is two ResRet types. +// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]], +// CHECK-DAG: %dx.types.ResRet.[[TY32:[a-z][0-9][0-9]]] = type { [[TYPE]], +// I64: %dx.types.ResRet.[[TY32:i32]] +// F64: %dx.types.ResRet.[[TY32:i32]] + + ByteAddressBuffer RoByBuf : register(t1); +RWByteAddressBuffer RwByBuf : register(u1); + + StructuredBuffer< TYPE > RoStBuf : register(t2); +RWStructuredBuffer< TYPE > RwStBuf : register(u2); + + Buffer< TYPE > RoTyBuf : register(t3); +RWBuffer< TYPE > RwTyBuf : register(u3); + +ConsumeStructuredBuffer CnStBuf : register(u4); +AppendStructuredBuffer ApStBuf : register(u5); + +void main(uint ix[2] : IX) { + // ByteAddressBuffer Tests + + // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + + // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + + // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) + // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + + // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]); + + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]); + + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2); + + // StructuredBuffer Tests + // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt1 = RwStBuf.Load(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt2 = RwStBuf[ix[1]]; + + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt4 = RoStBuf[ix[1]]; + + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + + // {Append/Consume}StructuredBuffer Tests + // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] + // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE cnElt = CnStBuf.Consume(); + + // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]] + // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]] + ApStBuf.Append(cnElt); + + // TypedBuffer Tests + // CHECK: [[ANHDLRWTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTY]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt1 = RwTyBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt2 = RwTyBuf[ix[1]]; + // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt3 = RoTyBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt4 = RoTyBuf[ix[1]]; + + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // I64: trunc i64 %{{.*}} to i32 + // I64: lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl new file mode 100644 index 0000000000..5305ee495b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -0,0 +1,91 @@ +// RUN: %dxc -DTYPE=float -DNUM=4 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -DNUM=4 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -DNUM=2 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=double -DNUM=2 -T vs_6_9 %s | FileCheck %s + +// RUN: %dxc -DTYPE=float -DNUM=6 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -DNUM=13 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -DNUM=24 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=double -DNUM=32 -T vs_6_9 %s | FileCheck %s + +/////////////////////////////////////////////////////////////////////// +// Test codegen for various load and store operations and conversions +// for different scalar/vector buffer types and indices. +/////////////////////////////////////////////////////////////////////// + +// CHECK: %dx.types.ResRet.[[VTY:v[0-9]*[a-z][0-9][0-9]]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]>, i32 } + +ByteAddressBuffer RoByBuf : register(t1); +RWByteAddressBuffer RwByBuf : register(u1); + +StructuredBuffer > RoStBuf : register(t2); +RWStructuredBuffer > RwStBuf : register(u2); + +ConsumeStructuredBuffer > CnStBuf : register(u4); +AppendStructuredBuffer > ApStBuf : register(u5); + +// CHECK-LABEL: define void @main +[shader("vertex")] +void main(uint ix[2] : IX) { + // ByteAddressBuffer Tests + + // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + + // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + + // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector babElt1 = RwByBuf.Load< vector >(ix[0]); + + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector babElt2 = RoByBuf.Load< vector >(ix[0]); + + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2); + + // StructuredBuffer Tests + // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt1 = RwStBuf.Load(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt2 = RwStBuf[ix[1]]; + + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt4 = RoStBuf[ix[1]]; + + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + + // {Append/Consume}StructuredBuffer Tests + // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] + // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector cnElt = CnStBuf.Consume(); + + // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]] + // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]] + ApStBuf.Append(cnElt); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl index ea44fef604..8dcf5ead1c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl @@ -27,13 +27,20 @@ RWByteAddressBuffer RwByBuf : register(u1); StructuredBuffer< TYPE > RoStBuf : register(t2); RWStructuredBuffer< TYPE > RwStBuf : register(u2); - Buffer< TYPE > RoTyBuf : register(t3); -RWBuffer< TYPE > RwTyBuf : register(u3); +ConsumeStructuredBuffer CnStBuf : register(u3); +AppendStructuredBuffer ApStBuf : register(u4); -ConsumeStructuredBuffer CnStBuf : register(u4); -AppendStructuredBuffer ApStBuf : register(u5); + Buffer< TYPE > RoTyBuf : register(t5); +RWBuffer< TYPE > RwTyBuf : register(u5); -void main(uint ix[2] : IX) { + Texture1D< TYPE > RoTex1d : register(t6); +RWTexture1D< TYPE > RwTex1d : register(u6); + Texture2D< TYPE > RoTex2d : register(t7); +RWTexture2D< TYPE > RwTex2d : register(u7); + Texture3D< TYPE > RoTex3d : register(t8); +RWTexture3D< TYPE > RwTex3d : register(u8); + +void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -42,13 +49,27 @@ void main(uint ix[2] : IX) { // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) - // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) - // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + + // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 0 }, i32 5, i1 false) + // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) - // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) - // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + // CHECK-DAG: [[HDLROTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 0 }, i32 6, i1 false) + // CHECK-DAG: [[HDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 1 }, i32 6, i1 false) + // CHECK-DAG: [[HDLROTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 0 }, i32 7, i1 false) + // CHECK-DAG: [[HDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 1 }, i32 7, i1 false) + // CHECK-DAG: [[HDLROTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 0 }, i32 8, i1 false) + // CHECK-DAG: [[HDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 1 }, i32 8, i1 false) - // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK-DAG: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0 + // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0 + // CHECK-DAG: [[IX20:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0 + // CHECK-DAG: [[IX21:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1 + // CHECK-DAG: [[IX30:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 0 + // CHECK-DAG: [[IX31:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 1 + // CHECK-DAG: [[IX32:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 2 // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] @@ -56,7 +77,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]); + TYPE babElt1 = RwByBuf.Load< TYPE >(ix0); // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] @@ -64,14 +85,14 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]); + TYPE babElt2 = RoByBuf.Load< TYPE >(ix0); // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2); + RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -80,14 +101,13 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt1 = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + TYPE stbElt1 = RwStBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt2 = RwStBuf[ix[1]]; + TYPE stbElt2 = RwStBuf[ix1]; // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] @@ -95,20 +115,20 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt3 = RoStBuf.Load(ix[0]); + TYPE stbElt3 = RoStBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt4 = RoStBuf[ix[1]]; + TYPE stbElt4 = RoStBuf[ix1]; // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4; // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -146,7 +166,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt1 = RwTyBuf.Load(ix[0]); + TYPE typElt1 = RwTyBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -162,7 +182,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt2 = RwTyBuf[ix[1]]; + TYPE typElt2 = RwTyBuf[ix1]; // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -179,7 +199,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt3 = RoTyBuf.Load(ix[0]); + TYPE typElt3 = RoTyBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -195,7 +215,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt4 = RoTyBuf[ix[1]]; + TYPE typElt4 = RoTyBuf[ix1]; // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 @@ -210,5 +230,126 @@ void main(uint ix[2] : IX) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] - RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4; + RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4; + + // Texture Tests + // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX1]], i32 0, i32 [[IX0]], i32 undef, i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt1 = RoTex1d[ix0]; + // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt2 = RwTex1d[ix0]; + + // CHECK: [[ANHDLROTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX2]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX2]], i32 0, i32 [[IX20]], i32 [[IX21]], i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt3 = RoTex2d[ix2]; + // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt4 = RwTex2d[ix2]; + + // CHECK: [[ANHDLROTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX3]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX3]], i32 0, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt5 = RoTex3d[ix3]; + // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt6 = RwTex3d[ix3]; + + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // I64: trunc i64 %{{.*}} to i32 + // lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I64: trunc i64 %{{.*}} to i32 + // lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // CHECK: call void @dx.op.textureStore.[[TY32]](i32 67, %dx.types.Handle [[ANHDLRWTX3]], i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + RwTex3d[ix3] = texElt1 + texElt2 + texElt3 + texElt4 + texElt5 + texElt6; } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl new file mode 100644 index 0000000000..7cd54e0387 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl @@ -0,0 +1,152 @@ +// RUN: %dxc -fcgl -T vs_6_6 %s | FileCheck %s + +// Source file for DxilGen IR test for buffer load lowering +// Much of this mirrors buffer-load-store and buffer-agg-load-store + +template +struct Vector { + float4 pad1; + double pad2; + vector v; + Vector operator+(Vector vec) { + Vector ret; + ret.pad1 = 0.0; + ret.pad2 = 0.0; + ret.v = v + vec.v; + return ret; + } +}; + +template +struct Matrix { + float4 pad1; + matrix m; + Matrix operator+(Matrix mat) { + Matrix ret; + ret.m = m + mat.m; + return ret; + } +}; + +RWByteAddressBuffer BabBuf : register(u1); +RWStructuredBuffer< float2 > VecBuf : register(u2); + StructuredBuffer< float[2] > ArrBuf : register(t3); + StructuredBuffer< Vector > SVecBuf : register(t4); + StructuredBuffer< float2x2 > MatBuf : register(t5); + StructuredBuffer< Matrix > SMatBuf : register(t6); + +void main(uint ix0 : IX0) { + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 0 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + bool2 Bab0 = BabBuf.Load< bool2 >(ix0 + 0); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Bab1 = (float2)BabBuf.Load< float[2] >(ix0 + 1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Bab2 = BabBuf.Load< Vector >(ix0 + 2).v; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Bab3 = BabBuf.Load< float2x2 >(ix0 + 3)[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Bab4 = BabBuf.Load< Matrix >(ix0 + 4).m[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 5 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle [[ANHDL]], i32 [[IX]], <2 x float> + BabBuf.Store< float2 >(ix0+5, select(Bab0, Bab1+Bab2, Bab3+Bab4)); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 0 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sld0 = VecBuf.Load(ix0 + 0); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32 0, %"class.StructuredBuffer" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer" undef) + // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sld1 = (float2)ArrBuf.Load(ix0 + 1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer >" undef) + // CHECK: call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sld2 = SVecBuf.Load(ix0 + 2).v; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer >" undef) + // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sld3 = MatBuf.Load(ix0 + 3)[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer >" undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + // CHECK: [[GEP:%.*]] = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* [[MSS]], i32 0, i32 1 + // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[GEP]], i32 1, i32 3) + float2 Sld4 = SMatBuf.Load(ix0 + 4).m[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 5 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + VecBuf[ix0+5] = select(Sld0, Sld1+Sld2, Sld3+Sld4); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 6 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]] + float2 Sss0 = VecBuf[ix0 + 6]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 7 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32 0, %"class.StructuredBuffer" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer" undef) + // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sss1 = (float2)ArrBuf[ix0 + 7]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 8 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer >" undef) + // CHECK: call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sss2 = SVecBuf[ix0 + 8].v; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 9 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer >" undef) + // CHECK: [[SS:%.*]] = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[SS]], i32 1, i32 3) + float2 Sss3 = MatBuf[ix0 + 9][1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 10 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer >" undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + // CHECK: [[GEP:%.*]] = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* [[MSS]], i32 0, i32 1 + // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[GEP]], i32 1, i32 3) + float2 Sss4 = SMatBuf[ix0 + 10].m[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 11 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + VecBuf[ix0+11] = select(Sss0, Sss1+Sss2, Sss3+Sss4); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll new file mode 100644 index 0000000000..6b01120f7b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll @@ -0,0 +1,404 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RWByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <2 x float> } +%"class.StructuredBuffer" = type { [2 x float] } +%"class.StructuredBuffer >" = type { %"struct.Vector" } +%"struct.Vector" = type { <4 x float>, double, <2 x float> } +%"class.StructuredBuffer >" = type { %class.matrix.float.2.2 } +%class.matrix.float.2.2 = type { [2 x <2 x float>] } +%"class.StructuredBuffer >" = type { %"struct.Matrix" } +%"struct.Matrix" = type { <4 x float>, %class.matrix.float.2.2 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?BabBuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A" = external global %"class.StructuredBuffer", align 4 +@"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.StructuredBuffer >", align 8 +@"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.StructuredBuffer >", align 4 +@"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.StructuredBuffer >", align 4 + +; Function Attrs: nounwind +define void @main(i32 %ix0) #0 { + %1 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; Booleans require some conversion after being loaded + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %10, i32 %7, i32 undef, i8 3, i32 4) + ; CHECK: [[EL0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK: [[EL1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[EL0]], i64 0 + ; CHECK: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[EL1]], i64 1 + ; CHECK: {{%.*}} = icmp ne <2 x i32> [[VEC1]], zeroinitializer + %2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %1) + %3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %4 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %3, i32 %ix0) + %5 = zext <2 x i1> %4 to <2 x i32> + %6 = add i32 %ix0, 1 + %7 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; Array loads do so one element at a time. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %7) + %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %10 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %9, i32 %6) + + %11 = getelementptr inbounds [2 x float], [2 x float]* %10, i32 0, i32 0 + %12 = load float, float* %11 + %13 = getelementptr inbounds [2 x float], [2 x float]* %10, i32 0, i32 1 + %14 = load float, float* %13 + %15 = insertelement <2 x float> undef, float %12, i32 0 + %16 = insertelement <2 x float> %15, float %14, i32 1 + %17 = add i32 %ix0, 3 + %18 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; Vector inside a struct is a simple load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %18) + %20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %19, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %21 = call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %20, i32 %17) + %22 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %21, i32 0, i32 2 + %23 = load <2 x float>, <2 x float>* %22, align 4 + %24 = add i32 %ix0, 4 + %25 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; 2x2 matrix loads the full storage vector and converts the orientation. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 15, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 2 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 3 + %26 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %25) + %27 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %26, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %28 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %27, i32 %24) + %row2col = shufflevector <4 x float> %28, <4 x float> %28, <4 x i32> + %29 = shufflevector <4 x float> %row2col, <4 x float> %row2col, <2 x i32> + %30 = add i32 %ix0, 5 + %31 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %32 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %31) + %33 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %32, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %34 = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %33, i32 %30) + %35 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %34, i32 0, i32 1 + %36 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %35, i32 1, i32 3) + %37 = load <2 x float>, <2 x float>* %36 + %38 = fadd <2 x float> %29, %37 + %39 = fadd <2 x float> %16, %23 + %40 = icmp ne <2 x i32> %5, zeroinitializer + %41 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %40, <2 x float> %39, <2 x float> %38) + %42 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + %43 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %42) + %44 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %43, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle %44, i32 %ix0, <2 x float> %41) + %45 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + + ; Normal vector. Standard load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %46 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %45) + %47 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %46, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %48 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %47, i32 %ix0) + %49 = add i32 %ix0, 1 + %50 = load %"class.StructuredBuffer", %"class.StructuredBuffer"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A" + + ; Array loads do so one element at a time. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer"(i32 160, %"class.StructuredBuffer" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %51 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32 0, %"class.StructuredBuffer" %50) + %52 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32 14, %dx.types.Handle %51, %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer" zeroinitializer) + %53 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %52, i32 %49) + %54 = getelementptr inbounds [2 x float], [2 x float]* %53, i32 0, i32 0 + %55 = load float, float* %54 + %56 = getelementptr inbounds [2 x float], [2 x float]* %53, i32 0, i32 1 + %57 = load float, float* %56 + %58 = insertelement <2 x float> undef, float %55, i32 0 + %59 = insertelement <2 x float> %58, float %57, i32 1 + %60 = add i32 %ix0, 3 + %61 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A" + + ; Vector inside a struct is a simple load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 24, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %62 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %61) + %63 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %62, %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer >" zeroinitializer) + %64 = call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %63, i32 %60) + %65 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %64, i32 0, i32 2 + %66 = load <2 x float>, <2 x float>* %65, align 4 + %67 = add i32 %ix0, 4 + %68 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A" + + ; 2x2 matrix loads the full storage vector and converts the orientation. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 15, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 2 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 3 + %69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %68) + %70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %69, %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer >" zeroinitializer) + %71 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %70, i32 %67) + %row2col1 = shufflevector <4 x float> %71, <4 x float> %71, <4 x i32> + %72 = shufflevector <4 x float> %row2col1, <4 x float> %row2col1, <2 x i32> + %73 = add i32 %ix0, 5 + %74 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A" + + ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 20, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 28, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %75 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %74) + %76 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %75, %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer >" zeroinitializer) + %77 = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %76, i32 %73) + %78 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %77, i32 0, i32 1 + %79 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %78, i32 1, i32 3) + %80 = load <2 x float>, <2 x float>* %79 + %81 = fadd <2 x float> %72, %80 + %82 = fadd <2 x float> %59, %66 + %83 = fcmp une <2 x float> %48, zeroinitializer + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDL]] + %84 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %83, <2 x float> %82, <2 x float> %81) + %85 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + + ; Normal vector. Standard load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %86 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %85) + %87 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %86, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %88 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %87, i32 %ix0) + store <2 x float> %84, <2 x float>* %88 + %89 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + + %90 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %89) + %91 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %90, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %92 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %91, i32 %ix0) + %93 = load <2 x float>, <2 x float>* %92 + %94 = add i32 %ix0, 1 + %95 = load %"class.StructuredBuffer", %"class.StructuredBuffer"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A" + + ; Array loads do so one element at a time. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer"(i32 160, %"class.StructuredBuffer" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %96 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32 0, %"class.StructuredBuffer" %95) + %97 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32 14, %dx.types.Handle %96, %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer" zeroinitializer) + %98 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %97, i32 %94) + %99 = getelementptr inbounds [2 x float], [2 x float]* %98, i32 0, i32 0 + %100 = load float, float* %99 + %101 = getelementptr inbounds [2 x float], [2 x float]* %98, i32 0, i32 1 + %102 = load float, float* %101 + %103 = insertelement <2 x float> undef, float %100, i32 0 + %104 = insertelement <2 x float> %103, float %102, i32 1 + %105 = add i32 %ix0, 3 + %106 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A" + + ; Vector inside a struct is a simple load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 24, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %107 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %106) + %108 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %107, %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer >" zeroinitializer) + %109 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %108, i32 %105) + %110 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %109, i32 0, i32 2 + %111 = load <2 x float>, <2 x float>* %110, align 4 + %112 = add i32 %ix0, 4 + %113 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A" + + ; Subscripted matrices get their elements extracted with individual loads on account of already dealing with GEPs + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 12, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %113) + %115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %114, %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer >" zeroinitializer) + %116 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %115, i32 %112) + %117 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %116, i32 1, i32 3) + %118 = load <2 x float>, <2 x float>* %117 + %119 = add i32 %ix0, 5 + %120 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A" + + ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 20, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 28, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %121 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %120) + %122 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %121, %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer >" zeroinitializer) + %123 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %122, i32 %119) + %124 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %123, i32 0, i32 1 + %125 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %124, i32 1, i32 3) + %126 = load <2 x float>, <2 x float>* %125 + %127 = fadd <2 x float> %118, %126 + %128 = fadd <2 x float> %104, %111 + %129 = fcmp une <2 x float> %93, zeroinitializer + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDL]] + %130 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %129, <2 x float> %128, <2 x float> %127) + %131 = add i32 %ix0, 1 + %132 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + + %133 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %132) + %134 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %133, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %135 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %134, i32 %131) + store <2 x float> %130, <2 x float>* %135 + ret void +} + +declare <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32, %class.matrix.float.2.2*, i32, i32) #1 +declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 +declare [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32, %dx.types.Handle, i32, <2 x float>) #0 +declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32, <2 x i1>, <2 x float>, <2 x float>) #1 +declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32, %"class.StructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer >") #1 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !43} +!dx.entryPoints = !{!50} +!dx.fnprops = !{!63} +!dx.options = !{!64, !65} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"} +!3 = !{i32 1, i32 6} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 6} +!6 = !{i32 0, %"class.RWStructuredBuffer >" undef, !7, %"class.StructuredBuffer" undef, !12, %"class.StructuredBuffer >" undef, !16, %"struct.Vector" undef, !21, %"class.StructuredBuffer >" undef, !29, %"class.StructuredBuffer >" undef, !35, %"struct.Matrix" undef, !39} +!7 = !{i32 8, !8, !9} +!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 0, !10} +!10 = !{!11} +!11 = !{i32 0, <2 x float> undef} +!12 = !{i32 20, !8, !13} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, [2 x float] undef} +!16 = !{i32 32, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, %"struct.Vector" undef} +!21 = !{i32 32, !22, !23, !24, !25} +!22 = !{i32 6, !"pad1", i32 3, i32 0, i32 7, i32 9} +!23 = !{i32 6, !"pad2", i32 3, i32 16, i32 7, i32 10} +!24 = !{i32 6, !"v", i32 3, i32 24, i32 7, i32 9} +!25 = !{i32 0, !26} +!26 = !{!27, !28} +!27 = !{i32 0, float undef} +!28 = !{i32 1, i64 2} +!29 = !{i32 24, !30, !32} +!30 = !{i32 6, !"h", i32 2, !31, i32 3, i32 0, i32 7, i32 9} +!31 = !{i32 2, i32 2, i32 2} +!32 = !{i32 0, !33} +!33 = !{!34} +!34 = !{i32 0, %class.matrix.float.2.2 undef} +!35 = !{i32 40, !17, !36} +!36 = !{i32 0, !37} +!37 = !{!38} +!38 = !{i32 0, %"struct.Matrix" undef} +!39 = !{i32 40, !22, !40, !41} +!40 = !{i32 6, !"m", i32 2, !31, i32 3, i32 16, i32 7, i32 9} +!41 = !{i32 0, !42} +!42 = !{!27, !28, !28} +!43 = !{i32 1, void (i32)* @main, !44} +!44 = !{!45, !47} +!45 = !{i32 1, !46, !46} +!46 = !{} +!47 = !{i32 0, !48, !49} +!48 = !{i32 4, !"IX0", i32 7, i32 5} +!49 = !{i32 0} +!50 = !{void (i32)* @main, !"main", null, !51, null} +!51 = !{!52, !60, null, null} +!52 = !{!53, !55, !57, !59} +!53 = !{i32 0, %"class.StructuredBuffer"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A", !"ArrBuf", i32 0, i32 3, i32 1, i32 12, i32 0, !54} +!54 = !{i32 1, i32 8} +!55 = !{i32 1, %"class.StructuredBuffer >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A", !"SVecBuf", i32 0, i32 4, i32 1, i32 12, i32 0, !56} +!56 = !{i32 1, i32 32} +!57 = !{i32 2, %"class.StructuredBuffer >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A", !"MatBuf", i32 0, i32 5, i32 1, i32 12, i32 0, !58} +!58 = !{i32 1, i32 16} +!59 = !{i32 3, %"class.StructuredBuffer >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A", !"SMatBuf", i32 0, i32 6, i32 1, i32 12, i32 0, !56} +!60 = !{!61, !62} +!61 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A", !"BabBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!62 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"VecBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !54} +!63 = !{void (i32)* @main, i32 1} +!64 = !{i32 64} +!65 = !{i32 -1} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl new file mode 100644 index 0000000000..fa070ceca5 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl @@ -0,0 +1,192 @@ +// RUN: %dxc -fcgl -T vs_6_6 %s | FileCheck %s + +// Source file for DxilGen IR test for buffer store lowering + +template +struct Vector { + float4 pad1; + double pad2; + vector v; + Vector operator+(Vector vec) { + Vector ret; + ret.pad1 = 0.0; + ret.pad2 = 0.0; + ret.v = v + vec.v; + return ret; + } +}; + +template +struct Matrix { + float4 pad1; + matrix m; + Matrix operator+(Matrix mat) { + Matrix ret; + ret.m = m + mat.m; + return ret; + } +}; + +RWByteAddressBuffer BabBuf : register(u1); +RWStructuredBuffer< float2 > VecBuf : register(u2); +RWStructuredBuffer< float[2] > ArrBuf : register(u3); +RWStructuredBuffer< Vector > SVecBuf : register(u4); +RWStructuredBuffer< float2x2 > MatBuf : register(u5); +RWStructuredBuffer< Matrix > SMatBuf : register(u6); + +ConsumeStructuredBuffer< float2 > CVecBuf : register(u7); +ConsumeStructuredBuffer< float[2] > CArrBuf : register(u8); +ConsumeStructuredBuffer< Vector > CSVecBuf : register(u9); +ConsumeStructuredBuffer< float2x2 > CMatBuf : register(u10); +ConsumeStructuredBuffer< Matrix > CSMatBuf : register(u11); + +AppendStructuredBuffer< float2 > AVecBuf : register(u12); +AppendStructuredBuffer< float[2] > AArrBuf : register(u13); +AppendStructuredBuffer< Vector > ASVecBuf : register(u14); +AppendStructuredBuffer< float2x2 > AMatBuf : register(u15); +AppendStructuredBuffer< Matrix > ASMatBuf : register(u16); + +void main(uint ix0 : IX0) { + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 0 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], <2 x i1> + BabBuf.Store(ix0 + 1, BabBuf.Load< bool2 >(ix0 + 0)); + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, [2 x float]*)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]], [2 x float] + BabBuf.Store(ix0 + 2, BabBuf.Load< float[2] >(ix0 + 1)); + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %\22struct.Vector\22*)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], %"struct.Vector" + BabBuf.Store >(ix0 + 3, BabBuf.Load< Vector >(ix0 + 2)); + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %class.matrix.float.2.2)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]], %class.matrix.float.2.2 + BabBuf.Store(ix0 + 4, BabBuf.Load< float2x2 >(ix0 + 3)); + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 5 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %\22struct.Matrix\22*)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], %"struct.Matrix" + BabBuf.Store >(ix0 + 5, BabBuf.Load< Matrix >(ix0 + 4)); + + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 0 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]] + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + VecBuf[ix0 + 1] = VecBuf[ix0 + 0]; + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer" undef) + // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer" undef) + // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + ArrBuf[ix0 + 2] = ArrBuf[ix0 + 1]; + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + SVecBuf[ix0 + 3] = SVecBuf[ix0 + 2]; + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer >" undef) + // CHECK: [[SS:%.*]] = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + MatBuf[ix0 + 4] = MatBuf[ix0 + 3]; + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 5 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer >" undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + SMatBuf[ix0 + 5] = SMatBuf[ix0 + 4]; + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer >" undef) + // CHECK: [[cn:%.*]] = call <2 x float> @"dx.hl.op..consume<2 x float> (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer >" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, <2 x float>)"(i32 226, %dx.types.Handle [[anhdl]], <2 x float> [[cn]]) + AVecBuf.Append(CVecBuf.Consume()); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer\22)"(i32 0, %"class.ConsumeStructuredBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer" undef) + // CHECK: [[cn:%.*]] = call [2 x float]* @"dx.hl.op..consume[2 x float]* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer\22)"(i32 0, %"class.AppendStructuredBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, [2 x float]*)"(i32 226, %dx.types.Handle [[anhdl]], [2 x float]* + AArrBuf.Append(CArrBuf.Consume()); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.ConsumeStructuredBuffer >" undef) + // CHECK: [[cn:%.*]] = call %"struct.Vector"* @"dx.hl.op..consume%\22struct.Vector\22* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.AppendStructuredBuffer >" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %\22struct.Vector\22*)"(i32 226, %dx.types.Handle [[anhdl]], %"struct.Vector"* + ASVecBuf.Append(CSVecBuf.Consume()); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.ConsumeStructuredBuffer >" undef) + // CHECK: [[cn:%.*]] = call %class.matrix.float.2.2 @"dx.hl.op..consume%class.matrix.float.2.2 (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.AppendStructuredBuffer >" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %class.matrix.float.2.2)"(i32 226, %dx.types.Handle [[anhdl]], %class.matrix.float.2.2 [[cn]]) + AMatBuf.Append(CMatBuf.Consume()); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.ConsumeStructuredBuffer >" undef) + // CHECK: [[cn:%.*]] = call %"struct.Matrix"* @"dx.hl.op..consume%\22struct.Matrix\22* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.AppendStructuredBuffer >" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %\22struct.Matrix\22*)"(i32 226, %dx.types.Handle [[anhdl]], %"struct.Matrix"* + ASMatBuf.Append(CSMatBuf.Consume()); + +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll new file mode 100644 index 0000000000..540ab85819 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll @@ -0,0 +1,822 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RWByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <2 x float> } +%"class.RWStructuredBuffer" = type { [2 x float] } +%"class.RWStructuredBuffer >" = type { %"struct.Vector" } +%"struct.Vector" = type { <4 x float>, double, <2 x float> } +%"class.RWStructuredBuffer >" = type { %class.matrix.float.2.2 } +%class.matrix.float.2.2 = type { [2 x <2 x float>] } +%"class.RWStructuredBuffer >" = type { %"struct.Matrix" } +%"struct.Matrix" = type { <4 x float>, %class.matrix.float.2.2 } +%"class.ConsumeStructuredBuffer >" = type { <2 x float> } +%"class.ConsumeStructuredBuffer" = type { [2 x float] } +%"class.ConsumeStructuredBuffer >" = type { %"struct.Vector" } +%"class.ConsumeStructuredBuffer >" = type { %class.matrix.float.2.2 } +%"class.ConsumeStructuredBuffer >" = type { %"struct.Matrix" } +%"class.AppendStructuredBuffer >" = type { <2 x float> } +%"class.AppendStructuredBuffer" = type { [2 x float] } +%"class.AppendStructuredBuffer >" = type { %"struct.Vector" } +%"class.AppendStructuredBuffer >" = type { %class.matrix.float.2.2 } +%"class.AppendStructuredBuffer >" = type { %"struct.Matrix" } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?BabBuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" = external global %"class.RWStructuredBuffer", align 4 +@"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.RWStructuredBuffer >", align 8 +@"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.ConsumeStructuredBuffer >", align 4 +@"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A" = external global %"class.ConsumeStructuredBuffer", align 4 +@"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.ConsumeStructuredBuffer >", align 8 +@"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.ConsumeStructuredBuffer >", align 4 +@"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.ConsumeStructuredBuffer >", align 4 +@"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.AppendStructuredBuffer >", align 4 +@"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A" = external global %"class.AppendStructuredBuffer", align 4 +@"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.AppendStructuredBuffer >", align 8 +@"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.AppendStructuredBuffer >", align 4 +@"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.AppendStructuredBuffer >", align 4 + +; CHECK-LABEL: define void @main(i32 %ix0) +; Function Attrs: nounwind +define void @main(i32 %ix0) #0 { +bb: + ; CHECK: [[pix:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle [[anhdl]], i32 [[pix]], i32 undef, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + + %tmp = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:60 col:32 + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp) ; line:60 col:32 + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:60 col:32 + %tmp3 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp2, i32 %ix0) ; line:60 col:32 + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[vec2:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, i32 [[val0]], i32 [[val1]], i32 undef, i32 undef, i8 3, i32 4) + %tmp4 = add i32 %ix0, 1 ; line:60 col:27 + %tmp5 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:60 col:3 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp5) ; line:60 col:3 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:60 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32 277, %dx.types.Handle %tmp7, i32 %tmp4, <2 x i1> %tmp3) ; line:60 col:3 + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[lix:%.*]] = add i32 0, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[lix:%.*]] = add i32 4, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4) + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + + %tmp8 = add i32 %ix0, 1 ; line:70 col:63 + %tmp9 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:70 col:35 + %tmp10 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp9) ; line:70 col:35 + %tmp11 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp10, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:70 col:35 + %tmp12 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp11, i32 %tmp8) ; line:70 col:35 + %tmp13 = getelementptr inbounds [2 x float], [2 x float]* %tmp12, i32 0, i32 0 ; line:70 col:3 + %tmp14 = load float, float* %tmp13 ; line:70 col:3 + %tmp15 = getelementptr inbounds [2 x float], [2 x float]* %tmp12, i32 0, i32 1 ; line:70 col:3 + %tmp16 = load float, float* %tmp15 ; line:70 col:3 + + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float undef, float undef, float undef, i8 1, i32 4) + ; CHECK: [[stix:%.*]] = add i32 [[ix]], 4 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val1]], float undef, float undef, float undef, i8 1, i32 4) + + %tmp17 = add i32 %ix0, 2 ; line:70 col:30 + %tmp18 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:70 col:3 + %tmp19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp18) ; line:70 col:3 + %tmp20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp19, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:70 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %tmp20, i32 %tmp17, float %tmp14) ; line:70 col:3 + %tmp21 = add i32 %tmp17, 4 ; line:70 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %tmp20, i32 %tmp21, float %tmp16) ; line:70 col:3 + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[lix:%.*]] = add i32 0, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[lix:%.*]] = add i32 16, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4) + ; CHECK: [[dval:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0 + ; CHECK: [[lix:%.*]] = add i32 24, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + %tmp22 = add i32 %ix0, 2 ; line:80 col:78 + %tmp23 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:80 col:43 + %tmp24 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp23) ; line:80 col:43 + %tmp25 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp24, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:80 col:43 + %tmp26 = call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp25, i32 %tmp22) ; line:80 col:43 + %tmp27 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp26, i32 0, i32 0 ; line:80 col:3 + %tmp28 = load <4 x float>, <4 x float>* %tmp27 ; line:80 col:3 + %tmp29 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp26, i32 0, i32 1 ; line:80 col:3 + %tmp30 = load double, double* %tmp29 ; line:80 col:3 + %tmp31 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp26, i32 0, i32 2 ; line:80 col:3 + %tmp32 = load <2 x float>, <2 x float>* %tmp31 ; line:80 col:3 + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[stix:%.*]] = add i32 [[ix]], 16 + ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, double [[dval]] + ; CHECK: [[stix:%.*]] = add i32 [[ix]], 24 + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp33 = add i32 %ix0, 3 ; line:80 col:38 + %tmp34 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:80 col:3 + %tmp35 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp34) ; line:80 col:3 + %tmp36 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp35, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:80 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp33, <4 x float> %tmp28) ; line:80 col:3 + %tmp37 = add i32 %tmp33, 16 ; line:80 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, double)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp37, double %tmp30) ; line:80 col:3 + %tmp38 = add i32 %tmp33, 24 ; line:80 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp38, <2 x float> %tmp32) ; line:80 col:3 + + + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[rvec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + %tmp39 = add i32 %ix0, 3 ; line:90 col:63 + %tmp40 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:90 col:35 + %tmp41 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp40) ; line:90 col:35 + %tmp42 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp41, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:90 col:35 + %tmp43 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp42, i32 %tmp39) ; line:90 col:35 + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 4 + ; CHECK: [[cvec4:%.*]] = shufflevector <4 x float> [[rvec4]], <4 x float> [[rvec4]], <4 x i32> + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[cvec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[cvec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[cvec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[cvec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + %tmp44 = add i32 %ix0, 4 ; line:90 col:30 + %row2col = shufflevector <4 x float> %tmp43, <4 x float> %tmp43, <4 x i32> ; line:90 col:3 + %tmp45 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:90 col:3 + %tmp46 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp45) ; line:90 col:3 + %tmp47 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp46, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:90 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp47, i32 %tmp44, <4 x float> %row2col) ; line:90 col:3 + + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[lix:%.*]] = add i32 0, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[lix:%.*]] = add i32 16, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[mat:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + %tmp48 = add i32 %ix0, 4 ; line:100 col:82 + %tmp49 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:100 col:45 + %tmp50 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp49) ; line:100 col:45 + %tmp51 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp50, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:100 col:45 + %tmp52 = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp51, i32 %tmp48) ; line:100 col:45 + %tmp53 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp52, i32 0, i32 0 ; line:100 col:3 + %tmp54 = load <4 x float>, <4 x float>* %tmp53 ; line:100 col:3 + %tmp55 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp52, i32 0, i32 1 ; line:100 col:3 + %tmp56 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp55) ; line:100 col:3 + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 5 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[stix:%.*]] = add i32 [[ix]], 16 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[mat]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[mat]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[mat]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[mat]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + %tmp57 = add i32 %ix0, 5 ; line:100 col:40 + %tmp58 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:100 col:3 + %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp58) ; line:100 col:3 + %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:100 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp60, i32 %tmp57, <4 x float> %tmp54) ; line:100 col:3 + %tmp61 = add i32 %tmp57, 16 ; line:100 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp60, i32 %tmp61, <4 x float> %tmp56) ; line:100 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[pix]], i32 0, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + %tmp62 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" ; line:111 col:21 + %tmp63 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp62) ; line:111 col:21 + %tmp64 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp63, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:111 col:21 + %tmp65 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp64, i32 %ix0) ; line:111 col:21 + %tmp66 = load <2 x float>, <2 x float>* %tmp65 ; line:111 col:21 + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp67 = add i32 %ix0, 1 ; line:111 col:14 + %tmp68 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" ; line:111 col:3 + %tmp69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp68) ; line:111 col:3 + %tmp70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp69, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:111 col:3 + %tmp71 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp70, i32 %tmp67) ; line:111 col:3 + store <2 x float> %tmp66, <2 x float>* %tmp71 ; line:111 col:19 + + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" + ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" + ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 1, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float undef, float undef, float undef, i8 1, i32 4) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 4, i8 1, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 4, float [[val0]], float undef, float undef, float undef, i8 1, i32 4) + %tmp72 = add i32 %ix0, 2 ; line:121 col:14 + %tmp73 = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" ; line:121 col:3 + %tmp74 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32 0, %"class.RWStructuredBuffer" %tmp73) ; line:121 col:3 + %tmp75 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32 14, %dx.types.Handle %tmp74, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer" zeroinitializer) ; line:121 col:3 + %tmp76 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp75, i32 %tmp72) ; line:121 col:3 + %tmp77 = add i32 %ix0, 1 ; line:121 col:32 + %tmp78 = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" ; line:121 col:21 + %tmp79 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32 0, %"class.RWStructuredBuffer" %tmp78) ; line:121 col:21 + %tmp80 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32 14, %dx.types.Handle %tmp79, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer" zeroinitializer) ; line:121 col:21 + %tmp81 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp80, i32 %tmp77) ; line:121 col:21 + %tmp82 = getelementptr inbounds [2 x float], [2 x float]* %tmp76, i32 0, i32 0 ; line:121 col:21 + %tmp83 = getelementptr inbounds [2 x float], [2 x float]* %tmp81, i32 0, i32 0 ; line:121 col:21 + %tmp84 = load float, float* %tmp83 ; line:121 col:21 + store float %tmp84, float* %tmp82 ; line:121 col:21 + %tmp85 = getelementptr inbounds [2 x float], [2 x float]* %tmp76, i32 0, i32 1 ; line:121 col:21 + %tmp86 = getelementptr inbounds [2 x float], [2 x float]* %tmp81, i32 0, i32 1 ; line:121 col:21 + %tmp87 = load float, float* %tmp86 ; line:121 col:21 + store float %tmp87, float* %tmp85 ; line:121 col:21 + + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }) + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 16, i8 1, i32 8) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0 + ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 16, double [[val0]], double undef, double undef, double undef, i8 1, i32 8) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 24, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 24, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp88 = add i32 %ix0, 3 ; line:131 col:15 + %tmp89 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" ; line:131 col:3 + %tmp90 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp89) ; line:131 col:3 + %tmp91 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp90, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:131 col:3 + %tmp92 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp91, i32 %tmp88) ; line:131 col:3 + %tmp93 = add i32 %ix0, 2 ; line:131 col:34 + %tmp94 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" ; line:131 col:22 + %tmp95 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp94) ; line:131 col:22 + %tmp96 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp95, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:131 col:22 + %tmp97 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp96, i32 %tmp93) ; line:131 col:22 + %tmp98 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp92, i32 0, i32 0 ; line:131 col:22 + %tmp99 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp97, i32 0, i32 0 ; line:131 col:22 + %tmp100 = load <4 x float>, <4 x float>* %tmp99 ; line:131 col:22 + store <4 x float> %tmp100, <4 x float>* %tmp98 ; line:131 col:22 + %tmp101 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp92, i32 0, i32 1 ; line:131 col:22 + %tmp102 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp97, i32 0, i32 1 ; line:131 col:22 + %tmp103 = load double, double* %tmp102 ; line:131 col:22 + store double %tmp103, double* %tmp101 ; line:131 col:22 + %tmp104 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp92, i32 0, i32 2 ; line:131 col:22 + %tmp105 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp97, i32 0, i32 2 ; line:131 col:22 + %tmp106 = load <2 x float>, <2 x float>* %tmp105 ; line:131 col:22 + store <2 x float> %tmp106, <2 x float>* %tmp104 ; line:131 col:22 + + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }) + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + %tmp107 = add i32 %ix0, 4 ; line:141 col:14 + %tmp108 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:141 col:3 + %tmp109 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp108) ; line:141 col:3 + %tmp110 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp109, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:141 col:3 + %tmp111 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp110, i32 %tmp107) ; line:141 col:3 + %tmp112 = add i32 %ix0, 3 ; line:141 col:32 + %tmp113 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:141 col:21 + %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp113) ; line:141 col:21 + %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:141 col:21 + %tmp116 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 %tmp112) ; line:141 col:21 + %tmp117 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp116) ; line:141 col:21 + %tmp118 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp111, <4 x float> %tmp117) ; line:141 col:19 + + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 5 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }) + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 16, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 16, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + %tmp119 = add i32 %ix0, 5 ; line:151 col:15 + %tmp120 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:151 col:3 + %tmp121 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp120) ; line:151 col:3 + %tmp122 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp121, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:151 col:3 + %tmp123 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp122, i32 %tmp119) ; line:151 col:3 + %tmp124 = add i32 %ix0, 4 ; line:151 col:34 + %tmp125 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:151 col:22 + %tmp126 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp125) ; line:151 col:22 + %tmp127 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp126, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:151 col:22 + %tmp128 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp127, i32 %tmp124) ; line:151 col:22 + %tmp129 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp123, i32 0, i32 0 ; line:151 col:22 + %tmp130 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp128, i32 0, i32 0 ; line:151 col:22 + %tmp131 = load <4 x float>, <4 x float>* %tmp130 ; line:151 col:22 + store <4 x float> %tmp131, <4 x float>* %tmp129 ; line:151 col:22 + %tmp132 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp123, i32 0, i32 1 ; line:151 col:22 + %tmp133 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp128, i32 0, i32 1 ; line:151 col:22 + %tmp134 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp133) ; line:151 col:22 + %tmp135 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp132, <4 x float> %tmp134) ; line:151 col:22 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer >"(i32 160, %"class.ConsumeStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + %tmp136 = load %"class.ConsumeStructuredBuffer >", %"class.ConsumeStructuredBuffer >"* @"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A" ; line:159 col:18 + %tmp137 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" %tmp136) ; line:159 col:18 + %tmp138 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp137, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer >" zeroinitializer) ; line:159 col:18 + %tmp139 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp138) #0 ; line:159 col:18 + %tmp140 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp138, i32 %tmp139) #0 ; line:159 col:18 + %tmp141 = load <2 x float>, <2 x float>* %tmp140 ; line:159 col:18 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer >"(i32 160, %"class.AppendStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp142 = load %"class.AppendStructuredBuffer >", %"class.AppendStructuredBuffer >"* @"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A" ; line:159 col:3 + %tmp143 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" %tmp142) ; line:159 col:3 + %tmp144 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp143, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer >" zeroinitializer) ; line:159 col:3 + %tmp145 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp144) #0 ; line:159 col:3 + %tmp146 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp144, i32 %tmp145) #0 ; line:159 col:3 + store <2 x float> %tmp141, <2 x float>* %tmp146 ; line:159 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer"(i32 160, %"class.ConsumeStructuredBuffer" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 1, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 4, i8 1, i32 4) + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + + %tmp147 = load %"class.ConsumeStructuredBuffer", %"class.ConsumeStructuredBuffer"* @"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A" ; line:167 col:18 + %tmp148 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer\22)"(i32 0, %"class.ConsumeStructuredBuffer" %tmp147) ; line:167 col:18 + %tmp149 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer\22)"(i32 14, %dx.types.Handle %tmp148, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer" zeroinitializer) ; line:167 col:18 + %tmp150 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp149) #0 ; line:167 col:18 + %tmp151 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp149, i32 %tmp150) #0 ; line:167 col:18 + %tmp152 = getelementptr inbounds [2 x float], [2 x float]* %tmp151, i32 0, i32 0 ; line:167 col:3 + %tmp153 = load float, float* %tmp152 ; line:167 col:3 + %tmp154 = getelementptr inbounds [2 x float], [2 x float]* %tmp151, i32 0, i32 1 ; line:167 col:3 + %tmp155 = load float, float* %tmp154 ; line:167 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer"(i32 160, %"class.AppendStructuredBuffer" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float undef, float undef, float undef, i8 1, i32 4) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 4, float [[val1]], float undef, float undef, float undef, i8 1, i32 4) + + %tmp156 = load %"class.AppendStructuredBuffer", %"class.AppendStructuredBuffer"* @"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A" ; line:167 col:3 + %tmp157 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer\22)"(i32 0, %"class.AppendStructuredBuffer" %tmp156) ; line:167 col:3 + %tmp158 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer\22)"(i32 14, %dx.types.Handle %tmp157, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer" zeroinitializer) ; line:167 col:3 + %tmp159 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp158) #0 ; line:167 col:3 + %tmp160 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp158, i32 %tmp159) #0 ; line:167 col:3 + %tmp161 = getelementptr inbounds [2 x float], [2 x float]* %tmp160, i32 0, i32 0 ; line:167 col:3 + store float %tmp153, float* %tmp161 ; line:167 col:3 + %tmp162 = getelementptr inbounds [2 x float], [2 x float]* %tmp160, i32 0, i32 1 ; line:167 col:3 + store float %tmp155, float* %tmp162 ; line:167 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer >"(i32 160, %"class.ConsumeStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37644, i32 32 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, i8 1, i32 8) + ; CHECK: [[dval:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 24, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + + %tmp163 = load %"class.ConsumeStructuredBuffer >", %"class.ConsumeStructuredBuffer >"* @"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A" ; line:175 col:19 + %tmp164 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" %tmp163) ; line:175 col:19 + %tmp165 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp164, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.ConsumeStructuredBuffer >" zeroinitializer) ; line:175 col:19 + %tmp166 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp165) #0 ; line:175 col:19 + %tmp167 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp165, i32 %tmp166) #0 ; line:175 col:19 + %tmp168 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp167, i32 0, i32 0 ; line:175 col:3 + %tmp169 = load <4 x float>, <4 x float>* %tmp168 ; line:175 col:3 + %tmp170 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp167, i32 0, i32 1 ; line:175 col:3 + %tmp171 = load double, double* %tmp170 ; line:175 col:3 + %tmp172 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp167, i32 0, i32 2 ; line:175 col:3 + %tmp173 = load <2 x float>, <2 x float>* %tmp172 ; line:175 col:3 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer >"(i32 160, %"class.AppendStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37644, i32 32 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, double [[dval]], double undef, double undef, double undef, i8 1, i32 8) + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 24, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp174 = load %"class.AppendStructuredBuffer >", %"class.AppendStructuredBuffer >"* @"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A" ; line:175 col:3 + %tmp175 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" %tmp174) ; line:175 col:3 + %tmp176 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp175, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.AppendStructuredBuffer >" zeroinitializer) ; line:175 col:3 + %tmp177 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp176) #0 ; line:175 col:3 + %tmp178 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp176, i32 %tmp177) #0 ; line:175 col:3 + %tmp179 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp178, i32 0, i32 0 ; line:175 col:3 + store <4 x float> %tmp169, <4 x float>* %tmp179 ; line:175 col:3 + %tmp180 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp178, i32 0, i32 1 ; line:175 col:3 + store double %tmp171, double* %tmp180 ; line:175 col:3 + %tmp181 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp178, i32 0, i32 2 ; line:175 col:3 + store <2 x float> %tmp173, <2 x float>* %tmp181 ; line:175 col:3 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer >"(i32 160, %"class.ConsumeStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 16 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[rvec4:%.*]] = shufflevector <4 x float> [[vec4]], <4 x float> [[vec4]], <4 x i32> + %tmp182 = load %"class.ConsumeStructuredBuffer >", %"class.ConsumeStructuredBuffer >"* @"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:183 col:18 + %tmp183 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" %tmp182) ; line:183 col:18 + %tmp184 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp183, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.ConsumeStructuredBuffer >" zeroinitializer) ; line:183 col:18 + %tmp185 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp184) #0 ; line:183 col:18 + %tmp186 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp184, i32 %tmp185) #0 ; line:183 col:18 + %tmp187 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp186) ; line:183 col:18 + %col2row10 = shufflevector <4 x float> %tmp187, <4 x float> %tmp187, <4 x i32> ; line:183 col:18 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer >"(i32 160, %"class.AppendStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 16 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: [[cvec4:%.*]] = shufflevector <4 x float> [[rvec4]], <4 x float> [[rvec4]], <4 x i32> + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[cvec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[cvec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[cvec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[cvec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + + %tmp188 = load %"class.AppendStructuredBuffer >", %"class.AppendStructuredBuffer >"* @"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:183 col:3 + %tmp189 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" %tmp188) ; line:183 col:3 + %tmp190 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp189, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.AppendStructuredBuffer >" zeroinitializer) ; line:183 col:3 + %tmp191 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp190) #0 ; line:183 col:3 + %tmp192 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp190, i32 %tmp191) #0 ; line:183 col:3 + %row2col11 = shufflevector <4 x float> %col2row10, <4 x float> %col2row10, <4 x i32> ; line:183 col:3 + call void @"dx.hl.matldst.colStore.void (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp192, <4 x float> %row2col11) ; line:183 col:3 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer >"(i32 160, %"class.ConsumeStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 32 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[mat:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + %tmp193 = load %"class.ConsumeStructuredBuffer >", %"class.ConsumeStructuredBuffer >"* @"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:191 col:19 + %tmp194 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" %tmp193) ; line:191 col:19 + %tmp195 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp194, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.ConsumeStructuredBuffer >" zeroinitializer) ; line:191 col:19 + %tmp196 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp195) #0 ; line:191 col:19 + %tmp197 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp195, i32 %tmp196) #0 ; line:191 col:19 + %tmp198 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp197, i32 0, i32 0 ; line:191 col:3 + %tmp199 = load <4 x float>, <4 x float>* %tmp198 ; line:191 col:3 + %tmp200 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp197, i32 0, i32 1 ; line:191 col:3 + %tmp201 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp200) ; line:191 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer >"(i32 160, %"class.AppendStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 32 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[mat]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[mat]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[mat]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[mat]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + + %tmp202 = load %"class.AppendStructuredBuffer >", %"class.AppendStructuredBuffer >"* @"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:191 col:3 + %tmp203 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" %tmp202) ; line:191 col:3 + %tmp204 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp203, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.AppendStructuredBuffer >" zeroinitializer) ; line:191 col:3 + %tmp205 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp204) #0 ; line:191 col:3 + %tmp206 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp204, i32 %tmp205) #0 ; line:191 col:3 + %tmp207 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp206, i32 0, i32 0 ; line:191 col:3 + store <4 x float> %tmp199, <4 x float>* %tmp207 ; line:191 col:3 + %tmp208 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp206, i32 0, i32 1 ; line:191 col:3 + %tmp209 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp208, <4 x float> %tmp201) ; line:191 col:3 + + + ; CHECK: ret void + ret void ; line:193 col:1 +} + +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32, %dx.types.Handle, i32, <2 x i1>) #0 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 +declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32, %"class.RWStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer") #1 +declare %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32, %dx.types.Handle) #0 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer\22)"(i32, %"class.AppendStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer\22)"(i32, %"class.ConsumeStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer >") #1 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32, %dx.types.Handle, i32, <4 x float>) #0 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, double)"(i32, %dx.types.Handle, i32, double) #0 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32, %dx.types.Handle, i32, <2 x float>) #0 +declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32, %class.matrix.float.2.2*) #2 +declare <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32, %class.matrix.float.2.2*, <4 x float>) #0 +declare void @"dx.hl.matldst.colStore.void (i32, %class.matrix.float.2.2*, <4 x float>)"(i32, %class.matrix.float.2.2*, <4 x float>) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !43} +!dx.entryPoints = !{!50} +!dx.fnprops = !{!72} +!dx.options = !{!73, !74} + +!3 = !{i32 1, i32 6} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 6} +!6 = !{i32 0, %"class.RWStructuredBuffer >" undef, !7, %"class.RWStructuredBuffer" undef, !12, %"class.RWStructuredBuffer >" undef, !16, %"struct.Vector" undef, !21, %"class.RWStructuredBuffer >" undef, !29, %"class.RWStructuredBuffer >" undef, !35, %"struct.Matrix" undef, !39, %"class.ConsumeStructuredBuffer >" undef, !7, %"class.ConsumeStructuredBuffer" undef, !12, %"class.ConsumeStructuredBuffer >" undef, !16, %"class.ConsumeStructuredBuffer >" undef, !29, %"class.ConsumeStructuredBuffer >" undef, !35, %"class.AppendStructuredBuffer >" undef, !7, %"class.AppendStructuredBuffer" undef, !12, %"class.AppendStructuredBuffer >" undef, !16, %"class.AppendStructuredBuffer >" undef, !29, %"class.AppendStructuredBuffer >" undef, !35} +!7 = !{i32 8, !8, !9} +!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 0, !10} +!10 = !{!11} +!11 = !{i32 0, <2 x float> undef} +!12 = !{i32 20, !8, !13} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, [2 x float] undef} +!16 = !{i32 32, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, %"struct.Vector" undef} +!21 = !{i32 32, !22, !23, !24, !25} +!22 = !{i32 6, !"pad1", i32 3, i32 0, i32 7, i32 9} +!23 = !{i32 6, !"pad2", i32 3, i32 16, i32 7, i32 10} +!24 = !{i32 6, !"v", i32 3, i32 24, i32 7, i32 9} +!25 = !{i32 0, !26} +!26 = !{!27, !28} +!27 = !{i32 0, float undef} +!28 = !{i32 1, i64 2} +!29 = !{i32 24, !30, !32} +!30 = !{i32 6, !"h", i32 2, !31, i32 3, i32 0, i32 7, i32 9} +!31 = !{i32 2, i32 2, i32 2} +!32 = !{i32 0, !33} +!33 = !{!34} +!34 = !{i32 0, %class.matrix.float.2.2 undef} +!35 = !{i32 40, !17, !36} +!36 = !{i32 0, !37} +!37 = !{!38} +!38 = !{i32 0, %"struct.Matrix" undef} +!39 = !{i32 40, !22, !40, !41} +!40 = !{i32 6, !"m", i32 2, !31, i32 3, i32 16, i32 7, i32 9} +!41 = !{i32 0, !42} +!42 = !{!27, !28, !28} +!43 = !{i32 1, void (i32)* @main, !44} +!44 = !{!45, !47} +!45 = !{i32 1, !46, !46} +!46 = !{} +!47 = !{i32 0, !48, !49} +!48 = !{i32 4, !"IX0", i32 7, i32 5} +!49 = !{i32 0} +!50 = !{void (i32)* @main, !"main", null, !51, null} +!51 = !{null, !52, null, null} +!52 = !{!53, !54, !56, !57, !59, !61, !62, !63, !64, !65, !66, !67, !68, !69, !70, !71} +!53 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A", !"BabBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!54 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"VecBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!55 = !{i32 1, i32 8} +!56 = !{i32 2, %"class.RWStructuredBuffer"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A", !"ArrBuf", i32 0, i32 3, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!57 = !{i32 3, %"class.RWStructuredBuffer >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A", !"SVecBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!58 = !{i32 1, i32 32} +!59 = !{i32 4, %"class.RWStructuredBuffer >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A", !"MatBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 false, i1 false, !60} +!60 = !{i32 1, i32 16} +!61 = !{i32 5, %"class.RWStructuredBuffer >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"SMatBuf", i32 0, i32 6, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!62 = !{i32 6, %"class.ConsumeStructuredBuffer >"* @"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A", !"CVecBuf", i32 0, i32 7, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!63 = !{i32 7, %"class.ConsumeStructuredBuffer"* @"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A", !"CArrBuf", i32 0, i32 8, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!64 = !{i32 8, %"class.ConsumeStructuredBuffer >"* @"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A", !"CSVecBuf", i32 0, i32 9, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!65 = !{i32 9, %"class.ConsumeStructuredBuffer >"* @"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A", !"CMatBuf", i32 0, i32 10, i32 1, i32 12, i1 false, i1 false, i1 false, !60} +!66 = !{i32 10, %"class.ConsumeStructuredBuffer >"* @"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"CSMatBuf", i32 0, i32 11, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!67 = !{i32 11, %"class.AppendStructuredBuffer >"* @"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A", !"AVecBuf", i32 0, i32 12, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!68 = !{i32 12, %"class.AppendStructuredBuffer"* @"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A", !"AArrBuf", i32 0, i32 13, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!69 = !{i32 13, %"class.AppendStructuredBuffer >"* @"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A", !"ASVecBuf", i32 0, i32 14, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!70 = !{i32 14, %"class.AppendStructuredBuffer >"* @"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A", !"AMatBuf", i32 0, i32 15, i32 1, i32 12, i1 false, i1 false, i1 false, !60} +!71 = !{i32 15, %"class.AppendStructuredBuffer >"* @"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"ASMatBuf", i32 0, i32 16, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!72 = !{void (i32)* @main, i32 1} +!73 = !{i32 64} +!74 = !{i32 -1} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl new file mode 100644 index 0000000000..47355d633f --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl @@ -0,0 +1,112 @@ +// RUN: %dxc -fcgl -T vs_6_6 %s | FileCheck %s + +// Source file for DxilGen IR test for typed buffer/texture load lowering + +RWBuffer< bool2 > TyBuf : register(u1); +Texture2DMS< bool2 > Tex2dMs : register(t2); + +Texture1D< float2 > Tex1d : register(t3); +Texture2D< float2 > Tex2d : register(t4); +Texture3D< float2 > Tex3d : register(t5); +Texture2DArray< float2 > Tex2dArr : register(t6); + +RWBuffer< float2 > OutBuf : register(u7); + +void main(uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3, uint4 ix4 : IX4) { + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + bool2 Tyb0 = TyBuf.Load(ix1 + 1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + bool2 Tyb1 = TyBuf[ix1 + 2]; + + // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32 0, %"class.Texture2DMS, 0>" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS, 0>" undef), + // CHECK: call <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 231, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]] + bool2 TxMs0 = Tex2dMs.Load(ix2 + 3, ix1); + + // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32 0, %"class.Texture2DMS, 0>" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS, 0>" undef) + // CHECK: call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]]) + bool2 TxMs1 = Tex2dMs[ix2 + 4]; + + // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32 0, %"class.Texture1D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]]) + float2 Tx1d0 = Tex1d.Load(ix2 + 5); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 6 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32 0, %"class.Texture1D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Tx1d1 = Tex1d[ix1 + 6]; + + // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32 0, %"class.Texture2D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]]) + float2 Tx2d0 = Tex2d.Load(ix3 + 7); + + // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32 0, %"class.Texture2D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]]) + float2 Tx2d1 = Tex2d[ix2 + 8]; + + // CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32 0, %"class.Texture3D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <4 x i32> [[IX]]) + float2 Tx3d0 = Tex3d.Load(ix4 + 9); + + // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32 0, %"class.Texture3D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]]) + float2 Tx3d1 = Tex3d[ix3 + 10]; + + // CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32 0, %"class.Texture2DArray >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <4 x i32> [[IX]]) + float2 Tx2da0 = Tex2dArr.Load(ix4 + 11); + + // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32 0, %"class.Texture2DArray >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]]) + float2 Tx2da1 = Tex2dArr[ix3 + 12]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 13 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + OutBuf[ix1+13] = select(Tyb0, Tx1d0, Tx1d1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 14 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + OutBuf[ix1+14] = select(Tyb1, Tx2d0, Tx2d1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 15 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + OutBuf[ix1+15] = select(TxMs0, Tx3d0, Tx3d1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 16 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + OutBuf[ix1+16] = select(TxMs1, Tx2da0, Tx2da1); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll new file mode 100644 index 0000000000..3ecb28644c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll @@ -0,0 +1,346 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWBuffer >" = type { <2 x i32> } +%"class.Texture2DMS, 0>" = type { <2 x i32>, %"class.Texture2DMS, 0>::sample_type" } +%"class.Texture2DMS, 0>::sample_type" = type { i32 } +%"class.Texture1D >" = type { <2 x float>, %"class.Texture1D >::mips_type" } +%"class.Texture1D >::mips_type" = type { i32 } +%"class.Texture2D >" = type { <2 x float>, %"class.Texture2D >::mips_type" } +%"class.Texture2D >::mips_type" = type { i32 } +%"class.Texture3D >" = type { <2 x float>, %"class.Texture3D >::mips_type" } +%"class.Texture3D >::mips_type" = type { i32 } +%"class.Texture2DArray >" = type { <2 x float>, %"class.Texture2DArray >::mips_type" } +%"class.Texture2DArray >::mips_type" = type { i32 } +%"class.RWBuffer >" = type { <2 x float> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" = external global %"class.RWBuffer >", align 4 +@"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A" = external global %"class.Texture2DMS, 0>", align 4 +@"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A" = external global %"class.Texture1D >", align 4 +@"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A" = external global %"class.Texture2D >", align 4 +@"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A" = external global %"class.Texture3D >", align 4 +@"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A" = external global %"class.Texture2DArray >", align 4 +@"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" = external global %"class.RWBuffer >", align 4 + +; Function Attrs: nounwind +define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3, <4 x i32> %ix4) #0 { + ; CHECK: [[PIX:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 1 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef) + ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0 + ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1 + ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer + %1 = add i32 %ix1, 1 + %2 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" + %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %2) + %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %5 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %4, i32 %1) + + %6 = zext <2 x i1> %5 to <2 x i32> + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 2 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef) + ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0 + ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1 + ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer + %7 = add i32 %ix1, 2 + %8 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" + %9 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %8) + %10 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %9, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %11 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %10, i32 %7) + %12 = load <2 x i32>, <2 x i32>* %11 + + %13 = icmp ne <2 x i32> %12, zeroinitializer + %14 = zext <2 x i1> %13 to <2 x i32> + + ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DMS, 0>"(i32 160, %"class.Texture2DMS, 0>" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[PIX]], i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0 + ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1 + ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer + %15 = add <2 x i32> %ix2, + %16 = load %"class.Texture2DMS, 0>", %"class.Texture2DMS, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A" + %17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32 0, %"class.Texture2DMS, 0>" %16) + %18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32 14, %dx.types.Handle %17, %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS, 0>" zeroinitializer) + %19 = call <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 231, %dx.types.Handle %18, <2 x i32> %15, i32 %ix1) + %20 = zext <2 x i1> %19 to <2 x i32> + + ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DMS, 0>"(i32 160, %"class.Texture2DMS, 0>" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0 + ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1 + ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer + %21 = add <2 x i32> %ix2, + %22 = load %"class.Texture2DMS, 0>", %"class.Texture2DMS, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A" + %23 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32 0, %"class.Texture2DMS, 0>" %22) + %24 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32 14, %dx.types.Handle %23, %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS, 0>" zeroinitializer) + %25 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %24, <2 x i32> %21) + %26 = load <2 x i32>, <2 x i32>* %25 + + %27 = icmp ne <2 x i32> %26, zeroinitializer + %28 = zext <2 x i1> %27 to <2 x i32> + + ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture1D >"(i32 160, %"class.Texture1D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX1]], i32 [[IX0]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + %29 = add <2 x i32> %ix2, + %30 = load %"class.Texture1D >", %"class.Texture1D >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A" + %31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32 0, %"class.Texture1D >" %30) + %32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32 14, %dx.types.Handle %31, %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D >" zeroinitializer) + %33 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32 231, %dx.types.Handle %32, <2 x i32> %29) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 6 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture1D >"(i32 160, %"class.Texture1D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }) + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + %34 = add i32 %ix1, 6 + %35 = load %"class.Texture1D >", %"class.Texture1D >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A" + %36 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32 0, %"class.Texture1D >" %35) + %37 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32 14, %dx.types.Handle %36, %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D >" zeroinitializer) + %38 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %37, i32 %34) + %39 = load <2 x float>, <2 x float>* %38 + + ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2D >"(i32 160, %"class.Texture2D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX2]], i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef) + %40 = add <3 x i32> %ix3, + %41 = load %"class.Texture2D >", %"class.Texture2D >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A" + %42 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32 0, %"class.Texture2D >" %41) + %43 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32 14, %dx.types.Handle %42, %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D >" zeroinitializer) + %44 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32 231, %dx.types.Handle %43, <3 x i32> %40) + + ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2D >"(i32 160, %"class.Texture2D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef) + %45 = add <2 x i32> %ix2, + %46 = load %"class.Texture2D >", %"class.Texture2D >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A" + %47 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32 0, %"class.Texture2D >" %46) + %48 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32 14, %dx.types.Handle %47, %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D >" zeroinitializer) + %49 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %48, <2 x i32> %45) + %50 = load <2 x float>, <2 x float>* %49 + + ; CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture3D >"(i32 160, %"class.Texture3D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <4 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <4 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <4 x i32> [[IX]], i64 2 + ; CHECK-DAG: [[IX3:%.*]] = extractelement <4 x i32> [[IX]], i64 3 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX3]], i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef) + %51 = add <4 x i32> %ix4, + %52 = load %"class.Texture3D >", %"class.Texture3D >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A" + %53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32 0, %"class.Texture3D >" %52) + %54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32 14, %dx.types.Handle %53, %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D >" zeroinitializer) + %55 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle %54, <4 x i32> %51) + + ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture3D >"(i32 160, %"class.Texture3D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef) + %56 = add <3 x i32> %ix3, + %57 = load %"class.Texture3D >", %"class.Texture3D >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A" + %58 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32 0, %"class.Texture3D >" %57) + %59 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32 14, %dx.types.Handle %58, %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D >" zeroinitializer) + %60 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %59, <3 x i32> %56) + %61 = load <2 x float>, <2 x float>* %60 + + ; CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DArray >"(i32 160, %"class.Texture2DArray >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <4 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <4 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <4 x i32> [[IX]], i64 2 + ; CHECK-DAG: [[IX3:%.*]] = extractelement <4 x i32> [[IX]], i64 3 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX3]], i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef) + %62 = add <4 x i32> %ix4, + %63 = load %"class.Texture2DArray >", %"class.Texture2DArray >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A" + %64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32 0, %"class.Texture2DArray >" %63) + %65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32 14, %dx.types.Handle %64, %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray >" zeroinitializer) + %66 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle %65, <4 x i32> %62) + + ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DArray >"(i32 160, %"class.Texture2DArray >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef) + %67 = add <3 x i32> %ix3, + %68 = load %"class.Texture2DArray >", %"class.Texture2DArray >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A" + %69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32 0, %"class.Texture2DArray >" %68) + %70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32 14, %dx.types.Handle %69, %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray >" zeroinitializer) + %71 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %70, <3 x i32> %67) + %72 = load <2 x float>, <2 x float>* %71 + + %73 = icmp ne <2 x i32> %6, zeroinitializer + %74 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %73, <2 x float> %33, <2 x float> %39) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 13 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }) + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef, + %75 = add i32 %ix1, 13 + %76 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" + %77 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %76) + %78 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %77, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" zeroinitializer) + %79 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %78, i32 %75) + store <2 x float> %74, <2 x float>* %79 + + %80 = icmp ne <2 x i32> %14, zeroinitializer + %81 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %80, <2 x float> %44, <2 x float> %50) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 14 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }) + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef + %82 = add i32 %ix1, 14 + %83 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" + %84 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %83) + %85 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %84, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" zeroinitializer) + %86 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %85, i32 %82) + store <2 x float> %81, <2 x float>* %86 + + %87 = icmp ne <2 x i32> %20, zeroinitializer + %88 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %87, <2 x float> %55, <2 x float> %61) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 15 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }) + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef + %89 = add i32 %ix1, 15 + %90 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" + %91 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %90) + %92 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %91, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" zeroinitializer) + %93 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %92, i32 %89) + store <2 x float> %88, <2 x float>* %93 + + %94 = icmp ne <2 x i32> %28, zeroinitializer + %95 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %94, <2 x float> %66, <2 x float> %72) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 16 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }) + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef + %96 = add i32 %ix1, 16 + %97 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" + %98 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %97) + %99 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %98, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" zeroinitializer) + %100 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %99, i32 %96) + store <2 x float> %95, <2 x float>* %100 + + ret void +} + +declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #2 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #0 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32, %"class.Texture2DMS, 0>") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2DMS, 0>") #2 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #2 +declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32, %"class.Texture1D >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture1D >") #2 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32, %"class.Texture2D >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2D >") #2 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #2 +declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32, %dx.types.Handle, <4 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32, %"class.Texture3D >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture3D >") #2 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #2 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32, %"class.Texture2DArray >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2DArray >") #2 +declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32, <2 x i1>, <2 x float>, <2 x float>) #2 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6} +!dx.entryPoints = !{!22} +!dx.fnprops = !{!35} +!dx.options = !{!36, !37} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"} +!3 = !{i32 1, i32 6} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 6} +!6 = !{i32 1, void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, !7} +!7 = !{!8, !10, !13, !16, !19} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{i32 0, !11, !12} +!11 = !{i32 4, !"IX1", i32 7, i32 5} +!12 = !{i32 1} +!13 = !{i32 0, !14, !15} +!14 = !{i32 4, !"IX2", i32 7, i32 5} +!15 = !{i32 2} +!16 = !{i32 0, !17, !18} +!17 = !{i32 4, !"IX3", i32 7, i32 5} +!18 = !{i32 3} +!19 = !{i32 0, !20, !21} +!20 = !{i32 4, !"IX4", i32 7, i32 5} +!21 = !{i32 4} +!22 = !{void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, !"main", null, !23, null} +!23 = !{!24, !32, null, null} +!24 = !{!25, !27, !29, !30, !31} +!25 = !{i32 0, %"class.Texture2DMS, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A", !"Tex2dMs", i32 0, i32 2, i32 1, i32 3, i32 0, !26} +!26 = !{i32 0, i32 5} +!27 = !{i32 1, %"class.Texture1D >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A", !"Tex1d", i32 0, i32 3, i32 1, i32 1, i32 0, !28} +!28 = !{i32 0, i32 9} +!29 = !{i32 2, %"class.Texture2D >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A", !"Tex2d", i32 0, i32 4, i32 1, i32 2, i32 0, !28} +!30 = !{i32 3, %"class.Texture3D >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A", !"Tex3d", i32 0, i32 5, i32 1, i32 4, i32 0, !28} +!31 = !{i32 4, %"class.Texture2DArray >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A", !"Tex2dArr", i32 0, i32 6, i32 1, i32 7, i32 0, !28} +!32 = !{!33, !34} +!33 = !{i32 0, %"class.RWBuffer >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A", !"TyBuf", i32 0, i32 1, i32 1, i32 10, i1 false, i1 false, i1 false, !26} +!34 = !{i32 1, %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A", !"OutBuf", i32 0, i32 7, i32 1, i32 10, i1 false, i1 false, i1 false, !28} +!35 = !{void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, i32 1} +!36 = !{i32 64} +!37 = !{i32 -1} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl new file mode 100644 index 0000000000..9ff6039127 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl @@ -0,0 +1,404 @@ +// RUN: %dxc -fcgl -T vs_6_6 %s | FileCheck %s + +// Source file for DxilGen IR test for typed buffer store lowering +// Focuses on converted types in addition to common float type. + +RWBuffer FTyBuf; +RWBuffer BTyBuf; +RWBuffer LTyBuf; +RWBuffer DTyBuf; + +RWTexture1D FTex1d; +RWTexture1D BTex1d; +RWTexture1D LTex1d; +RWTexture1D DTex1d; + +RWTexture2D FTex2d; +RWTexture2D BTex2d; +RWTexture2D LTex2d; +RWTexture2D DTex2d; + +RWTexture3D FTex3d; +RWTexture3D BTex3d; +RWTexture3D LTex3d; +RWTexture3D DTex3d; + +RWTexture2DMS FTex2dMs; +RWTexture2DMS BTex2dMs; +RWTexture2DMS LTex2dMs; +RWTexture2DMS DTex2dMs; + +// CHECK: define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3) +void main(uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { + + // CHECK-DAG: [[ix3adr:%.*]] = alloca <3 x i32>, align 4 + // CHECK-DAG: [[ix2adr:%.*]] = alloca <2 x i32>, align 4 + // CHECK-DAG: [[ix1adr:%.*]] = alloca i32, align 4 + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 0 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTyBuf[ix1 + 1] = FTyBuf[ix1 + 0]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTyBuf[ix1 + 3] = BTyBuf[ix1 + 2]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 5 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTyBuf[ix1 + 5] = LTyBuf[ix1 + 4]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 6 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32 0, %"class.RWBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 7 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32 0, %"class.RWBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTyBuf[ix1 + 7] = DTyBuf[ix1 + 6]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 8 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 9 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex1d[ix1 + 9] = FTex1d[ix1 + 8]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 10 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 11 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex1d[ix1 + 11] = BTex1d[ix1 + 10]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 12 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 13 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex1d[ix1 + 13] = LTex1d[ix1 + 12]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 14 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32 0, %"class.RWTexture1D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 15 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32 0, %"class.RWTexture1D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex1d[ix1 + 15] = DTex1d[ix1 + 14]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex2d[ix2 + 17] = FTex2d[ix2 + 16]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex2d[ix2 + 19] = BTex2d[ix2 + 18]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex2d[ix2 + 21] = LTex2d[ix2 + 20]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32 0, %"class.RWTexture2D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32 0, %"class.RWTexture2D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex2d[ix2 + 23] = DTex2d[ix2 + 22]; + + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex3d[ix3 + 25] = FTex3d[ix3 + 24]; + + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex3d[ix3 + 27] = BTex3d[ix3 + 26]; + + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex3d[ix3 + 29] = LTex3d[ix3 + 28]; + + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32 0, %"class.RWTexture3D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32 0, %"class.RWTexture3D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex3d[ix3 + 31] = DTex3d[ix3 + 30]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex2dMs[ix2 + 33] = FTex2dMs[ix2 + 32]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex2dMs[ix2 + 35] = BTex2dMs[ix2 + 34]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex2dMs[ix2 + 37] = LTex2dMs[ix2 + 36]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex2dMs[ix2 + 39] = DTex2dMs[ix2 + 38]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 0 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex2dMs.sample[ix1 + 1][ix2 + 41] = FTex2dMs.sample[ix1 + 0][ix2 + 40]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex2dMs.sample[ix1 + 3][ix2 + 43] = BTex2dMs.sample[ix1 + 2][ix2 + 42]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 5 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex2dMs.sample[ix1 + 5][ix2 + 45] = LTex2dMs.sample[ix1 + 4][ix2 + 44]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 6 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 7 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex2dMs.sample[ix1 + 7][ix2 + 47] = DTex2dMs.sample[ix1 + 6][ix2 + 46]; + + // CHECK: ret void + +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll new file mode 100644 index 0000000000..ac5c6182e1 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll @@ -0,0 +1,1079 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWBuffer >" = type { <3 x float> } +%"class.RWBuffer >" = type { <2 x i32> } +%"class.RWBuffer >" = type { <2 x i64> } +%"class.RWBuffer" = type { double } +%"class.RWTexture1D >" = type { <3 x float> } +%"class.RWTexture1D >" = type { <2 x i32> } +%"class.RWTexture1D >" = type { <2 x i64> } +%"class.RWTexture1D" = type { double } +%"class.RWTexture2D >" = type { <3 x float> } +%"class.RWTexture2D >" = type { <2 x i32> } +%"class.RWTexture2D >" = type { <2 x i64> } +%"class.RWTexture2D" = type { double } +%"class.RWTexture3D >" = type { <3 x float> } +%"class.RWTexture3D >" = type { <2 x i32> } +%"class.RWTexture3D >" = type { <2 x i64> } +%"class.RWTexture3D" = type { double } +%"class.RWTexture2DMS, 0>" = type { <3 x float>, %"class.RWTexture2DMS, 0>::sample_type" } +%"class.RWTexture2DMS, 0>::sample_type" = type { i32 } +%"class.RWTexture2DMS, 0>" = type { <2 x i32>, %"class.RWTexture2DMS, 0>::sample_type" } +%"class.RWTexture2DMS, 0>::sample_type" = type { i32 } +%"class.RWTexture2DMS, 0>" = type { <2 x i64>, %"class.RWTexture2DMS, 0>::sample_type" } +%"class.RWTexture2DMS, 0>::sample_type" = type { i32 } +%"class.RWTexture2DMS" = type { double, %"class.RWTexture2DMS::sample_type" } +%"class.RWTexture2DMS::sample_type" = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A" = external global %"class.RWBuffer >", align 4 +@"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" = external global %"class.RWBuffer >", align 4 +@"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A" = external global %"class.RWBuffer >", align 8 +@"\01?DTyBuf@@3V?$RWBuffer@N@@A" = external global %"class.RWBuffer", align 8 +@"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A" = external global %"class.RWTexture1D >", align 4 +@"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture1D >", align 4 +@"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture1D >", align 8 +@"\01?DTex1d@@3V?$RWTexture1D@N@@A" = external global %"class.RWTexture1D", align 8 +@"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A" = external global %"class.RWTexture2D >", align 4 +@"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture2D >", align 4 +@"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture2D >", align 8 +@"\01?DTex2d@@3V?$RWTexture2D@N@@A" = external global %"class.RWTexture2D", align 8 +@"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A" = external global %"class.RWTexture3D >", align 4 +@"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture3D >", align 4 +@"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture3D >", align 8 +@"\01?DTex3d@@3V?$RWTexture3D@N@@A" = external global %"class.RWTexture3D", align 8 +@"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" = external global %"class.RWTexture2DMS, 0>", align 4 +@"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" = external global %"class.RWTexture2DMS, 0>", align 4 +@"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" = external global %"class.RWTexture2DMS, 0>", align 8 +@"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" = external global %"class.RWTexture2DMS", align 8 + +; Function Attrs: nounwind +; CHECK-LABEL: define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3) +define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3) #0 { +bb: + ; CHECK: [[ix3_0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0, i32 undef) + ; CHECK: [[ix3:%.*]] = insertelement <3 x i32> undef, i32 [[ix3_0]], i64 0 + ; CHECK: [[ix3_1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1, i32 undef) + ; CHECK: [[vec3:%.*]] = insertelement <3 x i32> [[ix3]], i32 [[ix3_1]], i64 1 + ; CHECK: [[ix3_2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 2, i32 undef) + ; CHECK: [[ix3:%.*]] = insertelement <3 x i32> [[vec3]], i32 [[ix3_2]], i64 2 + ; CHECK: [[ix2_0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef) + ; CHECK: [[vec2:%.*]] = insertelement <2 x i32> undef, i32 [[ix2_0]], i64 0 + ; CHECK: [[ix2_1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef) + ; CHECK: [[ix2:%.*]] = insertelement <2 x i32> [[vec2]], i32 [[ix2_1]], i64 1 + ; CHECK: [[ix1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix1]], i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }) + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15) + %tmp = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A" + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp) + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer >" zeroinitializer) + %tmp3 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 %ix1) + %tmp4 = load <3 x float>, <3 x float>* %tmp3 + %tmp5 = add i32 %ix1, 1 + %tmp6 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A" + %tmp7 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp6) + %tmp8 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer >" zeroinitializer) + %tmp9 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp8, i32 %tmp5) + store <3 x float> %tmp4, <3 x float>* %tmp9 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15) + %tmp10 = add i32 %ix1, 2 + %tmp11 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" + %tmp12 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp11) + %tmp13 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp12, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %tmp14 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp13, i32 %tmp10) + %tmp15 = load <2 x i32>, <2 x i32>* %tmp14 + %tmp16 = icmp ne <2 x i32> %tmp15, zeroinitializer + %tmp17 = add i32 %ix1, 3 + %tmp18 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" + %tmp19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp18) + %tmp20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp19, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %tmp21 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp20, i32 %tmp17) + %tmp22 = zext <2 x i1> %tmp16 to <2 x i32> + store <2 x i32> %tmp22, <2 x i32>* %tmp21 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 5 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15) + %tmp23 = add i32 %ix1, 4 + %tmp24 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A" + %tmp25 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp24) + %tmp26 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp25, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %tmp27 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp26, i32 %tmp23) + %tmp28 = load <2 x i64>, <2 x i64>* %tmp27 + %tmp29 = add i32 %ix1, 5 + %tmp30 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A" + %tmp31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp30) + %tmp32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp31, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %tmp33 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp32, i32 %tmp29) + store <2 x i64> %tmp28, <2 x i64>* %tmp33 + + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 6 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer"(i32 160, %"class.RWBuffer" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 7 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer"(i32 160, %"class.RWBuffer" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }) + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15) + %tmp34 = add i32 %ix1, 6 + %tmp35 = load %"class.RWBuffer", %"class.RWBuffer"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A" + %tmp36 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32 0, %"class.RWBuffer" %tmp35) + %tmp37 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32 14, %dx.types.Handle %tmp36, %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer" zeroinitializer) + %tmp38 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp37, i32 %tmp34) + %tmp39 = load double, double* %tmp38 + %tmp40 = add i32 %ix1, 7 + %tmp41 = load %"class.RWBuffer", %"class.RWBuffer"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A" + %tmp42 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32 0, %"class.RWBuffer" %tmp41) + %tmp43 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32 14, %dx.types.Handle %tmp42, %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer" zeroinitializer) + %tmp44 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp43, i32 %tmp40) + store double %tmp39, double* %tmp44 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 8 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 9 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }) + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15) + %tmp45 = add i32 %ix1, 8 + %tmp46 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A" + %tmp47 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp46) + %tmp48 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp47, %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D >" zeroinitializer) + %tmp49 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp48, i32 %tmp45) + %tmp50 = load <3 x float>, <3 x float>* %tmp49 + %tmp51 = add i32 %ix1, 9 + %tmp52 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A" + %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp52) + %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D >" zeroinitializer) + %tmp55 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 %tmp51) + store <3 x float> %tmp50, <3 x float>* %tmp55 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 10 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 11 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15) + %tmp56 = add i32 %ix1, 10 + %tmp57 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A" + %tmp58 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp57) + %tmp59 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp58, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" zeroinitializer) + %tmp60 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp59, i32 %tmp56) + %tmp61 = load <2 x i32>, <2 x i32>* %tmp60 + %tmp62 = icmp ne <2 x i32> %tmp61, zeroinitializer + %tmp63 = add i32 %ix1, 11 + %tmp64 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A" + %tmp65 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp64) + %tmp66 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp65, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" zeroinitializer) + %tmp67 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp66, i32 %tmp63) + %tmp68 = zext <2 x i1> %tmp62 to <2 x i32> + store <2 x i32> %tmp68, <2 x i32>* %tmp67 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 12 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 13 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }) + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15) + %tmp69 = add i32 %ix1, 12 + %tmp70 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A" + %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp70) + %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" zeroinitializer) + %tmp73 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 %tmp69) + %tmp74 = load <2 x i64>, <2 x i64>* %tmp73 + %tmp75 = add i32 %ix1, 13 + %tmp76 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A" + %tmp77 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp76) + %tmp78 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp77, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" zeroinitializer) + %tmp79 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp78, i32 %tmp75) + store <2 x i64> %tmp74, <2 x i64>* %tmp79 + + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 14 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D"(i32 160, %"class.RWTexture1D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 15 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D"(i32 160, %"class.RWTexture1D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }) + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15) + %tmp80 = add i32 %ix1, 14 + %tmp81 = load %"class.RWTexture1D", %"class.RWTexture1D"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A" + %tmp82 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32 0, %"class.RWTexture1D" %tmp81) + %tmp83 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32 14, %dx.types.Handle %tmp82, %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D" zeroinitializer) + %tmp84 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp83, i32 %tmp80) + %tmp85 = load double, double* %tmp84 + %tmp86 = add i32 %ix1, 15 + %tmp87 = load %"class.RWTexture1D", %"class.RWTexture1D"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A" + %tmp88 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32 0, %"class.RWTexture1D" %tmp87) + %tmp89 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32 14, %dx.types.Handle %tmp88, %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D" zeroinitializer) + %tmp90 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp89, i32 %tmp86) + store double %tmp85, double* %tmp90 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15) + %tmp91 = add <2 x i32> %ix2, + %tmp92 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A" + %tmp93 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp92) + %tmp94 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp93, %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D >" zeroinitializer) + %tmp95 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp94, <2 x i32> %tmp91) + %tmp96 = load <3 x float>, <3 x float>* %tmp95 + %tmp97 = add <2 x i32> %ix2, + %tmp98 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A" + %tmp99 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp98) + %tmp100 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp99, %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D >" zeroinitializer) + %tmp101 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp100, <2 x i32> %tmp97) + store <3 x float> %tmp96, <3 x float>* %tmp101 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15) + %tmp102 = add <2 x i32> %ix2, + %tmp103 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A" + %tmp104 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp103) + %tmp105 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp104, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" zeroinitializer) + %tmp106 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp105, <2 x i32> %tmp102) + %tmp107 = load <2 x i32>, <2 x i32>* %tmp106 + %tmp108 = icmp ne <2 x i32> %tmp107, zeroinitializer + %tmp109 = add <2 x i32> %ix2, + %tmp110 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A" + %tmp111 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp110) + %tmp112 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp111, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" zeroinitializer) + %tmp113 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp112, <2 x i32> %tmp109) + %tmp114 = zext <2 x i1> %tmp108 to <2 x i32> + store <2 x i32> %tmp114, <2 x i32>* %tmp113 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15) + %tmp115 = add <2 x i32> %ix2, + %tmp116 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A" + %tmp117 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp116) + %tmp118 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp117, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" zeroinitializer) + %tmp119 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp118, <2 x i32> %tmp115) + %tmp120 = load <2 x i64>, <2 x i64>* %tmp119 + %tmp121 = add <2 x i32> %ix2, + %tmp122 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A" + %tmp123 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp122) + %tmp124 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp123, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" zeroinitializer) + %tmp125 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp124, <2 x i32> %tmp121) + store <2 x i64> %tmp120, <2 x i64>* %tmp125 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D"(i32 160, %"class.RWTexture2D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D"(i32 160, %"class.RWTexture2D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15) + %tmp126 = add <2 x i32> %ix2, + %tmp127 = load %"class.RWTexture2D", %"class.RWTexture2D"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A" + %tmp128 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32 0, %"class.RWTexture2D" %tmp127) + %tmp129 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32 14, %dx.types.Handle %tmp128, %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D" zeroinitializer) + %tmp130 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp129, <2 x i32> %tmp126) + %tmp131 = load double, double* %tmp130 + %tmp132 = add <2 x i32> %ix2, + %tmp133 = load %"class.RWTexture2D", %"class.RWTexture2D"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A" + %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32 0, %"class.RWTexture2D" %tmp133) + %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D" zeroinitializer) + %tmp136 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp135, <2 x i32> %tmp132) + store double %tmp131, double* %tmp136 + + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15) + %tmp137 = add <3 x i32> %ix3, + %tmp138 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A" + %tmp139 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp138) + %tmp140 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp139, %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D >" zeroinitializer) + %tmp141 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp140, <3 x i32> %tmp137) + %tmp142 = load <3 x float>, <3 x float>* %tmp141 + %tmp143 = add <3 x i32> %ix3, + %tmp144 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A" + %tmp145 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp144) + %tmp146 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp145, %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D >" zeroinitializer) + %tmp147 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp146, <3 x i32> %tmp143) + store <3 x float> %tmp142, <3 x float>* %tmp147 + + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15) + %tmp148 = add <3 x i32> %ix3, + %tmp149 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A" + %tmp150 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp149) + %tmp151 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp150, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" zeroinitializer) + %tmp152 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp151, <3 x i32> %tmp148) + %tmp153 = load <2 x i32>, <2 x i32>* %tmp152 + %tmp154 = icmp ne <2 x i32> %tmp153, zeroinitializer + %tmp155 = add <3 x i32> %ix3, + %tmp156 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A" + %tmp157 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp156) + %tmp158 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp157, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" zeroinitializer) + %tmp159 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp158, <3 x i32> %tmp155) + %tmp160 = zext <2 x i1> %tmp154 to <2 x i32> + store <2 x i32> %tmp160, <2 x i32>* %tmp159 + + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15) + %tmp161 = add <3 x i32> %ix3, + %tmp162 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A" + %tmp163 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp162) + %tmp164 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp163, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" zeroinitializer) + %tmp165 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp164, <3 x i32> %tmp161) + %tmp166 = load <2 x i64>, <2 x i64>* %tmp165 + %tmp167 = add <3 x i32> %ix3, + %tmp168 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A" + %tmp169 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp168) + %tmp170 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp169, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" zeroinitializer) + %tmp171 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp170, <3 x i32> %tmp167) + store <2 x i64> %tmp166, <2 x i64>* %tmp171 + + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D"(i32 160, %"class.RWTexture3D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D"(i32 160, %"class.RWTexture3D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15) + %tmp172 = add <3 x i32> %ix3, + %tmp173 = load %"class.RWTexture3D", %"class.RWTexture3D"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A" + %tmp174 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32 0, %"class.RWTexture3D" %tmp173) + %tmp175 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32 14, %dx.types.Handle %tmp174, %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D" zeroinitializer) + %tmp176 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp175, <3 x i32> %tmp172) + %tmp177 = load double, double* %tmp176 + %tmp178 = add <3 x i32> %ix3, + %tmp179 = load %"class.RWTexture3D", %"class.RWTexture3D"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A" + %tmp180 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32 0, %"class.RWTexture3D" %tmp179) + %tmp181 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32 14, %dx.types.Handle %tmp180, %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D" zeroinitializer) + %tmp182 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp181, <3 x i32> %tmp178) + store double %tmp177, double* %tmp182 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStoreSample.f32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 0) + %tmp183 = add <2 x i32> %ix2, + %tmp184 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" + %tmp185 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp184) + %tmp186 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp185, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp187 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp186, <2 x i32> %tmp183) + %tmp188 = load <3 x float>, <3 x float>* %tmp187 + %tmp189 = add <2 x i32> %ix2, + %tmp190 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" + %tmp191 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp190) + %tmp192 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp191, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp193 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp192, <2 x i32> %tmp189) + store <3 x float> %tmp188, <3 x float>* %tmp193 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15, i32 0) + %tmp194 = add <2 x i32> %ix2, + %tmp195 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" + %tmp196 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp195) + %tmp197 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp196, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp198 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp197, <2 x i32> %tmp194) + %tmp199 = load <2 x i32>, <2 x i32>* %tmp198 + %tmp200 = icmp ne <2 x i32> %tmp199, zeroinitializer + %tmp201 = add <2 x i32> %ix2, + %tmp202 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" + %tmp203 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp202) + %tmp204 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp203, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp205 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp204, <2 x i32> %tmp201) + %tmp206 = zext <2 x i1> %tmp200 to <2 x i32> + store <2 x i32> %tmp206, <2 x i32>* %tmp205 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15, i32 0) + + %tmp207 = add <2 x i32> %ix2, + %tmp208 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" + %tmp209 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp208) + %tmp210 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp209, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp211 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp210, <2 x i32> %tmp207) + %tmp212 = load <2 x i64>, <2 x i64>* %tmp211 + %tmp213 = add <2 x i32> %ix2, + %tmp214 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" + %tmp215 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp214) + %tmp216 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp215, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp217 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp216, <2 x i32> %tmp213) + store <2 x i64> %tmp212, <2 x i64>* %tmp217 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS"(i32 160, %"class.RWTexture2DMS" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS"(i32 160, %"class.RWTexture2DMS" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15, i32 0) + + %tmp218 = add <2 x i32> %ix2, + %tmp219 = load %"class.RWTexture2DMS", %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" + %tmp220 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" %tmp219) + %tmp221 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle %tmp220, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" zeroinitializer) + %tmp222 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp221, <2 x i32> %tmp218) + %tmp223 = load double, double* %tmp222 + %tmp224 = add <2 x i32> %ix2, + %tmp225 = load %"class.RWTexture2DMS", %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" + %tmp226 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" %tmp225) + %tmp227 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle %tmp226, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" zeroinitializer) + %tmp228 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp227, <2 x i32> %tmp224) + store double %tmp223, double* %tmp228 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 [[ix1]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStoreSample.f32(i32 225, %dx.types.Handle %388, i32 %389, i32 %390, i32 undef, float %392, float %393, float %394, float %391, i8 15, i32 %tmp235) + %tmp229 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" + %tmp230 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp229) + %tmp231 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp230, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp232 = add <2 x i32> %ix2, + %tmp233 = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp231, <2 x i32> %tmp232, i32 %ix1) + %tmp234 = load <3 x float>, <3 x float>* %tmp233 + %tmp235 = add i32 %ix1, 1 + %tmp236 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" + %tmp237 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp236) + %tmp238 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp237, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp239 = add <2 x i32> %ix2, + %tmp240 = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp238, <2 x i32> %tmp239, i32 %tmp235) + store <3 x float> %tmp234, <3 x float>* %tmp240 + + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: %tmp248 = icmp ne <2 x i32> %402, zeroinitializer + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: %407 = extractelement <2 x i32> %tmp255, i64 0 + ; CHECK: %408 = extractelement <2 x i32> %tmp255, i64 0 + ; CHECK: %409 = extractelement <2 x i32> %tmp255, i64 1 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle %404, i32 %405, i32 %406, i32 undef, i32 %408, i32 %409, i32 %407, i32 %407, i8 15, i32 %tmp249) + ; CHECK: %tmp255 = zext <2 x i1> %tmp248 to <2 x i32> + %tmp241 = add i32 %ix1, 2 + %tmp242 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" + %tmp243 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp242) + %tmp244 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp243, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp245 = add <2 x i32> %ix2, + %tmp246 = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp244, <2 x i32> %tmp245, i32 %tmp241) + %tmp247 = load <2 x i32>, <2 x i32>* %tmp246 + %tmp248 = icmp ne <2 x i32> %tmp247, zeroinitializer + %tmp249 = add i32 %ix1, 3 + %tmp250 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" + %tmp251 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp250) + %tmp252 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp251, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp253 = add <2 x i32> %ix2, + %tmp254 = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp252, <2 x i32> %tmp253, i32 %tmp249) + %tmp255 = zext <2 x i1> %tmp248 to <2 x i32> + store <2 x i32> %tmp255, <2 x i32>* %tmp254 + + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 5 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15, i32 [[sax]]) + %tmp256 = add i32 %ix1, 4 + %tmp257 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" + %tmp258 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp257) + %tmp259 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp258, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp260 = add <2 x i32> %ix2, + %tmp261 = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp259, <2 x i32> %tmp260, i32 %tmp256) + %tmp262 = load <2 x i64>, <2 x i64>* %tmp261 + %tmp263 = add i32 %ix1, 5 + %tmp264 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" + %tmp265 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp264) + %tmp266 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp265, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp267 = add <2 x i32> %ix2, + %tmp268 = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp266, <2 x i32> %tmp267, i32 %tmp263) + store <2 x i64> %tmp262, <2 x i64>* %tmp268 + + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 6 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS"(i32 160, %"class.RWTexture2DMS" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: %447 = call double @dx.op.makeDouble.f64(i32 101, i32 %445, i32 %446) + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 7 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS"(i32 160, %"class.RWTexture2DMS" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: %452 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %447) + ; CHECK: %453 = extractvalue %dx.types.splitdouble %452, 0 + ; CHECK: %454 = extractvalue %dx.types.splitdouble %452, 1 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle %449, i32 %450, i32 %451, i32 undef, i32 %453, i32 %454, i32 %453, i32 %454, i8 15, i32 %tmp276) + %tmp269 = add i32 %ix1, 6 + %tmp270 = load %"class.RWTexture2DMS", %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" + %tmp271 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" %tmp270) + %tmp272 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle %tmp271, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" zeroinitializer) + %tmp273 = add <2 x i32> %ix2, + %tmp274 = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp272, <2 x i32> %tmp273, i32 %tmp269) + %tmp275 = load double, double* %tmp274 + %tmp276 = add i32 %ix1, 7 + %tmp277 = load %"class.RWTexture2DMS", %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" + %tmp278 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" %tmp277) + %tmp279 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle %tmp278, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" zeroinitializer) + %tmp280 = add <2 x i32> %ix2, + %tmp281 = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp279, <2 x i32> %tmp280, i32 %tmp276) + store double %tmp275, double* %tmp281 + + + ; CHECK: ret void + ret void +} + + +declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #1 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #1 +declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #1 +declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32, %"class.RWBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32, %"class.RWTexture1D") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D") #1 +declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32, %"class.RWTexture2D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D >") #1 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32, %"class.RWTexture2D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D >") #1 +declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32, %"class.RWTexture2D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D >") #1 +declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32, %"class.RWTexture2D") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D") #1 +declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32, %"class.RWTexture3D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D >") #1 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32, %"class.RWTexture3D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D >") #1 +declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32, %"class.RWTexture3D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D >") #1 +declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32, %"class.RWTexture3D") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32, %"class.RWTexture2DMS") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS") #1 +declare <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1 +declare <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1 +declare <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1 +declare double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6} +!dx.entryPoints = !{!19} +!dx.fnprops = !{!44} +!dx.options = !{!45, !46} + +!3 = !{i32 1, i32 6} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 6} +!6 = !{i32 1, void (i32, <2 x i32>, <3 x i32>)* @main, !7} +!7 = !{!8, !10, !13, !16} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{i32 0, !11, !12} +!11 = !{i32 4, !"IX1", i32 7, i32 5} +!12 = !{i32 1} +!13 = !{i32 0, !14, !15} +!14 = !{i32 4, !"IX2", i32 7, i32 5} +!15 = !{i32 2} +!16 = !{i32 0, !17, !18} +!17 = !{i32 4, !"IX3", i32 7, i32 5} +!18 = !{i32 3} +!19 = !{void (i32, <2 x i32>, <3 x i32>)* @main, !"main", null, !20, null} +!20 = !{null, !21, null, null} +!21 = !{!22, !24, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43} +!22 = !{i32 0, %"class.RWBuffer >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A", !"FTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !23} +!23 = !{i32 0, i32 9} +!24 = !{i32 1, %"class.RWBuffer >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A", !"BTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25} +!25 = !{i32 0, i32 5} +!26 = !{i32 2, %"class.RWBuffer >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A", !"LTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25} +!27 = !{i32 3, %"class.RWBuffer"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A", !"DTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25} +!28 = !{i32 4, %"class.RWTexture1D >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A", !"FTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !23} +!29 = !{i32 5, %"class.RWTexture1D >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A", !"BTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25} +!30 = !{i32 6, %"class.RWTexture1D >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A", !"LTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25} +!31 = !{i32 7, %"class.RWTexture1D"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A", !"DTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25} +!32 = !{i32 8, %"class.RWTexture2D >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A", !"FTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !23} +!33 = !{i32 9, %"class.RWTexture2D >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A", !"BTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25} +!34 = !{i32 10, %"class.RWTexture2D >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A", !"LTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25} +!35 = !{i32 11, %"class.RWTexture2D"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A", !"DTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25} +!36 = !{i32 12, %"class.RWTexture3D >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A", !"FTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !23} +!37 = !{i32 13, %"class.RWTexture3D >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A", !"BTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25} +!38 = !{i32 14, %"class.RWTexture3D >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A", !"LTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25} +!39 = !{i32 15, %"class.RWTexture3D"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A", !"DTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25} +!40 = !{i32 16, %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A", !"FTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !23} +!41 = !{i32 17, %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A", !"BTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25} +!42 = !{i32 18, %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A", !"LTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25} +!43 = !{i32 19, %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A", !"DTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25} +!44 = !{void (i32, <2 x i32>, <3 x i32>)* @main, i32 1} +!45 = !{i32 64} +!46 = !{i32 -1} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl new file mode 100644 index 0000000000..08836dfbaf --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl @@ -0,0 +1,37 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST + +// AST: |-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (dx::HitObject)' extern +// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <> HitObject 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// AST-NEXT: | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: |-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (dx::HitObject, unsigned int, unsigned int)' extern +// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <> HitObject 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <> CoherenceHint 'unsigned int' +// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <> NumCoherenceHintBitsFromLSB 'unsigned int' +// AST-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// AST-NEXT: | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: `-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern +// AST-NEXT: |-ParmVarDecl {{[^ ]+}} <> CoherenceHint 'unsigned int' +// AST-NEXT: |-ParmVarDecl {{[^ ]+}} <> NumCoherenceHintBitsFromLSB 'unsigned int' +// AST-NEXT: |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// AST-NEXT: `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %[[NOP:[^ ]+]]) +// FCGL-NEXT: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %[[NOP]], i32 241, i32 3) +// FCGL-NEXT: call void @"dx.hl.op..void (i32, i32, i32)"(i32 359, i32 242, i32 7) + +// DXIL: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP:[^ ]+]], i32 undef, i32 0) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP]], i32 241, i32 3) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP]], i32 242, i32 7) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + dx::MaybeReorderThread(hit); + dx::MaybeReorderThread(hit, 0xf1, 3); + dx::MaybeReorderThread(0xf2, 7); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl new file mode 100644 index 0000000000..1e947b2296 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl @@ -0,0 +1,75 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST + +// AST: | |-CXXRecordDecl {{[^ ]+}} <> implicit referenced class HitObject definition +// AST-NEXT: | | |-FinalAttr {{[^ ]+}} <> Implicit final +// AST-NEXT: | | |-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST-NEXT: | | |-HLSLHitObjectAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | |-FieldDecl {{[^ ]+}} <> implicit h 'int' +// AST-NEXT: | | |-CXXConstructorDecl {{[^ ]+}} <> used HitObject 'void ()' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 358 +// AST-NEXT: | | | `-HLSLCXXOverloadAttr {{[^ ]+}} <> Implicit + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> MakeMiss +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRayFlags +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TMissShaderIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRay +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit MakeMiss 'TResult (TRayFlags, TMissShaderIndex, TRay) const' static +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'TRayFlags' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'TMissShaderIndex' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Ray 'TRay' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used MakeMiss 'dx::HitObject (unsigned int, unsigned int, RayDesc)' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'RayDesc' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MakeMiss 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'RayDesc' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 387 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> MakeNop +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit MakeNop 'TResult () const' static +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used MakeNop 'dx::HitObject ()' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 358 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: %{{[^ ]+}} = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %{{[^ ]+}}) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %{{[^ ]+}}) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %{{[^ ]+}}, i32 0, i32 1, %struct.RayDesc* %{{[^ ]+}}) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %{{[^ ]+}}, i32 0, i32 2, %struct.RayDesc* %{{[^ ]+}}) + +// Expect HitObject_Make* calls with identical parameters to be folded. +// DXIL: {{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL-NOT: {{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop +// DXIL: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 1, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000, float 1.000000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) +// DXIL-NOT: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 1 +// DXIL: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 2, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000, float 1.000000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) + +void Use(in dx::HitObject hit) { + dx::MaybeReorderThread(hit); +} + +[shader("raygeneration")] +void main() { + dx::HitObject nop; + Use(nop); + + dx::HitObject nop2 = dx::HitObject::MakeNop(); + Use(nop2); + + RayDesc ray = {{0,0,0}, {0,0,1}, 0.05, 1000.0}; + dx::HitObject miss = dx::HitObject::MakeMiss(0, 1, ray); + Use(miss); + + dx::HitObject miss2 = dx::HitObject::MakeMiss(0, 1, ray); + Use(miss2); + + dx::HitObject miss3 = dx::HitObject::MakeMiss(0, 2, ray); + Use(miss3); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg new file mode 100644 index 0000000000..ba86568f9a --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg @@ -0,0 +1 @@ +config.unsupported = 'dxil-1-9' not in config.available_features diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl new file mode 100644 index 0000000000..de79a2f481 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl @@ -0,0 +1,23 @@ +// REQUIRES: dxil-1-9 +// RUN: %dxc -T lib_6_9 %s | FileCheck %s +// RUN: %dxc -T lib_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s + +// RUN: %dxc -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -T vs_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s + + +RaytracingAccelerationStructure RTAS; +[shader("vertex")] +void main(RayDesc rayDesc : RAYDESC) { + + // CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1) + // FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1) + RayQuery rayQuery1; + + rayQuery1.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); + + // CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1) + // FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0) + RayQuery rayQuery2; + rayQuery2.TraceRayInline(RTAS, 0, 2, rayDesc); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl new file mode 100644 index 0000000000..8bc7b9e73d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl @@ -0,0 +1,322 @@ +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s -check-prefixes=CHECK,F5 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,B7 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t -DNUM=9 %s | FileCheck %s -check-prefixes=CHECK,L9 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double -DNUM=17 %s | FileCheck %s -check-prefixes=CHECK,D17 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=256 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,H256 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t -DNUM=1024 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,S1024 + +// A test to verify that declarations of longvecs are permitted in all the accepted places. +// Only tests for acceptance, most codegen is ignored for now. + +// CHECK: %struct.LongVec = type { <4 x float>, <[[NUM:[0-9]*]] x [[STY:[a-z0-9]*]]> } +struct LongVec { + float4 f; + vector vec; +}; + +struct LongVecSub : LongVec { + int3 is; +}; + +template +struct LongVecTpl { + float4 f; + vector vec; +}; + +// Just some dummies to capture the types and mangles. +// CHECK: @"\01?dummy@@3[[MNG:F|M|N|_N|_K|\$f16@]]A" = external addrspace(3) global [[STY]] +groupshared TYPE dummy; + +// Use the first groupshared to establish mangles and sizes +// F5-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:M]]$[[VS:04]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// B7-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_N]]$[[VS:06]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// L9-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_K]]$[[VS:08]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// D17-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:N]]$[[VS:0BB@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// H256-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:\$f16@]]$[[VS:0BAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// S1024-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:F]]$[[VS:0EAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +groupshared vector gs_vec; + +// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$[[VS]]@@A" = external addrspace(3) global [10 x <[[NUM]] x [[STY]]>] +groupshared vector gs_vec_arr[10]; +// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec +groupshared LongVec gs_vec_rec; +// CHECK-DAG: @"\01?gs_vec_sub@@3ULongVecSub@@A" = external addrspace(3) global %struct.LongVecSub +groupshared LongVecSub gs_vec_sub; +// CHECK-DAG: @"\01?gs_vec_tpl@@3U?$LongVecTpl@$[[VS]]@@A" = external addrspace(3) global %"struct.LongVecTpl<[[NUM]]>" +groupshared LongVecTpl gs_vec_tpl; + +// CHECK-DAG: @static_vec = internal global <[[NUM]] x [[STY]]> +static vector static_vec; +// CHECK-DAG: @static_vec_arr = internal global [10 x <[[NUM]] x [[STY]]>] zeroinitializer +static vector static_vec_arr[10]; +// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec +static LongVec static_vec_rec; +// CHECK-DAG: @static_vec_sub = internal global %struct.LongVecSub +static LongVecSub static_vec_sub; +// CHECK-DAG: @static_vec_tpl = internal global %"struct.LongVecTpl<[[NUM]]>" +static LongVecTpl static_vec_tpl; + +// CHECK: define [[RTY:[a-z0-9]*]] @"\01?getVal@@YA[[MNG]][[MNG]]@Z"([[RTY]] {{.*}}%t) +export TYPE getVal(TYPE t) {TYPE ret = dummy; dummy = t; return ret;} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_param_passthru +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@V1@@Z"(<[[NUM]] x [[RTY]]> %vec1) +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_param_passthru(vector vec1) { + return vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_arr_passthru +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@Y09V1@@Z"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result, [10 x <[[NUM]] x [[STY]]>]* %vec) +// CHECK: ret void +export vector lv_param_arr_passthru(vector vec[10])[10] { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_rec_passthru(LongVec vec) { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_sub_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_sub_passthru(LongVec vec) { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_tpl_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_tpl_passthru(LongVec vec) { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@AIAV1@@Z"(<[[NUM]] x [[RTY]]> %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4 +// CHECK: ret void +export void lv_param_in_out(in vector vec1, out vector vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_sub@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_sub(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_tpl@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_tpl(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + + +// CHECK-LABEL: define void @"\01?lv_param_inout +// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$[[VS]]@@0@Z"(<[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec1, align 4 +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec2, align 4 +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec1, align 4 +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4 +// CHECK: ret void +export void lv_param_inout(inout vector vec1, inout vector vec2) { + vector tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_param_inout_sub@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_inout_sub(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_param_inout_tpl@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_inout_tpl(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_global_assign +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* @static_vec +// CHECK: ret void +export void lv_global_assign(vector vec, vector arr[10], + LongVec rec, LongVecSub sub, LongVecTpl tpl) { + static_vec = vec; + static_vec_arr = arr; + static_vec_rec = rec; + static_vec_sub = sub; + static_vec_tpl = tpl; +} + +// CHECK-LABEL: define void @"\01?lv_gs_assign +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A" +// CHECK: ret void +export void lv_gs_assign(vector vec, vector arr[10], + LongVec rec, LongVecSub sub, LongVecTpl tpl) { + gs_vec = vec; + gs_vec_arr = arr; + gs_vec_rec = sub; + gs_vec_tpl = tpl; +} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_global_ret +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"() +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* @static_vec +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_global_ret() { + return static_vec; +} + +// CHECK-LABEL: define void @"\01?lv_global_arr_ret +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result) +// CHECK: ret void +export vector lv_global_arr_ret()[10] { + return static_vec_arr; +} + +// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_global_rec_ret() { + return static_vec_rec; +} + +// CHECK-LABEL: define void @"\01?lv_global_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVecSub lv_global_sub_ret() { + return static_vec_sub; +} + +// CHECK-LABEL: define void @"\01?lv_global_tpl_ret +// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVecTpl lv_global_tpl_ret() { + return static_vec_tpl; +} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_gs_ret +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"() +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A" +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_gs_ret() { + return gs_vec; +} + +// CHECK-LABEL: define void @"\01?lv_gs_arr_ret +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result) +// CHECK: ret void +export vector lv_gs_arr_ret()[10] { + return gs_vec_arr; +} + +// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_gs_rec_ret() { + return gs_vec_rec; +} + +// CHECK-LABEL: define void @"\01?lv_gs_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVecSub lv_gs_sub_ret() { + return gs_vec_sub; +} + +// CHECK-LABEL: define void @"\01?lv_gs_tpl_ret +// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVecTpl lv_gs_tpl_ret() { + return gs_vec_tpl; +} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_splat +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar) +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_splat(TYPE scalar) { + vector ret = scalar; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_initlist +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@XZ"() +// CHECK: ret <6 x [[RTY]]> +export vector lv_initlist() { + vector ret = {1, 2, 3, 4, 5, 6}; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_initlist_vec +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@@Z"(<3 x [[RTY]]> %vec) +// CHECK: ret <6 x [[RTY]]> +export vector lv_initlist_vec(vector vec) { + vector ret = {vec, 4.0, 5.0, 6.0}; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_vec_vec +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@0@Z"(<3 x [[RTY]]> %vec1, <3 x [[RTY]]> %vec2) +// CHECK: ret <6 x [[RTY]]> +export vector lv_vec_vec(vector vec1, vector vec2) { + vector ret = {vec1, vec2}; + return ret; +} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_array_cast +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@Y[[VS]][[MNG]]@Z"({{\[}}[[NUM]] x [[STY]]]* %arr) +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_array_cast(TYPE arr[NUM]) { + vector ret = (vector)arr; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_ctor +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@[[MNG]]@Z"([[RTY]] {{.*}}%s) +// CHECK: ret <6 x [[RTY]]> +export vector lv_ctor(TYPE s) { + vector ret = vector(1.0, 2.0, 3.0, 4.0, 5.0, s); + return ret; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl new file mode 100644 index 0000000000..935ec3cc13 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -Zi -Qembed_debug -T lib_6_9 %s -DNUM=8 | FileCheck %s --check-prefix=CHECK-LONG +// RUN: %dxc -Zi -Qembed_debug -T lib_6_9 %s -DNUM=4 | FileCheck %s --check-prefix=CHECK-SHORT + +// Test debug info for short and long vector types + +RWByteAddressBuffer buf; + +export vector lv_global_arr_ret() { + vector d = buf.Load >(0); + return d; +} + +// CHECK-LONG: ![[TYDI:[^ ]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !{{[^ ]+}}, size: 256, align: 32, elements: ![[ELEMDI:[^ ]+]], +// CHECK-LONG: ![[ELEMDI]] = !{![[C0:[^ ]+]], ![[C1:[^ ]+]], ![[C2:[^ ]+]], ![[C3:[^ ]+]], ![[C4:[^ ]+]], ![[C5:[^ ]+]], ![[C6:[^ ]+]], ![[C7:[^ ]+]]} +// CHECK-LONG: ![[C0]] = !DIDerivedType(tag: DW_TAG_member, name: "c0", scope: !{{[^ ]+}} file: !{{[^ ]+}}, baseType: ![[BASETY:[^ ]+]], size: 32, align: 32, flags: DIFlagPublic) +// CHECK-LONG: ![[BASETY]] = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) +// CHECK-LONG: ![[C1]] = !DIDerivedType(tag: DW_TAG_member, name: "c1", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 32, flags: DIFlagPublic) +// CHECK-LONG: ![[C2]] = !DIDerivedType(tag: DW_TAG_member, name: "c2", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 64, flags: DIFlagPublic) +// CHECK-LONG: ![[C3]] = !DIDerivedType(tag: DW_TAG_member, name: "c3", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 96, flags: DIFlagPublic) +// CHECK-LONG: ![[C4]] = !DIDerivedType(tag: DW_TAG_member, name: "c4", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 128, flags: DIFlagPublic) +// CHECK-LONG: ![[C5]] = !DIDerivedType(tag: DW_TAG_member, name: "c5", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 160, flags: DIFlagPublic) +// CHECK-LONG: ![[C6]] = !DIDerivedType(tag: DW_TAG_member, name: "c6", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 192, flags: DIFlagPublic) +// CHECK-LONG: ![[C7]] = !DIDerivedType(tag: DW_TAG_member, name: "c7", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 224, flags: DIFlagPublic) +// CHECK-LONG: !{{[^ ]+}} = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, line: 9, type: ![[TYDI]]) + +// CHECK-SHORT: ![[TYDI:[^ ]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !{{[^ ]+}}, size: 128, align: 32, elements: ![[ELEMDI:[^ ]+]], +// CHECK-SHORT: ![[ELEMDI]] = !{![[X:[^ ]+]], ![[Y:[^ ]+]], ![[Z:[^ ]+]], ![[W:[^ ]+]]} +// CHECK-SHORT: ![[X]] = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY:[^ ]+]], size: 32, align: 32, flags: DIFlagPublic) +// CHECK-SHORT: ![[BASETY]] = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) +// CHECK-SHORT: ![[Y]] = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 32, flags: DIFlagPublic) +// CHECK-SHORT: ![[Z]] = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 64, flags: DIFlagPublic) +// CHECK-SHORT: ![[W]] = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 96, flags: DIFlagPublic) +// CHECK-SHORT: !{{[^ ]+}} = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, line: 9, type: ![[TYDI]]) \ No newline at end of file diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..0b7f0d6b2f --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl @@ -0,0 +1,394 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=2 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=256 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=1024 %s | FileCheck %s + +// Test vector-enabled non-trivial intrinsics that take parameters of various types. + +RWByteAddressBuffer buf; +RWByteAddressBuffer ibuf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = ibuf.Load >(0); + vector sVec2 = ibuf.Load >(512); + vector sVec3 = ibuf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = ibuf.Load >(1025); + vector usVec2 = ibuf.Load >(1536); + vector usVec3 = ibuf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = ibuf.Load >(2049); + vector iVec2 = ibuf.Load >(2560); + vector iVec3 = ibuf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = ibuf.Load >(3073); + vector uiVec2 = ibuf.Load >(3584); + vector uiVec3 = ibuf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = ibuf.Load >(4097); + vector lVec2 = ibuf.Load >(4608); + vector lVec3 = ibuf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = ibuf.Load >(5121); + vector ulVec2 = ibuf.Load >(5632); + vector ulVec3 = ibuf.Load >(6144); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 35, <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 36, <[[NUM]] x half> [[tmp]], <[[NUM]] x half> [[hvec3]]) ; FMin(a,b) + vector hRes = clamp(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 35, <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 36, <[[NUM]] x float> [[tmp]], <[[NUM]] x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 35, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 36, <[[NUM]] x double> [[tmp]], <[[NUM]] x double> [[dvec3]]) ; FMin(a,b) + vector dRes = clamp(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 38, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[svec3]]) ; IMin(a,b) + vector sRes = clamp(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 39, <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 40, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[usvec3]]) ; UMin(a,b) + vector usRes = clamp(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 38, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[ivec3]]) ; IMin(a,b) + vector iRes = clamp(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 39, <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 40, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[uivec3]]) ; UMin(a,b) + vector uiRes = clamp(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 38, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[lvec3]]) ; IMin(a,b) + vector lRes = clamp(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 39, <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 40, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[ulvec3]]) ; UMin(a,b) + vector ulRes = clamp(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], [[hvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x half> zeroinitializer, <[[NUM]] x half> [[fvec2]], [[fvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x float> zeroinitializer, <[[NUM]] x float> [[hvec1]], @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp]]) ; Exp(value) + hRes += exp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x float> [[fvec1]], @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[hvec2]], [[hvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 7, <[[NUM]] x half> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x half> [[mul]], [[sub]] + hRes += smoothstep(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 7, <[[NUM]] x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fmul fast <[[NUM]] x half> [[hvec2]], [[fvec2]], [[hvec3]], [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <[[NUM]] x float> [[fvec1]] to <[[NUM]] x i32> + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], [[add]], [[shr]] to <[[NUM]] x float> + // CHECK: [[sel:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[i2f]], <[[NUM]] x float> zeroinitializer + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec2]] + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]] + fRes += lerp(fVec2, fVec3, fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x half> , [[hvec1]] + hRes += rcp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x float> , [[fvec1]] + fRes += rcp(fVec1); + + vector signs = 1; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(dVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i16> [[usvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: [[sub:%.*]] = sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i32> [[uivec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i64> [[ulvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(ulVec2); + + iRes += signs; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i16> [[svec1]], zeroinitializer + // CHECK: or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + sRes += or(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i16> [[svec3]], zeroinitializer + // CHECK: and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + sRes += and(sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: select <[[NUM]] x i1> [[bvec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]] + sRes += select(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + + ibuf.Store >(0, sRes); + ibuf.Store >(1024, usRes); + ibuf.Store >(2048, iRes); + ibuf.Store >(3072, uiRes); + ibuf.Store >(4096, lRes); + ibuf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl new file mode 100644 index 0000000000..12955c87f9 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl @@ -0,0 +1,464 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=2 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=5 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=3 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=9 %s | FileCheck %s + +// Test relevant operators on an assortment bool vector sizes with 6.9 native vectors. +// Bools have a different representation in memory and a smaller set of interesting ops. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]] +RWStructuredBuffer< bool > buf; + +groupshared vector gs_vec1, gs_vec2; +groupshared vector gs_vec3; + + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout vector things[10], bool scales[10]) { + + // Another trick to capture the size. + // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]] + // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.i32 [[res]], 0 + // CHECK: [[bscl:%[0-9]*]] = icmp ne i32 [[scl]], 0 + bool scalar = buf.Load(NUM); + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add9]] + // CHECK: [[bvec9:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec9]], zeroinitializer + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec9]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + things[0] = things[9]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i1> undef, i1 [[bscl]], i32 0 + // CHECK: [[res:%[0-9]*]] = shufflevector <[[NUM]] x i1> [[spt]], <[[NUM]] x i1> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[res]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + things[5] = scalar; + +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export vector arithmetic(inout vector things[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[svec0:%[0-9]*]] = sext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[bsvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[svec0]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bsvec0]] to <[[NUM]] x i32> + res[0] = -things[0]; + + // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + res[1] = +things[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32> + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + // CHECK: [[res2:%[0-9]*]] = add nuw nsw <[[NUM]] x i32> [[vec2]], [[vec1]] + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = things[1] + things[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[res3:%[0-9]*]] = sub nsw <[[NUM]] x i32> [[vec2]], [[vec3]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = things[2] - things[3]; + + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[res4:%[0-9]*]] = mul nuw nsw <[[NUM]] x i32> [[vec4]], [[vec3]] + // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer + // CHECK: [[res4:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = things[3] * things[4]; + + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res5:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec4]], [[vec5]] + // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer + // CHECK: [[res5:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = things[4] / things[5]; + + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32> + // CHECK: [[res6:%[0-9]*]] = {{[ufs]?rem( fast)?}} <[[NUM]] x i32> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; + + // Stores into res[]. Previous were for things[] inout. + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: ret void + + + return res; +} + +// Test arithmetic operators with scalars. +// CHECK-LABEL: define void @"\01?scarithmetic +export vector scarithmetic(inout vector things[10], bool scales[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 0 + // CHECK: [[scl0:%[0-9]*]] = load i32, i32* [[add0]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = add <[[NUM]] x i32> [[spt0]], [[vec0]] + // CHECK: [[bres0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res0]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32> + res[0] = things[0] + scales[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32> + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load i32, i32* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = sub <[[NUM]] x i32> [[vec1]], [[spt1]] + // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = things[1] - scales[1]; + + + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load i32, i32* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt2]], [[vec2]] + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = things[2] * scales[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load i32, i32* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec3]], [[spt3]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = things[3] / scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load i32, i32* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[res4:%[0-9]*]] = add <[[NUM]] x i32> [[spt4]], [[vec4]] + // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = scales[4] + things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 5 + // CHECK: [[scl5:%[0-9]*]] = load i32, i32* [[add5]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res5:%[0-9]*]] = sub <[[NUM]] x i32> [[spt5]], [[vec5]] + // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = scales[5] - things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 6 + // CHECK: [[scl6:%[0-9]*]] = load i32, i32* [[add6]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32> + // CHECK: [[res6:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt6]], [[vec6]] + // CHECK: [[bres6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res6]], zeroinitializer + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + res[6] = scales[6] * things[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: ret void + + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export vector logic(vector truth[10], vector consequences[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // MORE STUFF + + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[bres4:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[bvec0]], [[bvec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres5:%[0-9]*]] = icmp {{u?}}ne <[[NUM]] x i1> [[bvec1]], [[bvec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres6:%[0-9]*]] = icmp {{[osu]?}}lt <[[NUM]] x i1> [[bvec2]], [[bvec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[bres7:%[0-9]*]] = icmp {{[osu]]?}}gt <[[NUM]] x i1> [[bvec3]], [[bvec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[bres8:%[0-9]*]] = icmp {{[osu]]?}}le <[[NUM]] x i1> [[bvec4]], [[bvec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[bres9:%[0-9]*]] = icmp {{[osu]?}}ge <[[NUM]] x i1> [[bvec5]], [[bvec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]] + // CHECK: ret void + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export vector index(vector things[10], int i, bool val)[10] { + vector res[10]; + + // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x i32>] + // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> zeroinitializer, <[[NUM]] x i32>* [[res0]] + res[0] = 0; + + // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 %i + // CHECK: store <[[NUM]] x i32> , <[[NUM]] x i32>* [[resi]] + res[i] = 1; + + // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> , <[[NUM]] x i32>* [[res2]] + res[Ix] = true; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[bthg0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg0]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 3 + // CHECK: [[thg0:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg0]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thg0]], <[[NUM]] x i32>* [[res3]] + res[3] = things[0]; + + // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 %i + // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[addi]] + // CHECK: [[bthgi:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thgi]], zeroinitializer + // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 4 + // CHECK: [[thgi:%[0-9]*]] = zext <[[NUM]] x i1> [[bthgi]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thgi]], <[[NUM]] x i32>* [[res4]] + res[4] = things[i]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bthg2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg2]], zeroinitializer + // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 5 + // CHECK: [[thg2:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg2]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thg2]], <[[NUM]] x i32>* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; + +} + +// Test bit twiddling operators. +// CHECK-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout vector things[10]) { + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + things[1] = things[2] | things[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec4]], [[bvec3]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + things[2] = things[3] & things[4]; + + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[bres4:%[0-9]*]] = or <[[NUM]] x i1> [[bvec6]], [[bvec4]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + things[4] |= things[6]; + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[bvec7:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec7]], zeroinitializer + // CHECK: [[bres5:%[0-9]*]] = and <[[NUM]] x i1> [[bvec7]], [[bvec5]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + things[5] &= things[7]; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[bvec8:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec8]], zeroinitializer + // CHECK: [[bres6:%[0-9]*]] = xor <[[NUM]] x i1> [[bvec6]], [[bvec8]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + things[6] ^= things[8]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl new file mode 100644 index 0000000000..0a115bd709 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl @@ -0,0 +1,719 @@ +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t -DNUM=33 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Linking tests. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 -Fo %t.1 %s +// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=3 -DDBL -Fo %t.2 %s +// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DNUM=12 -DINT -enable-16bit-types -Fo %t.3 %s +// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG + +// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors. +// Tests in a CS environment where vector operations were previously disallowed to confirm that they are retained. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK-DAG: %dx.types.ResRet.[[TY:v[0-9]*[a-z][0-9]*]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]> +// CHECK-DAG: %dx.types.ResRet.[[STY:[a-z][0-9]*]] = type { [[STYPE:[a-z0-9_]*]] +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> + +void assignments(inout vector things[11], TYPE scales[10]); +vector arithmetic(inout vector things[11])[11]; +vector scarithmetic(vector things[11], TYPE scales[10])[11]; +vector logic(vector truth[10], vector consequences[11])[10]; +vector index(vector things[11], int i)[11]; +void bittwiddlers(inout vector things[13]); + +struct Viface { + vector values[11]; +}; + +struct Siface { + TYPE values[10]; +}; + +struct Liface { + vector values[10]; +}; + +struct Binface { + vector values[13]; +}; + +RWStructuredBuffer Input : register(u11); +RWStructuredBuffer Output : register(u12); +RWStructuredBuffer Scales : register(u13); +RWStructuredBuffer Truths : register(u14); +RWStructuredBuffer Bits : register(u15); +RWStructuredBuffer > Offsets : register(u16); + +[shader("compute")] +[numthreads(8,1,1)] +// CHECK-LABEL: define void @main +void main(uint3 GID : SV_GroupThreadID) { + + // CHECK-DAG: [[Input:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11 + // CHECK-DAG: [[Output:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12 + // CHECK-DAG: [[Scales:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13 + // CHECK-DAG: [[Truths:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14 + // INT-DAG: [[Bits:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15 + + // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0) + // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1) + // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2) + // CHECK: [[scratch1:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>] + // CHECK: [[scratch2:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>] + + uint InIx1 = GID[0]; + uint InIx2 = GID[1]; + uint OutIx = GID[2]; + + // Assign vector offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> + Offsets[0] = vector(sizeof(vector)*0, + sizeof(vector)*1, + sizeof(vector)*2, + sizeof(vector)*3, + sizeof(vector)*4, + sizeof(vector)*5, + sizeof(vector)*6, + sizeof(vector)*7, + sizeof(vector)*8, + sizeof(vector)*9, + sizeof(vector)*10, + sizeof(vector)*11, + sizeof(vector)*12); + + // Assign scalar offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> + Offsets[1] = vector(sizeof(TYPE)*0, + sizeof(TYPE)*1, + sizeof(TYPE)*2, + sizeof(TYPE)*3, + sizeof(TYPE)*4, + sizeof(TYPE)*5, + sizeof(TYPE)*6, + sizeof(TYPE)*7, + sizeof(TYPE)*8, + sizeof(TYPE)*9, + sizeof(TYPE)*10, + sizeof(TYPE),// Effectively alignof. + sizeof(int));// Effectively integer alignof. + + // Assign boolean offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 2, i32 0, <13 x i32> + Offsets[2] = vector(sizeof(vector)*0, + sizeof(vector)*1, + sizeof(vector)*2, + sizeof(vector)*3, + sizeof(vector)*4, + sizeof(vector)*5, + sizeof(vector)*6, + sizeof(vector)*7, + sizeof(vector)*8, + sizeof(vector)*9, + sizeof(vector)*10, + sizeof(vector)*11, + sizeof(vector)*12); + + assignments(Input[InIx1+1].values, Scales[InIx2+1].values); + Output[OutIx+2].values = arithmetic(Input[InIx1+2].values); + Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values); + Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values); + Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5); +#ifdef INT + bittwiddlers(Bits[InIx1+6].values); +#endif +} + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +void assignments(inout vector things[11], TYPE scales[10]) { + + // CHECK: [[VcIx:%.*]] = add i32 [[InIx1]], 1 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1 + // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + things[0] = scales[0]; + + // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec1]] + things[1] += things[5]; + + // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]] + things[2] -= things[6]; + + // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec7]], [[vec3]] + things[3] *= things[7]; + + // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]] + things[4] /= things[8]; + +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float> + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]] + // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double> + vector f9 = (vector)things[9]; + vector f5 = (vector)things[5]; + f5 %= f9; + things[5] = f5; +#else + // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]] + things[5] %= things[9]; +#endif + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]] + things[6] += scales[1]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]] + things[7] -= scales[2]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]] + things[8] *= scales[3]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]] + things[9] /= scales[4]; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + +} + +// Test arithmetic operators. +vector arithmetic(inout vector things[11])[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 2 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]]) + // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // NOINT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> <[[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000),.*}}>, [[vec0]] + // INT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> zeroinitializer, [[vec0]] + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]] + res[2] = things[1] + things[2]; + + // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + res[3] = things[2] - things[3]; + + // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + res[4] = things[3] * things[4]; + + // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + res[5] = things[4] / things[5]; + + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float> + // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]] + // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double> + res[6] = (vector)things[5] % (vector)things[6]; +#else + // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + res[7] = things[7]++; + + // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]], <[[TYPE]] [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]] + res[8] = things[8]--; + + // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]], <[[TYPE]] [[POS1]] + res[9] = ++things[9]; + + // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]], <[[TYPE]] [[NEG1]] + res[10] = --things[10]; + + // Things[] input gets all the result values since pre/post inc/decrements don't change the end result. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // res1 is just vec0 since it was just the unary + operator. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // res[] input gets either the original or the preincremented value. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[vec7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[vec8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + + return res; +} + +// Test arithmetic operators with scalars. +vector scarithmetic(vector things[11], TYPE scales[10])[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 3 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3 + // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]] + res[1] = things[1] - scales[1]; + + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]] + res[6] = scales[6] * things[6]; + res[7] = res[8] = res[9] = res[10] = 0; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +vector logic(vector truth[10], vector consequences[11])[10] { + vector res[10]; + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4 + // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]] + // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i32 [[IALN]]) + // CHECK: [[ivec0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i32 [[IALN]]) + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i32 [[IALN]]) + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i32 [[IALN]]) + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i32 [[IALN]]) + // CHECK: [[ivec4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i32 [[IALN]]) + // CHECK: [[ivec5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 4 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec1]], zeroinitializer + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec4]], zeroinitializer + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec5]], zeroinitializer + // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]] + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], <[[NUM]] x i32> [[res0]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], <[[NUM]] x i32> [[res1]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], <[[NUM]] x i32> [[res2]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], <[[NUM]] x i32> [[res3]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], <[[NUM]] x i32> [[res4]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], <[[NUM]] x i32> [[res5]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], <[[NUM]] x i32> [[res6]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], <[[NUM]] x i32> [[res7]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], <[[NUM]] x i32> [[res8]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], <[[NUM]] x i32> [[res9]], i32 4) + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +vector index(vector things[11], int i)[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 5 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec1]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 2 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 3 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec3]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec4]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 5 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec5]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec6]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec9]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]]) + // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec10]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + + // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5 + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]] + res[0] = 0; + + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 [[Ix]] + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[POS1]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[i] = 1; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[Ix] = 2; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[3] = things[0]; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 [[Ix]] + // CHECK: [[ldix:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[ldix]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[4] = things[i]; + + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[5] = things[Ix]; + + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[ldix]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[vec2]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + + return res; +} + +#ifdef INT +// Test bit twiddling operators. +void bittwiddlers(inout vector things[13]) { + // INT: [[VcIx:%.*]] = add i32 [[InIx1]], 6 + // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]] + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]]) + // INT: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]]) + // INT: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]]) + // INT: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]]) + // INT: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]]) + // INT: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]]) + // INT: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]]) + // INT: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]]) + // INT: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]]) + // INT: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], i32 [[ALN]]) + // INT: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], i32 [[ALN]]) + // INT: [[vec11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], i32 [[ALN]]) + // INT: [[vec12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // INT: [[res0:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1 + things[0] = ~things[1]; + + // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + things[1] = things[2] | things[3]; + + // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + things[2] = things[3] & things[4]; + + // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + things[3] = things[4] ^ things[5]; + + // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]] + // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]] + things[4] = things[5] << things[6]; + + // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]] + // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + things[5] = things[6] >> things[7]; + + // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]] + things[6] |= things[8]; + + // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]] + things[7] &= things[9]; + + // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]] + things[8] ^= things[10]; + + // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec11]] + // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec9]], [[shv11]] + things[9] <<= things[11]; + + // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec12]] + // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]] + // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]] + things[10] >>= things[12]; + + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], <[[NUM]] x [[TYPE]]> [[vec11]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], <[[NUM]] x [[TYPE]]> [[vec12]], i32 [[ALN]]) + + // CHECK-LABEL: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl new file mode 100644 index 0000000000..b749a3b255 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl @@ -0,0 +1,73 @@ +// RUN: %dxc -T lib_6_9 -DTYPE=uint -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,UNSIG +// RUN: %dxc -T lib_6_9 -DTYPE=int64_t -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,SIG +// RUN: %dxc -T lib_6_9 -DTYPE=uint16_t -DNUM=9 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,UNSIG + +// Test bitwise operators on an assortment vector sizes and integer types with 6.9 native vectors. + +// Test bit twiddling operators. +// CHECK-LABEL: define void @"\01?bittwiddlers +// CHECK-SAME: ([11 x <[[NUM:[0-9][0-9]*]] x [[TYPE:[a-z0-9]*]]>]* +export void bittwiddlers(inout vector things[11]) { + // CHECK: [[adr1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1, + // CHECK: [[adr0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]] + things[0] = ~things[1]; + + // CHECK: [[adr2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + + // CHECK: [[adr3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]] + things[1] = things[2] | things[3]; + + // CHECK: [[adr4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]] + things[2] = things[3] & things[4]; + + // CHECK: [[adr5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[adr6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // CHECK: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]], <[[TYPE]] + // CHECK: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]] + things[4] = things[5] << things[6]; + + // CHECK: [[adr7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]] + // CHECK: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] + // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]] + things[5] = things[6] >> things[7]; + + // CHECK: [[adr8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]] + // CHECK: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]] + things[6] |= things[8]; + + // CHECK: [[adr9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]] + // CHECK: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]] + things[7] &= things[9]; + + // CHECK: [[adr10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]] + // CHECK: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]] + things[8] ^= things[10]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl new file mode 100644 index 0000000000..8b12b96c80 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl @@ -0,0 +1,342 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int64_t %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z0-9_]*]] +RWStructuredBuffer buf; + +export void assignments(inout TYPE things[10], TYPE scales[10]); +export TYPE arithmetic(inout TYPE things[11])[11]; +export bool logic(bool truth[10], TYPE consequences[10])[10]; +export TYPE index(TYPE things[10], int i, TYPE val)[10]; + +struct Interface { + TYPE assigned[10]; + TYPE arithmeticked[11]; + bool logicked[10]; + TYPE indexed[10]; + TYPE scales[10]; +}; + +#if 0 +// Requires vector loading support. Enable when available. +RWStructuredBuffer Input; +RWStructuredBuffer Output; + +TYPE g_val; + +[shader("compute")] +[numthreads(8,1,1)] +void main(uint GI : SV_GroupIndex) { + assignments(Output[GI].assigned, Input[GI].scales); + Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); + Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); + Output[GI].indexed = index(Input[GI].indexed, GI, g_val); +} +#endif + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout TYPE things[10]) { + + // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{(8|4|2)}}) + // CHECK: [[res0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0 + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + things[0] = buf.Load(1); + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val1]], [[val5]] + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + things[1] += things[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6 + // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast| nsw)?]] [[TYPE]] [[val2]], [[val6]] + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + things[2] -= things[6]; + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast| nsw)?]] [[TYPE]] [[val3]], [[val7]] + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + things[3] *= things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast| nsw)?]] [[TYPE]] [[val4]], [[val8]] + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + things[4] /= things[8]; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9 + // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] +#ifdef DBL + // DBL: [[fvec9:%.*]] = fptrunc double [[val9]] to float + // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float + // DBL: [[fres5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] float [[fvec5]], [[fvec9]] + // DBL: [[res5:%.*]] = fpext float [[fres5]] to double + float f9 = things[9]; + float f5 = things[5]; + f5 %= f9; + things[5] = f5; +#else + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] [[TYPE]] [[val5]], [[val9]] + things[5] %= things[9]; +#endif + // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export TYPE arithmetic(inout TYPE things[11])[11] { + TYPE res[11]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[res1:%.*]] = [[SUB]] [[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000)}}, [[res0]] + res[0] = +things[0]; + res[1] = -things[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[val2]], [[val1]] + res[2] = things[1] + things[2]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[val2]], [[val3]] + res[3] = things[2] - things[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[val4]], [[val3]] + res[4] = things[3] * things[4]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[val4]], [[val5]] + res[5] = things[4] / things[5]; + + // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6 + // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] +#ifdef DBL + // DBL: [[fvec6:%.*]] = fptrunc double [[val6]] to float + // DBL: [[fres6:%.*]] = [[REM]] float [[fvec5]], [[fvec6]] + // DBL: [[res6:%.*]] = fpext float [[fres6]] to double + res[6] = (float)things[5] % (float)things[6]; +#else + // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[res7:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val7]], {{(1|1\.?0*e?\+?0*|0xH3C00)}} + // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]] + res[7] = things[7]++; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[val8]] + // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]] + res[8] = things[8]--; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9 + // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[val9]] + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + res[9] = ++things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10 + // CHECK: [[val10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]] + // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[val10]] + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + res[10] = --things[10]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1 + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2 + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3 + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4 + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5 + // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6 + // CHECK: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7 + // This is a post op, so the original value goes into res[]. + // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr7]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8 + // This is a post op, so the original value goes into res[]. + // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr8]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9 + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10 + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + // CHECK: ret void + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export bool logic(bool truth[10], TYPE consequences[10])[10] { + bool res[10]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]] + // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1 + res[0] = !truth[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]] + res[1] = truth[1] || truth[2]; + + // CHECK: [[bvec2:%.*]] = icmp ne i32 [[val2]], 0 + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne i32 [[val3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]] + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + res[2] = truth[2] && truth[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[bvec4:%.*]] = icmp ne i32 [[val4]], 0 + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[bvec5:%.*]] = icmp ne i32 [[val5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]] + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0 + // CHECK: [[val0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast| nsw)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]] + // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32 + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]] + // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32 + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]] + // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32 + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]] + // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32 + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]] + // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32 + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6 + // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]] + // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32 + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0 + // CHECK: store i32 [[res0]], i32* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1 + // CHECK: store i32 [[res1]], i32* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2 + // CHECK: store i32 [[res2]], i32* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3 + // CHECK: store i32 [[res3]], i32* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4 + // CHECK: store i32 [[res4]], i32* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5 + // CHECK: store i32 [[res5]], i32* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6 + // CHECK: store i32 [[res6]], i32* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7 + // CHECK: store i32 [[res7]], i32* [[adr7]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8 + // CHECK: store i32 [[res8]], i32* [[adr8]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9 + // CHECK: store i32 [[res9]], i32* [[adr9]] + + // CHECK: ret void + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export TYPE index(TYPE things[10], int i)[10] { + // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]] + TYPE res[10]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0 + // CHECK: store [[TYPE]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]] + res[0] = 0; + + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 %i + // CHECK: store [[TYPE]] {{(1|1\.?0*e?\+?0*|0xH3C00)}}, [[TYPE]]* [[adri]] + res[i] = 1; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2 + // CHECK: store [[TYPE]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[TYPE]]* [[adr2]] + res[Ix] = 2; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3 + // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[adr3]] + res[3] = things[0]; + + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i + // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4 + // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[adr4]] + res[4] = things[i]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5 + // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[adr5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl new file mode 100644 index 0000000000..cb2fd5f781 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl @@ -0,0 +1,57 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s --check-prefix=NOBR + +// Test that no short-circuiting takes place for logic ops with native vectors. +// First run verifies that side effects result in stores. +// Second runline just makes sure there are no branches nor phis at all. + +// NOBR-NOT: br i1 +// NOBR-NOT: = phi + +export int4 logic(inout bool4 truth[5], inout int4 consequences[4]) { + // CHECK: [[adr0:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]] + // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]] + // CHECK: [[add:%.*]] = add <4 x i32> [[vec1]], + // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr1]] + // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer + // CHECK: [[bres3:%.*]] = or <4 x i1> [[bvec1]], [[bvec0]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 3 + // CHECK: [[res3:%.*]] = zext <4 x i1> [[bres3]] to <4 x i32> + // CHECK: store <4 x i32> [[res3]], <4 x i32>* [[adr3]] + truth[3] = truth[0] || consequences[1]++; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]] + // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer + // CHECK: [[adr0:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]] + // CHECK: [[sub:%.*]] = add <4 x i32> [[vec0]], + // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr0]] + // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer + // CHECK: [[bres4:%.*]] = and <4 x i1> [[bvec0]], [[bvec1]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 4 + // CHECK: [[res4:%.*]] = zext <4 x i1> [[bres4]] to <4 x i32> + // CHECK: store <4 x i32> [[res4]], <4 x i32>* [[adr4]] + truth[4] = truth[1] && consequences[0]--; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]] + // CHECK: [[bcond:%.*]] = icmp ne <4 x i32> [[vec2]], zeroinitializer + // CHECK: [[adr2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]] + // CHECK: [[add:%.*]] = add <4 x i32> %25, + // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <4 x i32>, <4 x i32>* [[adr3]] + // CHECK: [[sub:%.*]] = add <4 x i32> [[vec3]], + // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr3]] + // CHECK: [[res:%.*]] = select <4 x i1> [[bcond]], <4 x i32> [[vec2]], <4 x i32> [[vec3]] + int4 res = truth[2] ? consequences[2]++ : consequences[3]--; + + // CHECK: ret <4 x i32> %30 + return res; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl new file mode 100644 index 0000000000..ca239a5b22 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl @@ -0,0 +1,680 @@ +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double -DDBL %s | FileCheck %s +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Scalar variants to confirm they match. +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=double -DDBL %s | FileCheck %s +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Linking tests. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -Fo %t.1 %s +// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL -Fo %t.2 %s +// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DINT -enable-16bit-types -Fo %t.3 %s +// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG + +// Test relevant operators on vec1s in a 6.9 compute shader to ensure they continue to be treated as scalars. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9]*]] = type { [[TYPE:[a-z0-9_]*]] +// CHECK-DAG: %dx.types.ResRet.[[ITY:i32]] = type { i32 + +#ifdef SCL +#define VTYPE TYPE +#else +#define VTYPE vector +#endif + +void assignments(inout VTYPE things[11], TYPE scales[10]); +VTYPE arithmetic(inout VTYPE things[11])[11]; +VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11]; +bool1 logic(bool1 truth[10], VTYPE consequences[11])[10]; +VTYPE index(VTYPE things[11], int i)[11]; +void bittwiddlers(inout VTYPE things[13]); + +struct Viface { + VTYPE values[11]; +}; + +struct Siface { + TYPE values[10]; +}; + +struct Liface { + bool1 values[10]; +}; + +struct Binface { + VTYPE values[13]; +}; + +RWStructuredBuffer Input : register(u11); +RWStructuredBuffer Output : register(u12); +RWStructuredBuffer Scales : register(u13); +RWStructuredBuffer Truths : register(u14); +RWStructuredBuffer Bits : register(u15); +RWStructuredBuffer > Offsets : register(u16); + +[shader("compute")] +[numthreads(8,1,1)] +// CHECK-LABEL: define void @main +void main(uint3 GID : SV_GroupThreadID) { + + // CHECK-DAG: [[Input:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11 + // CHECK-DAG: [[Output:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12 + // CHECK-DAG: [[Scales:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13 + // CHECK-DAG: [[Truths:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14 + // INT-DAG: [[Bits:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15 + + // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0) + // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1) + // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2) + + uint InIx1 = GID[0]; + uint InIx2 = GID[1]; + uint OutIx = GID[2]; + + // Assign vector offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> + Offsets[0] = vector(sizeof(TYPE)*0, + sizeof(TYPE)*1, + sizeof(TYPE)*2, + sizeof(TYPE)*3, + sizeof(TYPE)*4, + sizeof(TYPE)*5, + sizeof(TYPE)*6, + sizeof(TYPE)*7, + sizeof(TYPE)*8, + sizeof(TYPE)*9, + sizeof(TYPE)*10, + sizeof(TYPE)*11, + sizeof(TYPE)*12); + + // Assign boolean offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> + Offsets[1] = vector(sizeof(int)*0, + sizeof(int)*1, + sizeof(int)*2, + sizeof(int)*3, + sizeof(int)*4, + sizeof(int)*5, + sizeof(int)*6, + sizeof(int)*7, + sizeof(int)*8, + sizeof(int)*9, + sizeof(int)*10, + sizeof(TYPE),// Effectively alignof. + sizeof(int));// Effectively integer alignof. + + assignments(Input[InIx1+1].values, Scales[InIx2+1].values); + Output[OutIx+2].values = arithmetic(Input[InIx1+2].values); + Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values); + Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values); + Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5); +#ifdef INT + bittwiddlers(Bits[InIx1+6].values); +#endif +} +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +void assignments(inout VTYPE things[11], TYPE scales[10]) { + + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 1 + + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1 + // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // Nothing to check. Just a copy over. + things[0] = scales[0]; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]]{{( nsw)?}} [[TYPE]] [[val5]], [[val1]] + things[1] += things[5]; + + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]]{{( nsw)?}} [[TYPE]] [[val2]], [[val6]] + things[2] -= things[6]; + + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]]{{( nsw)?}} [[TYPE]] [[val7]], [[val3]] + things[3] *= things[7]; + + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]]{{( nsw)?}} [[TYPE]] [[val4]], [[val8]] + things[4] /= things[8]; + +#ifdef DBL + things[5] = 0; // Gotta give it something in any case for validation. +#else + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[val5]], [[val9]] + things[5] %= things[9]; +#endif + + // CHECK: [[res6:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl1]], [[val6]] + things[6] += scales[1]; + + // CHECK: [[res7:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val7]], [[scl2]] + things[7] -= scales[2]; + + // CHECK: [[res8:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl3]], [[val8]] + things[8] *= scales[3]; + + // CHECK: [[res9:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val9]], [[scl4]] + things[9] /= scales[4]; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[scl0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[val10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + +} + +// Test arithmetic operators. +VTYPE arithmetic(inout VTYPE things[11])[11] { + TYPE res[11]; + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 2 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + res[0] = +things[0]; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[res1:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]] + res[1] = -things[0]; + + // CHECK: [[res2:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val2]], [[val1]] + res[2] = things[1] + things[2]; + + // CHECK: [[res3:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val2]], [[val3]] + res[3] = things[2] - things[3]; + + // CHECK: [[res4:%.*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[val4]], [[val3]] + res[4] = things[3] * things[4]; + + // CHECK: [[res5:%.*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val4]], [[val5]] + res[5] = things[4] / things[5]; + +#ifdef DBL + res[6] = 0; // Gotta give it something in any case for validation. +#else + // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[res7:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + res[7] = things[7]++; + + // CHECK: [[res8:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]] + res[8] = things[8]--; + + // CHECK: [[res9:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val9]], [[POS1]] + res[9] = ++things[9]; + + // CHECK: [[res10:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val10]], [[NEG1]] + res[10] = --things[10]; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[val1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[val3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[val4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[val5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[val6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // Postincrement/decrements get the original value. + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[val7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[val8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + +// Test arithmetic operators with scalars. +VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11] { + VTYPE res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 3 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3 + // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[res0:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl0]], [[val0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[res1:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val1]], [[scl1]] + res[1] = things[1] - scales[1]; + + // CHECK: [[res2:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl2]], [[val2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[res3:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val3]], [[scl3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[res4:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl4]], [[val4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[res5:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[scl5]], [[val5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[res6:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl6]], [[val6]] + res[6] = scales[6] * things[6]; + res[7] = res[8] = res[9] = res[10] = 0; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + + +// Test logic operators. +// Only permissable in pre-HLSL2021 +bool1 logic(bool1 truth[10], VTYPE consequences[11])[10] { + bool1 res[10]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4 + // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]] + // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i8 1, i32 [[IALN]]) + // CHECK: [[ival0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i8 1, i32 [[IALN]]) + // CHECK: [[ival1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i8 1, i32 [[IALN]]) + // CHECK: [[ival2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i8 1, i32 [[IALN]]) + // CHECK: [[ival3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i8 1, i32 [[IALN]]) + // CHECK: [[ival4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i8 1, i32 [[IALN]]) + // CHECK: [[ival5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + + // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 4 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[bres0:%.*]] = icmp eq i32 [[ival0]], 0 + // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32 + res[0] = !truth[0]; + + // CHECK: [[res1:%.*]] = or i32 [[ival2]], [[ival1]] + // CHECK: [[bres1:%.*]] = icmp ne i32 [[res1]], 0 + // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32 + res[1] = truth[1] || truth[2]; + + // CHECK: [[bval2:%.*]] = icmp ne i32 [[ival2]], 0 + // CHECK: [[bval3:%.*]] = icmp ne i32 [[ival3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]] + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + res[2] = truth[2] && truth[3]; + + // CHECK: [[bval4:%.*]] = icmp ne i32 [[ival4]], 0 + // CHECK: [[bval5:%.*]] = icmp ne i32 [[ival5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]] + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]] + // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32 + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]] + // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32 + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]] + // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32 + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]] + // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32 + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]] + // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32 + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]] + // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32 + res[9] = consequences[5] >= consequences[6]; + + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], i32 [[res0]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], i32 [[res1]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], i32 [[res2]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], i32 [[res3]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], i32 [[res4]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], i32 [[res5]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], i32 [[res6]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], i32 [[res7]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], i32 [[res8]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], i32 [[res9]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +VTYPE index(VTYPE things[11], int i)[11] { + VTYPE res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 5 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1:%.*]], i32 0, i32 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val1]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 2 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 3 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val3]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val4]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 5 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val5]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val6]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val9]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val10]], [[TYPE]]* [[adr]], align [[ALN]] + + // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5 + + // CHECK: [[adr0:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2:%.*]], i32 0, i32 0 + // CHECK: store [[TYPE]] {{(0|0\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]], align [[ALN]] + res[0] = 0; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 [[Ix]] + // CHECK: store [[TYPE]] [[POS1]], [[TYPE]]* [[adr]] + res[i] = 1; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 2 + // CHECK: store [[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]], [[TYPE]]* [[adr]] + res[Ix] = 2; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 3 + // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]] + res[3] = things[0]; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 [[Ix]] + // CHECK: [[vali:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 4 + // CHECK: store [[TYPE]] [[vali]], [[TYPE]]* [[adr]] + res[4] = things[i]; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 5 + // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]] + res[5] = things[Ix]; + + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[TWO]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[vali]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + +#ifdef INT +// Test bit twiddling operators. +void bittwiddlers(inout VTYPE things[13]) { + // INT: [[ValIx:%.*]] = add i32 [[InIx1]], 6 + // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]] + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // INT: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // INT: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // INT: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // INT: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // INT: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // INT: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // INT: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // INT: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // INT: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // INT: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], i8 1, i32 [[ALN]]) + // INT: [[val11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], i8 1, i32 [[ALN]]) + // INT: [[val12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // INT: [[res0:%[0-9]*]] = xor [[TYPE]] [[val1]], -1 + things[0] = ~things[1]; + + // INT: [[res1:%[0-9]*]] = or [[TYPE]] [[val3]], [[val2]] + things[1] = things[2] | things[3]; + + // INT: [[res2:%[0-9]*]] = and [[TYPE]] [[val4]], [[val3]] + things[2] = things[3] & things[4]; + + // INT: [[res3:%[0-9]*]] = xor [[TYPE]] [[val5]], [[val4]] + things[3] = things[4] ^ things[5]; + + // INT: [[shv6:%[0-9]*]] = and [[TYPE]] [[val6]] + // INT: [[res4:%[0-9]*]] = shl [[TYPE]] [[val5]], [[shv6]] + things[4] = things[5] << things[6]; + + // INT: [[shv7:%[0-9]*]] = and [[TYPE]] [[val7]] + // UNSIG: [[res5:%[0-9]*]] = lshr [[TYPE]] [[val6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr [[TYPE]] [[val6]], [[shv7]] + things[5] = things[6] >> things[7]; + + // INT: [[res6:%[0-9]*]] = or [[TYPE]] [[val8]], [[val6]] + things[6] |= things[8]; + + // INT: [[res7:%[0-9]*]] = and [[TYPE]] [[val9]], [[val7]] + things[7] &= things[9]; + + // INT: [[res8:%[0-9]*]] = xor [[TYPE]] [[val10]], [[val8]] + things[8] ^= things[10]; + + // INT: [[shv11:%[0-9]*]] = and [[TYPE]] [[val11]] + // INT: [[res9:%[0-9]*]] = shl [[TYPE]] [[val9]], [[shv11]] + things[9] <<= things[11]; + + // INT: [[shv12:%[0-9]*]] = and [[TYPE]] [[val12]] + // UNSIG: [[res10:%[0-9]*]] = lshr [[TYPE]] [[val10]], [[shv12]] + // SIG: [[res10:%[0-9]*]] = ashr [[TYPE]] [[val10]], [[shv12]] + things[10] >>= things[12]; + + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], [[TYPE]] [[val11]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], [[TYPE]] [[val12]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + // CHECK-LABEL: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl new file mode 100644 index 0000000000..44c9be17d4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl @@ -0,0 +1,451 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL %s | FileCheck %s --check-prefixes=CHECK +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Test relevant operators on vec1s in 6.9 to ensure they continue to be treated as scalars. + +#define VTYPE vector + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]] +// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] } +RWStructuredBuffer buf; + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout VTYPE things[10]) { + + // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}}) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0 + // CHECK: [[res0:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[val0]], i64 0 + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + things[0] = buf.Load(1); + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0 + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0 + // CHECK: [[add1:%.*]] = [[ADD:f?add( fast)?]] [[ELTY]] [[val1]], [[val5]] + // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add1]], i32 0 + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + things[1] += things[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0 + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0 + // CHECK: [[sub2:%.*]] = [[SUB:f?sub( fast)?]] [[ELTY]] [[val2]], [[val6]] + // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub2]], i32 0 + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + things[2] -= things[6]; + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0 + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0 + // CHECK: [[mul3:%.*]] = [[MUL:f?mul( fast)?]] [[ELTY]] [[val3]], [[val7]] + // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul3]], i32 0 + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + things[3] *= things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0 + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0 + // CHECK: [[div4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[ELTY]] [[val4]], [[val8]] + // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div4]], i32 0 + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + things[4] /= things[8]; + +#ifndef DBL + // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9 + // NODBL: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // NODBL: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]] + // NODBL: [[rem5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[ELTY]] [[val5]], [[val9]] + // NODBL: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem5]], i32 0 + // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + things[5] %= things[9]; +#endif +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export VTYPE arithmetic(inout VTYPE things[11])[11] { + VTYPE res[11]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0 + // CHECK: [[sub1:%.*]] = [[SUB]] [[ELTY]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]] + res[0] = +things[0]; + res[1] = -things[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0 + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0 + // CHECK: [[add2:%.*]] = [[ADD]] [[ELTY]] [[val2]], [[val1]] + res[2] = things[1] + things[2]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0 + // CHECK: [[sub3:%.*]] = [[SUB]] [[ELTY]] [[val2]], [[val3]] + res[3] = things[2] - things[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0 + // CHECK: [[mul4:%.*]] = [[MUL]] [[ELTY]] [[val4]], [[val3]] + res[4] = things[3] * things[4]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0 + // CHECK: [[div5:%.*]] = [[DIV]] [[ELTY]] [[val4]], [[val5]] + res[5] = things[4] / things[5]; + +#ifndef DBL + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6 + // NODBL: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // NODBL: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]] + // NODBL: [[rem6:%.*]] = [[REM]] [[ELTY]] [[val5]], [[val6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0 + // CHECK: [[add7:%.*]] = [[ADD]] [[ELTY]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + // CHECK: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add7]], i32 0 + // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]] + res[7] = things[7]++; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0 + // CHECK: [[add8:%.*]] = [[ADD]] [[ELTY]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]] + // CHECK: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add8]], i32 0 + // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]] + res[8] = things[8]--; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9 + // CHECK: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // CHECK: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]], i32 0 + // CHECK: [[add9:%.*]] = [[ADD]] [[ELTY]] [[val9]], [[POS1]] + // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i32 0 + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + res[9] = ++things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10 + // CHECK: [[ld10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]] + // CHECK: [[val10:%.*]] = extractelement [[TYPE]] [[ld10]], i32 0 + // CHECK: [[add10:%.*]] = [[ADD]] [[ELTY]] [[val10]], [[NEG1]] + // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i32 0 + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + res[10] = --things[10]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1 + // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub1]], i64 0 + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2 + // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add2]], i64 0 + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3 + // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub3]], i64 0 + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4 + // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul4]], i64 0 + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5 + // CHECK: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div5]], i64 0 + // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6 + // NODBL: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem6]], i64 0 + // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7 + // This is a post op, so the original value goes into res[]. + // CHECK: store [[TYPE]] [[ld7]], [[TYPE]]* [[adr7]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8 + // This is a post op, so the original value goes into res[]. + // CHECK: store [[TYPE]] [[ld8]], [[TYPE]]* [[adr8]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9 + // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i64 0 + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10 + // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i64 0 + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + // CHECK: ret void + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export bool logic(bool truth[10], VTYPE consequences[10])[10] { + bool res[10]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]] + // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1 + res[0] = !truth[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]] + res[1] = truth[1] || truth[2]; + + // CHECK: [[bval2:%.*]] = icmp ne i32 [[val2]], 0 + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[bval3:%.*]] = icmp ne i32 [[val3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]] + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + res[2] = truth[2] && truth[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[bval4:%.*]] = icmp ne i32 [[val4]], 0 + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[bval5:%.*]] = icmp ne i32 [[val5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]] + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0 + // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[ld0]], i32 0 + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0 + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[ELTY]] [[val0]], [[val1]] + // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32 + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0 + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[ELTY]] [[val1]], [[val2]] + // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32 + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0 + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[ELTY]] [[val2]], [[val3]] + // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32 + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0 + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[ELTY]] [[val3]], [[val4]] + // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32 + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0 + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[ELTY]] [[val4]], [[val5]] + // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32 + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0 + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[ELTY]] [[val5]], [[val6]] + // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32 + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0 + // CHECK: store i32 [[res0]], i32* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1 + // CHECK: store i32 [[res1]], i32* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2 + // CHECK: store i32 [[res2]], i32* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3 + // CHECK: store i32 [[res3]], i32* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4 + // CHECK: store i32 [[res4]], i32* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5 + // CHECK: store i32 [[res5]], i32* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6 + // CHECK: store i32 [[res6]], i32* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7 + // CHECK: store i32 [[res7]], i32* [[adr7]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8 + // CHECK: store i32 [[res8]], i32* [[adr8]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9 + // CHECK: store i32 [[res9]], i32* [[adr9]] + + // CHECK: ret void + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export VTYPE index(VTYPE things[10], int i)[10] { + // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]] + VTYPE res[10]; + + // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0 + // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]] + res[0] = 0; + + // CHECK: [[adri:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 %i + // CHECK: store [[ELTY]] [[POS1]], [[ELTY]]* [[adri]] + res[i] = 1; + + // CHECK: [[adr2:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 2 + // CHECK: store [[ELTY]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[ELTY]]* [[adr2]] + res[Ix] = 2; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[adr3:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 3 + // CHECK: [[thg0:%.*]] = extractelement [[TYPE]] [[ld0]], i64 0 + // CHECK: store [[ELTY]] [[thg0]], [[ELTY]]* [[adr3]] + res[3] = things[0]; + + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i + // CHECK: [[ldi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]] + // CHECK: [[adr4:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 4 + // CHECK: [[thgi:%.*]] = extractelement [[TYPE]] [[ldi]], i64 0 + // CHECK: store [[ELTY]] [[thgi]], [[ELTY]]* [[adr4]] + res[4] = things[i]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[adr5:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 5 + // CHECK: [[thg2:%.*]] = extractelement [[TYPE]] [[ld2]], i64 0 + // CHECK: store [[ELTY]] [[thg2]], [[ELTY]]* [[adr5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} + +#ifdef INT +// Test bit twiddling operators. +// INT-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout VTYPE things[13]) { + // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1 + // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0 + // INT: [[xor1:%[0-9]*]] = xor [[ELTY]] [[val1]], -1 + // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor1]], i32 0 + // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 0 + // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr0]] + things[0] = ~things[1]; + + // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 2 + // INT: [[ld2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // INT: [[val2:%[0-9]*]] = extractelement [[TYPE]] [[ld2]], i32 0 + // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 3 + // INT: [[ld3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // INT: [[val3:%[0-9]*]] = extractelement [[TYPE]] [[ld3]], i32 0 + // INT: [[or1:%[0-9]*]] = or [[ELTY]] [[val3]], [[val2]] + // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or1]], i32 0 + // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + things[1] = things[2] | things[3]; + + // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 4 + // INT: [[ld4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // INT: [[val4:%[0-9]*]] = extractelement [[TYPE]] [[ld4]], i32 0 + // INT: [[and2:%[0-9]*]] = and [[ELTY]] [[val4]], [[val3]] + // INT: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and2]], i32 0 + // INT: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + things[2] = things[3] & things[4]; + + // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 5 + // INT: [[ld5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // INT: [[val5:%[0-9]*]] = extractelement [[TYPE]] [[ld5]], i32 0 + // INT: [[xor3:%[0-9]*]] = xor [[ELTY]] [[val5]], [[val4]] + // INT: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor3]], i32 0 + // INT: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + things[3] = things[4] ^ things[5]; + + // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 6 + // INT: [[ld6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // INT: [[val6:%[0-9]*]] = extractelement [[TYPE]] [[ld6]], i32 0 + // INT: [[shv6:%[0-9]*]] = and [[ELTY]] [[val6]] + // INT: [[shl4:%[0-9]*]] = shl [[ELTY]] [[val5]], [[shv6]] + // INT: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl4]], i32 0 + // INT: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + things[4] = things[5] << things[6]; + + // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 7 + // INT: [[ld7:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // INT: [[val7:%[0-9]*]] = extractelement [[TYPE]] [[ld7]], i32 0 + // INT: [[shv7:%[0-9]*]] = and [[ELTY]] [[val7]] + // UNSIG: [[shr5:%[0-9]*]] = lshr [[ELTY]] [[val6]], [[shv7]] + // SIG: [[shr5:%[0-9]*]] = ashr [[ELTY]] [[val6]], [[shv7]] + // INT: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr5]], i32 0 + // INT: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + things[5] = things[6] >> things[7]; + + // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 8 + // INT: [[ld8:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // INT: [[val8:%[0-9]*]] = extractelement [[TYPE]] [[ld8]], i32 0 + // INT: [[or6:%[0-9]*]] = or [[ELTY]] [[val8]], [[val6]] + // INT: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or6]], i32 0 + // INT: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]] + things[6] |= things[8]; + + // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 9 + // INT: [[ld9:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // INT: [[val9:%[0-9]*]] = extractelement [[TYPE]] [[ld9]], i32 0 + // INT: [[and7:%[0-9]*]] = and [[ELTY]] [[val9]], [[val7]] + // INT: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and7]], i32 0 + // INT: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]] + things[7] &= things[9]; + + // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 10 + // INT: [[ld10:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr10]] + // INT: [[val10:%[0-9]*]] = extractelement [[TYPE]] [[ld10]], i32 0 + // INT: [[xor8:%[0-9]*]] = xor [[ELTY]] [[val10]], [[val8]] + // INT: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor8]], i32 0 + // INT: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]] + things[8] ^= things[10]; + + // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 11 + // INT: [[ld11:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr11]] + // INT: [[val11:%[0-9]*]] = extractelement [[TYPE]] [[ld11]], i32 0 + // INT: [[shv11:%[0-9]*]] = and [[ELTY]] [[val11]] + // INT: [[shl9:%[0-9]*]] = shl [[ELTY]] [[val9]], [[shv11]] + // INT: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl9]], i32 0 + // INT: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + things[9] <<= things[11]; + + // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 12 + // INT: [[ld12:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr12]] + // INT: [[val12:%[0-9]*]] = extractelement [[TYPE]] [[ld12]], i32 0 + // INT: [[shv12:%[0-9]*]] = and [[ELTY]] [[val12]] + // UNSIG: [[shr10:%[0-9]*]] = lshr [[ELTY]] [[val10]], [[shv12]] + // SIG: [[shr10:%[0-9]*]] = ashr [[ELTY]] [[val10]], [[shv12]] + // INT: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr10]], i32 0 + // INT: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + things[10] >>= things[12]; + + // INT: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl new file mode 100644 index 0000000000..ba76eca619 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl @@ -0,0 +1,563 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=8 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=10 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=11 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=12 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=13 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=14 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=15 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=16 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=18 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=128 %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Less exhaustive testing for some other types. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t -DNUM=177 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]] + +RWStructuredBuffer< TYPE > buf; + +export void assignments(inout vector things[10], TYPE scales[10]); +export vector arithmetic(inout vector things[11])[11]; +export vector scarithmetic(inout vector things[10], TYPE scales[10])[10]; +export vector logic(vector truth[10], vector consequences[10])[10]; +export vector index(vector things[10], int i, TYPE val)[10]; + +struct Interface { + vector assigned[10]; + vector arithmeticked[11]; + vector scarithmeticked[10]; + vector logicked[10]; + vector indexed[10]; + TYPE scales[10]; +}; + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout vector things[10], TYPE scales[10]) { + + // Another trick to capture the size. + // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]] + // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.[[TY]] [[res]], 0 + TYPE scalar = buf.Load(NUM); + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl]], i32 0 + // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + things[0] = scalar; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + things[1] += things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + things[2] -= things[6]; + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + things[3] *= things[7]; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + things[4] /= things[8]; + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]] +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float> + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]] + // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double> + vector f9 = things[9]; + vector f5 = things[5]; + f5 %= f9; + things[5] = f5; +#else + // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]] + things[5] %= things[9]; +#endif + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + things[6] += scales[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]] + things[7] -= scales[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]] + things[8] *= scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + things[9] /= scales[4]; + +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export vector arithmetic(inout vector things[11])[11] { + vector res[11]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[res1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]] + res[2] = things[1] + things[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + res[3] = things[2] - things[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + res[4] = things[3] * things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + res[5] = things[4] / things[5]; + + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float> + // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]] + // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double> + res[6] = (vector)things[5] % (vector)things[6]; +#else + // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]] + res[7] = things[7]++; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]] + res[8] = things[8]--; + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]] + // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + res[9] = ++things[9]; + + // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add10]] + // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]] + res[10] = --things[10]; + + // Stores into res[]. Previous were for things[] inout. + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + // These two were post ops, so the original value goes into res[]. + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 10 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]] + // CHECK: ret void + + + return res; +} + +// Test arithmetic operators with scalars. +// CHECK-LABEL: define void @"\01?scarithmetic +export vector scarithmetic(inout vector things[10], TYPE scales[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 0 + // CHECK: [[scl0:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add0]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]] + res[1] = things[1] - scales[1]; + + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 5 + // CHECK: [[scl5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add5]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 6 + // CHECK: [[scl6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add6]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]] + res[6] = scales[6] * things[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: ret void + + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export vector logic(vector truth[10], vector consequences[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]] + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]] + // CHECK: ret void + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export vector index(vector things[10], int i, TYPE val)[10] { + vector res[10]; + + // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x [[TYPE]]>] + // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]] + res[0] = 0; + + // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 %i + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|0xH3C00).*}}>, <[[NUM]] x [[TYPE]]>* [[resi]] + res[i] = 1; + + // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|0xH4000).*}}>, <[[NUM]] x [[TYPE]]>* [[res2]] + res[Ix] = 2; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]] + res[3] = things[0]; + + // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 %i + // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[addi]] + // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]] + res[4] = things[i]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} + +#ifdef INT +// Test bit twiddling operators. +// INT-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout vector things[13]) { + // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // INT: [[ld1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // INT: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld1]], <[[TYPE]] -1 + // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]] + things[0] = ~things[1]; + + // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // INT: [[ld2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // INT: [[ld3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld3]], [[ld2]] + // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]] + things[1] = things[2] | things[3]; + + // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // INT: [[ld4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld4]], [[ld3]] + // INT: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]] + things[2] = things[3] & things[4]; + + // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // INT: [[ld5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld4]], [[ld5]] + // INT: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]] + things[3] = things[4] ^ things[5]; + + // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // INT: [[ld6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld6]] + // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld5]], [[shv6]] + // INT: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]] + things[4] = things[5] << things[6]; + + // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // INT: [[ld7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]] + // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld7]] + // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]] + // INT: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]] + things[5] = things[6] >> things[7]; + + // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // INT: [[ld8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]] + // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld8]], [[ld6]] + // INT: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]] + things[6] |= things[8]; + + // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // INT: [[ld9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]] + // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld9]], [[ld7]] + // INT: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]] + things[7] &= things[9]; + + // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // INT: [[ld10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]] + // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld8]], [[ld10]] + // INT: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]] + things[8] ^= things[10]; + + // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 11 + // INT: [[ld11:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr11]] + // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld11]] + // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld9]], [[shv11]] + // INT: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]] + things[9] <<= things[11]; + + // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 12 + // INT: [[ld12:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr12]] + // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld12]] + // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]] + // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]] + // INT: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]] + things[10] >>= things[12]; + + // INT: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..2ae3c92e85 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl @@ -0,0 +1,115 @@ +// RUN: %dxc -T lib_6_9 %s | FileCheck %s + +// Long vector tests for vec ops that scalarize to something more complex +// than a simple repetition of the same dx.op calls. + +// CHECK-LABEL: test_atan2 +// CHECK: fdiv fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> %{{.*}}) ; Atan(value) +// CHECK: fadd fast <8 x float> %{{.*}}, %{{.*}}, +// CHECK: fcmp fast oeq <8 x float> +// CHECK: fcmp fast oge <8 x float> +// CHECK: fcmp fast olt <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> vec1, vector vec2) { + vec1 = atan2(vec1, vec2); +} + +// CHECK-LABEL: test_fmod +// CHECK: fdiv fast <8 x float> +// CHECK: fsub fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 6, <8 x float> %{{.*}}) ; FAbs(value) +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 22, <8 x float> %{{.*}}) ; Frc(value) + +// CHECK: fsub fast <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: fmul fast <8 x float> +export void test_fmod(inout vector vec1, vector vec2) { + vec1 = fmod(vec1, vec2); +} + +// CHECK-LABEL: test_ldexp +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value) +// CHECK: fmul fast <8 x float> + +export void test_ldexp(inout vector vec1, vector vec2) { + vec1 = ldexp(vec1, vec2); +} + + +// CHECK-LABEL: test_pow +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> %{{.*}}) ; Log(value) +// CHECK: fmul fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value) +export void test_pow(inout vector vec1, vector vec2) { + vec1 = pow(vec1, vec2); +} + +// CHECK-LABEL: test_modf +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 29, <8 x float> %{{.*}}) ; Round_z(value) +// CHECK: fsub fast <8 x float> +export void test_modf(inout vector vec1, vector vec2) { + vec1 = modf(vec1, vec2); +} + +// CHECK-LABEL: test_dot +// CHECK: [[el:%.*]] = extractelement <8 x float> +// CHECK: [[mul:%.*]] = fmul fast float [[el]] +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +export void test_dot(inout vector vec1, vector vec2) { + vec1 = dot(vec1, vec2); +} + +// CHECK-LABEL: test_any +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +export void test_any(vector vec1, inout vector bvec) { + bvec &= any(vec1); +} + +// CHECK-LABEL: test_all +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +export void test_all(vector vec1, inout vector bvec) { + bvec &= all(vec1); +} + +// CHECK-LABEL: test_WaveMatch +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +export uint4 test_WaveMatch(vector bvec) { + return WaveMatch(bvec); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl new file mode 100644 index 0000000000..02cad5b894 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled binary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) + vector hRes = FUNC(hVec1, hVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) + vector fRes = FUNC(fVec1, fVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) + vector dRes = FUNC(dVec1, dVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl new file mode 100644 index 0000000000..994246b753 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl @@ -0,0 +1,116 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled binary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1024); + vector usVec2 = buf.Load >(1536); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2048); + vector iVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3072); + vector uiVec2 = buf.Load >(3584); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4096); + vector lVec2 = buf.Load >(4608); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5120); + vector ulVec2 = buf.Load >(5632); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) + vector sRes = FUNC(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) + vector usRes = FUNC(usVec1, usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) + vector iRes = FUNC(iVec1, iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) + vector uiRes = FUNC(uiVec1, uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) + vector lRes = FUNC(lVec1, lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) + vector ulRes = FUNC(ulVec1, ulVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..6ebb511b00 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl @@ -0,0 +1,77 @@ +// The binary part of some of these is all just a vector math ops with as many unary dxops as elements. +// These will have apparent mismatches between the ARITY define and the check prefix. + +// RUN: %dxc -DFUNC=f16tof32 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=f32tof16 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=isfinite -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isinf -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isnan -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossDiagonal -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=WaveActiveBitAnd -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitOr -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitXor -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMin -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMax -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitAnd -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitOr -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitXor -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixProduct -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixSum -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneFirst -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveAllEqual -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE + +#ifndef TYPE +#define TYPE float +#endif + +#if ARITY == 1 +#define CALLARGS(x,y,z) x +#elif ARITY == 2 +#define CALLARGS(x,y,z) x, y +#elif ARITY == 3 +#define CALLARGS(x,y,z) x, y, z +// ARITY 4 is used for 1 vec + scalar +#elif ARITY == 4 +#define CALLARGS(x,y,z) x, i +// ARITY 5 is used for 1 vec + uint4 mask for wavemultiprefix* +#elif ARITY == 5 +#define CALLARGS(x,y,z) x, m +#endif + +StructuredBuffer< vector > buf; +ByteAddressBuffer rbuf; + +float4 main(uint i : SV_PrimitiveID, uint4 m : M) : SV_Target { + vector arg1 = rbuf.Load< vector >(i++*32); + vector arg2 = rbuf.Load< vector >(i++*32); + vector arg3 = rbuf.Load< vector >(i++*32); + + // UNARY: call {{.*}} [[DXOP:@dx.op.unary]] + // BINARY: call {{.*}} [[DXOP:@dx.op.binary]] + // TERTIARY: call {{.*}} [[DXOP:@dx.op.tertiary]] + // LEGACY: call {{.*}} [[DXOP:@dx.op.legacy]] + // SPECFLT: call {{.*}} [[DXOP:@dx.op.isSpecialFloat]] + // QUAD: call {{.*}} [[DXOP:@dx.op.quad]] + // WAVE: call {{.*}} [[DXOP:@dx.op.wave]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + + vector ret = FUNC(CALLARGS(arg1, arg2, arg3)); + return float4(ret[0] + ret[1], ret[2] + ret[3], ret[4] + ret[5], ret[6] + ret[7]); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl new file mode 100644 index 0000000000..e32ebc1db2 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled ternary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +// Given that all we have at the moment are fmad and fma and the latter only takes doubles, +// fma is tacked on as an additional check. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.tertiary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]], <[[NUM]] x half> [[hvec3]]) + vector hRes = FUNC(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.tertiary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]], <[[NUM]] x float> [[fvec3]]) + vector fRes = FUNC(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes = FUNC(dVec1, dVec2, dVec3); + + // Tacked on fma() check since it only takes doubles. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 47, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes2 = fma(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + buf.Store >(5120, dRes2); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl new file mode 100644 index 0000000000..50f98715e4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl @@ -0,0 +1,131 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled tertiary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + vector sVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1025); + vector usVec2 = buf.Load >(1536); + vector usVec3 = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2049); + vector iVec2 = buf.Load >(2560); + vector iVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3073); + vector uiVec2 = buf.Load >(3584); + vector uiVec3 = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4097); + vector lVec2 = buf.Load >(4608); + vector lVec3 = buf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5121); + vector ulVec2 = buf.Load >(5632); + vector ulVec3 = buf.Load >(6144); + + // Test simple matching type overloads. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]]) + vector sRes = FUNC(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]], <[[NUM]] x i16> [[usvec3]]) + vector usRes = FUNC(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]], <[[NUM]] x i32> [[ivec3]]) + vector iRes = FUNC(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]], <[[NUM]] x i32> [[uivec3]]) + vector uiRes = FUNC(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]], <[[NUM]] x i64> [[lvec3]]) + vector lRes = FUNC(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]], <[[NUM]] x i64> [[ulvec3]]) + vector ulRes = FUNC(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl new file mode 100644 index 0000000000..91ab631a7e --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl @@ -0,0 +1,83 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec = buf.Load >(1024); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec]]) + vector hRes = FUNC(hVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]]) + vector fRes = FUNC(fVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(1024, fRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl new file mode 100644 index 0000000000..ef0b250745 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // Capture opcode number. + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec = buf.Load >(5120); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec]]) + vector sRes = FUNC(sVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[usvec]]) + vector usRes = FUNC(usVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec]]) + vector iRes = FUNC(iVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[uivec]]) + vector uiRes = FUNC(uiVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec]]) + vector lRes = FUNC(lVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[ulvec]]) + vector ulRes = FUNC(ulVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll new file mode 100644 index 0000000000..987f997a2a --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll @@ -0,0 +1,269 @@ +; RUN: %dxopt %s -dynamic-vector-to-array,ReplaceAllVectors=0 -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.VectRec1 = type { <1 x float> } +%struct.VectRec2 = type { <2 x float> } + +; Vec2s should be preserved. +; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4 + +; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 + +; Dynamic Vec1s should be reduced. +; CHECK-DAG: @dygar1.v = internal global [2 x [1 x float]] zeroinitializer, align 4 +; CHECK-DAG: @dygrec1.0.v = internal global [1 x float] zeroinitializer, align 4 +; CHECK-DAG: @dyglob1.v = internal global [1 x float] zeroinitializer, align 4 + +; These static accessed Vec1s were already reduced by SROA +; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4 +; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4 +; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4 + +@dyglob1 = internal global <1 x float> zeroinitializer, align 4 +@dyglob2 = internal global <2 x float> zeroinitializer, align 4 +@stglob2 = internal global <2 x float> zeroinitializer, align 4 +@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +@dygrec2.0 = internal global <2 x float> zeroinitializer, align 4 +@stgrec2.0 = internal global <2 x float> zeroinitializer, align 4 +@stgar1.0 = internal global [2 x float] zeroinitializer, align 4 +@dygrec1.0 = internal global <1 x float> zeroinitializer, align 4 +@stglob1.0 = internal global float 0.000000e+00, align 4 +@stgrec1.0.0 = internal global float 0.000000e+00, align 4 + +; Function Attrs: nounwind +; CHECK-LOCAL: define <4 x float> @"\01?tester +define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 { +bb: + ; Vec2s are preserved. + ; CHECK-DAG: %dyloc2 = alloca <2 x float> + ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>] + ; CHECK-DAG: %dylorc2.0 = alloca <2 x float> + + ; CHECK-DAG: %stloc2 = alloca <2 x float> + ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>] + ; CHECK-DAG: %stlorc2.0 = alloca <2 x float> + + ; Statics vec1s are unaltered by dynamic vector to array. + ; CHECK-DAG: %stloc1 = alloca <1 x float> + ; CHECK-DAG: %stlar1.0 = alloca [3 x float] + ; CHECK-DAG: %stlorc1.0 = alloca <1 x float> + + ; Dynamic vec1s are removed and lose their names. + ; CHECK-DAG: alloca [1 x float] + ; CHECK-DAG: alloca [3 x [1 x float]] + ; CHECK-DAG: alloca [1 x float] + + %dylorc1.0 = alloca <1 x float> + %stlorc1.0 = alloca <1 x float> + %dylorc2.0 = alloca <2 x float> + %stlorc2.0 = alloca <2 x float> + %stlar1.0 = alloca [3 x float] + %tmp = alloca i32, align 4 + %dyloc1 = alloca <1 x float>, align 4 + %dyloc2 = alloca <2 x float>, align 4 + %dylar1 = alloca [3 x <1 x float>], align 4 + %dylar2 = alloca [4 x <2 x float>], align 4 + %stloc1 = alloca <1 x float>, align 4 + %stloc2 = alloca <2 x float>, align 4 + %stlar2 = alloca [4 x <2 x float>], align 4 + store i32 %ix, i32* %tmp, align 4 + + %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7 + %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10 + %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10 + %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10 + br i1 %tmp16, label %bb17, label %bb76 ; line:53 col:7 + +bb17: ; preds = %bb + %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30 + %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30 + %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24 + %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17 + store float %tmp19, float* %tmp21 ; line:54 col:28 + %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5 + store float %tmp19, float* %tmp22 ; line:54 col:15 + %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30 + %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30 + %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24 + %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17 + store float %tmp24, float* %tmp26 ; line:55 col:28 + %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5 + store float %tmp24, float* %tmp27 ; line:55 col:15 + %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37 + %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37 + %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27 + %tmp31 = load i32, i32* %tmp, align 4 ; line:56 col:31 + %tmp32 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30, i32 %tmp31 ; line:56 col:20 + store float %tmp29, float* %tmp32 ; line:56 col:35 + %tmp33 = getelementptr inbounds [3 x float], [3 x float]* %stlar1.0, i32 0, i32 1 ; line:56 col:5 + store float %tmp29, float* %tmp33 ; line:56 col:18 + %tmp34 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37 + %tmp35 = load float, float* %tmp34, align 4 ; line:57 col:37 + %tmp36 = load i32, i32* %tmp, align 4 ; line:57 col:27 + %tmp37 = load i32, i32* %tmp, align 4 ; line:57 col:31 + %tmp38 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp36, i32 %tmp37 ; line:57 col:20 + store float %tmp35, float* %tmp38 ; line:57 col:35 + %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1, i32 0 ; line:57 col:5 + store float %tmp35, float* %tmp39 ; line:57 col:18 + %tmp40 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36 + %tmp41 = load float, float* %tmp40, align 4 ; line:58 col:36 + %tmp42 = load i32, i32* %tmp, align 4 ; line:58 col:30 + %tmp43 = getelementptr inbounds <1 x float>, <1 x float>* %dylorc1.0, i32 0, i32 %tmp42 ; line:58 col:20 + store float %tmp41, float* %tmp43 ; line:58 col:34 + %tmp44 = getelementptr inbounds <1 x float>, <1 x float>* %stlorc1.0, i32 0, i32 0 ; line:58 col:5 + store float %tmp41, float* %tmp44 ; line:58 col:18 + %tmp45 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36 + %tmp46 = load float, float* %tmp45, align 4 ; line:59 col:36 + %tmp47 = load i32, i32* %tmp, align 4 ; line:59 col:30 + %tmp48 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp47 ; line:59 col:20 + store float %tmp46, float* %tmp48 ; line:59 col:34 + %tmp49 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:59 col:5 + store float %tmp46, float* %tmp49 ; line:59 col:18 + %tmp50 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32 + %tmp51 = load float, float* %tmp50, align 4 ; line:61 col:32 + %tmp52 = load i32, i32* %tmp, align 4 ; line:61 col:26 + %tmp53 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp52 ; line:61 col:18 + store float %tmp51, float* %tmp53 ; line:61 col:30 + store float %tmp51, float* @stglob1.0 ; line:61 col:16 + %tmp54 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32 + %tmp55 = load float, float* %tmp54, align 4 ; line:62 col:32 + %tmp56 = load i32, i32* %tmp, align 4 ; line:62 col:26 + %tmp57 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp56 ; line:62 col:18 + store float %tmp55, float* %tmp57 ; line:62 col:30 + store float %tmp55, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16 + %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37 + %tmp59 = load float, float* %tmp58, align 4 ; line:63 col:37 + %tmp60 = load i32, i32* %tmp, align 4 ; line:63 col:27 + %tmp61 = load i32, i32* %tmp, align 4 ; line:63 col:31 + %tmp62 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp60, i32 %tmp61 ; line:63 col:20 + store float %tmp59, float* %tmp62 ; line:63 col:35 + store float %tmp59, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 1) ; line:63 col:18 + %tmp63 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37 + %tmp64 = load float, float* %tmp63, align 4 ; line:64 col:37 + %tmp65 = load i32, i32* %tmp, align 4 ; line:64 col:27 + %tmp66 = load i32, i32* %tmp, align 4 ; line:64 col:31 + %tmp67 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp65, i32 %tmp66 ; line:64 col:20 + store float %tmp64, float* %tmp67 ; line:64 col:35 + store float %tmp64, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18 + %tmp68 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36 + %tmp69 = load float, float* %tmp68, align 4 ; line:65 col:36 + %tmp70 = load i32, i32* %tmp, align 4 ; line:65 col:30 + %tmp71 = getelementptr inbounds <1 x float>, <1 x float>* @dygrec1.0, i32 0, i32 %tmp70 ; line:65 col:20 + store float %tmp69, float* %tmp71 ; line:65 col:34 + store float %tmp69, float* @stgrec1.0.0 ; line:65 col:18 + %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36 + %tmp73 = load float, float* %tmp72, align 4 ; line:66 col:36 + %tmp74 = load i32, i32* %tmp, align 4 ; line:66 col:30 + %tmp75 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp74 ; line:66 col:20 + store float %tmp73, float* %tmp75 ; line:66 col:34 + store float %tmp73, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:66 col:18 + br label %bb76 ; line:67 col:3 + +bb76: ; preds = %bb17, %bb + %tmp77 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17 + %tmp78 = extractelement <1 x float> %tmp77, i32 0 ; line:68 col:17 + %tmp79 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27 + %tmp80 = extractelement <2 x float> %tmp79, i32 1 ; line:68 col:27 + %tmp81 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37 + %tmp82 = extractelement <1 x float> %tmp81, i32 0 ; line:68 col:37 + %tmp83 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47 + %tmp84 = extractelement <2 x float> %tmp83, i32 1 ; line:68 col:47 + %tmp85 = insertelement <4 x float> undef, float %tmp78, i64 0 ; line:68 col:16 + %tmp86 = insertelement <4 x float> %tmp85, float %tmp80, i64 1 ; line:68 col:16 + %tmp87 = insertelement <4 x float> %tmp86, float %tmp82, i64 2 ; line:68 col:16 + %tmp88 = insertelement <4 x float> %tmp87, float %tmp84, i64 3 ; line:68 col:16 + %tmp89 = load i32, i32* %tmp, align 4 ; line:68 col:73 + %tmp90 = load i32, i32* %tmp, align 4 ; line:68 col:77 + %tmp91 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp89, i32 %tmp90 ; line:68 col:66 + %tmp92 = load float, float* %tmp91 ; line:68 col:66 + %tmp93 = load i32, i32* %tmp, align 4 ; line:68 col:89 + %tmp94 = load i32, i32* %tmp, align 4 ; line:68 col:93 + %tmp95 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp93, i32 %tmp94 ; line:68 col:82 + %tmp96 = load float, float* %tmp95 ; line:68 col:82 + %tmp97 = getelementptr [3 x float], [3 x float]* %stlar1.0, i32 0, i32 0 ; line:68 col:98 + %load = load float, float* %tmp97 ; line:68 col:98 + %insert = insertelement <1 x float> undef, float %load, i64 0 ; line:68 col:98 + %tmp98 = extractelement <1 x float> %insert, i32 0 ; line:68 col:98 + %tmp99 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111 + %tmp100 = load <2 x float>, <2 x float>* %tmp99, align 4 ; line:68 col:111 + %tmp101 = extractelement <2 x float> %tmp100, i32 1 ; line:68 col:111 + %tmp102 = insertelement <4 x float> undef, float %tmp92, i64 0 ; line:68 col:65 + %tmp103 = insertelement <4 x float> %tmp102, float %tmp96, i64 1 ; line:68 col:65 + %tmp104 = insertelement <4 x float> %tmp103, float %tmp98, i64 2 ; line:68 col:65 + %tmp105 = insertelement <4 x float> %tmp104, float %tmp101, i64 3 ; line:68 col:65 + %tmp106 = fadd <4 x float> %tmp88, %tmp105 ; line:68 col:57 + %tmp107 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10 + %tmp108 = extractelement <1 x float> %tmp107, i32 0 ; line:69 col:10 + %tmp109 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21 + %tmp110 = extractelement <2 x float> %tmp109, i32 1 ; line:69 col:21 + %load3 = load float, float* @stglob1.0 ; line:69 col:32 + %insert4 = insertelement <1 x float> undef, float %load3, i64 0 ; line:69 col:32 + %tmp111 = extractelement <1 x float> %insert4, i32 0 ; line:69 col:32 + %tmp112 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43 + %tmp113 = extractelement <2 x float> %tmp112, i32 1 ; line:69 col:43 + %tmp114 = insertelement <4 x float> undef, float %tmp108, i64 0 ; line:69 col:9 + %tmp115 = insertelement <4 x float> %tmp114, float %tmp110, i64 1 ; line:69 col:9 + %tmp116 = insertelement <4 x float> %tmp115, float %tmp111, i64 2 ; line:69 col:9 + %tmp117 = insertelement <4 x float> %tmp116, float %tmp113, i64 3 ; line:69 col:9 + %tmp118 = fadd <4 x float> %tmp106, %tmp117 ; line:68 col:124 + %tmp119 = load i32, i32* %tmp, align 4 ; line:69 col:70 + %tmp120 = load i32, i32* %tmp, align 4 ; line:69 col:74 + %tmp121 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp119, i32 %tmp120 ; line:69 col:63 + %tmp122 = load float, float* %tmp121 ; line:69 col:63 + %tmp123 = load i32, i32* %tmp, align 4 ; line:69 col:86 + %tmp124 = load i32, i32* %tmp, align 4 ; line:69 col:90 + %tmp125 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp123, i32 %tmp124 ; line:69 col:79 + %tmp126 = load float, float* %tmp125 ; line:69 col:79 + %load1 = load float, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 0) ; line:69 col:95 + %insert2 = insertelement <1 x float> undef, float %load1, i64 0 ; line:69 col:95 + %tmp127 = extractelement <1 x float> %insert2, i32 0 ; line:69 col:95 + %tmp128 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108 + %tmp129 = extractelement <2 x float> %tmp128, i32 1 ; line:69 col:108 + %tmp130 = insertelement <4 x float> undef, float %tmp122, i64 0 ; line:69 col:62 + %tmp131 = insertelement <4 x float> %tmp130, float %tmp126, i64 1 ; line:69 col:62 + %tmp132 = insertelement <4 x float> %tmp131, float %tmp127, i64 2 ; line:69 col:62 + %tmp133 = insertelement <4 x float> %tmp132, float %tmp129, i64 3 ; line:69 col:62 + %tmp134 = fadd <4 x float> %tmp118, %tmp133 ; line:69 col:54 + %tmp135 = load <1 x float>, <1 x float>* %stlorc1.0, align 4 ; line:70 col:20 + %tmp136 = extractelement <1 x float> %tmp135, i64 0 ; line:70 col:11 + %tmp137 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:70 col:23 + %tmp138 = load float, float* %tmp137 ; line:70 col:23 + %tmp139 = load <1 x float>, <1 x float>* %dylorc1.0, align 4 ; line:70 col:45 + %tmp140 = extractelement <1 x float> %tmp139, i64 0 ; line:70 col:11 + %tmp141 = load i32, i32* %tmp, align 4 ; line:70 col:58 + %tmp142 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp141 ; line:70 col:48 + %tmp143 = load float, float* %tmp142 ; line:70 col:48 + %tmp144 = insertelement <4 x float> undef, float %tmp136, i64 0 ; line:70 col:11 + %tmp145 = insertelement <4 x float> %tmp144, float %tmp138, i64 1 ; line:70 col:11 + %tmp146 = insertelement <4 x float> %tmp145, float %tmp140, i64 2 ; line:70 col:11 + %tmp147 = insertelement <4 x float> %tmp146, float %tmp143, i64 3 ; line:70 col:11 + %tmp148 = fadd <4 x float> %tmp134, %tmp147 ; line:69 col:121 + %load5 = load float, float* @stgrec1.0.0 ; line:70 col:80 + %insert6 = insertelement <1 x float> undef, float %load5, i64 0 ; line:70 col:80 + %tmp149 = extractelement <1 x float> %insert6, i64 0 ; line:70 col:71 + %tmp150 = load float, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:70 col:83 + %tmp151 = load <1 x float>, <1 x float>* @dygrec1.0, align 4 ; line:70 col:105 + %tmp152 = extractelement <1 x float> %tmp151, i64 0 ; line:70 col:71 + %tmp153 = load i32, i32* %tmp, align 4 ; line:70 col:118 + %tmp154 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp153 ; line:70 col:108 + %tmp155 = load float, float* %tmp154 ; line:70 col:108 + %tmp156 = insertelement <4 x float> undef, float %tmp149, i64 0 ; line:70 col:71 + %tmp157 = insertelement <4 x float> %tmp156, float %tmp150, i64 1 ; line:70 col:71 + %tmp158 = insertelement <4 x float> %tmp157, float %tmp152, i64 2 ; line:70 col:71 + %tmp159 = insertelement <4 x float> %tmp158, float %tmp155, i64 3 ; line:70 col:71 + %tmp160 = fadd <4 x float> %tmp148, %tmp159 ; line:70 col:63 + ret <4 x float> %tmp160 ; line:68 col:3 +} + +attributes #0 = { nounwind } + +!dx.version = !{!3} +!3 = !{i32 1, i32 9} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll new file mode 100644 index 0000000000..95a64a17d4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll @@ -0,0 +1,324 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Test for SROA reduction of globals and allocas. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.VectRec1 = type { <1 x float> } +%struct.VectRec2 = type { <2 x float> } +%ConstantBuffer = type opaque + +; Confirm that the dynamic globals are untouched and the statics are scalarized. +; DAG used to preserve the convenient ordering. + +; Dynamic access preserves even vec1s in SROA. +; CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4 +; CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +; CHECK-DAG: @dygrec1.0 = internal global <1 x float> zeroinitializer, align 4 +; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4 + +; Having >1 elements preserves even statically-accessed vec2s. +; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4 + +; Statically-accessed vec1s should get scalarized. +; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4 +; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4 +; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4 + +@dyglob2 = internal global <2 x float> zeroinitializer, align 4 +@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +@dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4 +@dyglob1 = internal global <1 x float> zeroinitializer, align 4 +@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +@dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4 + +@stglob2 = internal global <2 x float> zeroinitializer, align 4 +@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +@stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4 + +@stglob1 = internal global <1 x float> zeroinitializer, align 4 +@stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +@stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4 + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 { +bb: + ; Dynamic access preserves even vec1s in SROA. + ; CHECK-DAG: %dylorc1.0 = alloca <1 x float> + ; CHECK-DAG: %dylorc2.0 = alloca <2 x float> + ; CHECK-DAG: %dylorc1.0 = alloca <1 x float> + ; CHECK-DAG: %dylorc2.0 = alloca <2 x float> + ; CHECK-DAG: %dylar1 = alloca [3 x <1 x float>] + ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>] + + ; SROA doesn't reduce non-array allocas because scalarizer should get them. + ; CHECK-DAG: %stlorc1.0 = alloca <1 x float> + ; CHECK-DAG: %stlorc2.0 = alloca <2 x float> + ; CHECK-DAG: %stloc1 = alloca <1 x float>, align 4 + ; CHECK-DAG: %stloc2 = alloca <2 x float>, align 4 + + ; Statically-accessed arrays should get reduced. + ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>] + ; CHECK-DAG: %stlar1.0 = alloca [3 x float] + + %tmp = alloca i32, align 4, !dx.temp !14 + %dyloc1 = alloca <1 x float>, align 4 + %dyloc2 = alloca <2 x float>, align 4 + %dylar1 = alloca [3 x <1 x float>], align 4 + %dylar2 = alloca [4 x <2 x float>], align 4 + %dylorc1 = alloca %struct.VectRec1, align 4 + %dylorc2 = alloca %struct.VectRec2, align 4 + %stloc1 = alloca <1 x float>, align 4 + %stloc2 = alloca <2 x float>, align 4 + %stlar1 = alloca [3 x <1 x float>], align 4 + %stlar2 = alloca [4 x <2 x float>], align 4 + %stlorc1 = alloca %struct.VectRec1, align 4 + %stlorc2 = alloca %struct.VectRec2, align 4 + + store i32 %ix, i32* %tmp, align 4 + %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7 + %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10 + %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10 + %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10 + br i1 %tmp16, label %bb17, label %bb86 ; line:53 col:7 + +bb17: ; preds = %bb + %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30 + %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30 + %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24 + %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17 + store float %tmp19, float* %tmp21 ; line:54 col:28 + %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5 + store float %tmp19, float* %tmp22 ; line:54 col:15 + %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30 + %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30 + %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24 + %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17 + store float %tmp24, float* %tmp26 ; line:55 col:28 + %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5 + store float %tmp24, float* %tmp27 ; line:55 col:15 + %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37 + %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37 + %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27 + %tmp31 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30 ; line:56 col:20 + %tmp32 = load i32, i32* %tmp, align 4 ; line:56 col:31 + %tmp33 = getelementptr <1 x float>, <1 x float>* %tmp31, i32 0, i32 %tmp32 ; line:56 col:20 + store float %tmp29, float* %tmp33 ; line:56 col:35 + %tmp34 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 1 ; line:56 col:5 + %tmp35 = getelementptr <1 x float>, <1 x float>* %tmp34, i32 0, i32 0 ; line:56 col:5 + store float %tmp29, float* %tmp35 ; line:56 col:18 + %tmp36 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37 + %tmp37 = load float, float* %tmp36, align 4 ; line:57 col:37 + %tmp38 = load i32, i32* %tmp, align 4 ; line:57 col:27 + %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp38 ; line:57 col:20 + %tmp40 = load i32, i32* %tmp, align 4 ; line:57 col:31 + %tmp41 = getelementptr <2 x float>, <2 x float>* %tmp39, i32 0, i32 %tmp40 ; line:57 col:20 + store float %tmp37, float* %tmp41 ; line:57 col:35 + %tmp42 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1 ; line:57 col:5 + %tmp43 = getelementptr <2 x float>, <2 x float>* %tmp42, i32 0, i32 0 ; line:57 col:5 + store float %tmp37, float* %tmp43 ; line:57 col:18 + %tmp44 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36 + %tmp45 = load float, float* %tmp44, align 4 ; line:58 col:36 + %tmp46 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:58 col:28 + %tmp47 = load i32, i32* %tmp, align 4 ; line:58 col:30 + %tmp48 = getelementptr <1 x float>, <1 x float>* %tmp46, i32 0, i32 %tmp47 ; line:58 col:20 + store float %tmp45, float* %tmp48 ; line:58 col:34 + %tmp49 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:58 col:13 + %tmp50 = getelementptr <1 x float>, <1 x float>* %tmp49, i32 0, i32 0 ; line:58 col:5 + store float %tmp45, float* %tmp50 ; line:58 col:18 + %tmp51 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36 + %tmp52 = load float, float* %tmp51, align 4 ; line:59 col:36 + %tmp53 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:59 col:28 + %tmp54 = load i32, i32* %tmp, align 4 ; line:59 col:30 + %tmp55 = getelementptr <2 x float>, <2 x float>* %tmp53, i32 0, i32 %tmp54 ; line:59 col:20 + store float %tmp52, float* %tmp55 ; line:59 col:34 + %tmp56 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:59 col:13 + %tmp57 = getelementptr <2 x float>, <2 x float>* %tmp56, i32 0, i32 1 ; line:59 col:5 + store float %tmp52, float* %tmp57 ; line:59 col:18 + %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32 + %tmp59 = load float, float* %tmp58, align 4 ; line:61 col:32 + %tmp60 = load i32, i32* %tmp, align 4 ; line:61 col:26 + %tmp61 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp60 ; line:61 col:18 + store float %tmp59, float* %tmp61 ; line:61 col:30 + store float %tmp59, float* getelementptr inbounds (<1 x float>, <1 x float>* @stglob1, i32 0, i32 0) ; line:61 col:16 + %tmp62 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32 + %tmp63 = load float, float* %tmp62, align 4 ; line:62 col:32 + %tmp64 = load i32, i32* %tmp, align 4 ; line:62 col:26 + %tmp65 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp64 ; line:62 col:18 + store float %tmp63, float* %tmp65 ; line:62 col:30 + store float %tmp63, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16 + %tmp66 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37 + %tmp67 = load float, float* %tmp66, align 4 ; line:63 col:37 + %tmp68 = load i32, i32* %tmp, align 4 ; line:63 col:27 + %tmp69 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp68 ; line:63 col:20 + %tmp70 = load i32, i32* %tmp, align 4 ; line:63 col:31 + %tmp71 = getelementptr <1 x float>, <1 x float>* %tmp69, i32 0, i32 %tmp70 ; line:63 col:20 + store float %tmp67, float* %tmp71 ; line:63 col:35 + store float %tmp67, float* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 1, i32 0) ; line:63 col:18 + %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37 + %tmp73 = load float, float* %tmp72, align 4 ; line:64 col:37 + %tmp74 = load i32, i32* %tmp, align 4 ; line:64 col:27 + %tmp75 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp74 ; line:64 col:20 + %tmp76 = load i32, i32* %tmp, align 4 ; line:64 col:31 + %tmp77 = getelementptr <2 x float>, <2 x float>* %tmp75, i32 0, i32 %tmp76 ; line:64 col:20 + store float %tmp73, float* %tmp77 ; line:64 col:35 + store float %tmp73, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18 + %tmp78 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36 + %tmp79 = load float, float* %tmp78, align 4 ; line:65 col:36 + %tmp80 = load i32, i32* %tmp, align 4 ; line:65 col:30 + %tmp81 = getelementptr <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), i32 0, i32 %tmp80 ; line:65 col:20 + store float %tmp79, float* %tmp81 ; line:65 col:34 + store float %tmp79, float* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0, i32 0) ; line:65 col:18 + %tmp82 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36 + %tmp83 = load float, float* %tmp82, align 4 ; line:66 col:36 + %tmp84 = load i32, i32* %tmp, align 4 ; line:66 col:30 + %tmp85 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp84 ; line:66 col:20 + store float %tmp83, float* %tmp85 ; line:66 col:34 + store float %tmp83, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:66 col:18 + br label %bb86 ; line:67 col:3 + +bb86: ; preds = %bb17, %bb + %tmp87 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17 + %tmp88 = extractelement <1 x float> %tmp87, i32 0 ; line:68 col:17 + %tmp89 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27 + %tmp90 = extractelement <2 x float> %tmp89, i32 1 ; line:68 col:27 + %tmp91 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37 + %tmp92 = extractelement <1 x float> %tmp91, i32 0 ; line:68 col:37 + %tmp93 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47 + %tmp94 = extractelement <2 x float> %tmp93, i32 1 ; line:68 col:47 + %tmp95 = insertelement <4 x float> undef, float %tmp88, i64 0 ; line:68 col:16 + %tmp96 = insertelement <4 x float> %tmp95, float %tmp90, i64 1 ; line:68 col:16 + %tmp97 = insertelement <4 x float> %tmp96, float %tmp92, i64 2 ; line:68 col:16 + %tmp98 = insertelement <4 x float> %tmp97, float %tmp94, i64 3 ; line:68 col:16 + %tmp99 = load i32, i32* %tmp, align 4 ; line:68 col:73 + %tmp100 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp99 ; line:68 col:66 + %tmp101 = load i32, i32* %tmp, align 4 ; line:68 col:77 + %tmp102 = getelementptr <1 x float>, <1 x float>* %tmp100, i32 0, i32 %tmp101 ; line:68 col:66 + %tmp103 = load float, float* %tmp102 ; line:68 col:66 + %tmp104 = load i32, i32* %tmp, align 4 ; line:68 col:89 + %tmp105 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp104 ; line:68 col:82 + %tmp106 = load i32, i32* %tmp, align 4 ; line:68 col:93 + %tmp107 = getelementptr <2 x float>, <2 x float>* %tmp105, i32 0, i32 %tmp106 ; line:68 col:82 + %tmp108 = load float, float* %tmp107 ; line:68 col:82 + %tmp109 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 0 ; line:68 col:98 + %tmp110 = load <1 x float>, <1 x float>* %tmp109, align 4 ; line:68 col:98 + %tmp111 = extractelement <1 x float> %tmp110, i32 0 ; line:68 col:98 + %tmp112 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111 + %tmp113 = load <2 x float>, <2 x float>* %tmp112, align 4 ; line:68 col:111 + %tmp114 = extractelement <2 x float> %tmp113, i32 1 ; line:68 col:111 + %tmp115 = insertelement <4 x float> undef, float %tmp103, i64 0 ; line:68 col:65 + %tmp116 = insertelement <4 x float> %tmp115, float %tmp108, i64 1 ; line:68 col:65 + %tmp117 = insertelement <4 x float> %tmp116, float %tmp111, i64 2 ; line:68 col:65 + %tmp118 = insertelement <4 x float> %tmp117, float %tmp114, i64 3 ; line:68 col:65 + %tmp119 = fadd <4 x float> %tmp98, %tmp118 ; line:68 col:57 + %tmp120 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10 + %tmp121 = extractelement <1 x float> %tmp120, i32 0 ; line:69 col:10 + %tmp122 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21 + %tmp123 = extractelement <2 x float> %tmp122, i32 1 ; line:69 col:21 + %tmp124 = load <1 x float>, <1 x float>* @stglob1, align 4 ; line:69 col:32 + %tmp125 = extractelement <1 x float> %tmp124, i32 0 ; line:69 col:32 + %tmp126 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43 + %tmp127 = extractelement <2 x float> %tmp126, i32 1 ; line:69 col:43 + %tmp128 = insertelement <4 x float> undef, float %tmp121, i64 0 ; line:69 col:9 + %tmp129 = insertelement <4 x float> %tmp128, float %tmp123, i64 1 ; line:69 col:9 + %tmp130 = insertelement <4 x float> %tmp129, float %tmp125, i64 2 ; line:69 col:9 + %tmp131 = insertelement <4 x float> %tmp130, float %tmp127, i64 3 ; line:69 col:9 + %tmp132 = fadd <4 x float> %tmp119, %tmp131 ; line:68 col:124 + %tmp133 = load i32, i32* %tmp, align 4 ; line:69 col:70 + %tmp134 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp133 ; line:69 col:63 + %tmp135 = load i32, i32* %tmp, align 4 ; line:69 col:74 + %tmp136 = getelementptr <1 x float>, <1 x float>* %tmp134, i32 0, i32 %tmp135 ; line:69 col:63 + %tmp137 = load float, float* %tmp136 ; line:69 col:63 + %tmp138 = load i32, i32* %tmp, align 4 ; line:69 col:86 + %tmp139 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp138 ; line:69 col:79 + %tmp140 = load i32, i32* %tmp, align 4 ; line:69 col:90 + %tmp141 = getelementptr <2 x float>, <2 x float>* %tmp139, i32 0, i32 %tmp140 ; line:69 col:79 + %tmp142 = load float, float* %tmp141 ; line:69 col:79 + %tmp143 = load <1 x float>, <1 x float>* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 0), align 4 ; line:69 col:95 + %tmp144 = extractelement <1 x float> %tmp143, i32 0 ; line:69 col:95 + %tmp145 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108 + %tmp146 = extractelement <2 x float> %tmp145, i32 1 ; line:69 col:108 + %tmp147 = insertelement <4 x float> undef, float %tmp137, i64 0 ; line:69 col:62 + %tmp148 = insertelement <4 x float> %tmp147, float %tmp142, i64 1 ; line:69 col:62 + %tmp149 = insertelement <4 x float> %tmp148, float %tmp144, i64 2 ; line:69 col:62 + %tmp150 = insertelement <4 x float> %tmp149, float %tmp146, i64 3 ; line:69 col:62 + %tmp151 = fadd <4 x float> %tmp132, %tmp150 ; line:69 col:54 + %tmp152 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:70 col:20 + %tmp153 = load <1 x float>, <1 x float>* %tmp152, align 4 ; line:70 col:20 + %tmp154 = extractelement <1 x float> %tmp153, i64 0 ; line:70 col:11 + %tmp155 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:70 col:31 + %tmp156 = getelementptr <2 x float>, <2 x float>* %tmp155, i32 0, i32 1 ; line:70 col:23 + %tmp157 = load float, float* %tmp156 ; line:70 col:23 + %tmp158 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:70 col:45 + %tmp159 = load <1 x float>, <1 x float>* %tmp158, align 4 ; line:70 col:45 + %tmp160 = extractelement <1 x float> %tmp159, i64 0 ; line:70 col:11 + %tmp161 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:70 col:56 + %tmp162 = load i32, i32* %tmp, align 4 ; line:70 col:58 + %tmp163 = getelementptr <2 x float>, <2 x float>* %tmp161, i32 0, i32 %tmp162 ; line:70 col:48 + %tmp164 = load float, float* %tmp163 ; line:70 col:48 + %tmp165 = insertelement <4 x float> undef, float %tmp154, i64 0 ; line:70 col:11 + %tmp166 = insertelement <4 x float> %tmp165, float %tmp157, i64 1 ; line:70 col:11 + %tmp167 = insertelement <4 x float> %tmp166, float %tmp160, i64 2 ; line:70 col:11 + %tmp168 = insertelement <4 x float> %tmp167, float %tmp164, i64 3 ; line:70 col:11 + %tmp169 = fadd <4 x float> %tmp151, %tmp168 ; line:69 col:121 + %tmp170 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0), align 4 ; line:70 col:80 + %tmp171 = extractelement <1 x float> %tmp170, i64 0 ; line:70 col:71 + %tmp172 = load float, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:70 col:83 + %tmp173 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), align 4 ; line:70 col:105 + %tmp174 = extractelement <1 x float> %tmp173, i64 0 ; line:70 col:71 + %tmp175 = load i32, i32* %tmp, align 4 ; line:70 col:118 + %tmp176 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp175 ; line:70 col:108 + %tmp177 = load float, float* %tmp176 ; line:70 col:108 + %tmp178 = insertelement <4 x float> undef, float %tmp171, i64 0 ; line:70 col:71 + %tmp179 = insertelement <4 x float> %tmp178, float %tmp172, i64 1 ; line:70 col:71 + %tmp180 = insertelement <4 x float> %tmp179, float %tmp174, i64 2 ; line:70 col:71 + %tmp181 = insertelement <4 x float> %tmp180, float %tmp177, i64 3 ; line:70 col:71 + %tmp182 = fadd <4 x float> %tmp169, %tmp181 ; line:70 col:63 + ret <4 x float> %tmp182 ; line:68 col:3 +} + +attributes #0 = { nounwind } + +!pauseresume = !{!1} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !10} +!dx.entryPoints = !{!19} +!dx.fnprops = !{} +!dx.options = !{!23, !24} + +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8} +!6 = !{i32 4, !7} +!7 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC1", i32 7, i32 9, i32 13, i32 1} +!8 = !{i32 8, !9} +!9 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC2", i32 7, i32 9, i32 13, i32 2} +!10 = !{i32 1, <4 x float> (i32, [12 x float]*)* @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z", !11} +!11 = !{!12, !15, !17} +!12 = !{i32 1, !13, !14} +!13 = !{i32 7, i32 9, i32 13, i32 4} +!14 = !{} +!15 = !{i32 0, !16, !14} +!16 = !{i32 4, !"IX", i32 7, i32 4} +!17 = !{i32 0, !18, !14} +!18 = !{i32 4, !"VAL", i32 7, i32 9} +!19 = !{null, !"", null, !20, null} +!20 = !{null, null, !21, null} +!21 = !{!22} +!22 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!23 = !{i32 64} +!24 = !{i32 -1} +!25 = !{!26, !26, i64 0} +!26 = !{!"int", !27, i64 0} +!27 = !{!"omnipotent char", !28, i64 0} +!28 = !{!"Simple C/C++ TBAA"} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl new file mode 100644 index 0000000000..7641cb4f39 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl @@ -0,0 +1,112 @@ +// RUN: %dxc -fcgl -T lib_6_9 %s | FileCheck %s + +// Mainly a source for the ScalarReductionOfAggregatesHLSL(SROA) +// and DynamicIndexingVectorToArray(DIVA) IR tests with native vectors +// using allocas, static globals, and parameters. +// Dynamically accessed 1-element vectors should get skipped by SROA, +// but addressed by DynamicIndexingVectorToArray (hence the name). +// Larger vectors should be untouched. +// Arrays of vectors get some special treatment as well. +// Verifies that the original code is as expected for the IR tests. + +struct VectRec1 { + float1 f : REC1; +}; +struct VectRec2 { + float2 f : REC2; +}; + +// Vec2s will be preserved. +// CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4 +// CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +// CHECK-DAG: @dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4 + +// Dynamic vec1s will get replaced with dynamic vector to array. +// CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4 +// CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +// CHECK-DAG: @dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4 + +// Vec2s will be preserved. +// CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4 +// CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +// CHECK-DAG: @stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4 + +// Static vec1s will get replaced with SROA. +// CHECK-DAG: @stglob1 = internal global <1 x float> zeroinitializer, align 4 +// CHECK-DAG: @stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +// CHECK-DAG: @stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4 + +static float1 dyglob1; +static float2 dyglob2; +static float1 dygar1[2]; +static float2 dygar2[3]; +static VectRec1 dygrec1; +static VectRec2 dygrec2; + +static float1 stglob1; +static float2 stglob2; +static float1 stgar1[2]; +static float2 stgar2[3]; +static VectRec1 stgrec1; +static VectRec2 stgrec2; + +// Test assignment operators. +// Vec2s should be skipped by SROA and DIVA +// DIVA will lower statically-indexed vectors and vectors in an array. +// CHECK-LABEL: define <4 x float> @"\01?tester +export float4 tester(int ix : IX, float vals[12] : VAL) { + + // Vec2s will be preserved. + // CHECK-DAG: %dyloc2 = alloca <2 x float>, align 4 + // CHECK-DAG: %dylar2 = alloca [4 x <2 x float>], align 4 + // CHECK-DAG: %dylorc2 = alloca %struct.VectRec2, align 4 + + // Dynamic local vec1s will get replaced with dynamic vector to array. + // CHECK-DAG: %dyloc1 = alloca <1 x float>, align 4 + // CHECK-DAG: %dylar1 = alloca [3 x <1 x float>], align 4 + // CHECK-DAG: %dylorc1 = alloca %struct.VectRec1, align 4 + + // Vec2s will be preserved. + // CHECK-DAG: %stloc2 = alloca <2 x float>, align 4 + // CHECK-DAG: %stlar2 = alloca [4 x <2 x float>], align 4 + // CHECK-DAG: %stlorc2 = alloca %struct.VectRec2, align 4 + + // Static local vec1s will get replaced by various passes. + // CHECK-DAG: %stloc1 = alloca <1 x float>, align 4 + // CHECK-DAG: %stlar1 = alloca [3 x <1 x float>], align 4 + // CHECK-DAG: %stlorc1 = alloca %struct.VectRec1, align 4 + + float1 dyloc1; + float2 dyloc2; + float1 dylar1[3]; + float2 dylar2[4]; + VectRec1 dylorc1; + VectRec2 dylorc2; + + float1 stloc1; + float2 stloc2; + float1 stlar1[3]; + float2 stlar2[4]; + VectRec1 stlorc1; + VectRec2 stlorc2; + + if (ix > 0) { + stloc1[0] = dyloc1[ix] = vals[0]; + stloc2[1] = dyloc2[ix] = vals[1]; + stlar1[1][0] = dylar1[ix][ix] = vals[2]; + stlar2[1][0] = dylar2[ix][ix] = vals[3]; + stlorc1.f[0] = dylorc1.f[ix] = vals[4]; + stlorc2.f[1] = dylorc2.f[ix] = vals[5]; + + stglob1[0] = dyglob1[ix] = vals[6]; + stglob2[1] = dyglob2[ix] = vals[7]; + stgar1[1][0] = dygar1[ix][ix] = vals[8]; + stgar2[1][1] = dygar2[ix][ix] = vals[9]; + stgrec1.f[0] = dygrec1.f[ix] = vals[10]; + stgrec2.f[1] = dygrec2.f[ix] = vals[11]; + } + return float4(dyloc1.x, dyloc2.y, stloc1.x, stloc2.y) + float4(dylar1[ix][ix], dylar2[ix][ix], stlar1[0].x, stlar2[0].y) + + float4(dyglob1.x, dyglob2.y, stglob1.x, stglob2.y) + float4(dygar1[ix][ix], dygar2[ix][ix], stgar1[0].x, stgar2[0].y) + + float4(stlorc1.f, stlorc2.f[1], dylorc1.f, dylorc2.f[ix]) + float4(stgrec1.f, stgrec2.f[1], dygrec1.f, dygrec2.f[ix]); +} + diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..11d705305d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl @@ -0,0 +1,186 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=13 %s | FileCheck %s + +// Source for dxilgen test CodeGenDXIL/passes/longvec-intrinsics.ll. +// Some targetted filecheck testing as an incidental. + +RWStructuredBuffer > hBuf; +RWStructuredBuffer > fBuf; +RWStructuredBuffer > dBuf; + +RWStructuredBuffer > bBuf; +RWStructuredBuffer > uBuf; +RWStructuredBuffer > lBuf; + +[numthreads(8,1,1)] +void main() { + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + vector fVec1 = fBuf[11]; + vector fVec2 = fBuf[12]; + vector fVec3 = fBuf[13]; + + // CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.binary.v13f32(i32 35, <13 x float> [[fvec1]], <13 x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <13 x float> @dx.op.binary.v13f32(i32 36, <13 x float> [[tmp]], <13 x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + vector hVec1 = hBuf[14]; + vector hVec2 = hBuf[15]; + vector hVec3 = hBuf[16]; + + // CHECK: [[tmp:%.*]] = fcmp fast olt <13 x half> [[hvec2]], [[hvec1]] + // CHECK: select <13 x i1> [[tmp]], <13 x half> zeroinitializer, <13 x half> hRes = step(hVec1, hVec2); + + // CHECK: [[tmp:%.*]] = fmul fast <13 x float> [[fvec1]], @dx.op.unary.v13f32(i32 21, <13 x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK: [[tmp:%.*]] = call <13 x half> @dx.op.unary.v13f16(i32 23, <13 x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <13 x half> [[tmp]], [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <13 x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <13 x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 7, <13 x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], [[sat]] + // CHECK: fmul fast <13 x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK: fmul fast <13 x float> [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <13 x float> [[fvec1]] to <13 x i32> + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], [[add]], [[shr]] to <13 x float> + // CHECK: [[sel:%.*]] = select <13 x i1> [[cmp]], <13 x float> [[i2f]], <13 x float> zeroinitializer + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK: [[tmp:%.*]] = fsub fast <13 x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + // CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + // CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + vector uVec1 = uBuf[17]; + vector uVec2 = uBuf[18]; + + vector signs = 1; + // CHECK: [[cmp:%.*]] = icmp ne <13 x i32> [[uvec2]], zeroinitializer + // CHECK: zext <13 x i1> [[cmp]] to <13 x i32> + signs *= sign(uVec2); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + vector lVec1 = lBuf[19]; + vector lVec2 = lBuf[20]; + + // CHECK: [[gt:%.*]] = icmp sgt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <13 x i1> [[gt]] to <13 x i32> + // CHECK: [[ilt:%.*]] = zext <13 x i1> [[lt]] to <13 x i32> + // CHECK: sub nsw <13 x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + vector uRes = signs; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec1:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec2:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec3:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + vector bVec1 = bBuf[21]; + vector bVec2 = bBuf[22]; + vector bVec3 = bBuf[23]; + + // CHECK: [[bvec2:%.*]] = icmp ne <13 x i32> [[vec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <13 x i32> [[vec1]], zeroinitializer + // CHECK: or <13 x i1> [[bvec2]], [[bvec1]] + uRes += or(bVec1, bVec2); + + // CHECK: [[bvec3:%.*]] = icmp ne <13 x i32> [[vec3]], zeroinitializer + // CHECK: and <13 x i1> [[bvec3]], [[bvec2]] + uRes += and(bVec2, bVec3); + + // CHECK: select <13 x i1> [[bvec3]], <13 x i64> [[lvec1]], <13 x i64> [[lvec2]] + vector lRes = select(bVec3, lVec1, lVec2); + + // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]] + // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]] + // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]] + // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) + // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c) + // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c) + // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c) + // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c) + // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c) + // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c) + // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c) + // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c) + // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c) + // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c) + // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c) + fRes += dot(fVec1, fVec2); + + // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]]) ; Atan(value) + fRes += atan(fVec1); + + // CHECK: call <13 x i32> @dx.op.binary.v13i32(i32 40, <13 x i32> [[uvec1]], <13 x i32> [[uvec2]]) ; UMin(a,b) + uRes += min(uVec1, uVec2); + + // CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]]) ; FMad(a,b,c) + fRes += mad(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + vector dVec1 = dBuf[24]; + vector dVec2 = dBuf[25]; + vector dVec3 = dBuf[26]; + + // CHECK: call <13 x double> @dx.op.tertiary.v13f64(i32 47, <13 x double> [[dvec1]], <13 x double> [[dvec2]], <13 x double> [[dvec3]]) + vector dRes = fma(dVec1, dVec2, dVec3); + + hBuf[0] = hRes; + fBuf[0] = fRes; + dBuf[0] = dRes; + uBuf[0] = uRes; + lBuf[0] = lRes; +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll new file mode 100644 index 0000000000..8f9dcbbdbc --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll @@ -0,0 +1,434 @@ +; RUN: %dxopt %s -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer >" = type { <7 x half> } +%"class.RWStructuredBuffer >" = type { <7 x float> } +%"class.RWStructuredBuffer >" = type { <7 x double> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i64> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" = external global %"class.RWStructuredBuffer >", align 2 +@"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 +@"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 + +; CHECK-LABEL: define void @main() +define void @main() #0 { +bb: + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + ; CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + ; CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + ; CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + + %exp = alloca <7 x float>, align 4 + %tmp = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:23 col:30 + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp) ; line:23 col:30 + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:23 col:30 + %tmp3 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 11) ; line:23 col:30 + %tmp4 = load <7 x float>, <7 x float>* %tmp3 ; line:23 col:30 + %tmp5 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:24 col:30 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp5) ; line:24 col:30 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:24 col:30 + %tmp8 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp7, i32 12) ; line:24 col:30 + %tmp9 = load <7 x float>, <7 x float>* %tmp8 ; line:24 col:30 + %tmp10 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:25 col:30 + %tmp11 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp10) ; line:25 col:30 + %tmp12 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:25 col:30 + %tmp13 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp12, i32 13) ; line:25 col:30 + %tmp14 = load <7 x float>, <7 x float>* %tmp13 ; line:25 col:30 + + ; Clamp operation. + ; CHECK: [[max:%.*]] = call <7 x float> @dx.op.binary.v7f32(i32 35, <7 x float> [[fvec1]], <7 x float> [[fvec2]]) + ; CHECK: call <7 x float> @dx.op.binary.v7f32(i32 36, <7 x float> [[max]], <7 x float> [[fvec3]]) + %tmp15 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 119, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:29 col:29 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + ; CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + ; CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + ; CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + %tmp16 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:37 col:34 + %tmp17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp16) ; line:37 col:34 + %tmp18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp17, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:37 col:34 + %tmp19 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp18, i32 14) ; line:37 col:34 + %tmp20 = load <7 x half>, <7 x half>* %tmp19 ; line:37 col:34 + %tmp21 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:38 col:34 + %tmp22 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp21) ; line:38 col:34 + %tmp23 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp22, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:38 col:34 + %tmp24 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp23, i32 15) ; line:38 col:34 + %tmp25 = load <7 x half>, <7 x half>* %tmp24 ; line:38 col:34 + %tmp26 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:39 col:34 + %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp26) ; line:39 col:34 + %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:39 col:34 + %tmp29 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp28, i32 16) ; line:39 col:34 + %tmp30 = load <7 x half>, <7 x half>* %tmp29 ; line:39 col:34 + + ; Step operation. + ; CHECK: [[cmp:%.*]] = fcmp fast olt <7 x half> [[hvec2]], [[hvec1]] + ; CHECK: select <7 x i1> [[cmp]], <7 x half> zeroinitializer, <7 x half> + %tmp31 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32 192, <7 x half> %tmp20, <7 x half> %tmp25) ; line:43 col:33 + + ; Exp operation. + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> , [[fvec1]] + ; CHECK call <7 x float> @dx.op.unary.v7f32(i32 21, <7 x float> [[mul]]) + %tmp32 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 139, <7 x float> %tmp4) ; line:47 col:11 + %tmp33 = fadd <7 x float> %tmp15, %tmp32 ; line:47 col:8 + + ; Log operation. + ; CHECK: [[log:%.*]] = call <7 x half> @dx.op.unary.v7f16(i32 23, <7 x half> [[hvec1]]) + ; CHECK: fmul fast <7 x half> , [[log]] + %tmp34 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32 159, <7 x half> %tmp20) ; line:51 col:11 + %tmp35 = fadd <7 x half> %tmp31, %tmp34 ; line:51 col:8 + + ; Smoothstep operation. + ; CHECK: [[sub1:%.*]] = fsub fast <7 x float> [[fvec2]], [[fvec1]] + ; CHECK: [[sub2:%.*]] = fsub fast <7 x float> [[fvec3]], [[fvec1]] + ; CHECK: [[div:%.*]] = fdiv fast <7 x float> [[sub2]], [[sub1]] + ; CHECK: [[sat:%.*]] = call <7 x float> @dx.op.unary.v7f32(i32 7, <7 x float> [[div]]) + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], + ; CHECK: [[sub:%.*]] = fsub fast <7 x float> , [[mul]] + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], [[sub]] + ; CHECK: fmul fast <7 x float> %Saturate, [[mul]] + %tmp36 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 189, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:61 col:11 + %tmp37 = fadd <7 x float> %tmp33, %tmp36 ; line:61 col:8 + + ; Radians operation. + ; CHECK: fmul fast <7 x float> , [[fvec3]] + %tmp38 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 176, <7 x float> %tmp14) ; line:66 col:11 + %tmp39 = fadd <7 x float> %tmp37, %tmp38 ; line:66 col:8 + store <7 x float> %tmp14, <7 x float>* %exp, align 4 ; line:77 col:22 + + ; Frexp operation. + ; CHECK: [[cmp:%.*]] = fcmp fast une <7 x float> [[fvec1]], zeroinitializer + ; CHECK: [[ext:%.*]] = sext <7 x i1> [[cmp]] to <7 x i32> + ; CHECK: [[bct:%.*]] = bitcast <7 x float> [[fvec1]] to <7 x i32> + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[add:%.*]] = add <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[add]], [[ext]] + ; CHECK: [[shr:%.*]] = ashr <7 x i32> [[and]], + ; CHECK: [[i2f:%.*]] = sitofp <7 x i32> [[shr]] to <7 x float> + ; CHECK: store <7 x float> [[i2f]], <7 x float>* %exp + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[or:%.*]] = or <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[or]], [[ext]] + ; CHECK: bitcast <7 x i32> [[and]] to <7 x float> + %tmp41 = call <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32 150, <7 x float> %tmp4, <7 x float>* %exp) ; line:78 col:11 + %tmp42 = fadd <7 x float> %tmp39, %tmp41 ; line:78 col:8 + %tmp43 = load <7 x float>, <7 x float>* %exp, align 4 ; line:79 col:11 + %tmp44 = fadd <7 x float> %tmp42, %tmp43 ; line:79 col:8 + + ; Lerp operation. + ; CHECK: [[sub:%.*]] = fsub fast <7 x half> [[hvec3]], [[hvec2]] + ; CHECK: fmul fast <7 x half> [[hvec1]], [[sub]] + %tmp45 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32 157, <7 x half> %tmp25, <7 x half> %tmp30, <7 x half> %tmp20) ; line:83 col:11 + %tmp46 = fadd <7 x half> %tmp35, %tmp45 ; line:83 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + ; CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + ; CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + %tmp47 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:90 col:29 + %tmp48 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp47) ; line:90 col:29 + %tmp49 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp48, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:90 col:29 + %tmp50 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp49, i32 17) ; line:90 col:29 + %tmp51 = load <7 x i32>, <7 x i32>* %tmp50 ; line:90 col:29 + %tmp52 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:91 col:29 + %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp52) ; line:91 col:29 + %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:91 col:29 + %tmp55 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 18) ; line:91 col:29 + %tmp56 = load <7 x i32>, <7 x i32>* %tmp55 ; line:91 col:29 + + ; Unsigned int sign operation. + ; CHECK: [[cmp:%.*]] = icmp ne <7 x i32> [[uvec2]], zeroinitializer + ; CHECK: zext <7 x i1> [[cmp]] to <7 x i32> + %tmp57 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32 355, <7 x i32> %tmp56) ; line:96 col:12 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + ; CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + ; CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + %tmp58 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:102 col:32 + %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp58) ; line:102 col:32 + %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:102 col:32 + %tmp61 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp60, i32 19) ; line:102 col:32 + %tmp62 = load <7 x i64>, <7 x i64>* %tmp61 ; line:102 col:32 + %tmp63 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:103 col:32 + %tmp64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp63) ; line:103 col:32 + %tmp65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp64, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:103 col:32 + %tmp66 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp65, i32 20) ; line:103 col:32 + %tmp67 = load <7 x i64>, <7 x i64>* %tmp66 ; line:103 col:32 + + ; Signed int sign operation. + ; CHECK: [[lt1:%.*]] = icmp slt <7 x i64> zeroinitializer, [[lvec2]] + ; CHECK: [[lt2:%.*]] = icmp slt <7 x i64> [[lvec2]], zeroinitializer + ; CHECK: [[ilt1:%.*]] = zext <7 x i1> [[lt1]] to <7 x i32> + ; CHECK: [[ilt2:%.*]] = zext <7 x i1> [[lt2]] to <7 x i32> + ; CHECK: sub <7 x i32> [[ilt1]], [[ilt2]] + %tmp68 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32 185, <7 x i64> %tmp67) ; line:110 col:12 + %tmp69 = mul <7 x i32> %tmp57, %tmp68 ; line:110 col:9 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec1:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec2:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec3:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + %tmp70 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:126 col:29 + %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp70) ; line:126 col:29 + %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:126 col:29 + %tmp73 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 21) ; line:126 col:29 + %tmp74 = load <7 x i32>, <7 x i32>* %tmp73 ; line:126 col:29 + %tmp75 = icmp ne <7 x i32> %tmp74, zeroinitializer ; line:126 col:29 + %tmp76 = zext <7 x i1> %tmp75 to <7 x i32> ; line:126 col:21 + %tmp77 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:127 col:29 + %tmp78 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp77) ; line:127 col:29 + %tmp79 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp78, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:127 col:29 + %tmp80 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp79, i32 22) ; line:127 col:29 + %tmp81 = load <7 x i32>, <7 x i32>* %tmp80 ; line:127 col:29 + %tmp82 = icmp ne <7 x i32> %tmp81, zeroinitializer ; line:127 col:29 + %tmp83 = zext <7 x i1> %tmp82 to <7 x i32> ; line:127 col:21 + %tmp84 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:128 col:29 + %tmp85 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp84) ; line:128 col:29 + %tmp86 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp85, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:128 col:29 + %tmp87 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp86, i32 23) ; line:128 col:29 + %tmp88 = load <7 x i32>, <7 x i32>* %tmp87 ; line:128 col:29 + %tmp89 = icmp ne <7 x i32> %tmp88, zeroinitializer ; line:128 col:29 + %tmp90 = zext <7 x i1> %tmp89 to <7 x i32> ; line:128 col:21 + + + ; Or() operation. + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: [[bvec1:%.*]] = icmp ne <7 x i32> [[vec1]], zeroinitializer + ; CHECK: or <7 x i1> [[bvec1]], [[bvec2]] + %tmp91 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:133 col:21 + %tmp92 = icmp ne <7 x i32> %tmp76, zeroinitializer ; line:133 col:14 + %tmp93 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 169, <7 x i1> %tmp92, <7 x i1> %tmp91) ; line:133 col:11 + %tmp94 = zext <7 x i1> %tmp93 to <7 x i32> ; line:133 col:11 + %tmp95 = add <7 x i32> %tmp69, %tmp94 ; line:133 col:8 + + ; And() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: and <7 x i1> [[bvec2]], [[bvec3]] + %tmp96 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:137 col:22 + %tmp97 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:137 col:15 + %tmp98 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 106, <7 x i1> %tmp97, <7 x i1> %tmp96) ; line:137 col:11 + %tmp99 = zext <7 x i1> %tmp98 to <7 x i32> ; line:137 col:11 + %tmp100 = add <7 x i32> %tmp95, %tmp99 ; line:137 col:8 + + ; Select() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: select <7 x i1> [[bvec3]], <7 x i64> [[lvec1]], <7 x i64> [[lvec2]] + %tmp101 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:140 col:38 + %tmp102 = call <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32 184, <7 x i1> %tmp101, <7 x i64> %tmp62, <7 x i64> %tmp67) ; line:140 col:31 + %tmp103 = call float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32 134, <7 x float> %tmp4, <7 x float> %tmp9) ; line:152 col:11 + + ; Dot operation. + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 0 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 0 + ; CHECK: [[mul:%.*]] = fmul fast float [[el1]], [[el2]] + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 1 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 1 + ; CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mul]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 2 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 2 + ; CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad1]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 3 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 3 + ; CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad2]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 4 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 4 + ; CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad3]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 5 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 5 + ; CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad4]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 6 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 6 + ; CHECK: call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad5]]) + %tmp104 = insertelement <7 x float> undef, float %tmp103, i32 0 ; line:152 col:11 + %tmp105 = shufflevector <7 x float> %tmp104, <7 x float> undef, <7 x i32> zeroinitializer ; line:152 col:11 + %tmp106 = fadd <7 x float> %tmp44, %tmp105 ; line:152 col:8 + + ; Atan operation. + ; CHECK: call <7 x float> @dx.op.unary.v7f32(i32 17, <7 x float> [[fvec1]]) + %tmp107 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 116, <7 x float> %tmp4) ; line:155 col:11 + %tmp108 = fadd <7 x float> %tmp106, %tmp107 ; line:155 col:8 + + ; Min operation. + ; CHECK: call <7 x i32> @dx.op.binary.v7i32(i32 40, <7 x i32> [[uvec1]], <7 x i32> [[uvec2]]) + %tmp109 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32 353, <7 x i32> %tmp51, <7 x i32> %tmp56) ; line:158 col:11 + %tmp110 = add <7 x i32> %tmp100, %tmp109 ; line:158 col:8 + + ; Mad operation. + ; CHECK: call <7 x float> @dx.op.tertiary.v7f32(i32 46, <7 x float> [[fvec1]], <7 x float> [[fvec2]], <7 x float> [[fvec3]]) + %tmp111 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 162, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:161 col:11 + %tmp112 = fadd <7 x float> %tmp108, %tmp111 ; line:161 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + ; CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + ; CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + ; CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + %tmp113 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:169 col:31 + %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp113) ; line:169 col:31 + %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:169 col:31 + %tmp116 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 24) ; line:169 col:31 + %tmp117 = load <7 x double>, <7 x double>* %tmp116 ; line:169 col:31 + %tmp118 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:170 col:31 + %tmp119 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp118) ; line:170 col:31 + %tmp120 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp119, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:170 col:31 + %tmp121 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp120, i32 25) ; line:170 col:31 + %tmp122 = load <7 x double>, <7 x double>* %tmp121 ; line:170 col:31 + %tmp123 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:171 col:31 + %tmp124 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp123) ; line:171 col:31 + %tmp125 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp124, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:171 col:31 + %tmp126 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp125, i32 26) ; line:171 col:31 + %tmp127 = load <7 x double>, <7 x double>* %tmp126 ; line:171 col:31 + + ; FMA operation. + ; CHECK: call <7 x double> @dx.op.tertiary.v7f64(i32 47, <7 x double> [[dvec1]], <7 x double> [[dvec2]], <7 x double> [[dvec3]]) + %tmp128 = call <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32 147, <7 x double> %tmp117, <7 x double> %tmp122, <7 x double> %tmp127) ; line:174 col:30 + %tmp129 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:176 col:3 + %tmp130 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp129) ; line:176 col:3 + %tmp131 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp130, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:176 col:3 + %tmp132 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp131, i32 0) ; line:176 col:3 + store <7 x half> %tmp46, <7 x half>* %tmp132 ; line:176 col:11 + %tmp133 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:177 col:3 + %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp133) ; line:177 col:3 + %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:177 col:3 + %tmp136 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp135, i32 0) ; line:177 col:3 + store <7 x float> %tmp112, <7 x float>* %tmp136 ; line:177 col:11 + %tmp137 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:178 col:3 + %tmp138 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp137) ; line:178 col:3 + %tmp139 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp138, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:178 col:3 + %tmp140 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp139, i32 0) ; line:178 col:3 + store <7 x double> %tmp128, <7 x double>* %tmp140 ; line:178 col:11 + %tmp141 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:179 col:3 + %tmp142 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp141) ; line:179 col:3 + %tmp143 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp142, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:179 col:3 + %tmp144 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp143, i32 0) ; line:179 col:3 + store <7 x i32> %tmp110, <7 x i32>* %tmp144 ; line:179 col:11 + %tmp145 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:180 col:3 + %tmp146 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp145) ; line:180 col:3 + %tmp147 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp146, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:180 col:3 + %tmp148 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp147, i32 0) ; line:180 col:3 + store <7 x i64> %tmp102, <7 x i64>* %tmp148 ; line:180 col:11 + ret void ; line:181 col:1 +} + +declare <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>, <7 x float>) #1 +declare <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>) #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32, <7 x float>) #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32, <7 x half>) #1 +declare <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32, <7 x float>, <7 x float>*) #0 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>, <7 x half>) #1 +declare <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32, <7 x i32>) #1 +declare <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32, <7 x i64>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32, <7 x i1>, <7 x i1>) #1 +declare <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32, <7 x i1>, <7 x i64>, <7 x i64>) #1 +declare float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>) #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32, <7 x i32>, <7 x i32>) #1 +declare <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32, <7 x double>, <7 x double>, <7 x double>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!pauseresume = !{!1} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !36} +!dx.entryPoints = !{!40} +!dx.fnprops = !{!52} +!dx.options = !{!53, !54} + +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!3 = !{i32 1, i32 9} +!4 = !{!"cs", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer >" undef, !6, %"class.RWStructuredBuffer >" undef, !11, %"class.RWStructuredBuffer >" undef, !16, %"class.RWStructuredBuffer >" undef, !21, %"class.RWStructuredBuffer >" undef, !26, %"class.RWStructuredBuffer >" undef, !31} +!6 = !{i32 14, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 8, i32 13, i32 7} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, <7 x half> undef} +!11 = !{i32 28, !12, !13} +!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 7} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, <7 x float> undef} +!16 = !{i32 56, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 10, i32 13, i32 7} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, <7 x double> undef} +!21 = !{i32 28, !22, !23} +!22 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 1, i32 13, i32 7} +!23 = !{i32 0, !24} +!24 = !{!25} +!25 = !{i32 0, <7 x i1> undef} +!26 = !{i32 28, !27, !28} +!27 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5, i32 13, i32 7} +!28 = !{i32 0, !29} +!29 = !{!30} +!30 = !{i32 0, <7 x i32> undef} +!31 = !{i32 56, !32, !33} +!32 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 6, i32 13, i32 7} +!33 = !{i32 0, !34} +!34 = !{!35} +!35 = !{i32 0, <7 x i64> undef} +!36 = !{i32 1, void ()* @main, !37} +!37 = !{!38} +!38 = !{i32 1, !39, !39} +!39 = !{} +!40 = !{void ()* @main, !"main", null, !41, null} +!41 = !{null, !42, null, null} +!42 = !{!43, !45, !47, !49, !50, !51} +!43 = !{i32 0, %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A", !"hBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !44} +!44 = !{i32 1, i32 14} +!45 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A", !"fBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!46 = !{i32 1, i32 28} +!47 = !{i32 2, %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A", !"dBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!48 = !{i32 1, i32 56} +!49 = !{i32 3, %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A", !"bBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!50 = !{i32 4, %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A", !"uBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!51 = !{i32 5, %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A", !"lBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!52 = !{void ()* @main, i32 5, i32 8, i32 1, i32 1} +!53 = !{i32 0} +!54 = !{i32 -1} +!59 = !{!60, !60, i64 0} +!60 = !{!"omnipotent char", !61, i64 0} +!61 = !{!"Simple C/C++ TBAA"} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll new file mode 100644 index 0000000000..f9a9b3d677 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll @@ -0,0 +1,478 @@ +; RUN: %dxopt %s -hlsl-passes-resume -hlsl-dxil-scalarize-vector-load-stores -S | FileCheck %s + +; Verify that scalarize vector load stores pass will convert raw buffer vector operations +; into the equivalent collection of scalar load store calls. +; Sourced from buffer-load-stors-sm69.hlsl. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v17f32 = type { <17 x float>, i32 } +%struct.ByteAddressBuffer = type { i32 } +%"class.StructuredBuffer >" = type { <17 x float> } +%struct.RWByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <17 x float> } +%"class.ConsumeStructuredBuffer >" = type { <17 x float> } +%"class.AppendStructuredBuffer >" = type { <17 x float> } + +@"\01?RoByBuf@@3UByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 +@"\01?RwByBuf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 +@"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 + +define void @main() { +bb: + %tmp = load %dx.types.Handle, %dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A", align 4 + %tmp2 = load %dx.types.Handle, %dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp3 = load %dx.types.Handle, %dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp4 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp5 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A", align 4 + %tmp6 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %tmp7 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp5) + %tmp8 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp9 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i32 4) + %tmp10 = extractvalue %dx.types.ResRet.v17f32 %tmp9, 0 + %tmp11 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp1) + %tmp12 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 11, i32 0 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix1]], i32 undef, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix2]], i32 undef, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix3]], i32 undef, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix4]], i32 undef, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp13 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i32 4) + %tmp14 = extractvalue %dx.types.ResRet.v17f32 %tmp13, 0 + %tmp15 = fadd fast <17 x float> %tmp14, %tmp10 + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp15, i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp15, i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp15, i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp15, i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp15, i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp15, i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp15, i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp15, i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[ix2:%.*]] = add i32 %80, 16 + ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp15, i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp15, i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp15, i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp15, i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[ix3:%.*]] = add i32 %85, 16 + ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp15, i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp15, i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp15, i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp15, i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[ix4:%.*]] = add i32 %90, 16 + ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp15, i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, <17 x float> %tmp15, i32 4) + %tmp16 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp4) + %tmp17 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 4108, i32 68 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp18 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i32 4) + %tmp19 = extractvalue %dx.types.ResRet.v17f32 %tmp18, 0 + %tmp20 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 1, i8 0, i32 undef) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp21 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i32 4) + %tmp22 = extractvalue %dx.types.ResRet.v17f32 %tmp21, 0 + %tmp23 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp) + %tmp24 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 12, i32 68 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp25 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i32 4) + %tmp26 = extractvalue %dx.types.ResRet.v17f32 %tmp25, 0 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp27 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i32 4) + %tmp28 = extractvalue %dx.types.ResRet.v17f32 %tmp27, 0 + %tmp29 = fadd fast <17 x float> %tmp22, %tmp19 + %tmp30 = fadd fast <17 x float> %tmp29, %tmp26 + %tmp31 = fadd fast <17 x float> %tmp30, %tmp28 + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp31, i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp31, i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp31, i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp31, i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp31, i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp31, i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp31, i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp31, i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp31, i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp31, i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp31, i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp31, i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp31, i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp31, i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp31, i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp31, i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp31, i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, <17 x float> %tmp31, i32 4) + %tmp32 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp3) + %tmp33 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp32, %dx.types.ResourceProperties { i32 36876, i32 68 }) + %tmp34 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp33, i8 -1) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp35 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i32 4) + %tmp36 = extractvalue %dx.types.ResRet.v17f32 %tmp35, 0 + %tmp37 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp2) + %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp37, %dx.types.ResourceProperties { i32 36876, i32 68 }) + %tmp39 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp38, i8 1) + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> [[vec16]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> [[vec16]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> [[vec16]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> [[vec16]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractelement <17 x float> [[vec16]], i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> [[vec16]], i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> [[vec16]], i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> [[vec16]], i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractelement <17 x float> [[vec16]], i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> [[vec16]], i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> [[vec16]], i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> [[vec16]], i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractelement <17 x float> [[vec16]], i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> [[vec16]], i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> [[vec16]], i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> [[vec16]], i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[val16:%.*]] = extractelement <17 x float> [[vec16]], i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, <17 x float> %tmp36, i32 4) + ret void +} + +declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0 +declare %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32, %dx.types.Handle, i32, i32, i32) #1 +declare void @dx.op.rawBufferVectorStore.v17f32(i32, %dx.types.Handle, i32, i32, <17 x float>, i32) #2 +declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #2 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0 +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.typeAnnotations = !{!13} +!dx.entryPoints = !{!17, !19} + +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{!4, !8, null, null} +!4 = !{!5, !6} +!5 = !{i32 0, %struct.ByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A" to %struct.ByteAddressBuffer*), !"RoByBuf", i32 0, i32 1, i32 1, i32 11, i32 0, null} +!6 = !{i32 1, %"class.StructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.StructuredBuffer >"*), !"RoStBuf", i32 0, i32 2, i32 1, i32 12, i32 0, !7} +!7 = !{i32 1, i32 68} +!8 = !{!9, !10, !11, !12} +!9 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"RwByBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!10 = !{i32 1, %"class.RWStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.RWStructuredBuffer >"*), !"RwStBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7} +!11 = !{i32 2, %"class.ConsumeStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.ConsumeStructuredBuffer >"*), !"CnStBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 true, i1 false, !7} +!12 = !{i32 3, %"class.AppendStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.AppendStructuredBuffer >"*), !"ApStBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 true, i1 false, !7} +!13 = !{i32 1, void ()* @main, !14} +!14 = !{!15} +!15 = !{i32 0, !16, !16} +!16 = !{} +!17 = !{null, !"", null, !3, !18} +!18 = !{i32 0, i64 8589934608} +!19 = !{void ()* @main, !"main", !20, null, !24} +!20 = !{!21, null, null} +!21 = !{!22} +!22 = !{i32 0, !"IX", i8 5, i8 0, !23, i8 0, i32 2, i8 1, i32 0, i8 0, null} +!23 = !{i32 0, i32 1} +!24 = !{i32 8, i32 1, i32 5, !25} +!25 = !{i32 0} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll new file mode 100644 index 0000000000..1fe7c17621 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll @@ -0,0 +1,660 @@ +; RUN: %dxopt %s -scalarizer -S | FileCheck %s + +; Vectors of length greather than 1 should get no changes from scalarizer, +; so this unusual test, verifies that the pass makes no changes at all. +; Still justified because prior to 6.9, many changes would result. +; Compiled mostly for float7 vectors with int7 for the integer specific parts. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer" = type { float } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.f32 = type { float, float, float, float, i32 } + +@"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" = external global [7 x %"class.RWStructuredBuffer"], align 4 +@llvm.used = appending global [1 x i8*] [i8* bitcast ([7 x %"class.RWStructuredBuffer"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" to i8*)], section "llvm.metadata" + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?assignments +define void @"\01?assignments@@YAXY09$$CAV?$vector@M$06@@@Z"([10 x <7 x float>]* noalias %things) #0 { +bb: + %tmp = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* getelementptr inbounds ([7 x %"class.RWStructuredBuffer"], [7 x %"class.RWStructuredBuffer"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A", i32 0, i32 0) + %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %tmp) + %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + + ; CHECK: [[buf:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 1, i32 0, i8 1, i32 4) + ; CHECK: [[val:%.*]] = extractvalue %dx.types.ResRet.f32 [[buf]], 0 + ; CHECK: [[vec:%.*]] = insertelement <7 x float> undef, float [[val]], i32 0 + ; CHECK: [[res0:%.*]] = shufflevector <7 x float> [[vec]], <7 x float> undef, <7 x i32> zeroinitializer + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0 + ; CHECK: store <7 x float> [[res0]], <7 x float>* [[adr0]], align 4 + %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4) + %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0 + %tmp4 = insertelement <7 x float> undef, float %tmp3, i32 0 + %tmp5 = shufflevector <7 x float> %tmp4, <7 x float> undef, <7 x i32> zeroinitializer + %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0 + store <7 x float> %tmp5, <7 x float>* %tmp6, align 4 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4 + ; CHECK: [[res1:%.*]] = fadd fast <7 x float> [[ld1]], [[ld5]] + ; CHECK: store <7 x float> [[res1]], <7 x float>* [[adr1]], align 4 + %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5 + %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4 + %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1 + %tmp10 = load <7 x float>, <7 x float>* %tmp9, align 4 + %tmp11 = fadd fast <7 x float> %tmp10, %tmp8 + store <7 x float> %tmp11, <7 x float>* %tmp9, align 4 + + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[res2:%.*]] = fsub fast <7 x float> [[ld2]], [[ld6]] + ; CHECK: store <7 x float> [[res2]], <7 x float>* [[adr2]], align 4 + %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6 + %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4 + %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2 + %tmp15 = load <7 x float>, <7 x float>* %tmp14, align 4 + %tmp16 = fsub fast <7 x float> %tmp15, %tmp13 + store <7 x float> %tmp16, <7 x float>* %tmp14, align 4 + + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[res3:%.*]] = fmul fast <7 x float> [[ld3]], [[ld7]] + ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4 + %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7 + %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4 + %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3 + %tmp20 = load <7 x float>, <7 x float>* %tmp19, align 4 + %tmp21 = fmul fast <7 x float> %tmp20, %tmp18 + store <7 x float> %tmp21, <7 x float>* %tmp19, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[res4:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld8]] + ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4 + %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8 + %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4 + %tmp24 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4 + %tmp25 = load <7 x float>, <7 x float>* %tmp24, align 4 + %tmp26 = fdiv fast <7 x float> %tmp25, %tmp23 + store <7 x float> %tmp26, <7 x float>* %tmp24, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[res5:%.*]] = frem fast <7 x float> [[ld5]], [[ld9]] + ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4 + %tmp27 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9 + %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4 + %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5 + %tmp30 = load <7 x float>, <7 x float>* %tmp29, align 4 + %tmp31 = frem fast <7 x float> %tmp30, %tmp28 + store <7 x float> %tmp31, <7 x float>* %tmp29, align 4 + + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?arithmetic +define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$06@@Y0L@$$CAV1@@Z"([11 x <7 x float>]* noalias sret %agg.result, [11 x <7 x float>]* noalias %things) #0 { +bb: + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4 + ; CHECK: [[res0:%.*]] = fsub fast <7 x float> , [[ld0]] + %tmp = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0 + %tmp1 = load <7 x float>, <7 x float>* %tmp, align 4 + %tmp2 = fsub fast <7 x float> , %tmp1 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0 + ; CHECK: [[res1:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4 + %tmp3 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0 + %tmp4 = load <7 x float>, <7 x float>* %tmp3, align 4 + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[res2:%.*]] = fadd fast <7 x float> [[ld1]], [[ld2]] + %tmp5 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1 + %tmp6 = load <7 x float>, <7 x float>* %tmp5, align 4 + %tmp7 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2 + %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4 + %tmp9 = fadd fast <7 x float> %tmp6, %tmp8 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[res3:%.*]] = fsub fast <7 x float> [[ld2]], [[ld3]] + %tmp10 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2 + %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4 + %tmp12 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3 + %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4 + %tmp14 = fsub fast <7 x float> %tmp11, %tmp13 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[res4:%.*]] = fmul fast <7 x float> [[ld3]], [[ld4]] + %tmp15 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3 + %tmp16 = load <7 x float>, <7 x float>* %tmp15, align 4 + %tmp17 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4 + %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4 + %tmp19 = fmul fast <7 x float> %tmp16, %tmp18 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[res5:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld5]] + %tmp20 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4 + %tmp21 = load <7 x float>, <7 x float>* %tmp20, align 4 + %tmp22 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5 + %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4 + %tmp24 = fdiv fast <7 x float> %tmp21, %tmp23 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4 + ; CHECK: [[res6:%.*]] = frem fast <7 x float> [[ld5]], [[ld6]] + %tmp25 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5 + %tmp26 = load <7 x float>, <7 x float>* %tmp25, align 4 + %tmp27 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6 + %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4 + %tmp29 = frem fast <7 x float> %tmp26, %tmp28 + + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4 + ; CHECK: [[res7:%.*]] = fadd fast <7 x float> [[ld7]], + ; CHECK: store <7 x float> [[res7]], <7 x float>* [[adr7]], align 4 + %tmp30 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7 + %tmp31 = load <7 x float>, <7 x float>* %tmp30, align 4 + %tmp32 = fadd fast <7 x float> %tmp31, + store <7 x float> %tmp32, <7 x float>* %tmp30, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4 + ; CHECK: [[res8:%.*]] = fadd fast <7 x float> [[ld8]], + ; CHECK: store <7 x float> [[res8]], <7 x float>* [[adr8]], align 4 + %tmp33 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8 + %tmp34 = load <7 x float>, <7 x float>* %tmp33, align 4 + %tmp35 = fadd fast <7 x float> %tmp34, + store <7 x float> %tmp35, <7 x float>* %tmp33, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4 + ; CHECK: [[res9:%.*]] = fadd fast <7 x float> [[ld9]], + ; CHECK: store <7 x float> [[res9]], <7 x float>* [[adr9]], align 4 + %tmp36 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9 + %tmp37 = load <7 x float>, <7 x float>* %tmp36, align 4 + %tmp38 = fadd fast <7 x float> %tmp37, + store <7 x float> %tmp38, <7 x float>* %tmp36, align 4 + + ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10 + ; CHECK: [[ld10:%.*]] = load <7 x float>, <7 x float>* [[adr10]], align 4 + ; CHECK: [[res10:%.*]] = fadd fast <7 x float> [[ld10]], + ; CHECK: store <7 x float> [[res10]], <7 x float>* [[adr10]], align 4 + %tmp39 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10 + %tmp40 = load <7 x float>, <7 x float>* %tmp39, align 4 + %tmp41 = fadd fast <7 x float> %tmp40, + store <7 x float> %tmp41, <7 x float>* %tmp39, align 4 + + %tmp42 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 0 + store <7 x float> %tmp2, <7 x float>* %tmp42 + %tmp43 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 1 + store <7 x float> %tmp4, <7 x float>* %tmp43 + %tmp44 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 2 + store <7 x float> %tmp9, <7 x float>* %tmp44 + %tmp45 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 3 + store <7 x float> %tmp14, <7 x float>* %tmp45 + %tmp46 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 4 + store <7 x float> %tmp19, <7 x float>* %tmp46 + %tmp47 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 5 + store <7 x float> %tmp24, <7 x float>* %tmp47 + %tmp48 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 6 + store <7 x float> %tmp29, <7 x float>* %tmp48 + %tmp49 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 7 + store <7 x float> %tmp31, <7 x float>* %tmp49 + %tmp50 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 8 + store <7 x float> %tmp34, <7 x float>* %tmp50 + %tmp51 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 9 + store <7 x float> %tmp38, <7 x float>* %tmp51 + %tmp52 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 10 + store <7 x float> %tmp41, <7 x float>* %tmp52 + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?logic +define void @"\01?logic@@YA$$BY09V?$vector@_N$06@@Y09V1@Y09V?$vector@M$06@@@Z"([10 x <7 x i32>]* noalias sret %agg.result, [10 x <7 x i32>]* %truth, [10 x <7 x float>]* %consequences) #0 { +bb: + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <7 x i32>, <7 x i32>* [[adr0]], align 4 + ; CHECK: [[nres0:%.*]] = icmp ne <7 x i32> [[ld0]], zeroinitializer + ; CHECK: [[bres0:%.*]] = icmp eq <7 x i1> [[nres0:%.*]], zeroinitializer + ; CHECK: [[res0:%.*]] = zext <7 x i1> [[bres0]] to <7 x i32> + %tmp = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0 + %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4 + %tmp2 = icmp ne <7 x i32> %tmp1, zeroinitializer + %tmp3 = icmp eq <7 x i1> %tmp2, zeroinitializer + %tmp4 = zext <7 x i1> %tmp3 to <7 x i32> + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4 + ; CHECK: [[bld1:%.*]] = icmp ne <7 x i32> [[ld1]], zeroinitializer + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4 + ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer + ; CHECK: [[bres1:%.*]] = or <7 x i1> [[bld1]], [[bld2]] + ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32> + %tmp5 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1 + %tmp6 = load <7 x i32>, <7 x i32>* %tmp5, align 4 + %tmp7 = icmp ne <7 x i32> %tmp6, zeroinitializer + %tmp8 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2 + %tmp9 = load <7 x i32>, <7 x i32>* %tmp8, align 4 + %tmp10 = icmp ne <7 x i32> %tmp9, zeroinitializer + %tmp11 = or <7 x i1> %tmp7, %tmp10 + %tmp12 = zext <7 x i1> %tmp11 to <7 x i32> + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4 + ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4 + ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer + ; CHECK: [[bres2:%.*]] = and <7 x i1> [[bld2]], [[bld3]] + ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32> + %tmp13 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2 + %tmp14 = load <7 x i32>, <7 x i32>* %tmp13, align 4 + %tmp15 = icmp ne <7 x i32> %tmp14, zeroinitializer + %tmp16 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3 + %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4 + %tmp18 = icmp ne <7 x i32> %tmp17, zeroinitializer + %tmp19 = and <7 x i1> %tmp15, %tmp18 + %tmp20 = zext <7 x i1> %tmp19 to <7 x i32> + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4 + ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4 + ; CHECK: [[bld4:%.*]] = icmp ne <7 x i32> [[ld4]], zeroinitializer + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4 + ; CHECK: [[bld5:%.*]] = icmp ne <7 x i32> [[ld5]], zeroinitializer + ; CHECK: [[bres3:%.*]] = select <7 x i1> [[bld3]], <7 x i1> [[bld4]], <7 x i1> [[bld5]] + ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32> + %tmp21 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3 + %tmp22 = load <7 x i32>, <7 x i32>* %tmp21, align 4 + %tmp23 = icmp ne <7 x i32> %tmp22, zeroinitializer + %tmp24 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4 + %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4 + %tmp26 = icmp ne <7 x i32> %tmp25, zeroinitializer + %tmp27 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5 + %tmp28 = load <7 x i32>, <7 x i32>* %tmp27, align 4 + %tmp29 = icmp ne <7 x i32> %tmp28, zeroinitializer + %tmp30 = select <7 x i1> %tmp23, <7 x i1> %tmp26, <7 x i1> %tmp29 + %tmp31 = zext <7 x i1> %tmp30 to <7 x i32> + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4 + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4 + ; CHECK: [[bres1:%.*]] = fcmp fast oeq <7 x float> [[ld0]], [[ld1]] + ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32> + %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0 + %tmp33 = load <7 x float>, <7 x float>* %tmp32, align 4 + %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1 + %tmp35 = load <7 x float>, <7 x float>* %tmp34, align 4 + %tmp36 = fcmp fast oeq <7 x float> %tmp33, %tmp35 + %tmp37 = zext <7 x i1> %tmp36 to <7 x i32> + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[bres2:%.*]] = fcmp fast une <7 x float> [[ld1]], [[ld2]] + ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32> + %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1 + %tmp39 = load <7 x float>, <7 x float>* %tmp38, align 4 + %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2 + %tmp41 = load <7 x float>, <7 x float>* %tmp40, align 4 + %tmp42 = fcmp fast une <7 x float> %tmp39, %tmp41 + %tmp43 = zext <7 x i1> %tmp42 to <7 x i32> + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[bres3:%.*]] = fcmp fast olt <7 x float> [[ld2]], [[ld3]] + ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32> + %tmp44 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2 + %tmp45 = load <7 x float>, <7 x float>* %tmp44, align 4 + %tmp46 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3 + %tmp47 = load <7 x float>, <7 x float>* %tmp46, align 4 + %tmp48 = fcmp fast olt <7 x float> %tmp45, %tmp47 + %tmp49 = zext <7 x i1> %tmp48 to <7 x i32> + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[bres4:%.*]] = fcmp fast ogt <7 x float> [[ld3]], [[ld4]] + ; CHECK: [[res4:%.*]] = zext <7 x i1> [[bres4]] to <7 x i32> + %tmp50 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3 + %tmp51 = load <7 x float>, <7 x float>* %tmp50, align 4 + %tmp52 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4 + %tmp53 = load <7 x float>, <7 x float>* %tmp52, align 4 + %tmp54 = fcmp fast ogt <7 x float> %tmp51, %tmp53 + %tmp55 = zext <7 x i1> %tmp54 to <7 x i32> + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[bres5:%.*]] = fcmp fast ole <7 x float> [[ld4]], [[ld5]] + ; CHECK: [[res5:%.*]] = zext <7 x i1> [[bres5]] to <7 x i32> + %tmp56 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4 + %tmp57 = load <7 x float>, <7 x float>* %tmp56, align 4 + %tmp58 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5 + %tmp59 = load <7 x float>, <7 x float>* %tmp58, align 4 + %tmp60 = fcmp fast ole <7 x float> %tmp57, %tmp59 + %tmp61 = zext <7 x i1> %tmp60 to <7 x i32> + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4 + ; CHECK: [[bres6:%.*]] = fcmp fast oge <7 x float> [[ld5]], [[ld6]] + ; CHECK: [[res6:%.*]] = zext <7 x i1> [[bres6]] to <7 x i32> + %tmp62 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5 + %tmp63 = load <7 x float>, <7 x float>* %tmp62, align 4 + %tmp64 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6 + %tmp65 = load <7 x float>, <7 x float>* %tmp64, align 4 + %tmp66 = fcmp fast oge <7 x float> %tmp63, %tmp65 + %tmp67 = zext <7 x i1> %tmp66 to <7 x i32> + + %tmp68 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 0 + store <7 x i32> %tmp4, <7 x i32>* %tmp68 + %tmp69 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 1 + store <7 x i32> %tmp12, <7 x i32>* %tmp69 + %tmp70 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 2 + store <7 x i32> %tmp20, <7 x i32>* %tmp70 + %tmp71 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 3 + store <7 x i32> %tmp31, <7 x i32>* %tmp71 + %tmp72 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 4 + store <7 x i32> %tmp37, <7 x i32>* %tmp72 + %tmp73 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 5 + store <7 x i32> %tmp43, <7 x i32>* %tmp73 + %tmp74 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 6 + store <7 x i32> %tmp49, <7 x i32>* %tmp74 + %tmp75 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 7 + store <7 x i32> %tmp55, <7 x i32>* %tmp75 + %tmp76 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 8 + store <7 x i32> %tmp61, <7 x i32>* %tmp76 + %tmp77 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 9 + store <7 x i32> %tmp67, <7 x i32>* %tmp77 + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?index +define void @"\01?index@@YA$$BY09V?$vector@M$06@@Y09V1@H@Z"([10 x <7 x float>]* noalias sret %agg.result, [10 x <7 x float>]* %things, i32 %i) #0 { +bb: + %res = alloca [10 x <7 x float>], align 4 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0 + ; CHECK: store <7 x float> zeroinitializer, <7 x float>* [[adr0]], align 4 + %tmp1 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0 + store <7 x float> zeroinitializer, <7 x float>* %tmp1, align 4 + + ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i + ; CHECK: store <7 x float> , <7 x float>* [[adri]], align 4 + %tmp2 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i + store <7 x float> , <7 x float>* %tmp2, align 4 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2 + ; CHECK: store <7 x float> , <7 x float>* [[adr2]], align 4 + %tmp3 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2 + store <7 x float> , <7 x float>* %tmp3, align 4 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0 + ; CHECK: [[res3:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3 + ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4 + %tmp4 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0 + %tmp5 = load <7 x float>, <7 x float>* %tmp4, align 4 + %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3 + store <7 x float> %tmp5, <7 x float>* %tmp6, align 4 + + ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i + ; CHECK: [[res4:%.*]] = load <7 x float>, <7 x float>* [[adri]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4 + ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4 + %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i + %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4 + %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4 + store <7 x float> %tmp8, <7 x float>* %tmp9, align 4 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2 + ; CHECK: [[res5:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5 + ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4 + %tmp10 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2 + %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4 + %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5 + store <7 x float> %tmp11, <7 x float>* %tmp12, align 4 + + %tmp13 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 0 + %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0 + %tmp15 = load <7 x float>, <7 x float>* %tmp14 + store <7 x float> %tmp15, <7 x float>* %tmp13 + + %tmp16 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 1 + %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 1 + %tmp18 = load <7 x float>, <7 x float>* %tmp17 + store <7 x float> %tmp18, <7 x float>* %tmp16 + + %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 2 + %tmp20 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2 + %tmp21 = load <7 x float>, <7 x float>* %tmp20 + store <7 x float> %tmp21, <7 x float>* %tmp19 + + %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 3 + %tmp23 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3 + %tmp24 = load <7 x float>, <7 x float>* %tmp23 + store <7 x float> %tmp24, <7 x float>* %tmp22 + + %tmp25 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 4 + %tmp26 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4 + %tmp27 = load <7 x float>, <7 x float>* %tmp26 + store <7 x float> %tmp27, <7 x float>* %tmp25 + + %tmp28 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 5 + %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5 + %tmp30 = load <7 x float>, <7 x float>* %tmp29 + store <7 x float> %tmp30, <7 x float>* %tmp28 + + %tmp31 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 6 + %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 6 + %tmp33 = load <7 x float>, <7 x float>* %tmp32 + store <7 x float> %tmp33, <7 x float>* %tmp31 + + %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 7 + %tmp35 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 7 + %tmp36 = load <7 x float>, <7 x float>* %tmp35 + store <7 x float> %tmp36, <7 x float>* %tmp34 + + %tmp37 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 8 + %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 8 + %tmp39 = load <7 x float>, <7 x float>* %tmp38 + store <7 x float> %tmp39, <7 x float>* %tmp37 + + %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 9 + %tmp41 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 9 + %tmp42 = load <7 x float>, <7 x float>* %tmp41 + store <7 x float> %tmp42, <7 x float>* %tmp40 + + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?bittwiddlers +define void @"\01?bittwiddlers@@YAXY0L@$$CAV?$vector@I$06@@@Z"([11 x <7 x i32>]* noalias %things) #0 { +bb: + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4 + ; CHECK: [[res0:%.*]] = xor <7 x i32> [[ld1]], + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0 + ; CHECK: store <7 x i32> [[res0]], <7 x i32>* [[adr0]], align 4 + %tmp = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1 + %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4 + %tmp2 = xor <7 x i32> %tmp1, + %tmp3 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0 + store <7 x i32> %tmp2, <7 x i32>* %tmp3, align 4 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4 + ; CHECK: [[res1:%.*]] = or <7 x i32> [[ld2]], [[ld3]] + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1 + ; CHECK: store <7 x i32> [[res1]], <7 x i32>* [[adr1]], align 4 + %tmp4 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2 + %tmp5 = load <7 x i32>, <7 x i32>* %tmp4, align 4 + %tmp6 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + %tmp7 = load <7 x i32>, <7 x i32>* %tmp6, align 4 + %tmp8 = or <7 x i32> %tmp5, %tmp7 + %tmp9 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1 + store <7 x i32> %tmp8, <7 x i32>* %tmp9, align 4 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4 + ; CHECK: [[res2:%.*]] = and <7 x i32> [[ld3]], [[ld4]] + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2 + ; CHECK: store <7 x i32> [[res2]], <7 x i32>* [[adr2]], align 4 + %tmp10 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + %tmp11 = load <7 x i32>, <7 x i32>* %tmp10, align 4 + %tmp12 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + %tmp13 = load <7 x i32>, <7 x i32>* %tmp12, align 4 + %tmp14 = and <7 x i32> %tmp11, %tmp13 + %tmp15 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2 + store <7 x i32> %tmp14, <7 x i32>* %tmp15, align 4 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4 + ; CHECK: [[res3:%.*]] = xor <7 x i32> [[ld4]], [[ld5]] + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + ; CHECK: store <7 x i32> [[res3]], <7 x i32>* [[adr3]], align 4 + %tmp16 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4 + %tmp18 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + %tmp19 = load <7 x i32>, <7 x i32>* %tmp18, align 4 + %tmp20 = xor <7 x i32> %tmp17, %tmp19 + %tmp21 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + store <7 x i32> %tmp20, <7 x i32>* %tmp21, align 4 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4 + ; CHECK: [[shv6:%.*]] = and <7 x i32> [[ld6]], + ; CHECK: [[res4:%.*]] = shl <7 x i32> [[ld5]], [[shv6]] + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + ; CHECK: store <7 x i32> [[res4]], <7 x i32>* [[adr4]], align 4 + %tmp22 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + %tmp23 = load <7 x i32>, <7 x i32>* %tmp22, align 4 + %tmp24 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4 + %tmp26 = and <7 x i32> %tmp25, + %tmp27 = shl <7 x i32> %tmp23, %tmp26 + %tmp28 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + store <7 x i32> %tmp27, <7 x i32>* %tmp28, align 4 + + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4 + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4 + ; CHECK: [[shv7:%.*]] = and <7 x i32> [[ld7]], + ; CHECK: [[res5:%.*]] = lshr <7 x i32> [[ld6]], [[shv7]] + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + ; CHECK: store <7 x i32> [[res5]], <7 x i32>* [[adr5]], align 4 + %tmp29 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + %tmp30 = load <7 x i32>, <7 x i32>* %tmp29, align 4 + %tmp31 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7 + %tmp32 = load <7 x i32>, <7 x i32>* %tmp31, align 4 + %tmp33 = and <7 x i32> %tmp32, + %tmp34 = lshr <7 x i32> %tmp30, %tmp33 + %tmp35 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + store <7 x i32> %tmp34, <7 x i32>* %tmp35, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4 + ; CHECK: [[res6:%.*]] = or <7 x i32> [[ld6]], [[ld8]] + ; CHECK: store <7 x i32> [[res6]], <7 x i32>* [[adr6]], align 4 + %tmp36 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8 + %tmp37 = load <7 x i32>, <7 x i32>* %tmp36, align 4 + %tmp38 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + %tmp39 = load <7 x i32>, <7 x i32>* %tmp38, align 4 + %tmp40 = or <7 x i32> %tmp39, %tmp37 + store <7 x i32> %tmp40, <7 x i32>* %tmp38, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <7 x i32>, <7 x i32>* [[adr9]], align 4 + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4 + ; CHECK: [[res7:%.*]] = and <7 x i32> [[ld7]], [[ld9]] + ; CHECK: store <7 x i32> [[res7]], <7 x i32>* [[adr7]], align 4 + %tmp41 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9 + %tmp42 = load <7 x i32>, <7 x i32>* %tmp41, align 4 + %tmp43 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7 + %tmp44 = load <7 x i32>, <7 x i32>* %tmp43, align 4 + %tmp45 = and <7 x i32> %tmp44, %tmp42 + store <7 x i32> %tmp45, <7 x i32>* %tmp43, align 4 + + ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10 + ; CHECK: [[ld10:%.*]] = load <7 x i32>, <7 x i32>* [[adr10]], align 4 + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4 + ; CHECK: [[res8:%.*]] = xor <7 x i32> [[ld8]], [[ld10]] + ; CHECK: store <7 x i32> [[res8]], <7 x i32>* [[adr8]], align 4 + %tmp46 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10 + %tmp47 = load <7 x i32>, <7 x i32>* %tmp46, align 4 + %tmp48 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8 + %tmp49 = load <7 x i32>, <7 x i32>* %tmp48, align 4 + %tmp50 = xor <7 x i32> %tmp49, %tmp47 + store <7 x i32> %tmp50, <7 x i32>* %tmp48, align 4 + + ret void +} + +declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1 +declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32, %"class.RWStructuredBuffer") #1 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!3} + +!3 = !{i32 1, i32 9} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll new file mode 100644 index 0000000000..9734b85b12 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll @@ -0,0 +1,745 @@ +; RUN: %dxopt %s -scalarizer -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer >" = type { <1 x float> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.f32 = type { float, float, float, float, i32 } + +@"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@llvm.used = appending global [1 x i8*] [i8* bitcast (%"class.RWStructuredBuffer >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" to i8*)], section "llvm.metadata" + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?assignments +define void @"\01?assignments@@YAXY09$$CAV?$vector@M$00@@@Z"([10 x <1 x float>]* noalias %things) #0 { +bb: + %tmp = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" + %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" %tmp) + %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4) + %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0 + %tmp4 = insertelement <1 x float> undef, float %tmp3, i64 0 + %tmp5 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0 + store <1 x float> %tmp4, <1 x float>* %tmp5, align 4 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]] + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]] + ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0 + ; CHECK: [[res1:%.*]] = fadd fast float [[val1]], [[val5]] + ; CHECK: [[vec1:%.*]] = insertelement <1 x float> undef, float [[res1]], i32 0 + ; CHECK: store <1 x float> [[vec1]], <1 x float>* [[adr1]], align 4 + %tmp6 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5 + %tmp7 = load <1 x float>, <1 x float>* %tmp6, align 4 + %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1 + %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4 + %tmp10 = fadd fast <1 x float> %tmp9, %tmp7 + store <1 x float> %tmp10, <1 x float>* %tmp8, align 4 + + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]] + ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]] + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[res2:%.*]] = fsub fast float [[val2]], [[val6]] + ; CHECK: [[vec2:%.*]] = insertelement <1 x float> undef, float [[res2]], i32 0 + ; CHECK: store <1 x float> [[vec2]], <1 x float>* [[adr2]], align 4 + %tmp11 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6 + %tmp12 = load <1 x float>, <1 x float>* %tmp11, align 4 + %tmp13 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2 + %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4 + %tmp15 = fsub fast <1 x float> %tmp14, %tmp12 + store <1 x float> %tmp15, <1 x float>* %tmp13, align 4 + + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]] + ; CHECK: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]] + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[res3:%.*]] = fmul fast float [[val3]], [[val7]] + ; CHECK: [[vec3:%.*]] = insertelement <1 x float> undef, float [[res3]], i32 0 + ; CHECK: store <1 x float> [[vec3]], <1 x float>* [[adr3]], align 4 + %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7 + %tmp17 = load <1 x float>, <1 x float>* %tmp16, align 4 + %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3 + %tmp19 = load <1 x float>, <1 x float>* %tmp18, align 4 + %tmp20 = fmul fast <1 x float> %tmp19, %tmp17 + store <1 x float> %tmp20, <1 x float>* %tmp18, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]] + ; CHECK: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]] + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[res4:%.*]] = fdiv fast float [[val4]], [[val8]] + ; CHECK: [[vec4:%.*]] = insertelement <1 x float> undef, float [[res4]], i32 0 + ; CHECK: store <1 x float> [[vec4]], <1 x float>* [[adr4]], align 4 + %tmp21 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8 + %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4 + %tmp23 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4 + %tmp24 = load <1 x float>, <1 x float>* %tmp23, align 4 + %tmp25 = fdiv fast <1 x float> %tmp24, %tmp22 + store <1 x float> %tmp25, <1 x float>* %tmp23, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]] + ; CHECK: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]] + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[res5:%.*]] = frem fast float [[val5]], [[val9]] + ; CHECK: [[vec5:%.*]] = insertelement <1 x float> undef, float [[res5]], i32 0 + ; CHECK: store <1 x float> [[vec5]], <1 x float>* [[adr5]], align 4 + %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9 + %tmp27 = load <1 x float>, <1 x float>* %tmp26, align 4 + %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5 + %tmp29 = load <1 x float>, <1 x float>* %tmp28, align 4 + %tmp30 = frem fast <1 x float> %tmp29, %tmp27 + store <1 x float> %tmp30, <1 x float>* %tmp28, align 4 + + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?arithmetic +define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$00@@Y0L@$$CAV1@@Z"([11 x <1 x float>]* noalias sret %agg.result, [11 x <1 x float>]* noalias %things) #0 { +bb: + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4 + ; CHECK-DAG: [[zero:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK-DAG: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0 + ; CHECK: [[sub0:%.*]] = fsub fast float [[zero]], [[val0]] + ; CHECK: [[res0:%.*]] = insertelement <1 x float> undef, float [[sub0]], i32 0 + %tmp = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0 + %tmp1 = load <1 x float>, <1 x float>* %tmp, align 4 + %tmp2 = fsub fast <1 x float> , %tmp1 + %tmp3 = extractelement <1 x float> %tmp2, i64 0 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0 + ; CHECK: [[res1:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4 + ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[res1]], i64 0 + %tmp4 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0 + %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4 + %tmp6 = extractelement <1 x float> %tmp5, i64 0 + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]], align 4 + ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4 + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[add1:%.*]] = fadd fast float [[val1]], [[val2]] + ; CHECK: [[res1:%.*]] = insertelement <1 x float> undef, float [[add1]], i32 0 + %tmp7 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1 + %tmp8 = load <1 x float>, <1 x float>* %tmp7, align 4 + %tmp9 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2 + %tmp10 = load <1 x float>, <1 x float>* %tmp9, align 4 + %tmp11 = fadd fast <1 x float> %tmp8, %tmp10 + %tmp12 = extractelement <1 x float> %tmp11, i64 0 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4 + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4 + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[sub2:%.*]] = fsub fast float [[val2]], [[val3]] + ; CHECK: [[res2:%.*]] = insertelement <1 x float> undef, float [[sub2]], i32 0 + %tmp13 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2 + %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4 + %tmp15 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3 + %tmp16 = load <1 x float>, <1 x float>* %tmp15, align 4 + %tmp17 = fsub fast <1 x float> %tmp14, %tmp16 + %tmp18 = extractelement <1 x float> %tmp17, i64 0 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4 + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4 + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[mul3:%.*]] = fmul fast float [[val3]], [[val4]] + ; CHECK: [[res3:%.*]] = insertelement <1 x float> undef, float [[mul3]], i32 0 + %tmp19 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3 + %tmp20 = load <1 x float>, <1 x float>* %tmp19, align 4 + %tmp21 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4 + %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4 + %tmp23 = fmul fast <1 x float> %tmp20, %tmp22 + %tmp24 = extractelement <1 x float> %tmp23, i64 0 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4 + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4 + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[div4:%.*]] = fdiv fast float [[val4]], [[val5]] + ; CHECK: [[res4:%.*]] = insertelement <1 x float> undef, float [[div4]], i32 0 + %tmp25 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4 + %tmp26 = load <1 x float>, <1 x float>* %tmp25, align 4 + %tmp27 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5 + %tmp28 = load <1 x float>, <1 x float>* %tmp27, align 4 + %tmp29 = fdiv fast <1 x float> %tmp26, %tmp28 + %tmp30 = extractelement <1 x float> %tmp29, i64 0 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4 + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]], align 4 + ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0 + ; CHECK: [[rem5:%.*]] = frem fast float [[val5]], [[val6]] + ; CHECK: [[res5:%.*]] = insertelement <1 x float> undef, float [[rem5]], i32 0 + %tmp31 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5 + %tmp32 = load <1 x float>, <1 x float>* %tmp31, align 4 + %tmp33 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6 + %tmp34 = load <1 x float>, <1 x float>* %tmp33, align 4 + %tmp35 = frem fast <1 x float> %tmp32, %tmp34 + %tmp36 = extractelement <1 x float> %tmp35, i64 0 + + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]], align 4 + ; CHECK-DAG: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0 + ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK: [[add6:%.*]] = fadd fast float [[val7]], [[pos1]] + ; CHECK: [[res6:%.*]] = insertelement <1 x float> undef, float [[add6]], i32 0 + %tmp37 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7 + %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4 + %tmp39 = fadd fast <1 x float> %tmp38, + store <1 x float> %tmp39, <1 x float>* %tmp37, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]], align 4 + ; CHECK-DAG: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0 + ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK: [[add7:%.*]] = fadd fast float [[val8]], [[neg1]] + ; CHECK: [[res7:%.*]] = insertelement <1 x float> undef, float [[add7]], i32 0 + %tmp40 = extractelement <1 x float> %tmp38, i64 0 + %tmp41 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8 + %tmp42 = load <1 x float>, <1 x float>* %tmp41, align 4 + %tmp43 = fadd fast <1 x float> %tmp42, + store <1 x float> %tmp43, <1 x float>* %tmp41, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]], align 4 + ; CHECK-DAG: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0 + ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK: [[add8:%.*]] = fadd fast float [[val9]], [[pos1]] + ; CHECK: [[res8:%.*]] = insertelement <1 x float> undef, float [[add8]], i32 0 + %tmp44 = extractelement <1 x float> %tmp42, i64 0 + %tmp45 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9 + %tmp46 = load <1 x float>, <1 x float>* %tmp45, align 4 + %tmp47 = fadd fast <1 x float> %tmp46, + store <1 x float> %tmp47, <1 x float>* %tmp45, align 4 + + ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10 + ; CHECK: [[ld10:%.*]] = load <1 x float>, <1 x float>* [[adr10]], align 4 + ; CHECK-DAG: [[val10:%.*]] = extractelement <1 x float> [[ld10]], i32 0 + ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK: [[add9:%.*]] = fadd fast float [[val10]], [[neg1]] + ; CHECK: [[res9:%.*]] = insertelement <1 x float> undef, float [[add9]], i32 0 + %tmp48 = extractelement <1 x float> %tmp47, i64 0 + %tmp49 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10 + %tmp50 = load <1 x float>, <1 x float>* %tmp49, align 4 + %tmp51 = fadd fast <1 x float> %tmp50, + store <1 x float> %tmp51, <1 x float>* %tmp49, align 4 + + %tmp52 = extractelement <1 x float> %tmp51, i64 0 + %tmp53 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 0 + %insert20 = insertelement <1 x float> undef, float %tmp3, i64 0 + store <1 x float> %insert20, <1 x float>* %tmp53 + %tmp54 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 1 + %insert18 = insertelement <1 x float> undef, float %tmp6, i64 0 + store <1 x float> %insert18, <1 x float>* %tmp54 + %tmp55 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 2 + %insert16 = insertelement <1 x float> undef, float %tmp12, i64 0 + store <1 x float> %insert16, <1 x float>* %tmp55 + %tmp56 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 3 + %insert14 = insertelement <1 x float> undef, float %tmp18, i64 0 + store <1 x float> %insert14, <1 x float>* %tmp56 + %tmp57 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 4 + %insert12 = insertelement <1 x float> undef, float %tmp24, i64 0 + store <1 x float> %insert12, <1 x float>* %tmp57 + %tmp58 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 5 + %insert10 = insertelement <1 x float> undef, float %tmp30, i64 0 + store <1 x float> %insert10, <1 x float>* %tmp58 + %tmp59 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 6 + %insert8 = insertelement <1 x float> undef, float %tmp36, i64 0 + store <1 x float> %insert8, <1 x float>* %tmp59 + %tmp60 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 7 + %insert6 = insertelement <1 x float> undef, float %tmp40, i64 0 + store <1 x float> %insert6, <1 x float>* %tmp60 + %tmp61 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 8 + %insert4 = insertelement <1 x float> undef, float %tmp44, i64 0 + store <1 x float> %insert4, <1 x float>* %tmp61 + %tmp62 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 9 + %insert2 = insertelement <1 x float> undef, float %tmp48, i64 0 + store <1 x float> %insert2, <1 x float>* %tmp62 + %tmp63 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 10 + %insert = insertelement <1 x float> undef, float %tmp52, i64 0 + store <1 x float> %insert, <1 x float>* %tmp63 + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?logic +define void @"\01?logic@@YA$$BY09_NY09_NY09V?$vector@M$00@@@Z"([10 x i32]* noalias sret %agg.result, [10 x i32]* %truth, [10 x <1 x float>]* %consequences) #0 { +bb: + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load i32, i32* [[adr0]], align 4 + ; CHECK: [[cmp0:%.*]] = icmp ne i32 [[ld0]], 0 + ; CHECK: [[bres0:%.*]] = xor i1 [[cmp0]], true + ; CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32 + %tmp = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = icmp ne i32 %tmp1, 0 + %tmp3 = xor i1 %tmp2, true + %tmp4 = zext i1 %tmp3 to i32 + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4 + ; CHECK: [[cmp1:%.*]] = icmp ne i32 [[ld1]], 0 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4 + ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0 + ; CHECK: [[bres1:%.*]] = or i1 [[cmp1]], [[cmp2]] + ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32 + %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + %tmp6 = load i32, i32* %tmp5, align 4 + %tmp7 = icmp ne i32 %tmp6, 0 + %tmp9 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + %tmp10 = load i32, i32* %tmp9, align 4 + %tmp11 = icmp ne i32 %tmp10, 0 + %tmp13 = or i1 %tmp7, %tmp11 + %tmp14 = zext i1 %tmp13 to i32 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4 + ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4 + ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0 + ; CHECK: [[bres2:%.*]] = and i1 [[cmp2]], [[cmp3]] + ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + %tmp16 = load i32, i32* %tmp15, align 4 + %tmp17 = icmp ne i32 %tmp16, 0 + %tmp19 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + %tmp20 = load i32, i32* %tmp19, align 4 + %tmp21 = icmp ne i32 %tmp20, 0 + %tmp23 = and i1 %tmp17, %tmp21 + %tmp24 = zext i1 %tmp23 to i32 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4 + ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4 + ; CHECK: [[cmp4:%.*]] = icmp ne i32 [[ld4]], 0 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4 + ; CHECK: [[cmp5:%.*]] = icmp ne i32 [[ld5]], 0 + ; CHECK: [[bres3:%.*]] = select i1 [[cmp3]], i1 [[cmp4]], i1 [[cmp5]] + ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + %tmp25 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + %tmp26 = load i32, i32* %tmp25, align 4 + %tmp27 = icmp ne i32 %tmp26, 0 + %tmp29 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + %tmp30 = load i32, i32* %tmp29, align 4 + %tmp31 = icmp ne i32 %tmp30, 0 + %tmp32 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + %tmp33 = load i32, i32* %tmp32, align 4 + %tmp34 = icmp ne i32 %tmp33, 0 + %tmp35 = select i1 %tmp27, i1 %tmp31, i1 %tmp34 + %tmp36 = zext i1 %tmp35 to i32 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]] + ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0 + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]] + ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0 + ; CHECK: [[bres4:%.*]] = fcmp fast oeq float [[val0]], [[val1]] + ; CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32 + %tmp37 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0 + %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4 + %tmp39 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1 + %tmp40 = load <1 x float>, <1 x float>* %tmp39, align 4 + %tmp41 = fcmp fast oeq <1 x float> %tmp38, %tmp40 + %tmp42 = extractelement <1 x i1> %tmp41, i64 0 + %tmp43 = zext i1 %tmp42 to i32 + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]] + ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]] + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[bres5:%.*]] = fcmp fast une float [[val1]], [[val2]] + ; CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32 + %tmp44 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1 + %tmp45 = load <1 x float>, <1 x float>* %tmp44, align 4 + %tmp46 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2 + %tmp47 = load <1 x float>, <1 x float>* %tmp46, align 4 + %tmp48 = fcmp fast une <1 x float> %tmp45, %tmp47 + %tmp49 = extractelement <1 x i1> %tmp48, i64 0 + %tmp50 = zext i1 %tmp49 to i32 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]] + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]] + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[bres6:%.*]] = fcmp fast olt float [[val2]], [[val3]] + ; CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32 + %tmp51 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2 + %tmp52 = load <1 x float>, <1 x float>* %tmp51, align 4 + %tmp53 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3 + %tmp54 = load <1 x float>, <1 x float>* %tmp53, align 4 + %tmp55 = fcmp fast olt <1 x float> %tmp52, %tmp54 + %tmp56 = extractelement <1 x i1> %tmp55, i64 0 + %tmp57 = zext i1 %tmp56 to i32 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]] + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]] + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[bres7:%.*]] = fcmp fast ogt float [[val3]], [[val4]] + ; CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32 + %tmp58 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3 + %tmp59 = load <1 x float>, <1 x float>* %tmp58, align 4 + %tmp60 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4 + %tmp61 = load <1 x float>, <1 x float>* %tmp60, align 4 + %tmp62 = fcmp fast ogt <1 x float> %tmp59, %tmp61 + %tmp63 = extractelement <1 x i1> %tmp62, i64 0 + %tmp64 = zext i1 %tmp63 to i32 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]] + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]] + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[bres8:%.*]] = fcmp fast ole float [[val4]], [[val5]] + ; CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32 + %tmp65 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4 + %tmp66 = load <1 x float>, <1 x float>* %tmp65, align 4 + %tmp67 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5 + %tmp68 = load <1 x float>, <1 x float>* %tmp67, align 4 + %tmp69 = fcmp fast ole <1 x float> %tmp66, %tmp68 + %tmp70 = extractelement <1 x i1> %tmp69, i64 0 + %tmp71 = zext i1 %tmp70 to i32 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]] + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]] + ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0 + ; CHECK: [[bres9:%.*]] = fcmp fast oge float [[val5]], [[val6]] + ; CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32 + %tmp72 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5 + %tmp73 = load <1 x float>, <1 x float>* %tmp72, align 4 + %tmp74 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6 + %tmp75 = load <1 x float>, <1 x float>* %tmp74, align 4 + %tmp76 = fcmp fast oge <1 x float> %tmp73, %tmp75 + %tmp77 = extractelement <1 x i1> %tmp76, i64 0 + %tmp78 = zext i1 %tmp77 to i32 + + %tmp79 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0 + store i32 %tmp4, i32* %tmp79 + %tmp80 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1 + store i32 %tmp14, i32* %tmp80 + %tmp81 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2 + store i32 %tmp24, i32* %tmp81 + %tmp82 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3 + store i32 %tmp36, i32* %tmp82 + %tmp83 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4 + store i32 %tmp43, i32* %tmp83 + %tmp84 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5 + store i32 %tmp50, i32* %tmp84 + %tmp85 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6 + store i32 %tmp57, i32* %tmp85 + %tmp86 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7 + store i32 %tmp64, i32* %tmp86 + %tmp87 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8 + store i32 %tmp71, i32* %tmp87 + %tmp88 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9 + store i32 %tmp78, i32* %tmp88 + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?index +define void @"\01?index@@YA$$BY09V?$vector@M$00@@Y09V1@H@Z"([10 x <1 x float>]* noalias sret %agg.result, [10 x <1 x float>]* %things, i32 %i) #0 { +bb: + ; CHECK: %res.0 = alloca [10 x float] + %res.0 = alloca [10 x float] + + ; CHECK: [[adr0:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0 + ; CHECK: store float 0.000000e+00, float* [[adr0]] + %tmp1 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0 + store float 0.000000e+00, float* %tmp1 + + ; CHECK: [[adri:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i + ; CHECK: store float 1.000000e+00, float* [[adri]] + %tmp2 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i + store float 1.000000e+00, float* %tmp2 + + ; CHECK: [[adr2:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2 + ; CHECK: store float 2.000000e+00, float* [[adr2]] + %tmp3 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2 + store float 2.000000e+00, float* %tmp3 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]] + ; CHECK: [[adr3:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3 + ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i64 0 + ; CHECK: store float [[val0]], float* [[adr3]] + %tmp4 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0 + %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4 + %tmp6 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3 + %tmp7 = extractelement <1 x float> %tmp5, i64 0 + store float %tmp7, float* %tmp6 + + ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i + ; CHECK: [[ldi:%.*]] = load <1 x float>, <1 x float>* [[adri]] + ; CHECK: [[adr4:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4 + ; CHECK: [[vali:%.*]] = extractelement <1 x float> [[ldi]], i64 0 + ; CHECK: store float [[vali]], float* [[adr4]] + %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i + %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4 + %tmp10 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4 + %tmp11 = extractelement <1 x float> %tmp9, i64 0 + store float %tmp11, float* %tmp10 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]] + ; CHECK: [[adr5:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5 + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i64 0 + ; CHECK: store float [[val2]], float* [[adr5]] + %tmp12 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2 + %tmp13 = load <1 x float>, <1 x float>* %tmp12, align 4 + %tmp14 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5 + %tmp15 = extractelement <1 x float> %tmp13, i64 0 + store float %tmp15, float* %tmp14 + + %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 0 + %tmp17 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0 + %load17 = load float, float* %tmp17 + %insert18 = insertelement <1 x float> undef, float %load17, i64 0 + store <1 x float> %insert18, <1 x float>* %tmp16 + + %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 1 + %tmp19 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 1 + %load15 = load float, float* %tmp19 + %insert16 = insertelement <1 x float> undef, float %load15, i64 0 + store <1 x float> %insert16, <1 x float>* %tmp18 + + %tmp20 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 2 + %tmp21 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2 + %load13 = load float, float* %tmp21 + %insert14 = insertelement <1 x float> undef, float %load13, i64 0 + store <1 x float> %insert14, <1 x float>* %tmp20 + + %tmp22 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 3 + %tmp23 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3 + %load11 = load float, float* %tmp23 + %insert12 = insertelement <1 x float> undef, float %load11, i64 0 + store <1 x float> %insert12, <1 x float>* %tmp22 + + %tmp24 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 4 + %tmp25 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4 + %load9 = load float, float* %tmp25 + %insert10 = insertelement <1 x float> undef, float %load9, i64 0 + store <1 x float> %insert10, <1 x float>* %tmp24 + + %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 5 + %tmp27 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5 + %load7 = load float, float* %tmp27 + %insert8 = insertelement <1 x float> undef, float %load7, i64 0 + store <1 x float> %insert8, <1 x float>* %tmp26 + + %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 6 + %tmp29 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 6 + %load5 = load float, float* %tmp29 + %insert6 = insertelement <1 x float> undef, float %load5, i64 0 + store <1 x float> %insert6, <1 x float>* %tmp28 + + %tmp30 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 7 + %tmp31 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 7 + %load3 = load float, float* %tmp31 + %insert4 = insertelement <1 x float> undef, float %load3, i64 0 + store <1 x float> %insert4, <1 x float>* %tmp30 + + %tmp32 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 8 + %tmp33 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 8 + %load1 = load float, float* %tmp33 + %insert2 = insertelement <1 x float> undef, float %load1, i64 0 + store <1 x float> %insert2, <1 x float>* %tmp32 + + %tmp34 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 9 + %tmp35 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 9 + %load = load float, float* %tmp35 + %insert = insertelement <1 x float> undef, float %load, i64 0 + store <1 x float> %insert, <1 x float>* %tmp34 + + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?bittwiddlers +define void @"\01?bittwiddlers@@YAXY0L@$$CAI@Z"([11 x i32]* noalias %things) #0 { +bb: + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4 + ; CHECK: [[res0:%.*]] = xor i32 [[ld1]], -1 + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0 + ; CHECK: store i32 [[res0]], i32* [[adr0]], align 4 + %tmp = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = xor i32 %tmp1, -1 + %tmp3 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0 + store i32 %tmp2, i32* %tmp3, align 4 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4 + ; CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]] + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + ; CHECK: store i32 [[res1]], i32* [[adr1]], align 4 + %tmp4 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + %tmp5 = load i32, i32* %tmp4, align 4 + %tmp6 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + %tmp7 = load i32, i32* %tmp6, align 4 + %tmp8 = or i32 %tmp5, %tmp7 + %tmp9 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + store i32 %tmp8, i32* %tmp9, align 4 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4 + ; CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]] + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + ; CHECK: store i32 [[res2]], i32* [[adr2]], align 4 + %tmp10 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + %tmp11 = load i32, i32* %tmp10, align 4 + %tmp12 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + %tmp13 = load i32, i32* %tmp12, align 4 + %tmp14 = and i32 %tmp11, %tmp13 + %tmp15 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + store i32 %tmp14, i32* %tmp15, align 4 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4 + ; CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]] + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + ; CHECK: store i32 [[res3]], i32* [[adr3]], align 4 + %tmp16 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + %tmp17 = load i32, i32* %tmp16, align 4 + %tmp18 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + %tmp19 = load i32, i32* %tmp18, align 4 + %tmp20 = xor i32 %tmp17, %tmp19 + %tmp21 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + store i32 %tmp20, i32* %tmp21, align 4 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4 + ; CHECK: [[and4:%.*]] = and i32 [[ld6]], 31 + ; CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[and4]] + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + ; CHECK: store i32 [[res4]], i32* [[adr4]], align 4 + %tmp22 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + %tmp23 = load i32, i32* %tmp22, align 4 + %tmp24 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + %tmp25 = load i32, i32* %tmp24, align 4 + %tmp26 = and i32 %tmp25, 31 + %tmp27 = shl i32 %tmp23, %tmp26 + %tmp28 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + store i32 %tmp27, i32* %tmp28, align 4 + + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4 + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4 + ; CHECK: [[and5:%.*]] = and i32 [[ld7]], 31 + ; CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[and5]] + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + ; CHECK: store i32 [[res5]], i32* [[adr5]], align 4 + %tmp29 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + %tmp30 = load i32, i32* %tmp29, align 4 + %tmp31 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + %tmp32 = load i32, i32* %tmp31, align 4 + %tmp33 = and i32 %tmp32, 31 + %tmp34 = lshr i32 %tmp30, %tmp33 + %tmp35 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + store i32 %tmp34, i32* %tmp35, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4 + ; CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]] + ; CHECK: store i32 [[res6]], i32* [[adr6]], align 4 + %tmp36 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + %tmp37 = load i32, i32* %tmp36, align 4 + %tmp38 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + %tmp39 = load i32, i32* %tmp38, align 4 + %tmp40 = or i32 %tmp39, %tmp37 + store i32 %tmp40, i32* %tmp38, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]], align 4 + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4 + ; CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]] + ; CHECK: store i32 [[res7]], i32* [[adr7]], align 4 + %tmp41 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9 + %tmp42 = load i32, i32* %tmp41, align 4 + %tmp43 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + %tmp44 = load i32, i32* %tmp43, align 4 + %tmp45 = and i32 %tmp44, %tmp42 + store i32 %tmp45, i32* %tmp43, align 4 + + ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10 + ; CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]], align 4 + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4 + ; CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]] + ; CHECK: store i32 [[res8]], i32* [[adr8]], align 4 + %tmp46 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10 + %tmp47 = load i32, i32* %tmp46, align 4 + %tmp48 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + %tmp49 = load i32, i32* %tmp48, align 4 + %tmp50 = xor i32 %tmp49, %tmp47 + store i32 %tmp50, i32* %tmp48, align 4 + + ret void +} + +declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #2 +declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32, %"class.RWStructuredBuffer >") #2 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!3} +!3 = !{i32 1, i32 9} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl new file mode 100644 index 0000000000..66382af2d5 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl @@ -0,0 +1,425 @@ +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float1 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int1 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double1 -DDBL %s | FileCheck %s --check-prefixes=CHECK +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Mainly a source for the vec1 scalarizer IR test. +// Serves to verify some codegen as well. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Need to capture once for the full vector type, again for the element type. +// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:<[0-9]* x [a-z0-9_]*>]] } +// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { <{{[0-9]*}} x [[ELTY:[a-z0-9_]*]]> } +RWStructuredBuffer buf; + +export void assignments(inout TYPE things[10], TYPE scales[10]); +export TYPE arithmetic(inout TYPE things[11])[11]; +export bool logic(bool truth[10], TYPE consequences[10])[10]; +export TYPE index(TYPE things[10], int i, TYPE val)[10]; + +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout TYPE things[10]) { + + // CHECK: [[res0:%.*]] = call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1) + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + things[0] = buf.Load(1); + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] [[TYPE]] [[vec1]], [[vec5]] + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + things[1] += things[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6 + // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] [[TYPE]] [[vec2]], [[vec6]] + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + things[2] -= things[6]; + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] [[TYPE]] [[vec3]], [[vec7]] + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + things[3] *= things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[TYPE]] [[vec4]], [[vec8]] + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + things[4] /= things[8]; + +#ifndef DBL + // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9 + // NODBL: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5 + // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[vec5]], [[vec9]] + // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + things[5] %= things[9]; +#endif +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export TYPE arithmetic(inout TYPE things[11])[11] { + TYPE res[11]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[res1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[res0:%.*]] = [[SUB]] [[TYPE]] + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[vec1]], [[vec2]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 2 + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + res[2] = things[1] + things[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[vec2]], [[vec3]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 3 + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + res[3] = things[2] - things[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[vec3]], [[vec4]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 4 + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + res[4] = things[3] * things[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[vec4]], [[vec5]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 5 + // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + res[5] = things[4] / things[5]; + +#ifndef DBL + // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5 + // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6 + // NODBL: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[vec5]], [[vec6]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 6 + // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[res7:%.*]] = [[ADD]] [[TYPE]] [[vec7]], <[[ELTY]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]> + // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]] + // This is a post op, so the original value goes into res[]. + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 7 + // CHECK: store [[TYPE]] [[vec7]], [[TYPE]]* [[adr7]] + res[7] = things[7]++; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[vec8]] + // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]] + // This is a post op, so the original value goes into res[]. + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 8 + // CHECK: store [[TYPE]] [[vec8]], [[TYPE]]* [[adr8]] + res[8] = things[8]--; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9 + // CHECK: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[vec9]] + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 9 + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + res[9] = ++things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10 + // CHECK: [[vec10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]] + // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[vec10]] + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 10 + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + res[10] = --things[10]; + + // Memcpy res into return value. + // CHECK: [[retptr:%.*]] = bitcast [11 x [[TYPE]]]* %agg.result to i8* + // CHECK: [[resptr:%.*]] = bitcast [11 x [[TYPE]]]* %res to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]] + // CHECK: ret void + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export bool logic(bool truth[10], TYPE consequences[10])[10] { + bool res[10]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load i32, i32* [[adr0]] + // CHECK: [[bvec0:%.*]] = icmp ne i32 [[vec0]], 0 + // CHECK: [[bres0:%.*]] = xor i1 [[bvec0]], true + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 0 + // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32 + // CHECK: store i32 [[res0]], i32* [[adr0]] + res[0] = !truth[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load i32, i32* [[adr1]] + // CHECK: [[bvec1:%.*]] = icmp ne i32 [[vec1]], 0 + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0 + // CHECK: [[bres1:%.*]] = or i1 [[bvec1]], [[bvec2]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 1 + // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32 + // CHECK: store i32 [[res1]], i32* [[adr1]] + res[1] = truth[1] || truth[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0 + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 2 + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + // CHECK: store i32 [[res2]], i32* [[adr2]] + res[2] = truth[2] && truth[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0 + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[bvec4:%.*]] = icmp ne i32 [[vec4]], 0 + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[bvec5:%.*]] = icmp ne i32 [[vec5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 3 + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + // CHECK: store i32 [[res3]], i32* [[adr3]] + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[vec0]], [[vec1]] + // CHECK: [[bres4:%.*]] = extractelement <1 x i1> [[cmp4]], i64 0 + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 4 + // CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32 + // CHECK: store i32 [[res4]], i32* [[adr4]] + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[vec1]], [[vec2]] + // CHECK: [[bres5:%.*]] = extractelement <1 x i1> [[cmp5]], i64 0 + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 5 + // CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32 + // CHECK: store i32 [[res5]], i32* [[adr5]] + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[vec2]], [[vec3]] + // CHECK: [[bres6:%.*]] = extractelement <1 x i1> [[cmp6]], i64 0 + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 6 + // CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32 + // CHECK: store i32 [[res6]], i32* [[adr6]] + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[vec3]], [[vec4]] + // CHECK: [[bres7:%.*]] = extractelement <1 x i1> [[cmp7]], i64 0 + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 7 + // CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32 + // CHECK: store i32 [[res7]], i32* [[adr7]] + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[vec4]], [[vec5]] + // CHECK: [[bres8:%.*]] = extractelement <1 x i1> [[cmp8]], i64 0 + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 8 + // CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32 + // CHECK: store i32 [[res8]], i32* [[adr8]] + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[vec5]], [[vec6]] + // CHECK: [[bres9:%.*]] = extractelement <1 x i1> [[cmp9]], i64 0 + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 9 + // CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32 + // CHECK: store i32 [[res9]], i32* [[adr9]] + res[9] = consequences[5] >= consequences[6]; + + // Memcpy res into return value. + // CHECK: [[retptr:%.*]] = bitcast [10 x i32]* %agg.result to i8* + // CHECK: [[resptr:%.*]] = bitcast [10 x i32]* %res to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]] + // CHECK: ret void + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export TYPE index(TYPE things[10], int i)[10] { + // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]] + // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]] + TYPE res[10]; + + // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0 + // CHECK: store [[TYPE]] zeroinitializer, [[TYPE]]* [[res0]] + res[0] = 0; + + // CHECK: [[i:%.*]] = load i32, i32* [[iadd]] + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 [[i]] + // CHECK: store [[TYPE]] <[[ELTY]] {{(1|1\.0*e\+0*|0xH3C00).*}}>, [[TYPE]]* [[adri]] + res[i] = 1; + + // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2 + // CHECK: store [[TYPE]] <[[ELTY]] {{(2|2\.0*e\+0*|0xH4000).*}}>, [[TYPE]]* [[res2]] + res[Ix] = 2; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3 + // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[res3]] + res[3] = things[0]; + + // CHECK: [[i:%.*]] = load i32, i32* [[iadd]] + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 [[i]] + // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]] + // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4 + // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[res4]] + res[4] = things[i]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5 + // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} + +// Test bit twiddling operators. +// INT-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout uint things[11]) { + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]] + // CHECK: [[res1:%.*]] = xor i32 [[ld1]], -1 + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0 + // CHECK: store i32 [[res1]], i32* [[adr0]] + things[0] = ~things[1]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]] + // CHECK: store i32 [[res1]], i32* [[adr1]] + things[1] = things[2] | things[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + // CHECK: store i32 [[res2]], i32* [[adr2]] + things[2] = things[3] & things[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + // CHECK: store i32 [[res3]], i32* [[adr3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]] + // CHECK: [[shv6:%.*]] = and i32 [[ld6]], 31 + // CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[shv6]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + // CHECK: store i32 [[res4]], i32* [[adr4]] + things[4] = things[5] << things[6]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]] + // CHECK: [[shv7:%.*]] = and i32 [[ld7]], 31 + // CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[shv7]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + // CHECK: store i32 [[res5]], i32* [[adr5]] + things[5] = things[6] >> things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]] + // CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]] + // CHECK: store i32 [[res6]], i32* [[adr6]] + things[6] |= things[8]; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9 + // CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]] + // CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]] + // CHECK: store i32 [[res7]], i32* [[adr7]] + things[7] &= things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10 + // CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]] + // CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]] + // CHECK: store i32 [[res8]], i32* [[adr8]] + things[8] ^= things[10]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl new file mode 100644 index 0000000000..2c2ef01b8a --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl @@ -0,0 +1,420 @@ +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=16 -DDBL %s | FileCheck %s --check-prefixes=CHECK +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=34 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t -DNUM=129 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Mainly a source for the longvec scalarizer IR test. +// Serves to verify some codegen as well. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:[a-z0-9]*]] } +// CHECK: external global {{\[}}[[NUM:[0-9]*]] x %"class.RWStructuredBuffer +RWStructuredBuffer buf[NUM]; + + +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout vector things[10]) { + + // CHECK: [[res0:%.*]] = call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1) + // CHECK: [[vec0:%.*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[res0]], i32 0 + // CHECK: [[res0:%.*]] = shufflevector <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[adr0]] + things[0] = buf[0].Load(1); + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]] + things[1] += things[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]] + things[2] -= things[6]; + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]] + things[3] *= things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]] + things[4] /= things[8]; + +#ifndef DBL + // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // NODBL: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]] + // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]] + // NODBL: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]] + things[5] %= things[9]; +#endif +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export vector arithmetic(inout vector things[11])[11] { + vector res[11]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[res1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]] + // CHECK: [[res0:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]> + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[res2:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]] + res[2] = things[1] + things[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[res3:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]] + res[3] = things[2] - things[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[res4:%.*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]] + res[4] = things[3] * things[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[res5:%.*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]] + res[5] = things[4] / things[5]; + +#ifndef DBL + // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // NODBL: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // NODBL: [[res6:%.*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 6 + // NODBL: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]] + // CHECK: [[res7:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]] + // This is a post op, so the original value goes into res[]. + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 7 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr7]] + res[7] = things[7]++; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]] + // CHECK: [[res8:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]] + // This is a post op, so the original value goes into res[]. + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 8 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr8]] + res[8] = things[8]--; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]] + // CHECK: [[res9:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 9 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]] + res[9] = ++things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // CHECK: [[vec10:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]] + // CHECK: [[res10:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]] + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 10 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]] + res[10] = --things[10]; + + // Memcpy res into return value. + // CHECK: [[retptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %agg.result to i8* + // CHECK: [[resptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %res to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]] + // CHECK: ret void + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export vector logic(vector truth[10], vector consequences[10])[10] { + vector res[10]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr0]] + // CHECK: [[bvec0:%.*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[bres0:%.*]] = icmp eq <[[NUM]] x i1> [[bvec0]], zeroinitializer + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 0 + // CHECK: [[res0:%.*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[adr0]] + res[0] = !truth[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]] + // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]] + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[val1:%.*]] = icmp ne <[[NUM]] x i1> [[bvec1]], zeroinitializer + // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer + // CHECK: [[bres1:%.*]] = or <[[NUM]] x i1> [[val1]], [[val2]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 1 + // CHECK: [[res1:%.*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]] + res[1] = truth[1] || truth[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]] + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer + // CHECK: [[val3:%.*]] = icmp ne <[[NUM]] x i1> [[bvec3]], zeroinitializer + // CHECK: [[bres2:%.*]] = and <[[NUM]] x i1> [[val2]], [[val3]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 2 + // CHECK: [[res2:%.*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]] + res[2] = truth[2] && truth[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]] + // CHECK: [[bvec4:%.*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]] + // CHECK: [[bvec5:%.*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[bres3:%.*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 3 + // CHECK: [[res3:%.*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]] + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[bres4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 4 + // CHECK: [[res4:%.*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]] + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[bres5:%.*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 5 + // CHECK: [[res5:%.*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]] + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[bres6:%.*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 6 + // CHECK: [[res6:%.*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]] + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[bres7:%.*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 7 + // CHECK: [[res7:%.*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]] + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[bres8:%.*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 8 + // CHECK: [[res8:%.*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]] + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // CHECK: [[bres9:%.*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 9 + // CHECK: [[res9:%.*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[adr9]] + res[9] = consequences[5] >= consequences[6]; + + // Memcpy res into return value. + // CHECK: [[retptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %agg.result to i8* + // CHECK: [[resptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %res to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]] + // CHECK: ret void + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export vector index(vector things[10], int i)[10] { + // CHECK: [[res:%.*]] = alloca [10 x <[[NUM]] x [[TYPE]]>] + // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]] + vector res[10]; + + // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]] + res[0] = 0; + + // CHECK: [[i:%.*]] = load i32, i32* [[iadd]] + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 [[i]] + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|1\.0*e\+0*|0xH3C00).*}}, <[[NUM]] x [[TYPE]]>* [[adri]] + res[i] = 1; + + // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|2\.0*e\+0*|0xH4000).*}}, <[[NUM]] x [[TYPE]]>* [[res2]] + res[Ix] = 2; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[thg0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]] + // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]] + res[3] = things[0]; + + // CHECK: [[i:%.*]] = load i32, i32* [[iadd]] + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 [[i]] + // CHECK: [[thgi:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adri]] + // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]] + res[4] = things[i]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[thg2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} + +// Test bit twiddling operators. +// INT-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout vector things[11]) { + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]] + // CHECK: [[res1:%.*]] = xor <[[NUM]] x i32> [[ld1]], ], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr0]] + things[0] = ~things[1]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]] + // CHECK: [[res1:%.*]] = or <[[NUM]] x i32> [[ld2]], [[ld3]] + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]] + things[1] = things[2] | things[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]] + // CHECK: [[res2:%.*]] = and <[[NUM]] x i32> [[ld3]], [[ld4]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]] + things[2] = things[3] & things[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]] + // CHECK: [[res3:%.*]] = xor <[[NUM]] x i32> [[ld4]], [[ld5]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]] + // CHECK: [[shv6:%.*]] = and <[[NUM]] x i32> [[ld6]], [[ld5]], [[shv6]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]] + things[4] = things[5] << things[6]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]] + // CHECK: [[shv7:%.*]] = and <[[NUM]] x i32> [[ld7]], [[ld6]], [[shv7]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]] + things[5] = things[6] >> things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]] + // CHECK: [[res6:%.*]] = or <[[NUM]] x i32> [[ld6]], [[ld8]] + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]] + things[6] |= things[8]; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 9 + // CHECK: [[ld9:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr9]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]] + // CHECK: [[res7:%.*]] = and <[[NUM]] x i32> [[ld7]], [[ld9]] + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]] + things[7] &= things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 10 + // CHECK: [[ld10:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr10]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]] + // CHECK: [[res8:%.*]] = xor <[[NUM]] x i32> [[ld8]], [[ld10]] + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]] + things[8] ^= things[10]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv b/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv index 3b0c060a0d..9d915a84f2 100644 --- a/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv +++ b/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv @@ -96,7 +96,6 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input, // CHECK-NEXT: OpDecorate %in_var_TANVCORNER Patch // CHECK-NEXT: OpDecorate %in_var_TANWEIGHTS Patch // CHECK-NEXT: OpDecorate %gl_TessCoord BuiltIn TessCoord -// CHECK-NEXT: OpDecorate %gl_TessCoord Patch // CHECK-NEXT: OpDecorate %gl_Position BuiltIn Position // CHECK-NEXT: OpDecorate %in_var_BEZIERPOS Location 0 // CHECK-NEXT: OpDecorate %in_var_TANGENT Location 1 diff --git a/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl b/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl new file mode 100644 index 0000000000..19a37d071c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc dxc -T cs_6_6 -E Main -spirv %s -fcgl | FileCheck %s + + +// Make sure no code is generated for the cast to void. + +// CHECK: %src_Main = OpFunction %void None +// CHECK-NEXT: OpLabel +// CHECK-NEXT: %x = OpVariable +// CHECK-NEXT: OpStore %x %false +// CHECK-NEXT: OpReturn +// CHECK-NEXT: OpFunctionEnd + +[numthreads(1, 1, 1)] +void Main() +{ + bool x = false; + (void)x; +} diff --git a/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl b/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl index 3ec0ad447e..8d0195d672 100644 --- a/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl +++ b/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl @@ -28,14 +28,10 @@ groupshared S D; [numthreads(1,1,1)] void main() { // CHECK: %E = OpVariable %_ptr_Function_int Function -// CHECK-NEXT: [[TempVar:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function - int E; // CHECK: [[A:%[0-9]+]] = OpAccessChain %_ptr_Uniform_int %A %int_0 %uint_0 -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %int [[A]] -// CHECK-NEXT: OpStore [[TempVar]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %foo [[TempVar]] %B %C %D %E +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %foo [[A]] %B %C %D %E foo(A[0], B, C, D, E); A[0] = A[0] | B | C | D.a | E; } diff --git a/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl b/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl new file mode 100644 index 0000000000..9e2246e6a5 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl @@ -0,0 +1,29 @@ +// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives -fcgl %s -spirv 2>&1 | FileCheck %s + +// CHECK: OpCapability ComputeDerivativeGroupQuadsKHR +// CHECK: OpExtension "SPV_KHR_compute_shader_derivatives" +// CHECK: OpExecutionMode %main DerivativeGroupQuadsKHR + + +SamplerState ss : register(s2); +SamplerComparisonState scs; + +RWStructuredBuffer o; +Texture1D t1; + +[numthreads(2,2,1)] +void main(uint3 id : SV_GroupThreadID) +{ + // CHECK: OpDPdx %float %float_0_5 + o[0] = ddx(0.5); + // CHECK: OpDPdxCoarse %float %float_0_5 + o[1] = ddx_coarse(0.5); + // CHECK: OpDPdy %float %float_0_5 + o[2] = ddy(0.5); + // CHECK: OpDPdyCoarse %float %float_0_5 + o[3] = ddy_coarse(0.5); + // CHECK: OpDPdxFine %float %float_0_5 + o[4] = ddx_fine(0.5); + // CHECK: OpDPdyFine %float %float_0_5 + o[5] = ddy_fine(0.5); +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl b/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl index a8578f7377..5815981057 100644 --- a/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl +++ b/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl @@ -1,5 +1,5 @@ // RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s -check-prefix=GLSL450 -// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model %s -spirv | FileCheck %s -check-prefix=VULKAN +// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model -fspv-target-env=vulkan1.1 %s -spirv | FileCheck %s -check-prefix=VULKAN // When the GLSL450 memory model is used, there should be no memory operands on the loads and stores. // When the Vulkan memory model is used, there should be no decorations. There should be memory operands on the loads and stores instead. diff --git a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl index 70bf50abc6..dba7cd00ce 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl @@ -7,19 +7,19 @@ float4 foo(inout float f0, inout int f1) return 0; } -// CHECK-DAG: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function -// CHECK-DAG: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function +// CHECK: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function +// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function // CHECK: [[s33:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Uniform_float {{%[a-zA-Z0-9_]+}} %int_0 +// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int {{%[a-zA-Z0-9_]+}} %int_1 // CHECK: [[s37:%[a-zA-Z0-9_]+]] = OpLoad %float [[s33]] // CHECK: OpStore [[s36]] [[s37]] -// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int {{%[a-zA-Z0-9_]+}} %int_1 // CHECK: [[s40:%[a-zA-Z0-9_]+]] = OpLoad %int [[s34]] // CHECK: OpStore [[s39]] [[s40]] // CHECK: {{%[a-zA-Z0-9_]+}} = OpFunctionCall %v4float %foo [[s36]] [[s39]] -// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]] -// CHECK: OpStore [[s33]] [[s38]] // CHECK: [[s41:%[a-zA-Z0-9_]+]] = OpLoad %int [[s39]] // CHECK: OpStore [[s34]] [[s41]] +// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]] +// CHECK: OpStore [[s33]] [[s38]] struct Stru { int x; diff --git a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl index 6acd104aa3..5977fc454a 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl @@ -6,19 +6,19 @@ RWStructuredBuffer< float4 > output : register(u1); // CHECK: OpDecorate %main LinkageAttributes "main" Export // CHECK: %main = OpFunction %int None -// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function // CHECK: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function +// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function // CHECK: [[s33:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_StorageBuffer_float {{%[a-zA-Z0-9_]+}} %int_0 +// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int %stru %int_1 // CHECK: [[s37:%[a-zA-Z0-9_]+]] = OpLoad %float [[s33]] // CHECK: OpStore [[s36]] [[s37]] -// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int %stru %int_1 // CHECK: [[s40:%[a-zA-Z0-9_]+]] = OpLoad %int [[s34]] // CHECK: OpStore [[s39]] [[s40]] // CHECK: {{%[a-zA-Z0-9_]+}} = OpFunctionCall %void %func [[s36]] [[s39]] -// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]] -// CHECK: OpStore [[s33]] [[s38]] // CHECK: [[s41:%[a-zA-Z0-9_]+]] = OpLoad %int [[s39]] // CHECK: OpStore [[s34]] [[s41]] +// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]] +// CHECK: OpStore [[s33]] [[s38]] [noinline] void func(inout float f0, inout int f1) { diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl index 4d75d27fa8..d0e771e834 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl @@ -11,13 +11,10 @@ void main(float input : INPUT) { // CHECK: %param_var_a = OpVariable %_ptr_Function_float Function // CHECK: [[val:%[0-9]+]] = OpLoad %float %input -// CHECK: OpStore %param_var_a [[val]] +// CHECK: OpStore %param_var_a [[val]] // CHECK: [[p0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_0 -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %float [[p0]] -// CHECK-NEXT: OpStore [[temp0:%[a-zA-Z0-9_]+]] [[ld]] // CHECK: [[p1:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_1 -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %float %32 -// CHECK-NEXT: OpStore [[temp1:%[a-zA-Z0-9_]+]] [[ld]] -// CHECK: OpFunctionCall %void %foo %param_var_a [[temp0]] [[temp1]] + +// CHECK: OpFunctionCall %void %foo %param_var_a [[p0]] [[p1]] foo(input, Data[0], Data[1]); } diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl index 5641923aaa..bda2183057 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl @@ -18,9 +18,7 @@ float4 main() : C { float4 val; // CHECK: [[z_ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_float %val %int_2 -// CHECK: [[ld:%[0-9]+]] = OpLoad %float [[z_ptr]] -// CHECK: OpStore %param_var_w [[ld]] -// CHECK: {{%[0-9]+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z %param_var_w +// CHECK: {{%[0-9]+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z [[z_ptr]] // CHECK-NEXT: [[y:%[0-9]+]] = OpLoad %v3float %param_var_y // CHECK-NEXT: [[old:%[0-9]+]] = OpLoad %v4float %val // Write to val.zwx: @@ -39,10 +37,6 @@ float4 main() : C { // CHECK-NEXT: [[old_0:%[0-9]+]] = OpLoad %v4float %val // CHECK-NEXT: [[new_0:%[0-9]+]] = OpVectorShuffle %v4float [[old_0]] [[z]] 4 5 2 3 // CHECK-NEXT: OpStore %val [[new_0]] - // Write to val.z: -// CHECK-NEXT: [[new:%[0-9]+]] = OpLoad %float %param_var_w -// CHECK-NEXT: OpStore [[z_ptr]] [[new]] - bar(val, val.zwx, val.xy, val.z); return MyRWBuffer[0]; diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl index 3f890099f5..a4ad925f77 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl @@ -62,11 +62,7 @@ void main() { fn.incr(); // CHECK: [[rwsb_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_R %rwsb %int_0 %uint_0 -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %R [[rwsb_0]] -// CHECK-NEXT: [[ex:%[0-9]+]] = OpCompositeExtract %int [[ld]] 0 -// CHECK-NEXT: [[v:%[0-9]+]] = OpCompositeConstruct %R_0 [[ex]] -// CHECK-NEXT: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[v]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr [[TempVar]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr [[rwsb_0]] decr(rwsb[0]); // CHECK: OpFunctionCall %void %decr2 %gs @@ -91,29 +87,21 @@ void main() { fnarr[0].incr(); // CHECK: [[gsarr_0:%[0-9]+]] = OpAccessChain %_ptr_Workgroup_S %gsarr %int_0 -// CHECK: [[ld:%[0-9]+]] = OpLoad %S [[gsarr_0]] -// CHECK: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[gsarr_0]] decr2(gsarr[0]); // CHECK: [[starr_0:%[0-9]+]] = OpAccessChain %_ptr_Private_S %starr %int_0 -// CHECK: [[ld:%[0-9]+]] = OpLoad %S [[starr_0]] -// CHECK: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[starr_0]] decr2(starr[0]); // CHECK: [[fnarr_0:%[0-9]+]] = OpAccessChain %_ptr_Function_S %fnarr %int_0 -// CHECK: [[ld:%[0-9]+]] = OpLoad %S [[fnarr_0]] -// CHECK: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[fnarr_0]] decr2(fnarr[0]); // CHECK: [[arr:%[0-9]+]] = OpAccessChain %_ptr_Function_int %arr %int_0 // CHECK-NEXT: [[arr_0:%[0-9]+]] = OpLoad %int [[arr]] // CHECK-NEXT: [[arr_1:%[0-9]+]] = OpIAdd %int [[arr_0]] %int_1 -// CHECK-NEXT: OpStore [[arr]] [[arr_1]] -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %int [[arr]] -// CHECK-NEXT: OpStore [[TempVar:%[0-9a-zA-Z_]+]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %int_decr [[TempVar]] +// CHECK-NEXT: OpStore [[arr]] [[arr_1]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %int_decr [[arr]] int_decr(++arr[0]); } diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl index e9a1813f31..a0b2ab7207 100644 --- a/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl @@ -1,5 +1,5 @@ // RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=GLSL450 -// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=VULKAN +// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model -fspv-target-env=vulkan1.1 %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=VULKAN RWTexture1D g_tTex1di1; RWTexture1D g_tTex1du1; diff --git a/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl b/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl new file mode 100644 index 0000000000..0ab965aded --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T lib_6_6 -E main -fspv-target-env=universal1.5 -fcgl %s -spirv | FileCheck %s + +// CHECK: OpEntryPoint MissKHR %miss "miss" %payload +// CHECK: OpDecorate %func LinkageAttributes "func" Export + + +struct RayPayload +{ + uint a; +}; + +export void func() +{ +} + +[shader("miss")] +void miss(inout RayPayload payload) +{ +} diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl new file mode 100644 index 0000000000..05d9d8fb1c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl @@ -0,0 +1,65 @@ +// RUN: %dxc -T ms_6_5 -E outie -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ms_6_5 -E innie -fcgl %s -spirv | FileCheck %s + +// CHECK-DAG: [[v4_n05_05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_n0_5 %float_0_5 %float_0 %float_1 +// CHECK-DAG: [[v4_05_05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0 %float_1 +// CHECK-DAG: [[v4_0_n05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_0 %float_n0_5 %float_0 %float_1 +// CHECK-DAG: [[v3_1_0_0:%[0-9]+]] = OpConstantComposite %v3float %float_1 %float_0 %float_0 +// CHECK-DAG: [[v3_0_1_0:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_1 %float_0 +// CHECK-DAG: [[v3_0_0_1:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_0 %float_1 +// CHECK-DAG: [[u3_0_1_2:%[0-9]+]] = OpConstantComposite %v3uint %uint_0 %uint_1 %uint_2 + +// CHECK-DAG: OpDecorate [[indices:%[0-9]+]] BuiltIn PrimitiveIndicesNV + +struct MeshOutput { + float4 position : SV_Position; + float3 color : COLOR0; +}; + +[outputtopology("triangle")] +[numthreads(1, 1, 1)] +void innie(out indices uint3 triangles[1], out vertices MeshOutput verts[3]) { + SetMeshOutputCounts(3, 2); + + triangles[0] = uint3(0, 1, 2); +// CHECK: [[off:%[0-9]+]] = OpIMul %uint %uint_0 %uint_3 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[off]] +// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 0 +// CHECK: OpStore [[ptr]] [[tmp]] +// CHECK: [[idx:%[0-9]+]] = OpIAdd %uint [[off]] %uint_1 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[idx]] +// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 1 +// CHECK: OpStore [[ptr]] [[tmp]] +// CHECK: [[idx:%[0-9]+]] = OpIAdd %uint [[off]] %uint_2 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[idx]] +// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 2 +// CHECK: OpStore [[ptr]] [[tmp]] + + verts[0].position = float4(-0.5, 0.5, 0.0, 1.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_0 +// CHECK: OpStore [[ptr]] [[v4_n05_05_0_1]] + verts[0].color = float3(1.0, 0.0, 0.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_0 +// CHECK: OpStore [[ptr]] [[v3_1_0_0]] + + verts[1].position = float4(0.5, 0.5, 0.0, 1.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_1 +// CHECK: OpStore [[ptr]] [[v4_05_05_0_1]] + verts[1].color = float3(0.0, 1.0, 0.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_1 +// CHECK: OpStore [[ptr]] [[v3_0_1_0]] + + verts[2].position = float4(0.0, -0.5, 0.0, 1.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_2 +// CHECK: OpStore [[ptr]] [[v4_0_n05_0_1]] + verts[2].color = float3(0.0, 0.0, 1.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_2 +// CHECK: OpStore [[ptr]] [[v3_0_0_1]] + +} + +[outputtopology("triangle")] +[numthreads(1, 1, 1)] +void outie(out indices uint3 triangles[1], out vertices MeshOutput verts[3]) { + innie(triangles, verts); +} diff --git a/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl new file mode 100644 index 0000000000..414d8a638c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -T cs_6_2 -E main -spirv -fcgl -enable-16bit-types %s | FileCheck %s + +struct S1 +{ + uint16_t a : 8; +}; + +S1 foo() +{ + return (S1)0; +} + +[numthreads(1, 1, 1)] +void main() { + uint16_t test = foo().a; +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_ushort %temp_var_S1 %int_0 +// CHECK: [[raw:%[0-9]+]] = OpLoad %ushort [[ptr]] +// CHECK: [[tmp:%[0-9]+]] = OpShiftLeftLogical %ushort [[raw]] %uint_8 +// CHECK: [[out:%[0-9]+]] = OpShiftRightLogical %ushort [[tmp]] %uint_8 +// CHECK-NOT: OpLoad %ushort [[out]] +// CHECK: OpStore %test [[out]] +} diff --git a/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl b/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl index 5e4049f8c3..391e09a428 100644 --- a/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl +++ b/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl @@ -4,7 +4,6 @@ // CHECK-SAME: %gl_TessCoord // CHECK: OpDecorate %gl_TessCoord BuiltIn TessCoord -// CHECK: OpDecorate %gl_TessCoord Patch // CHECK: %gl_TessCoord = OpVariable %_ptr_Input_v3float Input diff --git a/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl new file mode 100644 index 0000000000..fb9f6e0d76 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl @@ -0,0 +1,41 @@ +// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fspv-extension=SPV_KHR_16bit_storage -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,NOQUAD +// RUN: not %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.0 -fcgl %s -spirv 2>&1 | FileCheck %s --check-prefixes=ERROR + +// CHECK: ; Version: 1.3 + +// QUAD: OpCapability QuadControlKHR +// QUAD: OpExtension "SPV_KHR_quad_control" + +RWStructuredBuffer values; + +[numthreads(32, 1, 1)] +void main(uint3 id: SV_DispatchThreadID) { + uint outIdx = (id.y * 8) + id.x; + +// CHECK: [[val1:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}} +// QUAD-NEXT: {{%[0-9]+}} = OpGroupNonUniformQuadAnyKHR %bool [[val1]] + +// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_0 +// NOQUAD-NEXT: [[or0:%[0-9]+]] = OpLogicalOr %bool [[val1]] [[inv0]] +// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_1 +// NOQUAD-NEXT: [[or1:%[0-9]+]] = OpLogicalOr %bool [[or0]] [[inv1]] +// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_2 +// NOQUAD-NEXT: [[or2:%[0-9]+]] = OpLogicalOr %bool [[or1]] [[inv2]] + +// ERROR: 27:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use + values[outIdx].x = QuadAny(outIdx % 4 == 0) ? 1.0 : 2.0; + +// CHECK: [[val2:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}} +// QUAD-NEXT: {{%[0-9]+}} = OpGroupNonUniformQuadAllKHR %bool [[val2]] + +// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_0 +// NOQUAD-NEXT: [[or0:%[0-9]+]] = OpLogicalAnd %bool [[val2]] [[inv0]] +// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_1 +// NOQUAD-NEXT: [[or1:%[0-9]+]] = OpLogicalAnd %bool [[or0]] [[inv1]] +// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_2 +// NOQUAD-NEXT: [[or2:%[0-9]+]] = OpLogicalAnd %bool [[or1]] [[inv2]] + +// ERROR: 40:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use + values[outIdx].y = QuadAll(outIdx % 2 == 0) ? 3.0 : 4.0; +} diff --git a/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl b/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl index a8fe81e021..6f073aeb46 100644 --- a/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl +++ b/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl @@ -85,7 +85,6 @@ struct DsOut { // CHECK: OpDecorateString %gl_PointSize UserSemantic "PSIZE" // CHECK: OpDecorate %gl_TessCoord BuiltIn TessCoord // CHECK: OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation" -// CHECK: OpDecorate %gl_TessCoord Patch // CHECK: OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter // CHECK: OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" // CHECK: OpDecorate %gl_TessLevelOuter Patch diff --git a/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl b/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl new file mode 100644 index 0000000000..23f52ad4b5 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl @@ -0,0 +1,23 @@ +// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives -fcgl %s -spirv 2>&1 | FileCheck %s --check-prefix=CHECK +// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives %s -spirv 2>&1 | FileCheck %s --check-prefix=CHECK + +// CHECK: OpCapability ComputeDerivativeGroupLinearKHR +// CHECK: OpExtension "SPV_KHR_compute_shader_derivatives" +// CHECK: OpExecutionMode %main DerivativeGroupLinearKHR + +SamplerState ss : register(s2); +SamplerComparisonState scs; + +RWStructuredBuffer o; +Texture1D t1; + +[numthreads(16,1,1)] +void main(uint3 id : SV_GroupThreadID) +{ + //CHECK: [[t1:%[0-9]+]] = OpLoad %type_1d_image %t1 + //CHECK-NEXT: [[ss1:%[0-9]+]] = OpLoad %type_sampler %ss + //CHECK-NEXT: [[si1:%[0-9]+]] = OpSampledImage %type_sampled_image [[t1]] [[ss1]] + //CHECK-NEXT: [[query1:%[0-9]+]] = OpImageQueryLod %v2float [[si1]] %float_0_5 + //CHECK-NEXT: {{%[0-9]+}} = OpCompositeExtract %float [[query1]] 0 + o[0] = t1.CalculateLevelOfDetail(ss, 0.5); +} diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl index e5954abae5..99d365b5e2 100644 --- a/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl @@ -1,6 +1,14 @@ -// RUN: not %dxc -T ps_6_6 -E main -fcgl %s -spirv -enable-16bit-types 2>&1 | FileCheck %s +// RUN: not %dxc -T ps_6_6 -E main -fcgl %s -spirv -enable-16bit-types 2>&1 | FileCheck %s --check-prefix=VK +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv -fspv-target-env=universal1.5 -enable-16bit-types 2>&1 | FileCheck %s --check-prefix=UNIVERSAL -// CHECK: error: 16-bit texture types not yet supported with -spirv +// When targeting Vulkan, A 16-bit floating pointer buffer is not valid. +// VK: error: The sampled type for textures cannot be a floating point type smaller than 32-bits when targeting a Vulkan environment. + +// When not targeting Vulkan, we should generate the 16-bit floating pointer buffer. +// UNIVERSAL: %half = OpTypeFloat 16 +// UNIVERSAL: %type_buffer_image = OpTypeImage %half Buffer 2 0 0 1 Unknown +// UNIVERSAL: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image +// UNIVERSAL: %MyBuffer = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant Buffer MyBuffer; void main(): SV_Target { } diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl new file mode 100644 index 0000000000..f29af69c1c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl @@ -0,0 +1,14 @@ +// RUN: not %dxc -T ps_6_6 -E main -fcgl %s -spirv -enable-16bit-types 2>&1 | FileCheck %s --check-prefix=VK +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv -fspv-target-env=universal1.5 -enable-16bit-types 2>&1 | FileCheck %s --check-prefix=UNIVERSAL + +// When targeting Vulkan, A 16-bit floating pointer buffer is not valid. +// VK: error: The sampled type for textures cannot be a floating point type smaller than 32-bits when targeting a Vulkan environment. + +// When not targeting Vulkan, we should generate the 16-bit floating pointer buffer. +// UNIVERSAL: %half = OpTypeFloat 16 +// UNIVERSAL: %type_buffer_image = OpTypeImage %half Buffer 2 0 0 1 Unknown +// UNIVERSAL: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image +// UNIVERSAL: %MyBuffer = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +Buffer MyBuffer; + +void main(): SV_Target { } diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl index 9d226eb962..526bfc002c 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl @@ -27,6 +27,7 @@ float4 main() : SV_Target // CHECK: [[x:%[0-9]+]] = OpSampledImage %type_sampled_image [[tex]] [[smp]] return Textures[0].Sample(TheStruct.Sampler, float2(0, 0)) // CHECK: [[tex:%[0-9]+]] = OpLoad %type_2d_image %TheStruct_Texture +// CHECK: [[smp:%[0-9]+]] = OpLoad %type_sampler %TheStruct_Sampler // CHECK: [[x:%[0-9]+]] = OpSampledImage %type_sampled_image [[tex]] [[smp]] + TheStruct.Texture.Sample(TheStruct.Sampler, float2(0, 0)); } diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl new file mode 100644 index 0000000000..f0f5c54a16 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -spirv -E main -T cs_6_7 %s | FileCheck %s + +// Bug was causing alignment miss + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +RWStructuredBuffer rwbuf; + +void foo(BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + foo(rwbuf[0].Get()); +} + +// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} Aligned 8 +// CHECK: [[L1:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} [[L0]] Aligned 8 +// CHECK: [[L2:%[_0-9A-Za-z]*]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[L1]] %int_0 +// CHECK: OpStore [[L2]] %int_1 Aligned 4 + + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl new file mode 100644 index 0000000000..fc5b9edad0 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl @@ -0,0 +1,72 @@ +// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +cbuffer cbuf { + [[vk::aliased_pointer]] Globals_p bp; +} + +// CHECK: OpDecorate [[BP0:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[BP1:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[BP:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1 +// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[V4C:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]] +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1 +// CHECK: [[GS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GS]] +// CHECK: [[TT:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGS]] +// CHECK: [[PTT:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[TT]] +// CHECK: [[PFV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[V4FLOAT]] +// CHECK: [[PPGS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGS]] +// CHECK: [[PBV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] + +void f([[vk::aliased_pointer]] Globals_p bp) { +} + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = float4(1.0,0.0,0.0,0.0); + [[vk::aliased_pointer]] Globals_p bp0 = Globals_p(g_PushConstants.m_nBufferDeviceAddress); + [[vk::aliased_pointer]] Globals_p bp1 = Globals_p(g_PushConstants.m_nBufferDeviceAddress); + bp0.Get().g_vTestFloat4 = vTest; + f(bp0); + return bp1.Get().g_vTestFloat4; // Returns float4(1.0,0.0,0.0,0.0) +} + +// CHECK: [[GP:%[_0-9A-Za-z]*]] = OpVariable [[PTT]] PushConstant +// CHECK: [[VTEST:%[0-9A-Za-z]*]] = OpVariable [[PFV4FLOAT]] Function +// CHECK: OpStore [[VTEST]] [[V4C]] +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad %_ptr_PhysicalStorageBuffer_Globals_s [[X1]] +// CHECK: OpStore [[BP0]] [[X2]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]] +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[X3]] +// CHECK: OpStore [[BP1]] [[X4]] +// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[VTEST]] +// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] Aligned 16 +// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X6]] [[I1]] +// CHECK: OpStore [[X7]] [[X5]] Aligned 16 +// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] Aligned 16 +// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X8]] [[I1]] +// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X9]] Aligned 16 +// CHECK: OpReturnValue [[X10]] + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl new file mode 100644 index 0000000000..992d8b39fd --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl @@ -0,0 +1,39 @@ +// RUN: %dxc -spirv -fcgl -T ps_6_0 %s | FileCheck %s + +struct S { + uint u; +}; + +typedef vk::BufferPointer BP; + +struct PC { + BP bp; +}; + +[[vk::push_constant]] PC pc; + +// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK: [[S:%[_0-9A-Za-z]*]] = OpTypeStruct [[UINT]] +// CHECK: [[PS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[S]] +// CHECK: [[PU:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]] +// CHECK: [[U1:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 1 +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} PushConstant + +void main() +{ +// CHECK: [[IN:%[_0-9A-Za-z]*]] = OpVariable +// CHECK: [[OUT:%[_0-9A-Za-z]*]] = OpVariable + uint u0, u1; + +// CHECK: [[X1:%[_0-9]+]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[PC]] [[I0]] +// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] Aligned 4 +// CHECK: [[X3:%[_0-9]+]] = OpAccessChain [[PU]] [[X2]] [[I0]] +// CHECK: [[X4:%[_0-9]+]] = OpLoad [[UINT]] [[IN]] +// CHECK: [[X5:%[_0-9]+]] = OpAtomicExchange [[UINT]] [[X3]] [[U1]] [[U0]] [[X4]] +// CHECK: OpStore [[OUT]] [[X5]] + InterlockedExchange(pc.bp.Get().u, u0, u1); +} + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl new file mode 100644 index 0000000000..86cf48c41e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + float a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + float tmp = buffer.Get().a; + buffer.Get().a = tmp; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl new file mode 100644 index 0000000000..09585a7664 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Globals_s { + float4 a; +}; + +typedef vk::BufferPointer Globals_p; +typedef vk::BufferPointer Globals_pp; + +[[vk::push_constant]] +Globals_pp bda; + +[numthreads(1, 1, 1)] +void main() { + float4 r = bda.Get().Get().a; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl new file mode 100644 index 0000000000..e803b5b754 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + uint a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + uint data = buffer.Get(); + buffer.Get() = data; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl new file mode 100644 index 0000000000..1029aa7f2e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl @@ -0,0 +1,18 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + uint a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + buffer.Get() = 1; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl new file mode 100644 index 0000000000..62bdb7f3cb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl @@ -0,0 +1,26 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +//[[vk::push_constant]] +//BufferContent buffer; + +RWStructuredBuffer rwbuf; + +// Wrong type in the parameter. +void foo(BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + foo(rwbuf[0]); +} + +// CHECK: no matching function for call to 'foo' + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl new file mode 100644 index 0000000000..a89b286edf --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl @@ -0,0 +1,23 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +RWStructuredBuffer buf; + +void foo(const BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + static BufferContent bcs = buf[0]; + static BufferBuffer bbs = (BufferContent)bcs; +} + +// CHECK: cannot initialize a variable of type 'BufferPointer' with an lvalue of type 'BufferPointer' + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl new file mode 100644 index 0000000000..b44e1eca09 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl @@ -0,0 +1,46 @@ +// RUN: %dxc -spirv -Od -T cs_6_7 %s | FileCheck %s +// RUN: %dxc -spirv -Od -T cs_6_7 -DALIGN_16 %s | FileCheck %s +// RUN: %dxc -spirv -Od -T cs_6_7 -DNO_PC %s | FileCheck %s + +// Was getting bogus type errors with the defined changes + +#ifdef ALIGN_16 +typedef vk::BufferPointer BufferType; +#else +typedef vk::BufferPointer BufferType; +#endif +#ifndef NO_PC +struct PushConstantStruct { + BufferType push_buffer; +}; +[[vk::push_constant]] PushConstantStruct push_constant; +#endif + +RWStructuredBuffer output; + +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK: [[PPUINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]] +// CHECK: [[PFPPUINT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[PPUINT]] +// CHECK: [[PUUINT:%[_0-9A-Za-z]*]] = OpTypePointer Uniform [[UINT]] +// CHECK: [[OUTPUT:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} Uniform + +[numthreads(1, 1, 1)] +void main() { + uint64_t addr = 123; + vk::BufferPointer test = vk::BufferPointer(addr); + output[0] = test.Get(); +} + +// CHECK: [[TEST:%[_0-9A-Za-z]*]] = OpVariable [[PFPPUINT]] Function +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PPUINT]] +// CHECK: OpStore [[TEST]] [[X1]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PPUINT]] [[TEST]] Aligned 32 +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[UINT]] [[X2]] Aligned 4 +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PUUINT]] [[OUTPUT]] [[I0]] [[U0]] +// CHECK: OpStore [[X4]] [[X3]] +// CHECK: OpReturn +// CHECK: OpFunctionEnd + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl new file mode 100644 index 0000000000..71fee1a795 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl @@ -0,0 +1,101 @@ +// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpCapability PhysicalStorageBufferAddresses +// CHECK: OpExtension "SPV_KHR_physical_storage_buffer" +// CHECK: OpMemoryModel PhysicalStorageBuffer64 GLSL450 +// CHECK: OpEntryPoint Fragment [[MAIN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +// Forward declaration +typedef struct block_s block_t; +typedef vk::BufferPointer block_p; + +struct block_s +{ + float4 x; + block_p next; +}; + +struct TestPushConstant_t +{ + block_p root; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: OpDecorate [[GP:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[COPY1:%[_0-9A-Za-z]*]] RestrictPointer +// CHECK: OpDecorate [[COPY2:%[_0-9A-Za-z]*]] RestrictPointer +// CHECK: OpMemberDecorate [[BLOCK:%[_0-9A-Za-z]*]] 1 Offset 16 +// CHECK: OpTypeForwardPointer [[PBLOCK:%[_0-9A-Za-z]*]] PhysicalStorageBuffer +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[ULONG:%[_0-9A-Za-z]*]] = OpTypeInt 64 0 +// CHECK: [[UL0:%[_0-9A-Za-z]*]] = OpConstant [[ULONG]] 0 +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F0]] [[F0]] [[F0]] [[F0]] +// CHECK: [[BLOCK]] = OpTypeStruct [[V4FLOAT]] [[PBLOCK]] +// CHECK: [[PBLOCK]] = OpTypePointer PhysicalStorageBuffer [[BLOCK]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PBLOCK]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPBLOCK0:%[_0-9A-Za-z]*]] = OpTypePointer Function %_ptr_PhysicalStorageBuffer_block_s +// CHECK: [[PPBLOCK1:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PBLOCK]] +// CHECK: [[PPBLOCK2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[PBLOCK]] +// CHECK: [[BOOL:%[_0-9A-Za-z]*]] = OpTypeBool +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +[numthreads(1,1,1)] +float4 MainPs(void) : SV_Target0 +{ + if (__has_feature(hlsl_vk_buffer_pointer)) { + [[vk::aliased_pointer]] block_p g_p = + vk::static_pointer_cast(g_PushConstants.root); + g_p = g_p.Get().next; + uint64_t addr = (uint64_t)g_p; + block_p copy1 = block_p(addr); + block_p copy2 = block_p(copy1); + if (addr == 0) // Null pointer test + return float4(0.0,0.0,0.0,0.0); + return g_p.Get().x; + } + return float4(0.0,0.0,0.0,0.0); +} + +// CHECK: [[MAIN]] = OpFunction +// CHECK-NEXT: OpLabel +// CHECK-NEXT: [[RESULT:%[_0-9A-Za-z]*]] = OpFunctionCall [[V4FLOAT]] [[FUN:%[_0-9A-Za-z]*]] +// CHECK: OpStore [[OUT]] [[RESULT]] +// CHECK: OpFunctionEnd +// CHECK: [[FUN]] = OpFunction [[V4FLOAT]] +// CHECK: [[GP]] = OpVariable [[PPBLOCK0]] Function +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK1]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X1]] +// CHECK: OpStore [[GP]] [[X2]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK2]] [[X3]] [[S1]] +// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 8 +// CHECK: OpStore [[GP]] [[X5]] +// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] +// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpConvertPtrToU [[ULONG]] [[X6]] +// CHECK: OpStore [[ADDR:%[_0-9A-Za-z]*]] [[X7]] +// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]] +// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PBLOCK]] [[X8]] +// CHECK: OpStore [[COPY1]] [[X9]] +// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[COPY1]] +// CHECK: OpStore [[COPY2]] [[X10]] +// CHECK: [[X11:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]] +// CHECK: [[X12:%[_0-9A-Za-z]*]] = OpIEqual %bool [[X11]] [[UL0]] +// CHECK: OpBranchConditional [[X12]] [[IF_TRUE:%[_0-9A-Za-z]*]] [[IF_MERGE:%[_0-9A-Za-z]*]] +// CHECK: [[IF_TRUE]] = OpLabel +// CHECK: OpReturnValue [[CV4FLOAT]] +// CHECK: [[IF_MERGE]] = OpLabel +// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X14:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X13]] [[S0]] +// CHECK: [[X15:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X14]] Aligned 16 +// CHECK: OpReturnValue [[X15]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl new file mode 100644 index 0000000000..c7d6f0ed2b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl @@ -0,0 +1,48 @@ +// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]] +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4; + return vTest; +} + +// CHECK: [[FUN]] = OpFunction +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X3]] Aligned 16 +// CHECK: OpStore [[OUT]] [[X4]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl new file mode 100644 index 0000000000..930770cc16 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl @@ -0,0 +1,35 @@ +// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 %s | FileCheck %s + +// Issue #7302: implicit object argument of Get() evaluates to rvalue + +template +[[vk::ext_instruction(/*spv::OpBitcast*/124)]] +T bitcast(U); + +struct Content +{ + int a; +}; + +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[IO:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[UDEADBEEF:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 3735928559 +// CHECK-DAG: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK: [[V2UINT:%[_0-9A-Za-z]*]] = OpTypeVector [[UINT]] 2 +// CHECK: [[VECTOR:%[_0-9A-Za-z]*]] = OpConstantComposite [[V2UINT]] [[UDEADBEEF]] [[U0]] +// CHECK: [[CONTENT:%[_0-9A-Za-z]*]] = OpTypeStruct [[INT]] +// CHECK: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT]] +// CHECK: [[PPINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[INT]] + +[numthreads(1, 1, 1)] +void main() +{ + bitcast >(uint32_t2(0xdeadbeefu,0x0u)).Get().a = 1; +} + +// CHECK: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]] +// CHECK: [[PTR:%[0-9]*]] = OpAccessChain [[PPINT]] [[BITCAST]] [[IO]] +// CHECK: OpStore [[PTR]] [[I1]] Aligned 4 + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl new file mode 100644 index 0000000000..b2efd02cbd --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl @@ -0,0 +1,52 @@ +// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK-DAG: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]] +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]] +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = float4(1.0,0.0,0.0,0.0); + g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4 = vTest; + return vTest; +} + +// CHECK: [[FUN]] = OpFunction +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] +// CHECK: OpStore [[X3]] [[CV4FLOAT]] Aligned 16 +// CHECK: OpStore [[OUT]] [[CV4FLOAT]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl new file mode 100644 index 0000000000..77c0f51911 --- /dev/null +++ b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl @@ -0,0 +1,21 @@ +// RUN: %dxc /T ps_6_5 -fcgl %s | FileCheck %s + +// Compiling this HLSL would trigger an assertion: +// While deleting: void (i32, float)* %dx.hl.op..void (i32, float) +// Use still stuck around after Def is destroyed: call void @"dx.hl.op..void (i32, float)"(i32 120, float -1.000000e+00), !dbg <0x503000001cc8> +// Error: assert(use_empty() && "Uses remain when a value is destroyed!") +// File: /src/external/DirectXShaderCompiler/lib/IR/Value.cpp(83) +// +// Bug was fixed in CodeGenFunction::EmitDiscardStmt by skipping the emission of +// an unreachable discard. + +// CHECK: define void @main() +// CHECK: br label % +// CHECK-NOT: call void @"dx.hl.op..void (i32, float)" +// CHECK: ret void + +void main() { + while (true) { + } + discard; +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll b/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll new file mode 100644 index 0000000000..ab86452b17 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll @@ -0,0 +1,118 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; generated the IR with: +; ExtractIRForPassTest.py -p dxilgen -o LowerAllocateRayQuery2.ll tools\clang\test\CodeGenDXIL\hlsl\objects\RayQuery\allocateRayQuery2.hlsl -- -T vs_6_9 +; Importantly, extraction took place with spirv code-gen enabled + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<1024, 1>" = type { i32 } +%"class.RayQuery<1, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +define void @main(<3 x float>, float, <3 x float>, float) #0 { +entry: + ; CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1) + %rayQuery12 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1), !dbg !42 ; line:15 col:79 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !46 ; line:17 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !46 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !46 ; line:17 col:3 + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery12, %dx.types.Handle %6, i32 1024, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !46 ; line:17 col:3 + + ; CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1) + %rayQuery23 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0), !dbg !47 ; line:21 col:35 + %7 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !48 ; line:22 col:3 + %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %7), !dbg !48 ; line:22 col:3 + %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !48 ; line:22 col:3 + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery23, %dx.types.Handle %9, i32 0, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !48 ; line:22 col:3 + ret void, !dbg !49 ; line:23 col:1 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !21} +!dx.entryPoints = !{!34} +!dx.fnprops = !{!39} +!dx.options = !{!40, !41} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4853 (lowerOMM, ca5df957eb33-dirty)"} +!3 = !{i32 1, i32 9} +!4 = !{!"vs", i32 6, i32 9} +!5 = !{i32 0, %struct.RayDesc undef, !6, %"class.RayQuery<1024, 1>" undef, !11, %"class.RayQuery<1, 0>" undef, !17} +!6 = !{i32 32, !7, !8, !9, !10} +!7 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!8 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!9 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!10 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!11 = !{i32 4, !12, !13} +!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!13 = !{i32 0, !14} +!14 = !{!15, !16} +!15 = !{i32 1, i64 1024} +!16 = !{i32 1, i64 1} +!17 = !{i32 4, !12, !18} +!18 = !{i32 0, !19} +!19 = !{!16, !20} +!20 = !{i32 1, i64 0} +!21 = !{i32 1, void (<3 x float>, float, <3 x float>, float)* @main, !22} +!22 = !{!23, !25, !28, !30, !32} +!23 = !{i32 0, !24, !24} +!24 = !{} +!25 = !{i32 0, !26, !27} +!26 = !{i32 4, !"RAYDESC", i32 7, i32 9} +!27 = !{i32 0} +!28 = !{i32 0, !26, !29} +!29 = !{i32 1} +!30 = !{i32 0, !26, !31} +!31 = !{i32 2} +!32 = !{i32 0, !26, !33} +!33 = !{i32 3} +!34 = !{void (<3 x float>, float, <3 x float>, float)* @main, !"main", null, !35, null} +!35 = !{!36, null, null, null} +!36 = !{!37} +!37 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !38} +!38 = !{i32 0, i32 4} +!39 = !{void (<3 x float>, float, <3 x float>, float)* @main, i32 1} +!40 = !{i32 -2147483584} +!41 = !{i32 -1} +!42 = !DILocation(line: 15, column: 79, scope: !43) +!43 = !DISubprogram(name: "main", scope: !44, file: !44, line: 11, type: !45, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void (<3 x float>, float, <3 x float>, float)* @main) +!44 = !DIFile(filename: "tools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cobjects\5CRayQuery\5CallocateRayQuery2.hlsl", directory: "") +!45 = !DISubroutineType(types: !24) +!46 = !DILocation(line: 17, column: 3, scope: !43) +!47 = !DILocation(line: 21, column: 35, scope: !43) +!48 = !DILocation(line: 22, column: 3, scope: !43) +!49 = !DILocation(line: 23, column: 1, scope: !43) diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll new file mode 100644 index 0000000000..17a968675f --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll @@ -0,0 +1,100 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%ConstantBuffer = type opaque +%dx.types.HitObject = type { i8* } +%"class.dx::HitObject" = type { i32 } + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %hit = alloca %dx.types.HitObject, align 4 + %tmp = alloca %dx.types.HitObject, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3 +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17 + %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3 + call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !24 ; line:10 col:3 +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !24 ; line:10 col:3 + %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3 + call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !24 ; line:10 col:3 + %4 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !25 ; line:11 col:1 + call void @llvm.lifetime.end(i64 4, i8* %4) #0, !dbg !25 ; line:11 col:1 + ret void, !dbg !25 ; line:11 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !8} +!dx.entryPoints = !{!12} +!dx.fnprops = !{!16} +!dx.options = !{!17, !18} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4840 (ser_patch_1 9ffd030b1)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.dx::HitObject" undef, !6} +!6 = !{i32 4, !7} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9} +!9 = !{!10} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{null, !"", null, !13, null} +!13 = !{null, null, !14, null} +!14 = !{!15} +!15 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!16 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!17 = !{i32 -2147483584} +!18 = !{i32 -1} +!19 = !DILocation(line: 9, column: 3, scope: !20) +!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 8, type: !22, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!21 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl", directory: "") +!22 = !DISubroutineType(types: !11) +!23 = !DILocation(line: 9, column: 17, scope: !20) +!24 = !DILocation(line: 10, column: 3, scope: !20) +!25 = !DILocation(line: 11, column: 1, scope: !20) diff --git a/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll new file mode 100644 index 0000000000..ca25b1e115 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll @@ -0,0 +1,105 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%ConstantBuffer = type opaque +%dx.types.HitObject = type { i8* } +%"class.dx::HitObject" = type { i32 } + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %hit = alloca %dx.types.HitObject, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3 +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %hit), !dbg !24 ; line:10 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %hit, i32 241, i32 3), !dbg !25 ; line:11 col:3 + call void @"dx.hl.op..void (i32, i32, i32)"(i32 359, i32 242, i32 7), !dbg !26 ; line:12 col:3 + %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !27 ; line:13 col:1 + call void @llvm.lifetime.end(i64 4, i8* %2) #0, !dbg !27 ; line:13 col:1 + ret void, !dbg !27 ; line:13 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32, %dx.types.HitObject*, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, i32)"(i32, i32, i32) #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !8} +!dx.entryPoints = !{!12} +!dx.fnprops = !{!16} +!dx.options = !{!17, !18} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4840 ser_patch_1 9ffd030b1)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.dx::HitObject" undef, !6} +!6 = !{i32 4, !7} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9} +!9 = !{!10} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{null, !"", null, !13, null} +!13 = !{null, null, !14, null} +!14 = !{!15} +!15 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!16 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!17 = !{i32 -2147483584} +!18 = !{i32 -1} +!19 = !DILocation(line: 9, column: 3, scope: !20) +!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 8, type: !22, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!21 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl", directory: "") +!22 = !DISubroutineType(types: !11) +!23 = !DILocation(line: 9, column: 17, scope: !20) +!24 = !DILocation(line: 10, column: 3, scope: !20) +!25 = !DILocation(line: 11, column: 3, scope: !20) +!26 = !DILocation(line: 12, column: 3, scope: !20) +!27 = !DILocation(line: 13, column: 1, scope: !20) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll new file mode 100644 index 0000000000..89ee886c2e --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll @@ -0,0 +1,142 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%ConstantBuffer = type opaque +%dx.types.HitObject = type { i8* } +%"class.dx::HitObject" = type { i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %hit = alloca %dx.types.HitObject, align 4 + %tmp = alloca %dx.types.HitObject, align 4 + %ray = alloca %struct.RayDesc, align 4 +; CHECK-NOT: %{{[^ ]+}} = alloca %struct.RayDesc + %tmp2 = alloca %dx.types.HitObject, align 4 +; CHECK: %[[HIT0:[^ ]+]] = alloca %dx.types.HitObject, align 4 +; CHECK: %[[HIT1:[^ ]+]] = alloca %dx.types.HitObject, align 4 +; CHECK: %[[HIT2:[^ ]+]] = alloca %dx.types.HitObject, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !23 ; line:42 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !23 ; line:42 col:3 +; CHECK: %[[THIS0:[^ ]+]] = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT0]]) +; CHECK-NOT: %[[THIS0]] + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !27 ; line:42 col:17 + %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !28 ; line:43 col:3 + call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !28 ; line:43 col:3 +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT1]]) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !28 ; line:43 col:3 + %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !28 ; line:43 col:3 + call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !28 ; line:43 col:3 + %4 = bitcast %struct.RayDesc* %ray to i8*, !dbg !29 ; line:44 col:3 + call void @llvm.lifetime.start(i64 32, i8* %4) #0, !dbg !29 ; line:44 col:3 + %5 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 0, !dbg !30 ; line:44 col:17 + store <3 x float> zeroinitializer, <3 x float>* %5, !dbg !30 ; line:44 col:17 + %6 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 1, !dbg !30 ; line:44 col:17 + store float 0.000000e+00, float* %6, !dbg !30 ; line:44 col:17 + %7 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 2, !dbg !30 ; line:44 col:17 + store <3 x float> , <3 x float>* %7, !dbg !30 ; line:44 col:17 + %8 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 3, !dbg !30 ; line:44 col:17 + store float 1.000000e+03, float* %8, !dbg !30 ; line:44 col:17 + %9 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3 + call void @llvm.lifetime.start(i64 4, i8* %9) #0, !dbg !31 ; line:45 col:3 +; CHECK: store <3 x float> zeroinitializer, <3 x float>* %[[pRDO:[^ ]+]], +; CHECK: store float 0.000000e+00, float* %[[pRDTMIN:[^ ]+]], +; CHECK: store <3 x float> , <3 x float>* %[[pRDD:[^ ]+]], +; CHECK: store float 1.000000e+03, float* %[[pRDTMAX:[^ ]+]], +; CHECK-DAG: %[[RDO:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDO]], +; CHECK-DAG: %[[RDTMIN:[^ ]+]] = load float, float* %[[pRDTMIN]], +; CHECK-DAG: %[[RDD:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD]], +; CHECK-DAG: %[[RDTMAX:[^ ]+]] = load float, float* %[[pRDTMAX]], +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO]], float %[[RDTMIN]], <3 x float> %[[RDD]], float %[[RDTMAX]]) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %tmp2, i32 0, i32 1, %struct.RayDesc* %ray), !dbg !31 ; line:45 col:3 + %10 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3 + call void @llvm.lifetime.end(i64 4, i8* %10) #0, !dbg !31 ; line:45 col:3 + %11 = bitcast %struct.RayDesc* %ray to i8*, !dbg !32 ; line:46 col:1 + call void @llvm.lifetime.end(i64 32, i8* %11) #0, !dbg !32 ; line:46 col:1 + %12 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !32 ; line:46 col:1 + call void @llvm.lifetime.end(i64 4, i8* %12) #0, !dbg !32 ; line:46 col:1 + ret void, !dbg !32 ; line:46 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*) #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !12} +!dx.entryPoints = !{!16} +!dx.fnprops = !{!20} +!dx.options = !{!21, !22} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.dx::HitObject" undef, !5, %struct.RayDesc undef, !7} +!5 = !{i32 4, !6} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 1, void ()* @"\01?main@@YAXXZ", !13} +!13 = !{!14} +!14 = !{i32 1, !15, !15} +!15 = !{} +!16 = !{null, !"", null, !17, null} +!17 = !{null, null, !18, null} +!18 = !{!19} +!19 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!20 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!21 = !{i32 -2147483584} +!22 = !{i32 -1} +!23 = !DILocation(line: 42, column: 3, scope: !24) +!24 = !DISubprogram(name: "main", scope: !25, file: !25, line: 41, type: !26, isLocal: false, isDefinition: true, scopeLine: 41, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!25 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl", directory: "") +!26 = !DISubroutineType(types: !15) +!27 = !DILocation(line: 42, column: 17, scope: !24) +!28 = !DILocation(line: 43, column: 3, scope: !24) +!29 = !DILocation(line: 44, column: 3, scope: !24) +!30 = !DILocation(line: 44, column: 17, scope: !24) +!31 = !DILocation(line: 45, column: 3, scope: !24) +!32 = !DILocation(line: 46, column: 1, scope: !24) \ No newline at end of file diff --git a/tools/clang/test/DXC/metal.test b/tools/clang/test/DXC/metal.test new file mode 100644 index 0000000000..3d00850abc --- /dev/null +++ b/tools/clang/test/DXC/metal.test @@ -0,0 +1,7 @@ +// REQUIRES: metal + +// Metal libraries are LLVM bitcode. This check inspects the magic number from +// the metal library output. +// RUN: %dxc %S/Inputs/smoke.hlsl /T ps_6_0 -metal -Fo Tmp.metal +// RUN: head -c 4 Tmp.metal | FileCheck -check-prefix=MTL %s +// MTL: {{^MTLB}} diff --git a/tools/clang/test/DXC/no_metal.test b/tools/clang/test/DXC/no_metal.test new file mode 100644 index 0000000000..37af16cad5 --- /dev/null +++ b/tools/clang/test/DXC/no_metal.test @@ -0,0 +1,4 @@ +// UNSUPPORTED: metal + +// RUN:not %dxc %S/Inputs/smoke.hlsl /T ps_6_0 -metal 2>&1 | FileCheck %s +// CHECK:Metal CodeGen not available diff --git a/tools/clang/test/DXC/no_metal_disassembly.test b/tools/clang/test/DXC/no_metal_disassembly.test new file mode 100644 index 0000000000..44283a8fe8 --- /dev/null +++ b/tools/clang/test/DXC/no_metal_disassembly.test @@ -0,0 +1,7 @@ +// REQUIRES: metal + +// These cases both fail because the shader converter library cannot emit +// textual IR. +// RUN: not %dxc %S/Inputs/smoke.hlsl /T ps_6_0 -metal -Fo Tmp.metal -Fc Tmp.air 2>&1 | FileCheck %s +// RUN: not %dxc %S/Inputs/smoke.hlsl /T ps_6_0 -metal 2>&1 | FileCheck %s +// CHECK: Disassembly of Metal IR not supported (yet). diff --git a/tools/clang/test/DXILValidation/load-store-validation.hlsl b/tools/clang/test/DXILValidation/load-store-validation.hlsl new file mode 100644 index 0000000000..d4e5e29db8 --- /dev/null +++ b/tools/clang/test/DXILValidation/load-store-validation.hlsl @@ -0,0 +1,74 @@ +// This file is not used directly for testing. +// This is the HLSL source for validation of various invalid load/store parameters. +// It is used to generate LitDxilValidation/load-store-validation.ll using `dxc -T ps_6_9`. +// Output is modified to trigger various validation errors. + +Texture1D Tex; +RWTexture1D RwTex; +SamplerState Samp; + +StructuredBuffer VecBuf; +StructuredBuffer ScalBuf; +ByteAddressBuffer BaBuf; + +RWStructuredBuffer OutVecBuf; +RWStructuredBuffer OutScalBuf; +RWByteAddressBuffer OutBaBuf; + +// Some simple ways to generate the vector ops in question. +float4 main(int i : IX) : SV_Target { + // Texture provides some invalid handles to plug in. + float4 TexVal = Tex.Sample(Samp, i); + RwTex[0] = TexVal; + + // For invalid RC on Load (and inevitably invalid RK). + float BadRCLd = ScalBuf[0]; + // For invalid RK on Load. + float BadRKLd = ScalBuf[1]; + // For non-constant alignment on Load. + float BadAlnLd = ScalBuf[2]; + // For undefined offset on Structured Buffer Load. + float BadStrOffLd = ScalBuf[3]; + // For defined (and therefore invalid) offset on Byte Address Buffer Load. + float BadBabOffLd = BaBuf.Load(0); + + // For invalid RC on Vector Load (and inevitably invalid RK). + float4 BadRCVcLd = VecBuf[0]; + // For invalid RK on Vector Load. + float4 BadRKVcLd = VecBuf[1]; + // For non-constant alignment on Vector Load. + float4 BadAlnVcLd = VecBuf[2]; + // For undefined offset on Structured Buffer Vector Load. + float4 BadStrOffVcLd = VecBuf[3]; + // For defined (and therefore invalid) offset on Byte Address Buffer Vector Load. + float4 BadBabOffVcLd = BaBuf.Load(4); + + // For Store to non-UAV. + OutScalBuf[0] = BadRCLd; + // For invalid RK on Store. + OutScalBuf[1] = BadRKLd; + // For non-constant alignment on Store. + OutScalBuf[2] = BadAlnLd; + // For undefined offset on Structured Buffer Store. + OutScalBuf[3] = BadStrOffLd; + // For undefined value Store. + OutScalBuf[4] = 77; + // For defined (and therefore invalid) offset on Byte Address Buffer Store. + OutBaBuf.Store(0, BadBabOffLd); + + // For Vector Store to non-UAV. + OutVecBuf[0] = BadRCVcLd; + // For invalid RK on Vector Store. + OutVecBuf[1] = BadRKVcLd; + // For non-constant alignment on Vector Store. + OutVecBuf[2] = BadAlnVcLd; + // For undefined offset on Structured Buffer Vector Store. + OutVecBuf[3] = BadStrOffVcLd; + // For undefinded value Vector Store. + OutVecBuf[4] = 77; + // For defined (and therefore invalid) offset on Byte Address Buffer Vector Store. + OutBaBuf.Store(4, BadBabOffVcLd); + + return TexVal; +} + diff --git a/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll b/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll new file mode 100644 index 0000000000..88b71ff3e0 --- /dev/null +++ b/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll @@ -0,0 +1,46 @@ +; RUN: %dxv %s | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + ; Test HitObject_MakeMiss (opcode 265) + %r265 = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 4, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) + + ; Test HitObject_MakeNop (opcode 266) + %r266 = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32, i32, i32, float, float, float, float, float, float, float, float) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!9, !11} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3} +!3 = !{!4} +!4 = !{i32 1, !5, !5} +!5 = !{} +!9 = !{null, !"", null, null, !10} +!10 = !{i32 0, i64 0} +!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12} +!12 = !{i32 8, i32 7, i32 5, !13} +!13 = !{i32 0} diff --git a/tools/clang/test/DXILValidation/vector-validation.hlsl b/tools/clang/test/DXILValidation/vector-validation.hlsl new file mode 100644 index 0000000000..5d6a5cd4a2 --- /dev/null +++ b/tools/clang/test/DXILValidation/vector-validation.hlsl @@ -0,0 +1,14 @@ +// This file is not used directly for testing. +// This is the HLSL source for validation of disallowed 6.9 features in previous shader models. +// It is used to generate LitDxilValidation/vector-validation.ll using `dxc -T ps_6_9`. +// Output is modified to have shader model 6.8 instead. + +RWStructuredBuffer VecBuf; + +// some simple ways to generate the vector ops in question. +float4 main(float val : VAL) :SV_Position { + float4 vec = VecBuf[1]; + VecBuf[0] = val; + return vec[2]; +} + diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl new file mode 100644 index 0000000000..44424f5d14 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxilver 1.9 | %dxc -T lib_6_9 %s | FileCheck %s +// RUN: %dxilver 1.9 | %dxc -T lib_6_9 -ast-dump %s | FileCheck -check-prefix=AST %s +// RUN: %dxilver 1.9 | %dxc -T lib_6_9 -ast-dump-implicit %s | FileCheck -check-prefix=ASTIMPL %s + + +// CHECK: ; RaytracingPipelineConfig1 rpc = { MaxTraceRecursionDepth = 32, Flags = RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + +// AST: TranslationUnitDecl 0x{{.+}} <> +// AST-NEXT: VarDecl 0x{{.+}} rpc 'RaytracingPipelineConfig1' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32 +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' 'const unsigned int' +// ASTIMPL: VarDecl 0x{{.+}} <> implicit referenced RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS 'const unsigned int' static cinit +// ASTIMPL-NEXT: IntegerLiteral 0x{{.+}} <> 'const unsigned int' 1024 +// ASTIMPL-NEXT: AvailabilityAttr 0x{{.+}} <> Implicit 6.9 0 0 "" + +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl new file mode 100644 index 0000000000..6cedf44e20 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl @@ -0,0 +1,53 @@ +// RUN: %dxilver 1.9 | %dxc -T lib_6_9 %s | %D3DReflect %s | %FileCheck %s -check-prefixes=RDAT + +// Check that stage flags are set correctly still for different barrier modes in SM 6.9. + +// RDAT: FunctionTable[{{.*}}] = { + +RWByteAddressBuffer BAB : register(u1, space0); + +// RDAT-LABEL: UnmangledName: "fn_barrier_reorder" +// RDAT: FeatureInfo1: 0 +// RDAT: FeatureInfo2: 0 +// RDAT: ShaderStageFlag: (Library | RayGeneration) +// RDAT: MinShaderTarget: 0x60069 + +[noinline] export +void fn_barrier_reorder() { + Barrier(UAV_MEMORY, REORDER_SCOPE); +} + +// RDAT-LABEL: UnmangledName: "fn_barrier_reorder2" +// RDAT: FeatureInfo1: 0 +// RDAT: FeatureInfo2: 0 +// RDAT: ShaderStageFlag: (Library | RayGeneration) +// RDAT: MinShaderTarget: 0x60069 + +[noinline] export +void fn_barrier_reorder2() { + Barrier(BAB, REORDER_SCOPE); +} + +// RDAT-LABEL: UnmangledName: "rg_barrier_reorder_in_call" +// RDAT: FeatureInfo1: 0 +// RDAT: FeatureInfo2: 0 +// RDAT: ShaderStageFlag: (RayGeneration) +// RDAT: MinShaderTarget: 0x70069 + +[shader("raygeneration")] +void rg_barrier_reorder_in_call() { + fn_barrier_reorder(); + BAB.Store(0, 0); +} + +// RDAT-LABEL: UnmangledName: "rg_barrier_reorder_in_call2" +// RDAT: FeatureInfo1: 0 +// RDAT: FeatureInfo2: 0 +// RDAT: ShaderStageFlag: (RayGeneration) +// RDAT: MinShaderTarget: 0x70069 + +[shader("raygeneration")] +void rg_barrier_reorder_in_call2() { + fn_barrier_reorder2(); + BAB.Store(0, 0); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl index 33086852ab..5443ada0c9 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl @@ -15,6 +15,7 @@ // ext_vector array. // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit class matrix definition // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +// CHECK-NEXT: HLSLMatrixAttr {{0x[0-9a-fA-F]+}} <> Implicit // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'element [row_count] __attribute__((ext_vector_type(col_count)))' diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl index 0ad236a4b2..12859b7eda 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl @@ -12,6 +12,7 @@ // Verify the class, final attribute and ext_vector field decl. // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit class vector definition // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +// CHECK-NEXT: HLSLVectorAttr {{0x[0-9a-fA-F]+}} <> Implicit // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'element __attribute__((ext_vector_type(element_count)))' // Verify operator overloads for const vector subscript operators. diff --git a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll index 35fd0d6b1d..d5b0bbb2a7 100644 --- a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll +++ b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll @@ -30,4 +30,3 @@ entry: declare float @"\01?foo@@YAMY02V?$vector@M$02@@@Z"([3 x <3 x float>]*) attributes #0 = { nounwind } - diff --git a/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll b/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll new file mode 100644 index 0000000000..cab9942b02 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll @@ -0,0 +1,68 @@ +; RUN: %dxilver 1.9 | %dxv %s + +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; BAB UAV byte r/w U0 u1 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?BAB@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A", align 4 + call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8) ; BarrierByMemoryType(MemoryTypeFlags,SemanticFlags) + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 8) ; BarrierByMemoryHandle(object,SemanticFlags) + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32) #1 + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryHandle(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #3 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!5} +!dx.entryPoints = !{!9, !11} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{null, !3, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"BAB", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!5 = !{i32 1, void ()* @"\01?main@@YAXXZ", !6} +!6 = !{!7} +!7 = !{i32 1, !8, !8} +!8 = !{} +!9 = !{null, !"", null, !2, !10} +!10 = !{i32 0, i64 8589934608} +!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12} +!12 = !{i32 8, i32 7, i32 5, !13} +!13 = !{i32 0} diff --git a/tools/clang/test/LitDXILValidation/load-store-validation.ll b/tools/clang/test/LitDXILValidation/load-store-validation.ll new file mode 100644 index 0000000000..34b2f6b602 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/load-store-validation.ll @@ -0,0 +1,229 @@ +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Ensure proper validation errors are produced for invalid parameters to load and store operations. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.f32 = type { float, float, float, float, i32 } +%dx.types.ResRet.v4f32 = type { <4 x float>, i32 } +%"class.Texture1D >" = type { <4 x float>, %"class.Texture1D >::mips_type" } +%"class.Texture1D >::mips_type" = type { i32 } +%"class.StructuredBuffer >" = type { <4 x float> } +%"class.StructuredBuffer" = type { float } +%struct.ByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <4 x float> } +%"class.RWStructuredBuffer" = type { float } +%struct.RWByteAddressBuffer = type { i32 } +%struct.SamplerState = type { i32 } + +; Unfortunately, the validation errors come in weird orders. +; Inlining them isn't helpful, so we'll just dump them all here. +; Inline comments, variable names, and notes should help find the corresponding source. + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)' +; CHECK: error: Assignment of undefined values to UAV. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4) +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4) +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)' +; CHECK: error: store should be on uav resource. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at '%badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at '%badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at '%badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer +; CHECK-NEXT: note: at '%badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)' +; CHECK: error: load can only run on UAV/SRV resource. +; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)' +; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)' +; CHECK: error: Assignment of undefined values to UAV. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)' +; CHECK: error: store should be on uav resource. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at '%badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at '%badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at '%badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer +; CHECK-NEXT: note: at '%badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)' +; CHECK: error: load can only run on UAV/SRV resource. +; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)' +; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)' + +define void @main() { +bb: + %tmp = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + %tmp1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + %tmp2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) + %tmp3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) + %tmp4 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + %tmp5 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + %tmp6 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false) + %tmp7 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 3 }, i32 0, i1 false) + %tmp8 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 0, i1 false) + %ix = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %texIx = sitofp i32 %ix to float + %tex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 1, i32 1033 }) + %samp = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 14, i32 0 }) + %tmp10 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 60, %dx.types.Handle %tex, %dx.types.Handle %samp, float %texIx, float undef, float undef, float undef, i32 0, i32 undef, i32 undef, float undef) + %tmp11 = extractvalue %dx.types.ResRet.f32 %tmp10, 0 + %tmp12 = extractvalue %dx.types.ResRet.f32 %tmp10, 1 + %tmp13 = extractvalue %dx.types.ResRet.f32 %tmp10, 2 + %tmp14 = extractvalue %dx.types.ResRet.f32 %tmp10, 3 + %rwTex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp8, %dx.types.ResourceProperties { i32 4097, i32 1033 }) + call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %rwTex, i32 0, i32 undef, i32 undef, float %tmp11, float %tmp12, float %tmp13, float %tmp14, i8 15) + %scalBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp4, %dx.types.ResourceProperties { i32 12, i32 4 }) + ; Invalid RC on Load (and inevitably invalid RK). + %badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4) + %badRC = extractvalue %dx.types.ResRet.f32 %badRCLd, 0 + ; Invalid RK on Load. + %badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4) + %badRK = extractvalue %dx.types.ResRet.f32 %badRKLd, 0 + ; Non-constant alignment on Load. + %badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix) + %badAln = extractvalue %dx.types.ResRet.f32 %badAlnLd, 0 + ; Undefined offset on Structured Buffer Load. + %badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4) + %badStrOff = extractvalue %dx.types.ResRet.f32 %badStrOffLd, 0 + %baBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp3, %dx.types.ResourceProperties { i32 11, i32 0 }) + ; Defined (and therefore invalid) offset on Byte Address Buffer Load. + %badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4) + %badBabOff = extractvalue %dx.types.ResRet.f32 %badBabOffLd, 0 + + %vecBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp5, %dx.types.ResourceProperties { i32 12, i32 16 }) + ; Invalid RC on Vector Load (and inevitably invalid RK). + %badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4) + %badRCVc = extractvalue %dx.types.ResRet.v4f32 %badRCVcLd, 0 + ; Invalid RK on Vector Load. + %badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4) + %badRKVc = extractvalue %dx.types.ResRet.v4f32 %badRKVcLd, 0 + ; Non-constant alignment on Vector Load. + %badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix) + %badAlnVc = extractvalue %dx.types.ResRet.v4f32 %badAlnVcLd, 0 + ; Undefined offset on Structured Buffer Vector Load. + %badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4) + %badStrOffVc = extractvalue %dx.types.ResRet.v4f32 %badStrOffVcLd, 0 + ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Load. + %badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4) + %badBabOffVc = extractvalue %dx.types.ResRet.v4f32 %badBabOffVcLd, 0 + + ; Store to non-UAV. + %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4) + ; Invalid RK on Store. + %tmp39 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4) + ; Non-constant alignment on Store. + %tmp40 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix) + ; Undefined offset on Structured Buffer Store. + %tmp41 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4) + ; Undefined value Store. + %tmp42 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4) + ; Defined (and therefore invalid) offset on Byte Address Buffer Store. + %tmp44 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4) + + ; Vector Store to non-UAV. + %tmp45 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %rwTex, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4) + ; Invalid RK on Vector Store. + %tmp46 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4) + ; Non-constant alignment on Vector Store. + %tmp47 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix) + ; Undefined offset on Structured Buffer Vector Store. + %tmp48 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4) + ; Undefinded value Vector Store. + %tmp49 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4) + ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Store. + %tmp51 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4) + + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %tmp11) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %tmp12) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %tmp13) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %tmp14) + ret void +} + +declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #2 +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0 +declare %dx.types.ResRet.f32 @dx.op.sample.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #1 +declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0 +declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0 +declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1 +declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #0 +declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #1 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.viewIdState = !{!18} +!dx.entryPoints = !{!19} + +!1 = !{i32 1, i32 9} +!2 = !{!"ps", i32 6, i32 9} +!3 = !{!4, !12, null, !16} +!4 = !{!5, !7, !9, !11} +!5 = !{i32 0, %"class.Texture1D >"* undef, !"", i32 0, i32 0, i32 1, i32 1, i32 0, !6} +!6 = !{i32 0, i32 9} +!7 = !{i32 1, %"class.StructuredBuffer >"* undef, !"", i32 0, i32 1, i32 1, i32 12, i32 0, !8} +!8 = !{i32 1, i32 16} +!9 = !{i32 2, %"class.StructuredBuffer"* undef, !"", i32 0, i32 2, i32 1, i32 12, i32 0, !10} +!10 = !{i32 1, i32 4} +!11 = !{i32 3, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 3, i32 1, i32 11, i32 0, null} +!12 = !{!13, !14, !15} +!13 = !{i32 0, %"class.RWStructuredBuffer >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !8} +!14 = !{i32 1, %"class.RWStructuredBuffer"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !10} +!15 = !{i32 2, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 2, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!16 = !{!17} +!17 = !{i32 0, %struct.SamplerState* undef, !"", i32 0, i32 0, i32 1, i32 0, null} +!18 = !{[3 x i32] [i32 1, i32 4, i32 0]} +!19 = !{void ()* @main, !"main", !20, !3, !27} +!20 = !{!21, !24, null} +!21 = !{!22} +!22 = !{i32 0, !"IX", i8 4, i8 0, !23, i8 1, i32 1, i8 1, i32 0, i8 0, null} +!23 = !{i32 0} +!24 = !{!25} +!25 = !{i32 0, !"SV_Target", i8 9, i8 16, !23, i8 0, i32 1, i8 4, i32 0, i8 0, !26} +!26 = !{i32 3, i32 15} +!27 = !{i32 0, i64 8589934608} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll new file mode 100644 index 0000000000..e527125009 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll @@ -0,0 +1,110 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.AttribType = type { float, float } +%dx.types.HitObject = type { i8* } + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %attrs = alloca %struct.AttribType, align 4 + %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + + %r269 = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject %nop) ; HitObject_IsMiss(hitObject) + + %r270 = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject %nop) ; HitObject_IsHit(hitObject) + + %r271 = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject %nop) ; HitObject_IsNop(hitObject) + + %r272 = call i32 @dx.op.hitObject_StateScalar.i32(i32 272, %dx.types.HitObject %nop) ; HitObject_RayFlags(hitObject) + + %r273 = call float @dx.op.hitObject_StateScalar.f32(i32 273, %dx.types.HitObject %nop) ; HitObject_RayTMin(hitObject) + + %r274 = call float @dx.op.hitObject_StateScalar.f32(i32 274, %dx.types.HitObject %nop) ; HitObject_RayTCurrent(hitObject) + + %r275 = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 0) ; HitObject_WorldRayOrigin(hitObject,component) + + %r276 = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 0) ; HitObject_WorldRayDirection(hitObject,component) + + %r277 = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 0) ; HitObject_ObjectRayOrigin(hitObject,component) + + %r278 = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 0) ; HitObject_ObjectRayDirection(hitObject,component) + + %r279 = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + + %r280 = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) + + %r281 = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject %nop) ; HitObject_GeometryIndex(hitObject) + + %r282 = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject %nop) ; HitObject_InstanceIndex(hitObject) + + %r283 = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %nop) ; HitObject_InstanceID(hitObject) + + %r284 = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %nop) ; HitObject_PrimitiveIndex(hitObject) + + %r285 = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject %nop) ; HitObject_HitKind(hitObject) + + %r286 = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %nop) ; HitObject_ShaderTableIndex(hitObject) + + %r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %nop, i32 1) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) + + %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %nop, i32 42) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) + + call void @dx.op.hitObject_Attributes.struct.AttribType(i32 289, %dx.types.HitObject %nop, %struct.AttribType* nonnull %attrs) ; HitObject_Attributes(hitObject,attributes) + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32, %dx.types.HitObject, i32) #1 + +; Function Attrs: nounwind readnone +declare i1 @dx.op.hitObject_StateScalar.i1(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readnone +declare i32 @dx.op.hitObject_StateScalar.i32(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readonly +declare i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32, %dx.types.HitObject, i32) #2 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateVector.f32(i32, %dx.types.HitObject, i32) #1 + +; Function Attrs: nounwind argmemonly +declare void @dx.op.hitObject_Attributes.struct.AttribType(i32, %dx.types.HitObject, %struct.AttribType*) #3 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateScalar.f32(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateMatrix.f32(i32, %dx.types.HitObject, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } +attributes #3 = { nounwind argmemonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!3, !4} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !5} +!3 = !{null, !"", null, null, !6} +!4 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !7} +!5 = !{!8} +!6 = !{i32 0, i64 0} +!7 = !{i32 8, i32 7, i32 5, !9} +!8 = !{i32 1, !10, !10} +!9 = !{i32 0} +!10 = !{} + diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll new file mode 100644 index 0000000000..5b0c65fd6b --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll @@ -0,0 +1,84 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%struct.CustomAttrs = type { float, float } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %2 = alloca %struct.CustomAttrs, align 4 + %3 = call i32 @dx.op.allocateRayQuery(i32 178, i32 5) ; AllocateRayQuery(constRayFlags) + %4 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %5 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %3, %dx.types.Handle %5, i32 0, i32 255, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; RayQuery_TraceRayInline(rayQueryHandle,accelerationStructure,rayFlags,instanceInclusionMask,origin_X,origin_Y,origin_Z,tMin,direction_X,direction_Y,direction_Z,tMax) + %6 = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %3) ; HitObject_FromRayQuery(rayQueryHandle) + %7 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %3, i32 16, %struct.CustomAttrs* nonnull %2) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + ret void +} + +; Function Attrs: nounwind +declare i32 @dx.op.allocateRayQuery(i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.rayQuery_TraceRayInline(i32, i32, %dx.types.Handle, i32, i32, float, float, float, float, float, float, float, float) #0 + +; Function Attrs: nounwind readonly +declare %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32, i32, i32, %struct.CustomAttrs*) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!6} +!dx.dxrPayloadAnnotations = !{!10} +!dx.entryPoints = !{!13, !15} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!3, null, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !5} +!5 = !{i32 0, i32 4} +!6 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!7 = !{!8} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{i32 0, %struct.Payload undef, !11} +!11 = !{!12} +!12 = !{i32 0, i32 8210} +!13 = !{null, !"", null, !2, !14} +!14 = !{i32 0, i64 33554432} +!15 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !16} +!16 = !{i32 8, i32 7, i32 5, !17} +!17 = !{i32 0} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll new file mode 100644 index 0000000000..f3b99300be --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll @@ -0,0 +1,68 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %2 = alloca %struct.Payload, align 4 + %3 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + %5 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %5, %struct.Payload* nonnull %2) ; HitObject_Invoke(hitObject,payload) + ret void +} + +; Function Attrs: nounwind +declare %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.Payload*) #0 + +; Function Attrs: nounwind +declare void @dx.op.hitObject_Invoke.struct.Payload(i32, %dx.types.HitObject, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!3} +!dx.dxrPayloadAnnotations = !{!4} +!dx.entryPoints = !{!5, !6} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!7, null, null, null} +!3 = !{i32 1, void ()* @"\01?main@@YAXXZ", !8} +!4 = !{i32 0, %struct.Payload undef, !9} +!5 = !{null, !"", null, !2, null} +!6 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !10} +!7 = !{!11} +!8 = !{!12} +!9 = !{!13} +!10 = !{i32 8, i32 7, i32 5, !14} +!11 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !15} +!12 = !{i32 1, !16, !16} +!13 = !{i32 0, i32 8210} +!14 = !{i32 0} +!15 = !{i32 0, i32 4} +!16 = !{} diff --git a/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll b/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll new file mode 100644 index 0000000000..4502b9241d --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll @@ -0,0 +1,60 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; CHECK: Function: ?main@@YAXXZ: error: Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. +; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 1, i32 undef)' + +; CHECK: Function: ?main@@YAXXZ: error: Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. +; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 1)' + +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject undef, i32 11, i32 0)' + +; CHECK: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + + ; Validate that hit object is not undef. + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject undef, i32 11, i32 0) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + + ; Validate that coherence hint is not undef while numCoherenceHintBitsFromLSB is not 0. + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 1) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + + ; Validate that num coherence hint bits from LSB is not undef. + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 1, i32 undef) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind +declare void @dx.op.maybeReorderThread(i32, %dx.types.HitObject, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!6, !8} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3} +!3 = !{!4} +!4 = !{i32 1, !5, !5} +!5 = !{} +!6 = !{null, !"", null, null, !7} +!7 = !{i32 0, i64 0} +!8 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9} +!9 = !{i32 8, i32 7, i32 5, !10} +!10 = !{i32 0} diff --git a/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll b/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll new file mode 100644 index 0000000000..8ee7677bd4 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll @@ -0,0 +1,46 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 241, i32 3) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + + ; Coherence hint disabled, accept 'undef' coherence hint bits. + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 0) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind +declare void @dx.op.maybeReorderThread(i32, %dx.types.HitObject, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!6, !8} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3} +!3 = !{!4} +!4 = !{i32 1, !5, !5} +!5 = !{} +!6 = !{null, !"", null, null, !7} +!7 = !{i32 0, i64 0} +!8 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9} +!9 = !{i32 8, i32 7, i32 5, !10} +!10 = !{i32 0} diff --git a/tools/clang/test/LitDXILValidation/vector-validation.ll b/tools/clang/test/LitDXILValidation/vector-validation.ll new file mode 100644 index 0000000000..74e8116e88 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/vector-validation.ll @@ -0,0 +1,78 @@ +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Confirm that 6.9 specific LLVM operations and DXIL intrinsics fail in 6.8 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v4f32 = type { <4 x float>, i32 } +%"class.RWStructuredBuffer >" = type { <4 x float> } + +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%6 = insertelement <4 x float> undef, float %2, i32 0 +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%8 = extractelement <4 x float> %5, i32 2 +; CHECK: Function: main: error: Opcode RawBufferVectorLoad not valid in shader model vs_6_8. +; CHECK: note: at '%4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8)' +; CHECK: Function: main: error: Opcode RawBufferVectorStore not valid in shader model vs_6_8. +; CHECK: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4)' +; CHECK: Function: main: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: main: error: Function uses features incompatible with the shader model. +define void @main() { + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) + %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4108, i32 16 }) + %4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8) + %5 = extractvalue %dx.types.ResRet.v4f32 %4, 0 + %6 = insertelement <4 x float> undef, float %2, i32 0 + %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4) + %8 = extractelement <4 x float> %5, i32 2 + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %8) + ret void +} + +declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0 +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1 +declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #2 +declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #1 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0 +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } +attributes #2 = { nounwind readonly } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.viewIdState = !{!7} +!dx.entryPoints = !{!8} + +!1 = !{i32 1, i32 8} +!2 = !{!"vs", i32 6, i32 8} +!3 = !{null, !4, null, null} +!4 = !{!5} +!5 = !{i32 0, %"class.RWStructuredBuffer >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6} +!6 = !{i32 1, i32 16} +!7 = !{[3 x i32] [i32 1, i32 4, i32 0]} +!8 = !{void ()* @main, !"main", !9, !3, !17} +!9 = !{!10, !14, null} +!10 = !{!11} +!11 = !{i32 0, !"VAL", i8 9, i8 0, !12, i8 0, i32 1, i8 1, i32 0, i8 0, !13} +!12 = !{i32 0} +!13 = !{i32 3, i32 1} +!14 = !{!15} +!15 = !{i32 0, !"SV_Position", i8 9, i8 3, !12, i8 4, i32 1, i8 4, i32 0, i8 0, !16} +!16 = !{i32 3, i32 15} +!17 = !{i32 0, i64 8590000144} + diff --git a/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl b/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl new file mode 100644 index 0000000000..53366de828 --- /dev/null +++ b/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -T lib_6_9 -ast-dump %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// CHECK: |-VarDecl {{.*}} used uav1 'reordercoherent RWTexture1D':'RWTexture1D >' +// CHECK-NEXT: | |-HLSLReorderCoherentAttr +reordercoherent RWTexture1D uav1 : register(u3); +RWBuffer uav2; + +[shader("raygeneration")] +void main() +{ + // CHECK: | `-VarDecl {{.*}} uav3 'reordercoherent RWTexture1D':'RWTexture1D >' cinit + // CHECK-NEXT: | | + // CHECK-NEXT: | | + // CHECK-NEXT: | `-HLSLReorderCoherentAttr + reordercoherent RWTexture1D uav3 = uav1; +} diff --git a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl index 4fcce749d7..ece7e3f2f4 100644 --- a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl +++ b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl @@ -1,3 +1,4 @@ +// REQUIRES: spirv // RUN: %dxc -T ps_6_0 -E main -verify -spirv %s struct S diff --git a/tools/clang/test/SemaHLSL/const-default.hlsl b/tools/clang/test/SemaHLSL/const-default.hlsl index 2ebb6fe52e..6b5e43e0e9 100644 --- a/tools/clang/test/SemaHLSL/const-default.hlsl +++ b/tools/clang/test/SemaHLSL/const-default.hlsl @@ -33,7 +33,11 @@ class MyClass { ConstantBuffer g_const_buffer2; TextureBuffer g_texture_buffer2; +// expected-note@+2 {{forward declaration of 'FWDDeclStruct'}} +// expected-note@+1 {{forward declaration of 'FWDDeclStruct'}} struct FWDDeclStruct; +// expected-note@+2 {{forward declaration of 'FWDDeclClass'}} +// expected-note@+1 {{forward declaration of 'FWDDeclClass'}} class FWDDeclClass; // Ensure forward declared struct/class fails as expected diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl new file mode 100644 index 0000000000..fc42f99a9a --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -Tlib_6_8 -verify %s + +[Shader("compute")] +[numthreads(1, 1, 1)] +void main() { + // expected-error@+1{{invalid SemanticFlags for Barrier operation; expected 0 or some combination of GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE flags}} + Barrier(0, REORDER_SCOPE); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl new file mode 100644 index 0000000000..18271a2b11 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl @@ -0,0 +1,12 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s + +RWByteAddressBuffer BAB : register(u1, space0); + +[shader("raygeneration")] +void main() { +// CHECK: call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8) + Barrier(UAV_MEMORY, REORDER_SCOPE); + +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %{{[^ ]+}}, i32 8) + Barrier(BAB, REORDER_SCOPE); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl new file mode 100644 index 0000000000..fa3ab68506 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl @@ -0,0 +1,10 @@ +// RUN: %dxc -T lib_6_9 -E main %s -verify + +// expected-no-diagnostics + +[shader("raygeneration")] void main() { + dx::HitObject hit; + dx::MaybeReorderThread(hit); + dx::MaybeReorderThread(hit, 0xf1, 3); + dx::MaybeReorderThread(0xf2, 7); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl new file mode 100644 index 0000000000..3c97ea0a77 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl @@ -0,0 +1,62 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct [raypayload] Payload +{ + float elem + : write(caller,closesthit,anyhit,miss) + : read(caller,closesthit,anyhit,miss); +}; + +struct Attribs { float2 barys; }; +void CallReorder() +{ +// expected-error@+6{{dx::MaybeReorderThread is unavailable in shader stage 'compute' (requires 'raygeneration')}} +// expected-error@+5{{dx::MaybeReorderThread is unavailable in shader stage 'callable' (requires 'raygeneration')}} +// expected-error@+4{{dx::MaybeReorderThread is unavailable in shader stage 'intersection' (requires 'raygeneration')}} +// expected-error@+3{{dx::MaybeReorderThread is unavailable in shader stage 'anyhit' (requires 'raygeneration')}} +// expected-error@+2{{dx::MaybeReorderThread is unavailable in shader stage 'closesthit' (requires 'raygeneration')}} +// expected-error@+1{{dx::MaybeReorderThread is unavailable in shader stage 'miss' (requires 'raygeneration')}} + dx::MaybeReorderThread(0,0); +} + +// expected-note@+3{{entry function defined here}} +[shader("compute")] +[numthreads(4,4,4)] +void mainReorderCS(uint ix : SV_GroupIndex, uint3 id : SV_GroupThreadID) { + CallReorder(); +} + +[shader("raygeneration")] +void mainReorderRG() { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("callable")] +void mainReorderCALL(inout Attribs attrs) { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("intersection")] +void mainReorderIS() { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("anyhit")] +void mainReorderAH(inout Payload pld, in Attribs attrs) { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("closesthit")] +void mainReorderCH(inout Payload pld, in Attribs attrs) { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("miss")] +void mainReorderMS(inout Payload pld) { + CallReorder(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl new file mode 100644 index 0000000000..db2d0fd2e3 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl @@ -0,0 +1,9 @@ +// RUN: %dxc -T lib_6_8 %s -verify + +// Check that inciwMaybeReorderThread is unavailable pre SM 6.9. + +[shader("raygeneration")] +void main() { + // expected-error@+1{{intrinsic dx::MaybeReorderThread potentially used by ''main'' requires shader model 6.9 or greater}} + dx::MaybeReorderThread(15u, 4u); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl new file mode 100644 index 0000000000..e23f398538 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -T lib_6_8 %s -verify + +// expected-no-diagnostics +using namespace dx; + +[shader("raygeneration")] +void main() { +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl new file mode 100644 index 0000000000..44afcf47e7 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl @@ -0,0 +1,58 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct [raypayload] Payload +{ + float elem + : write(caller,anyhit,closesthit,miss) + : read(caller,anyhit,closesthit,miss); +}; + +struct Attribs { float2 barys; }; + +dx::HitObject UseHitObject() { + return dx::HitObject::MakeNop(); +} + +// expected-note@+3{{entry function defined here}} +[shader("compute")] +[numthreads(4,4,4)] +void mainHitCS(uint ix : SV_GroupIndex, uint3 id : SV_GroupThreadID) { +// expected-error@-7{{dx::HitObject is unavailable in shader stage 'compute' (requires 'raygeneration', 'closesthit' or 'miss')}} + UseHitObject(); +} + +// expected-note@+2{{entry function defined here}} +[shader("callable")] +void mainHitCALL(inout Attribs attrs) { +// expected-error@-14{{dx::HitObject is unavailable in shader stage 'callable' (requires 'raygeneration', 'closesthit' or 'miss')}} + UseHitObject(); +} + +// expected-note@+2{{entry function defined here}} +[shader("intersection")] +void mainHitIS() { +// expected-error@-21{{dx::HitObject is unavailable in shader stage 'intersection' (requires 'raygeneration', 'closesthit' or 'miss')}} + UseHitObject(); +} + +// expected-note@+2{{entry function defined here}} +[shader("anyhit")] +void mainHitAH(inout Payload pld, in Attribs attrs) { +// expected-error@-28{{dx::HitObject is unavailable in shader stage 'anyhit' (requires 'raygeneration', 'closesthit' or 'miss')}} + UseHitObject(); +} + +[shader("raygeneration")] +void mainHitRG() { + UseHitObject(); +} + +[shader("closesthit")] +void mainHitCH(inout Payload pld, in Attribs attrs) { + UseHitObject(); +} + +[shader("miss")] +void mainHitMS(inout Payload pld) { + UseHitObject(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl new file mode 100644 index 0000000000..baa3a07a5b --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl @@ -0,0 +1,4 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +// expected-error@+1{{'dx::HitObject' is an object and cannot be used as a type parameter}} +RWStructuredBuffer InvalidBuffer; diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl new file mode 100644 index 0000000000..59c8dfbe2f --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl @@ -0,0 +1,11 @@ +// RUN: %dxc -T lib_6_8 %s -verify + +// Check that the HitObject is unavailable pre SM 6.9. + +[shader("raygeneration")] +void main() { + // expected-error@+3{{intrinsic dx::HitObject::MakeNop potentially used by ''main'' requires shader model 6.9 or greater}} + // expected-error@+2{{potential misuse of built-in type 'dx::HitObject' in shader model lib_6_8; introduced in shader model 6.9}} + // expected-error@+1{{potential misuse of built-in type 'dx::HitObject' in shader model lib_6_8; introduced in shader model 6.9}} + dx::HitObject hit = dx::HitObject::MakeNop(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl new file mode 100644 index 0000000000..4b6c45806b --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -T vs_6_9 %s -verify + +// expected-note@+1{{entry function defined here}} +float main(RayDesc rayDesc: RAYDESC) : OUT { +// expected-error@+1{{dx::HitObject is unavailable in shader stage 'vertex' (requires 'raygeneration', 'closesthit' or 'miss')}} + dx::HitObject::MakeNop(); + return 0.f; +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl new file mode 100644 index 0000000000..c266d81ddb --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl @@ -0,0 +1,36 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +// This test checks that HitObject can be used with 'using namespace dx' instead of explicit namespace prefix +// expected-no-diagnostics + +struct [raypayload] Payload +{ + float elem + : write(caller,anyhit,closesthit,miss) + : read(caller,anyhit,closesthit,miss); +}; + +struct Attribs { float2 barys; }; + +using namespace dx; + +[shader("raygeneration")] +void main() +{ + HitObject hit; + MaybeReorderThread(hit); +} + +[shader("closesthit")] +void closestHit(inout Payload pld, in Attribs attrs) +{ + // Create a HitObject + HitObject hit; +} + +[shader("miss")] +void missShader(inout Payload pld) +{ + // Also test using a static method + HitObject hit = HitObject::MakeNop(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl new file mode 100644 index 0000000000..cb7a24e1c7 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl @@ -0,0 +1,39 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct [raypayload] Payload +{ + float elem + : write(caller,anyhit,closesthit,miss) + : read(caller,anyhit,closesthit,miss); +}; + +struct Attribs { float2 barys; }; + +[shader("raygeneration")] +void main() +{ + // expected-error@+1{{unknown type name 'HitObject'}} + HitObject hit; +} + +[shader("closesthit")] +void closestHit(inout Payload pld, in Attribs attrs) +{ + // expected-error@+1{{unknown type name 'HitObject'}} + HitObject hit; +} + +[shader("miss")] +void missShader(inout Payload pld) +{ + // expected-error@+1{{unknown type name 'HitObject'}} + HitObject hit; +} + +// Also test API methods +[shader("raygeneration")] +void main2() +{ + // expected-error@+1{{use of undeclared identifier 'HitObject'}} + HitObject::MakeNop(); +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl new file mode 100644 index 0000000000..edf7e4fa71 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl @@ -0,0 +1,31 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct [raypayload] Payload +{ + float elem + : write(caller,anyhit,closesthit,miss) + : read(caller,anyhit,closesthit,miss); +}; + +struct Attribs { float2 barys; }; + +[shader("raygeneration")] +void main() +{ + // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}} + MaybeReorderThread(1); +} + +[shader("closesthit")] +void closestHit(inout Payload pld, in Attribs attrs) +{ + // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}} + MaybeReorderThread(2); +} + +[shader("miss")] +void missShader(inout Payload pld) +{ + // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}} + MaybeReorderThread(3); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl new file mode 100644 index 0000000000..1625454360 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -DTYPE=float -DNUM=7 -T hs_6_9 -verify %s + +struct HsConstantData { + float Edges[3] : SV_TessFactor; + vector vec; +}; + +struct LongVec { + float4 f; + vector vec; +}; + +HsConstantData PatchConstantFunction( // expected-error{{vectors of over 4 elements in patch constant function return type are not supported}} + vector vec : V, // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}} + LongVec lv : L) { // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}} + return (HsConstantData)0; +} + +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("PatchConstantFunction")] +void main() { +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl new file mode 100644 index 0000000000..0604feeaec --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl @@ -0,0 +1,200 @@ +// RUN: %dxc -T ps_6_9 -DTYPE=LongVec -DNUM=5 -verify %s +// RUN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s +// RUN: %dxc -T ps_6_9 -DNUM=1024 -verify %s + +// Add tests for base types and instantiated template classes with longvecs +// Size of the vector shouldn't matter, but using a few different ones just in case. + +#define PASTE_(x,y) x##y +#define PASTE(x,y) PASTE_(x,y) + +#ifndef TYPE +#define TYPE LongVecTpl +#endif + +struct LongVec { + float4 f; + vector vec; +}; + +struct LongVecSub : LongVec { + int3 is; +}; + +template +struct LongVecTpl { + float4 f; + vector vec; +}; + +vector global_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +vector global_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +TYPE global_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +TYPE global_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + +cbuffer BadBuffy { + vector cb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector cb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE cb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE cb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +}; + +tbuffer BadTuffy { + vector tb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector tb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE tb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE tb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +}; + +ConstantBuffer< TYPE > const_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} +TextureBuffer< TYPE > tex_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} + +[shader("pixel")] +vector main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}} + vector vec : V) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + return vec; +} + +[shader("vertex")] +TYPE vs_main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}} + TYPE parm : P) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + parm.f = 0; + return parm; +} + + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point(line TYPE e, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout PointStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout LineStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout TriangleStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} + +[shader("domain")] +[domain("tri")] +void ds_main(OutputPatch TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + +void patch_const(InputPatch inpatch, // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + OutputPatch outpatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + +RaytracingAccelerationStructure RTAS; + +struct [raypayload] DXRLongVec { + float4 f : write(closesthit) : read(caller); + vector vec : write(closesthit) : read(caller); +}; + +struct [raypayload] DXRLongVecSub : DXRLongVec { + int3 is : write(closesthit) : read(caller); +}; + +template +struct [raypayload] DXRLongVecTpl { + float4 f : write(closesthit) : read(caller); + vector vec : write(closesthit) : read(caller); +}; + +#define RTTYPE PASTE(DXR,TYPE) + +[shader("raygeneration")] +void raygen() { + RTTYPE p = (RTTYPE)0; + RayDesc ray = (RayDesc)0; + TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + + +[shader("closesthit")] +void closesthit(inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + in RTTYPE attribs ) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("anyhit")] +void AnyHit( inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + in RTTYPE attribs ) // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} +{ +} + +[shader("miss")] +void Miss(inout RTTYPE payload){ // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("intersection")] +void Intersection() { + float hitT = RayTCurrent(); + RTTYPE attr = (RTTYPE)0; + bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("callable")] +void callable1(inout RTTYPE p) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +groupshared LongVec as_pld; + +[shader("amplification")] +[numthreads(1,1,1)] +void Amp() { + DispatchMesh(1,1,1,as_pld); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +struct NodeLongVec { + uint3 grid : SV_DispatchGrid; + vector vec; +}; + +struct NodeLongVecSub : NodeLongVec { + int3 is; +}; + +template +struct NodeLongVecTpl { + uint3 grid : SV_DispatchGrid; + vector vec; +}; + +#define NTYPE PASTE(Node,TYPE) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8,1,1)] +void broadcast(DispatchNodeInputRecord input, // expected-error{{vectors of over 4 elements in node records are not supported}} + NodeOutput output) // expected-error{{vectors of over 4 elements in node records are not supported}} +{ + ThreadNodeOutputRecords touts; // expected-error{{vectors of over 4 elements in node records are not supported}} + GroupNodeOutputRecords gouts; // expected-error{{vectors of over 4 elements in node records are not supported}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} // expected-error{{vectors of over 4 elements in node records are not supported}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} // expected-error{{vectors of over 4 elements in node records are not supported}} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl new file mode 100644 index 0000000000..28b4a52158 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=float +// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=bool +// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=uint64_t +// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=double +// RUN: %dxc -Tlib_6_9 -verify %s -enable-16bit-types -DTYPE=float16_t +// RUN: %dxc -Tlib_6_9 -verify %s -enable-16bit-types -DTYPE=int16_t + +export +vector doit(vector vec5) { + vec5.x = 1; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}} + return vec5.xyw; // expected-error {{invalid swizzle 'xyw' on vector of over 4 elements.}} +} + +export +TYPE arr_to_vec(TYPE arr[5]) { + + TYPE val = (vector(arr, 1)).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}} + + TYPE val2 = ((vector)arr).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}} + + return val; +} + +export TYPE lv_ctor(TYPE s) { + TYPE ret = (vector(1, 2, 3, 4, 5, s)).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}} + return ret; +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl new file mode 100644 index 0000000000..54c85191da --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl @@ -0,0 +1,36 @@ +// RUN: %dxc -T ps_6_8 -verify %s + +#define TYPE float +#define NUM 5 + +StructuredBuffer > sbuf; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + +struct LongVec { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +}; +groupshared vector gs_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +groupshared vector gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + +static vector static_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +static vector static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + +export vector lv_param_passthru( // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector vec1) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + arr[1]= vec1; + return ret; +} + +export void lv_param_in_out(in vector vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + out vector vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vec2 = vec1; +} + +export void lv_param_inout(inout vector vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + inout vector vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vec1 = vec2; + vec2 = tmp; +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl new file mode 100644 index 0000000000..c1da348695 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl @@ -0,0 +1,116 @@ +// RUN: %dxc -T lib_6_9 -DTYPE=float -DNUM=1025 -verify %s +// RUN: %dxc -T ps_6_9 -DTYPE=float -DNUM=1025 -verify %s + +// A test to verify that declarations of longvecs are permitted in all the accepted places. +// Only tests for acceptance, most codegen is ignored for now. + +struct LongVec { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +template +struct LongVecTpl { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +template +struct LongVecTpl2 { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +groupshared vector gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +groupshared vector gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +groupshared LongVecTpl gs_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl<1025>' requested here}} + +static vector static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +static vector static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +static LongVecTpl2 static_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl2<1025>' requested here}} + +export vector // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +lv_param_passthru(vector vec1) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export void lv_param_in_out(in vector vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + out vector vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vec2 = vec1; +} + +export void lv_param_inout(inout vector vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + inout vector vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vec1 = vec2; + vec2 = tmp; +} + +export void lv_global_assign(vector vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + static_vec = vec; +} + +export vector lv_global_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export void lv_gs_assign(vector vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + gs_vec = vec; +} + +export vector lv_gs_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +#define DIMS 10 + +export vector // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +lv_param_arr_passthru(vector vec)[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = vec; + return ret; +} + +export void lv_global_arr_assign(vector vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + static_vec_arr[i] = vec[i]; +} + +export vector lv_global_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = static_vec_arr[i]; + return ret; +} + +export void lv_gs_arr_assign(vector vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + gs_vec_arr[i] = vec[i]; +} + +export vector lv_gs_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = gs_vec_arr[i]; + return ret; +} + +export LongVec lv_param_rec_passthru(LongVec vec) { + LongVec ret = vec; + return ret; +} + +export vector lv_splat(TYPE scalar) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = scalar; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export vector lv_array_cast(TYPE arr[NUM]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = (vector)arr; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + diff --git a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl index 40e0452719..05ec268a0c 100644 --- a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl @@ -60,12 +60,9 @@ void woo() { } template -// expected-note@+1{{zero sized record defined here}} struct ForwardDecl; // expected-note{{template is declared here}} void woot() { - // Forward decl fails because forcing completion to check empty size for node object. - // expected-error@+1{{record used in GroupNodeInputRecords may not have zero size}} GroupNodeInputRecords > data; // expected-error{{implicit instantiation of undefined template 'ForwardDecl'}} foo(data); } diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl index 8869b80400..b0d4f1da7f 100644 --- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl +++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl @@ -1,17 +1,83 @@ -// RUN: %dxc -Tlib_6_3 -Wno-unused-value -verify %s +// RUN: %dxc -Tlib_6_8 -Wno-unused-value -verify %s // Tests that the compiler is well-behaved with regard to uses of incomplete types. // Regression test for GitHub #2058, which crashed in this case. -// expected-note@+4 {{forward declaration of 'S'}} -// expected-note@+3 {{forward declaration of 'S'}} -// expected-note@+2 {{forward declaration of 'S'}} -// expected-note@+1 {{forward declaration of 'S'}} -struct S; + +struct S; // expected-note 24 {{forward declaration of 'S'}} +template struct T; // expected-note 4 {{template is declared here}} + ConstantBuffer CB; // expected-error {{variable has incomplete type 'S'}} +ConstantBuffer > TB; // expected-error {{implicit instantiation of undefined template 'T<1>'}} + +S s; // expected-error {{variable has incomplete type 'S'}} +T<1> t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} + +cbuffer BadBuffy { + S cb_s; // expected-error {{variable has incomplete type 'S'}} + T<1> cb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} +}; + +tbuffer BadTuffy { + S tb_s; // expected-error {{variable has incomplete type 'S'}} + T<1> tb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} +}; + S func( // expected-error {{incomplete result type 'S' in function definition}} S param) // expected-error {{variable has incomplete type 'S'}} { S local; // expected-error {{variable has incomplete type 'S'}} return (S)0; // expected-error {{'S' is an incomplete type}} } + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point(line S e, // expected-error {{variable has incomplete type 'S'}} + inout PointStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}} + inout LineStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}} + inout TriangleStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + + +[shader("domain")] +[domain("tri")] +void ds_main(OutputPatch TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}} + +void patch_const(InputPatch inpatch, // expected-error{{variable has incomplete type 'S'}} + OutputPatch outpatch) {} // expected-error{{variable has incomplete type 'S'}} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}} + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8,1,1)] +// expected-error@+1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}} +void broadcast(DispatchNodeInputRecord input, // expected-error{{variable has incomplete type 'S'}} + NodeOutput output) // expected-error{{variable has incomplete type 'S'}} +{ + ThreadNodeOutputRecords touts; // expected-error{{variable has incomplete type 'S'}} + GroupNodeOutputRecords gouts; // expected-error{{variable has incomplete type 'S'}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} // expected-error{{variable has incomplete type 'S'}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} // expected-error{{variable has incomplete type 'S'}} diff --git a/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl b/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl new file mode 100644 index 0000000000..55b4623725 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T vs_6_9 -E main -ast-dump-implicit %s | FileCheck %s + +float main(RayDesc rayDesc : RAYDESC) : OUT { + return 0; +} + +// CHECK: VarDecl 0x{{.+}} <> implicit RAY_FLAG_FORCE_OMM_2_STATE 'const unsigned int' static cinit +// CHECK: IntegerLiteral 0x{{.+}} <> 'const unsigned int' 1024 +// CHECK: AvailabilityAttr 0x{{.+}} <> Implicit 6.9 0 0 "" + +// CHECK: VarDecl 0x{{.+}} <> implicit RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS 'const unsigned int' static cinit +// CHECK: IntegerLiteral 0x{{.+}} <> 'const unsigned int' 1 +// CHECK: AvailabilityAttr 0x{{.+}} <> Implicit 6.9 0 0 "" + diff --git a/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl b/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl new file mode 100644 index 0000000000..2ec79a060f --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -T vs_6_9 -E main -ast-dump %s | FileCheck %s + +RaytracingAccelerationStructure RTAS; + + +float main(RayDesc rayDesc : RAYDESC) : OUT { + RayQuery<0, RAYQUERY_FLAG_NONE> rayQuery1; + RayQuery rayQuery2; + rayQuery1.TraceRayInline(RTAS, 1, 2, rayDesc); + rayQuery2.TraceRayInline(RTAS, RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); + return 0; +} + +// CHECK: -DeclStmt 0x{{.+}} +// CHECK-NEXT: `-VarDecl 0x{{.+}} used rayQuery1 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' callinit +// CHECK-NEXT: `-CXXConstructExpr 0x{{.+}} 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' 'void ()' +// CHECK-NEXT: -DeclStmt 0x{{.+}} +// CHECK-NEXT: `-VarDecl 0x{{.+}} used rayQuery2 'RayQuery':'RayQuery<1024, 1>' callinit +// CHECK-NEXT: `-CXXConstructExpr 0x{{.+}} 'RayQuery':'RayQuery<1024, 1>' 'void ()' +// CHECK-NEXT: -CXXMemberCallExpr 0x{{.+}} 'void' +// CHECK-NEXT: -MemberExpr 0x{{.+}} '' .TraceRayInline +// CHECK-NEXT: `-DeclRefExpr 0x{{.+}} 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' lvalue Var 0x{{.+}} 'rayQuery1' 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' + +// CHECK: -CXXMemberCallExpr 0x{{.+}} 'void' +// CHECK-NEXT: -MemberExpr 0x{{.+}} '' .TraceRayInline +// CHECK-NEXT: `-DeclRefExpr 0x{{.+}} 'RayQuery':'RayQuery<1024, 1>' lvalue Var 0x{{.+}} 'rayQuery2' 'RayQuery':'RayQuery<1024, 1>' diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl new file mode 100644 index 0000000000..722187cf43 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -T lib_6_3 -validator-version 1.8 -verify %s + +// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}} +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + +RaytracingAccelerationStructure RTAS; +// DXR entry to test that restricted flags are diagnosed. +[shader("raygeneration")] +void main(void) { + RayDesc rayDesc; + + // expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}} + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}} + RayQuery rayQuery; + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}} + rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl new file mode 100644 index 0000000000..3e2031e0a7 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl @@ -0,0 +1,11 @@ +// RUN: %dxc -T lib_6_5 -verify %s + +// expect no diagnostics here, since global variables +// are not picked up through the recursive AST visitor's +// traversal of the exported function. +int x = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; + +export float4 MyExportedFunction(float4 color) { + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_5; introduced in shader model 6.9}} + return color * RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl new file mode 100644 index 0000000000..476c1a503e --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl @@ -0,0 +1,11 @@ +// RUN: %dxc -Wno-error-hlsl-rayquery-flags -Wno-error-hlsl-availability -T vs_6_5 -E main -verify %s + +RaytracingAccelerationStructure RTAS; +void main(uint i : IDX, RayDesc rayDesc : RAYDESC) { + + // expected-warning@+3{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}} + // expected-warning@+2{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}} + RayQuery rayQuery0a; + +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl new file mode 100644 index 0000000000..6904f58c7d --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl @@ -0,0 +1,46 @@ +// RUN: %dxc -T vs_6_5 -E main -verify %s + +// tests that RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS usage will emit +// one warning for each incompatible availability attribute decl, +// when the compilation target is less than shader model 6.9. + +namespace MyNamespace { + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}} + static const int badVar = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; +} + +// expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}} +groupshared const int otherBadVar = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; + +int retNum(){ + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}} + return RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; +} + +int retNumUncalled(){ + // no diagnostic expected here + return RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; +} + +RaytracingAccelerationStructure RTAS; +void main(uint i : IDX, RayDesc rayDesc : RAYDESC) { + + int x = MyNamespace::badVar + otherBadVar + retNum(); + RayQuery<0> rayQuery0a; + + if (x > 4){ + rayQuery0a.TraceRayInline(RTAS, 8, 2, rayDesc); + } + else{ + rayQuery0a.TraceRayInline(RTAS, 16, 2, rayDesc); + } + + // expected-error@+2{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}} + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}} + RayQuery rayQuery0b; + + // expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}} + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}} + RayQuery rayQuery0d; + +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl new file mode 100644 index 0000000000..d31d9bf289 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -T vs_6_5 -E main -verify %s + +// Test that at the call site of any TraceRayInline call, a default error +// warning is emitted that indicates the ray query object has the +// RAY_FLAG_FORCE_OMM_2_STATE set, but doesn't have +// RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set + +RaytracingAccelerationStructure RTAS; +void main(RayDesc rayDesc : RAYDESC) : OUT { + // expected-note@+1 2 {{RayQueryFlags declared here}} + RayQuery<0> rayQuery; // implicitly, the second arg is 0. + + // expected-error@+2{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}} + rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); + + // expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} + rayQuery.TraceRayInline(RTAS, 1024, 2, rayDesc); + + // expected-error@+1{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}} + RayQuery<0, 1> rayQueryInvalid; +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl new file mode 100644 index 0000000000..5e484d193e --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -T vs_6_9 -verify %s +// RUN: %dxc -T vs_6_5 -verify %s + +// validate 2nd template argument flags +// expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} +typedef RayQuery BadRayQuery; +// expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} +typedef RayQuery BadRayQuery2; diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl new file mode 100644 index 0000000000..272a46a87e --- /dev/null +++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl @@ -0,0 +1,12 @@ +// RUN: %dxc -T ps_6_0 -verify %s + +// expected-no-diagnostics +// No diagnostic is expected because this is a non-library target, +// and SubObjects are ignored on non-library targets. + +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + +[shader("pixel")] +int main(int i : INDEX) : SV_Target { + return 1; +} diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl new file mode 100644 index 0000000000..c220f5734d --- /dev/null +++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl @@ -0,0 +1,6 @@ +// RUN: %dxc -T lib_6_8 -verify %s + +// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_8; introduced in shader model 6.9}} +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + + diff --git a/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl new file mode 100644 index 0000000000..0192154b78 --- /dev/null +++ b/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl @@ -0,0 +1,96 @@ +// RUN: %dxc -Tlib_6_9 -verify %s + +RWByteAddressBuffer NonCBuf; +globallycoherent RWByteAddressBuffer GCBuf; +reordercoherent RWByteAddressBuffer RCBuf; +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'reordercoherent' implied by 'globallycoherent' in 'RCGCBuf'. 'reordercoherent' ignored.}} +reordercoherent globallycoherent RWByteAddressBuffer RCGCBuf; + +globallycoherent RWByteAddressBuffer getPromoteRC() { + return RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getDemoteGC() { + return GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} +} + +globallycoherent RWByteAddressBuffer GCBufArr[2]; +reordercoherent RWByteAddressBuffer RCBufArr[2]; + +reordercoherent RWByteAddressBuffer RCBufMultiArr[2][2]; +globallycoherent RWByteAddressBuffer GCBufMultiArr[2][2]; + +globallycoherent RWByteAddressBuffer getPromoteRCArr() { + return RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getDemoteGCArr() { + return GCBufArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} +} + +globallycoherent RWByteAddressBuffer getPromoteRCMultiArr() { + return RCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getDemoteGCMultiArr() { + return GCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} +} + +void NonGCStore(RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void RCStore(reordercoherent RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void GCStore(globallycoherent RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void getPromoteToGCParam(inout globallycoherent RWByteAddressBuffer PGCBuf) { + PGCBuf = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer __restrict' promotes reordercoherent to globallycoherent annotation}} +} +void getDemoteToRCParam(inout reordercoherent RWByteAddressBuffer PRCBuf) { + PRCBuf = GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer __restrict' demotes globallycoherent to reordercoherent annotation}} +} + +static reordercoherent RWByteAddressBuffer SRCDemoteBufArr[2] = GCBufArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' demotes globallycoherent to reordercoherent annotation}} +static reordercoherent RWByteAddressBuffer SRCDemoteBufMultiArr0[2] = GCBufMultiArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' demotes globallycoherent to reordercoherent annotation}} +static reordercoherent RWByteAddressBuffer SRCDemoteBufMultiArr1[2][2] = GCBufMultiArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2][2]' to 'reordercoherent RWByteAddressBuffer [2][2]' demotes globallycoherent to reordercoherent annotation}} + +static globallycoherent RWByteAddressBuffer SRCPromoteBufArr[2] = RCBufArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer [2]' promotes reordercoherent to globallycoherent annotation}} +static globallycoherent RWByteAddressBuffer SRCPromoteBufMultiArr0[2] = RCBufMultiArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer [2]' promotes reordercoherent to globallycoherent annotation}} +static globallycoherent RWByteAddressBuffer SRCPromoteBufMultiArr1[2][2] = RCBufMultiArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2][2]' to 'globallycoherent RWByteAddressBuffer [2][2]' promotes reordercoherent to globallycoherent annotation}} + +void getPromoteToGCParamArr(inout globallycoherent RWByteAddressBuffer PGCBufArr[2]) { + PGCBufArr = RCBufArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer __restrict[2]' promotes reordercoherent to globallycoherent annotation}} +} +void getDemoteToRCParamArr(inout reordercoherent RWByteAddressBuffer PRCBufArr[2]) { + PRCBufArr = GCBufArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer __restrict[2]' demotes globallycoherent to reordercoherent annotation}} +} + +globallycoherent RWByteAddressBuffer getGCBuf() { + return GCBuf; +} + +reordercoherent RWByteAddressBuffer getRCBuf() { + return RCBuf; +} + +[shader("raygeneration")] +void main() +{ + GCStore(RCBuf); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} + RCStore(GCBuf); // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} + + reordercoherent RWByteAddressBuffer RCCopyGC = GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} + globallycoherent RWByteAddressBuffer GCCopyRC = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} + + reordercoherent RWByteAddressBuffer RCCopyGCReturn = getGCBuf(); // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} + globallycoherent RWByteAddressBuffer GCCopyRCReturn = getRCBuf(); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} + + reordercoherent RWByteAddressBuffer RCCopyGC0 = GCBufArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} + globallycoherent RWByteAddressBuffer GCCopyRC0 = RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} +} diff --git a/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl new file mode 100644 index 0000000000..130b0efee7 --- /dev/null +++ b/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl @@ -0,0 +1,41 @@ +// RUN: %dxc -E main -T lib_6_9 -verify %s +// REQUIRES: dxil-1-9 + +using Ty = RWTexture1D; + +using GTy = globallycoherent Ty; +using RTy = reordercoherent Ty; + +// expected-warning@+1{{attribute 'globallycoherent' is already applied}} +using GGTy = globallycoherent GTy; +// expected-warning@+1{{attribute 'reordercoherent' is already applied}} +using RRTy = reordercoherent RTy; + +// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}} +using GRTy = globallycoherent RTy; +// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}} +using RGTy = reordercoherent GTy; + +// expected-warning@+1{{attribute 'globallycoherent' is already applied}} +using GGRTy = globallycoherent GRTy; +// expected-warning@+1{{attribute 'reordercoherent' is already applied}} +using RRGTy = reordercoherent RGTy; + +// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}} +using GRTy2 = globallycoherent reordercoherent Ty; +// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}} +using RGTy2 = reordercoherent globallycoherent Ty; + +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'globallycoherent' is already applied}} +using GGRTy2 = globallycoherent globallycoherent reordercoherent Ty; +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'globallycoherent' is already applied}} +using GRGTy2 = globallycoherent reordercoherent globallycoherent Ty; + +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'reordercoherent' is already applied}} +using RGRTy2 = reordercoherent globallycoherent reordercoherent Ty; +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'reordercoherent' is already applied}} +using RRGTy2 = reordercoherent reordercoherent globallycoherent Ty; diff --git a/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl new file mode 100644 index 0000000000..447e496c6e --- /dev/null +++ b/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl @@ -0,0 +1,101 @@ +// RUN: %dxc -Tlib_6_9 -verify %s + +RWByteAddressBuffer NonRCBuf; +reordercoherent RWByteAddressBuffer RCBuf; + +RWByteAddressBuffer NonRCBufArr[2]; +reordercoherent RWByteAddressBuffer RCBufArr[2]; + +RWByteAddressBuffer NonRCBufMultiArr[2][2]; +reordercoherent RWByteAddressBuffer RCBufMultiArr[2][2]; + +RWByteAddressBuffer getNonRCBuf() { + return NonRCBuf; +} + +reordercoherent RWByteAddressBuffer getRCBuf() { + return RCBuf; +} + +RWByteAddressBuffer getNonRCBufArr() { + return NonRCBufArr[0]; +} + +reordercoherent RWByteAddressBuffer getRCBufArr() { + return RCBufArr[0]; +} + +RWByteAddressBuffer getNonRCBufMultiArr() { + return NonRCBufMultiArr[0][0]; +} + +reordercoherent RWByteAddressBuffer getRCBufMultiArr() { + return RCBufMultiArr[0][0]; +} + +RWByteAddressBuffer getNonGCRCBuf() { + return RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getGCNonRCBuf() { + return NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} +} + +RWByteAddressBuffer getNonGCRCBufArr() { + return RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getGCNonRCBufArr() { + return NonRCBufArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} +} + +RWByteAddressBuffer getNonGCRCBufMultiArr() { + return RCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getGCNonRCBufMultiArr() { + return NonRCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} +} + +void NonGCStore(RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void GCStore(reordercoherent RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void getNonRCBufPAram(inout reordercoherent RWByteAddressBuffer PRCBuf) { + PRCBuf = NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer __restrict' adds reordercoherent annotation}} +} + +static reordercoherent RWByteAddressBuffer SRCBufArr[2] = NonRCBufArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' adds reordercoherent annotation}} +static reordercoherent RWByteAddressBuffer SRCBufMultiArr0[2] = NonRCBufMultiArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' adds reordercoherent annotation}} +static reordercoherent RWByteAddressBuffer SRCBufMultiArr1[2][2] = NonRCBufMultiArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2][2]' to 'reordercoherent RWByteAddressBuffer [2][2]' adds reordercoherent annotation}} + +void getNonRCBufArrParam(inout reordercoherent RWByteAddressBuffer PRCBufArr[2]) { + PRCBufArr = NonRCBufArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer __restrict[2]' adds reordercoherent annotation}} +} + +[shader("raygeneration")] void main() { + NonGCStore(NonRCBuf); // No diagnostic + GCStore(NonRCBuf); // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} + NonGCStore(RCBuf); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} + GCStore(RCBuf); // No diagnostic + + RWByteAddressBuffer NonGCCopyNonGC = NonRCBuf; // No diagnostic + RWByteAddressBuffer NonGCCopyGC = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} + + reordercoherent RWByteAddressBuffer GCCopyNonGC = NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} + reordercoherent RWByteAddressBuffer GCCopyGC = RCBuf; // No diagnostic + + reordercoherent RWByteAddressBuffer GCCopyNonGCReturn = getNonRCBuf(); // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} + + RWByteAddressBuffer NonGCCopyGCReturn = getRCBuf(); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} + + RWByteAddressBuffer NonGCCopyNonGC0 = NonRCBufArr[0]; // No diagnostic + RWByteAddressBuffer NonGCCopyGC0 = RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} + + reordercoherent RWByteAddressBuffer GCCopyNonGC0 = NonRCBufArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} + reordercoherent RWByteAddressBuffer GCCopyGC0 = RCBufArr[0]; // No diagnostic +} diff --git a/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl new file mode 100644 index 0000000000..57fd33fb13 --- /dev/null +++ b/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -Tlib_6_9 -verify %s + +reordercoherent RWTexture1D uav1 : register(u3); + +// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'Buffer >'}} +// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}} +reordercoherent Buffer srv; + +// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'float'}} +// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}} +reordercoherent float m; + +reordercoherent RWTexture2D tex[12]; +reordercoherent RWTexture2D texMD[12][12]; + +// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'float'}} +// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}} +reordercoherent float One() { + return 1.0; +} + +struct Record { uint index; }; + +// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'RWDispatchNodeInputRecord'}} +// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}} +void func2(reordercoherent RWDispatchNodeInputRecord funcInputData) {} diff --git a/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl b/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl new file mode 100644 index 0000000000..6133847fb8 --- /dev/null +++ b/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl @@ -0,0 +1,136 @@ +// RUN: %dxc -T lib_6_9 -ast-dump-implicit %s | FileCheck -check-prefix=ASTIMPL %s +// RUN: %dxc -T lib_6_9 -ast-dump %s | FileCheck -check-prefix=AST %s +// The HLSL source is just a copy of +// tools\clang\test\HLSLFileCheck\shader_targets\raytracing\subobjects_raytracingPipelineConfig1.hlsl + +// This test tests that the HLSLSubObjectAttr attribute is present on all +// HLSL subobjects, and tests the ast representation of subobjects + +// ASTIMPL: CXXRecordDecl 0x{{.+}} <> implicit referenced struct StateObjectConfig definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 0 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Flags 'unsigned int' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct GlobalRootSignature definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 1 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Data 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct LocalRootSignature definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 2 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Data 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct SubobjectToExportsAssociation definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 8 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Subobject 'string' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Exports 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct RaytracingShaderConfig definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 9 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit MaxPayloadSizeInBytes 'unsigned int' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit MaxAttributeSizeInBytes 'unsigned int' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit struct RaytracingPipelineConfig definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 10 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit MaxTraceRecursionDepth 'unsigned int' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct TriangleHitGroup definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 11 0 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit AnyHit 'string' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit ClosestHit 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct ProceduralPrimitiveHitGroup definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 11 1 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit AnyHit 'string' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit ClosestHit 'string' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Intersection 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct RaytracingPipelineConfig1 definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 12 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit MaxTraceRecursionDepth 'unsigned int' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Flags 'unsigned int' + +// AST: VarDecl 0x{{.+}} grs 'GlobalRootSignature' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'GlobalRootSignature' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "CBV(b0)" +// AST-NEXT: VarDecl 0x{{.+}} soc 'StateObjectConfig' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'StateObjectConfig' +// AST-NEXT: BinaryOperator 0x{{.+}} 'unsigned int' '|' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS' 'const unsigned int' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS' 'const unsigned int' +// AST-NEXT: VarDecl 0x{{.+}} lrs 'LocalRootSignature' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'LocalRootSignature' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "UAV(u0, visibility = SHADER_VISIBILITY_GEOMETRY), RootFlags(LOCAL_ROOT_SIGNATURE)" +// AST-NEXT: VarDecl 0x{{.+}} sea 'SubobjectToExportsAssociation' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a;b;foo;c" +// AST-NEXT: VarDecl 0x{{.+}} sea2 'SubobjectToExportsAssociation' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ";" +// AST-NEXT: VarDecl 0x{{.+}} sea3 'SubobjectToExportsAssociation' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "" +// AST-NEXT: VarDecl 0x{{.+}} rsc 'RaytracingShaderConfig' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingShaderConfig' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 128 +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 64 +// AST-NEXT: VarDecl 0x{{.+}} rpc 'RaytracingPipelineConfig1' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32 +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES' 'const unsigned int' +// AST-NEXT: VarDecl 0x{{.+}} sea4 'SubobjectToExportsAssociation' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "rpc" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ";" +// AST-NEXT: VarDecl 0x{{.+}} rpc2 'RaytracingPipelineConfig1' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32 +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_NONE' 'const unsigned int' +// AST-NEXT: VarDecl 0x{{.+}} trHitGt 'TriangleHitGroup' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'TriangleHitGroup' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "b" +// AST-NEXT: VarDecl 0x{{.+}} ppHitGt 'ProceduralPrimitiveHitGroup' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'ProceduralPrimitiveHitGroup' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "b" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "c" + +GlobalRootSignature grs = {"CBV(b0)"}; +StateObjectConfig soc = { STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS | STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS }; +LocalRootSignature lrs = {"UAV(u0, visibility = SHADER_VISIBILITY_GEOMETRY), RootFlags(LOCAL_ROOT_SIGNATURE)"}; +SubobjectToExportsAssociation sea = { "grs", "a;b;foo;c" }; +// Empty association is well-defined: it creates a default association +SubobjectToExportsAssociation sea2 = { "grs", ";" }; +SubobjectToExportsAssociation sea3 = { "grs", "" }; +RaytracingShaderConfig rsc = { 128, 64 }; +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES }; +SubobjectToExportsAssociation sea4 = {"rpc", ";"}; +RaytracingPipelineConfig1 rpc2 = {32, RAYTRACING_PIPELINE_FLAG_NONE }; +TriangleHitGroup trHitGt = {"a", "b"}; +ProceduralPrimitiveHitGroup ppHitGt = { "a", "b", "c"}; diff --git a/tools/clang/test/lit.cfg b/tools/clang/test/lit.cfg index 5fc5d4a27c..a3a352071c 100644 --- a/tools/clang/test/lit.cfg +++ b/tools/clang/test/lit.cfg @@ -504,6 +504,9 @@ if config.enable_backtrace == "1": if config.spirv: config.available_features.add("spirv") +if config.metal: + config.available_features.add("metal") + # Check supported dxil version def get_dxil_version(): result = subprocess.run([lit.util.which('dxc', llvm_tools_dir), "--version"], stdout=subprocess.PIPE) diff --git a/tools/clang/test/lit.site.cfg.in b/tools/clang/test/lit.site.cfg.in index 207450add5..80dcadf288 100644 --- a/tools/clang/test/lit.site.cfg.in +++ b/tools/clang/test/lit.site.cfg.in @@ -22,6 +22,7 @@ config.enable_backtrace = "@ENABLE_BACKTRACES@" config.host_arch = "@HOST_ARCH@" config.spirv = "@ENABLE_SPIRV_CODEGEN@" =="ON" config.hlsl_headers_dir = "@HLSL_HEADERS_DIR@" # HLSL change +config.metal = "@ENABLE_METAL_CODEGEN@".upper() == "ON" # HLSL change # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/tools/clang/tools/dxcompiler/CMakeLists.txt b/tools/clang/tools/dxcompiler/CMakeLists.txt index 004d2e5ad1..c69e276194 100644 --- a/tools/clang/tools/dxcompiler/CMakeLists.txt +++ b/tools/clang/tools/dxcompiler/CMakeLists.txt @@ -136,6 +136,14 @@ target_link_libraries(dxcompiler PRIVATE ${LIBRARIES}) if (ENABLE_SPIRV_CODEGEN) target_link_libraries(dxcompiler PRIVATE clangSPIRV) endif (ENABLE_SPIRV_CODEGEN) +if (ENABLE_METAL_CODEGEN) + target_link_libraries(dxcompiler PRIVATE ${METAL_IRCONVERTER_LIB}) + target_include_directories(dxcompiler PRIVATE ${METAL_IRCONVERTER_INCLUDE_DIR}) + + get_filename_component(METAL_IRCONVERTER_LIB_DIR ${METAL_IRCONVERTER_LIB} DIRECTORY CACHE) + set_property(TARGET dxcompiler APPEND_STRING + PROPERTY LINK_FLAGS " -Wl,-rpath,${METAL_IRCONVERTER_LIB_DIR}") +endif (ENABLE_METAL_CODEGEN) include_directories(AFTER ${LLVM_INCLUDE_DIR}/dxc/Tracing ${DIASDK_INCLUDE_DIRS} ${HLSL_VERSION_LOCATION}) include_directories(${LLVM_SOURCE_DIR}/tools/clang/tools/dxcvalidator) diff --git a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp index 01f4973fbe..16d8b1dadd 100644 --- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp +++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp @@ -671,6 +671,8 @@ static const char *FlagToString(DXIL::RaytracingPipelineFlags Flag) { return "RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES"; case DXIL::RaytracingPipelineFlags::SkipProceduralPrimitives: return "RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES"; + case DXIL::RaytracingPipelineFlags::AllowOpacityMicromaps: + return "RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS"; } return ""; } @@ -1218,6 +1220,7 @@ void PrintResourceProperties(DxilResourceProperties &RP, bool bUAV = RP.isUAV(); LPCSTR RW = bUAV ? (RP.Basic.IsROV ? "ROV" : "RW") : ""; LPCSTR GC = bUAV && RP.Basic.IsGloballyCoherent ? "globallycoherent " : ""; + LPCSTR RC = bUAV && RP.Basic.IsReorderCoherent ? "reordercoherent " : ""; LPCSTR COUNTER = bUAV && RP.Basic.SamplerCmpOrHasCounter ? ", counter" : ""; switch (RP.getResourceKind()) { @@ -1231,7 +1234,7 @@ void PrintResourceProperties(DxilResourceProperties &RP, case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::Texture2DMS: case DXIL::ResourceKind::Texture2DMSArray: - OS << GC << RW << ResourceKindToString(RP.getResourceKind()); + OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind()); OS << "<"; if (RP.Typed.CompCount > 1) OS << std::to_string(RP.Typed.CompCount) << "x"; @@ -1239,11 +1242,11 @@ void PrintResourceProperties(DxilResourceProperties &RP, break; case DXIL::ResourceKind::RawBuffer: - OS << GC << RW << ResourceKindToString(RP.getResourceKind()); + OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind()); break; case DXIL::ResourceKind::StructuredBuffer: - OS << GC << RW << ResourceKindToString(RP.getResourceKind()); + OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind()); OS << ""; break; diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp index c1c844d4be..ebeee380ef 100644 --- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp +++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp @@ -71,6 +71,10 @@ #include "clang/Basic/Version.h" #endif // SUPPORT_QUERY_GIT_COMMIT_INFO +#ifdef ENABLE_METAL_CODEGEN +#include "metal_irconverter.h" +#endif + #define CP_UTF16 1200 using namespace llvm; @@ -718,6 +722,7 @@ class DxcCompiler : public IDxcCompiler3, bool validateRootSigContainer = false; if (isPreprocessing) { + TimeTraceScope TimeScope("PreprocessAction", StringRef("")); // These settings are back-compatible with fxc. clang::PreprocessorOutputOptions &PPOutOpts = compiler.getPreprocessorOutputOpts(); @@ -817,6 +822,10 @@ class DxcCompiler : public IDxcCompiler3, } compiler.getLangOpts().IsHLSLLibrary = opts.IsLibraryProfile(); + if (compiler.getLangOpts().IsHLSLLibrary && opts.GenMetal) + return ErrorWithString("Shader libraries unsupported in Metal (yet)", + riid, ppResult); + // Clear entry function if library target if (compiler.getLangOpts().IsHLSLLibrary) compiler.getLangOpts().HLSLEntryFunction = @@ -859,6 +868,7 @@ class DxcCompiler : public IDxcCompiler3, compiler.getTarget().adjust(compiler.getLangOpts()); if (opts.AstDump) { + TimeTraceScope TimeScope("DumpAST", StringRef("")); clang::ASTDumpAction dumpAction; // Consider - ASTDumpFilter, ASTDumpLookups compiler.getFrontendOpts().ASTDumpDecls = true; @@ -868,6 +878,7 @@ class DxcCompiler : public IDxcCompiler3, dumpAction.EndSourceFile(); outStream.flush(); } else if (opts.DumpDependencies) { + TimeTraceScope TimeScope("DumpDependencies", StringRef("")); auto dependencyCollector = std::make_shared(); compiler.addDependencyCollector(dependencyCollector); compiler.createPreprocessor(clang::TranslationUnitKind::TU_Complete); @@ -970,6 +981,7 @@ class DxcCompiler : public IDxcCompiler3, EmitBCAction action(&llvmContext); FrontendInputFile file(pUtf8SourceName, IK_HLSL); bool compileOK; + TimeTraceScope TimeScope("Compile Action", StringRef("")); if (action.BeginSourceFile(compiler, file)) { action.Execute(); action.EndSourceFile(); @@ -1024,6 +1036,7 @@ class DxcCompiler : public IDxcCompiler3, // Do not create a container when there is only a a high-level // representation in the module. if (compileOK && !opts.CodeGenHighLevel) { + TimeTraceScope TimeScope("AssembleAndWriteContainer", StringRef("")); HRESULT valHR = S_OK; CComPtr pRootSigStream; IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), @@ -1107,7 +1120,86 @@ class DxcCompiler : public IDxcCompiler3, &pHashBlob)); IFT(pResult->SetOutputObject(DXC_OUT_SHADER_HASH, pHashBlob)); } // SUCCEEDED(valHR) - } // compileOK && !opts.CodeGenHighLevel +#ifdef ENABLE_METAL_CODEGEN + // This is a bit hacky because we don't currently have a good way to + // disassemble AIR. + if (opts.GenMetal && produceFullContainer && + !opts.OutputObject.empty()) { + IRCompiler *MetalCompiler = IRCompilerCreate(); + IRCompilerSetEntryPointName( + MetalCompiler, + compiler.getCodeGenOpts().HLSLEntryFunction.c_str()); + + IRObject *DXILObj = IRObjectCreateFromDXIL( + static_cast(pOutputBlob->GetBufferPointer()), + pOutputBlob->GetBufferSize(), IRBytecodeOwnershipNone); + + // Compile DXIL to Metal IR: + IRError *Error = nullptr; + IRObject *AIR = IRCompilerAllocCompileAndLink(MetalCompiler, NULL, + DXILObj, &Error); + + if (!AIR) { + IRObjectDestroy(DXILObj); + IRCompilerDestroy(MetalCompiler); + IRErrorDestroy(Error); + return ErrorWithString( + "Error occurred in Metal Shader Conversion", riid, ppResult); + } + + IRMetalLibBinary *MetalLib = IRMetalLibBinaryCreate(); + IRShaderStage Stage = IRShaderStageInvalid; + const ShaderModel *SM = hlsl::ShaderModel::GetByName( + compiler.getLangOpts().HLSLProfile); + switch (SM->GetKind()) { + case DXIL::ShaderKind::Vertex: + Stage = IRShaderStageVertex; + break; + case DXIL::ShaderKind::Pixel: + Stage = IRShaderStageFragment; + break; + case DXIL::ShaderKind::Hull: + Stage = IRShaderStageHull; + break; + case DXIL::ShaderKind::Domain: + Stage = IRShaderStageDomain; + break; + case DXIL::ShaderKind::Mesh: + Stage = IRShaderStageMesh; + break; + case DXIL::ShaderKind::Amplification: + Stage = IRShaderStageAmplification; + break; + case DXIL::ShaderKind::Geometry: + Stage = IRShaderStageGeometry; + break; + case DXIL::ShaderKind::Compute: + Stage = IRShaderStageCompute; + break; + } + assert(Stage != IRShaderStageInvalid && + "Library targets not supported for Metal (yet)."); + IRObjectGetMetalLibBinary(AIR, Stage, MetalLib); + size_t MetalLibSize = IRMetalLibGetBytecodeSize(MetalLib); + std::unique_ptr MetalLibBytes = + std::unique_ptr(new uint8_t[MetalLibSize]); + IRMetalLibGetBytecode(MetalLib, MetalLibBytes.get()); + + // Store the metallib to custom format or disk, or use to create a + // MTLLibrary. + + CComPtr MetalBlob; + IFT(hlsl::DxcCreateBlobOnHeapCopy( + MetalLibBytes.get(), (uint32_t)MetalLibSize, &MetalBlob)); + std::swap(pOutputBlob, MetalBlob); + + IRMetalLibBinaryDestroy(MetalLib); + IRObjectDestroy(DXILObj); + IRObjectDestroy(AIR); + IRCompilerDestroy(MetalCompiler); + } +#endif + } // compileOK && !opts.CodeGenHighLevel } std::string remarks; @@ -1440,6 +1532,13 @@ class DxcCompiler : public IDxcCompiler3, Opts.EnablePayloadQualifiers; compiler.getLangOpts().HLSLProfile = compiler.getCodeGenOpts().HLSLProfile = Opts.TargetProfile; + const ShaderModel *SM = hlsl::ShaderModel::GetByName( + compiler.getLangOpts().HLSLProfile.c_str()); + if (SM->IsSM69Plus()) + compiler.getLangOpts().MaxHLSLVectorLength = DXIL::kSM69MaxVectorLength; + else + compiler.getLangOpts().MaxHLSLVectorLength = + DXIL::kDefaultMaxVectorLength; // Enable dumping implicit top level decls either if it was specifically // requested or if we are not dumping the ast from the command line. That diff --git a/tools/clang/tools/libclang/CMakeLists.txt b/tools/clang/tools/libclang/CMakeLists.txt index 1ef0c8ecd9..ed49cbaf44 100644 --- a/tools/clang/tools/libclang/CMakeLists.txt +++ b/tools/clang/tools/libclang/CMakeLists.txt @@ -119,6 +119,7 @@ if(MSVC) # Each functions is exported as "dllexport" in include/clang-c. # KB835326 set(LLVM_EXPORTED_SYMBOL_FILE) + add_compile_options(/bigobj) endif() # HLSL Change Starts diff --git a/tools/clang/unittests/HLSL/DxilContainerTest.cpp b/tools/clang/unittests/HLSL/DxilContainerTest.cpp index a1533ae19f..339b33c655 100644 --- a/tools/clang/unittests/HLSL/DxilContainerTest.cpp +++ b/tools/clang/unittests/HLSL/DxilContainerTest.cpp @@ -1454,6 +1454,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { "ConsumeStructuredBuffer consume_buf;" "RasterizerOrderedByteAddressBuffer rov_buf;" "globallycoherent RWByteAddressBuffer gc_buf;" + "reordercoherent RWByteAddressBuffer rc_buf;" "float function_import(float x);" "export float function0(min16float x) { " " return x + 1 + tex[0].x; }" @@ -1465,6 +1466,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { " f.f2 += 0.5; append_buf.Append(f);" " rov_buf.Store(i, f.i2.x);" " gc_buf.Store(i, f.i2.y);" + " rc_buf.Store(i, f.i2.y);" " b_buf.Store(i, f.i2.x + f.i2.y); }"; CComPtr pCompiler; CComPtr pSource; @@ -1477,7 +1479,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { hlsl::DXIL::ResourceKind kind; hlsl::RDAT::DxilResourceFlag flag; }; - const unsigned numResFlagCheck = 5; + const unsigned numResFlagCheck = 6; CheckResFlagInfo resFlags[numResFlagCheck] = { {"b_buf", hlsl::DXIL::ResourceKind::RawBuffer, hlsl::RDAT::DxilResourceFlag::None}, @@ -1487,6 +1489,8 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { hlsl::RDAT::DxilResourceFlag::UAVCounter}, {"gc_buf", hlsl::DXIL::ResourceKind::RawBuffer, hlsl::RDAT::DxilResourceFlag::UAVGloballyCoherent}, + {"rc_buf", hlsl::DXIL::ResourceKind::RawBuffer, + hlsl::RDAT::DxilResourceFlag::UAVReorderCoherent}, {"rov_buf", hlsl::DXIL::ResourceKind::RawBuffer, hlsl::RDAT::DxilResourceFlag::UAVRasterizerOrderedView}}; @@ -1575,7 +1579,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { IFTBOOLMSG(false, E_FAIL, "unknown function name"); } } - VERIFY_ARE_EQUAL(resTable.Count(), 8U); + VERIFY_ARE_EQUAL(resTable.Count(), 9U); } } IFTBOOLMSG(blobFound, E_FAIL, "failed to find RDAT blob after compiling"); diff --git a/tools/clang/unittests/HLSL/ExtensionTest.cpp b/tools/clang/unittests/HLSL/ExtensionTest.cpp index 51dda5533c..65407291ca 100644 --- a/tools/clang/unittests/HLSL/ExtensionTest.cpp +++ b/tools/clang/unittests/HLSL/ExtensionTest.cpp @@ -204,79 +204,86 @@ Intrinsic Intrinsics[] = { {L"test_fn", DEFAULT_NAME, "r", - {1, false, true, false, -1, countof(TestFnArgs), TestFnArgs}}, + {1, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnArgs), TestFnArgs}}, {L"test_proc", DEFAULT_NAME, "r", - {2, false, false, false, -1, countof(TestProcArgs), TestProcArgs}}, + {2, 0, 0, -1, countof(TestProcArgs), TestProcArgs}}, {L"test_poly", "test_poly.$o", "r", - {3, false, true, false, -1, countof(TestFnCustomArgs), TestFnCustomArgs}}, + {3, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnCustomArgs), + TestFnCustomArgs}}, {L"test_int", "test_int", "r", - {4, false, true, false, -1, countof(TestFnIntArgs), TestFnIntArgs}}, + {4, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnIntArgs), TestFnIntArgs}}, {L"test_nolower", "test_nolower.$o", "n", - {5, false, true, false, -1, countof(TestFnNoLowerArgs), + {5, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnNoLowerArgs), TestFnNoLowerArgs}}, {L"test_pack_0", "test_pack_0.$o", "p", - {6, false, false, false, -1, countof(TestFnPack0), TestFnPack0}}, + {6, 0, 0, -1, countof(TestFnPack0), TestFnPack0}}, {L"test_pack_1", "test_pack_1.$o", "p", - {7, false, true, false, -1, countof(TestFnPack1), TestFnPack1}}, + {7, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack1), TestFnPack1}}, {L"test_pack_2", "test_pack_2.$o", "p", - {8, false, true, false, -1, countof(TestFnPack2), TestFnPack2}}, + {8, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack2), TestFnPack2}}, {L"test_pack_3", "test_pack_3.$o", "p", - {9, false, true, false, -1, countof(TestFnPack3), TestFnPack3}}, + {9, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack3), TestFnPack3}}, {L"test_pack_4", "test_pack_4.$o", "p", - {10, false, false, false, -1, countof(TestFnPack4), TestFnPack4}}, + {10, 0, 0, -1, countof(TestFnPack4), TestFnPack4}}, {L"test_rand", "test_rand", "r", - {11, false, false, false, -1, countof(TestRand), TestRand}}, + {11, 0, 0, -1, countof(TestRand), TestRand}}, {L"test_isinf", "test_isinf", "d", - {13, true, true, false, -1, countof(TestIsInf), TestIsInf}}, + {13, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, -1, + countof(TestIsInf), TestIsInf}}, {L"test_ibfe", "test_ibfe", "d", - {14, true, true, false, -1, countof(TestIBFE), TestIBFE}}, + {14, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, -1, + countof(TestIBFE), TestIBFE}}, // Make this intrinsic have the same opcode as an hlsl intrinsic with an // unsigned counterpart for testing purposes. {L"test_unsigned", "test_unsigned", "n", - {static_cast(hlsl::IntrinsicOp::IOP_min), false, true, false, -1, - countof(TestUnsigned), TestUnsigned}}, + {static_cast(hlsl::IntrinsicOp::IOP_min), INTRIN_FLAG_READ_NONE, + 0, -1, countof(TestUnsigned), TestUnsigned}}, {L"wave_proc", DEFAULT_NAME, "r", - {16, false, true, true, -1, countof(WaveProcArgs), WaveProcArgs}}, + {16, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1, + countof(WaveProcArgs), WaveProcArgs}}, {L"test_o_1", "test_o_1.$o:1", "r", - {18, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}}, + {18, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1, + countof(TestOverloadArgs), TestOverloadArgs}}, {L"test_o_2", "test_o_2.$o:2", "r", - {19, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}}, + {19, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1, + countof(TestOverloadArgs), TestOverloadArgs}}, {L"test_o_3", "test_o_3.$o:3", "r", - {20, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}}, + {20, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1, + countof(TestOverloadArgs), TestOverloadArgs}}, // custom lowering with both optional arguments and vector exploding. // Arg 0 = Opcode // Arg 1 = Pass as is @@ -286,16 +293,17 @@ Intrinsic Intrinsics[] = { {L"CustomLoadOp", "CustomLoadOp", "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}", - {21, true, false, false, -1, countof(TestCustomLoadOp), TestCustomLoadOp}}, + {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOp), + TestCustomLoadOp}}, {L"CustomLoadOp", "CustomLoadOp", "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}", - {21, true, false, false, -1, countof(TestCustomLoadOpBool), + {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOpBool), TestCustomLoadOpBool}}, {L"CustomLoadOp", "CustomLoadOp", "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}", - {21, true, false, false, -1, countof(TestCustomLoadOpSubscript), + {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOpSubscript), TestCustomLoadOpSubscript}}, }; @@ -303,7 +311,8 @@ Intrinsic BufferIntrinsics[] = { {L"MyBufferOp", "MyBufferOp", "m", - {12, false, true, false, -1, countof(TestMyBufferOp), TestMyBufferOp}}, + {12, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyBufferOp), + TestMyBufferOp}}, }; // Test adding a method to an object that normally has no methods (SamplerState @@ -312,7 +321,8 @@ Intrinsic SamplerIntrinsics[] = { {L"MySamplerOp", "MySamplerOp", "m", - {15, false, true, false, -1, countof(TestMySamplerOp), TestMySamplerOp}}, + {15, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMySamplerOp), + TestMySamplerOp}}, }; // Define a lowering string to target a common dxil extension operation defined @@ -345,12 +355,12 @@ Intrinsic Texture1DIntrinsics[] = { {L"MyTextureOp", "MyTextureOp", MyTextureOp_LoweringInfo, - {17, false, true, false, -1, countof(TestMyTexture1DOp_0), + {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture1DOp_0), TestMyTexture1DOp_0}}, {L"MyTextureOp", "MyTextureOp", MyTextureOp_LoweringInfo, - {17, false, true, false, -1, countof(TestMyTexture1DOp_1), + {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture1DOp_1), TestMyTexture1DOp_1}}, }; @@ -358,7 +368,7 @@ Intrinsic Texture2DIntrinsics[] = { {L"MyTextureOp", "MyTextureOp", MyTextureOp_LoweringInfo, - {17, false, true, false, -1, countof(TestMyTexture2DOp), + {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture2DOp), TestMyTexture2DOp}}, }; @@ -1497,8 +1507,8 @@ TEST_F(ExtensionTest, EvalAttributeCollision) { Intrinsic Intrinsic = {L"collide_proc", "collide_proc", "r", - {static_cast(op), true, false, false, -1, - countof(Args), Args}}; + {static_cast(op), INTRIN_FLAG_READ_ONLY, 0, + -1, countof(Args), Args}}; Compiler c(m_dllSupport); c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1)); c.Compile(R"( @@ -1532,10 +1542,8 @@ TEST_F(ExtensionTest, NoUnwind) { IA_C}, {"value", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, 1, IA_C}}; - Intrinsic Intrinsic = {L"test_proc", - "test_proc", - "r", - {1, false, false, false, -1, countof(Args), Args}}; + Intrinsic Intrinsic = { + L"test_proc", "test_proc", "r", {1, 0, 0, -1, countof(Args), Args}}; Compiler c(m_dllSupport); c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1)); c.Compile(R"( @@ -1572,7 +1580,8 @@ TEST_F(ExtensionTest, DCE) { Intrinsic Intrinsic = {L"test_proc", "test_proc", "r", - {1, true, true, false, -1, countof(Args), Args}}; + {1, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, + -1, countof(Args), Args}}; Compiler c(m_dllSupport); c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1)); c.Compile(R"( diff --git a/tools/clang/unittests/HLSL/LinkerTest.cpp b/tools/clang/unittests/HLSL/LinkerTest.cpp index 7cafa0db06..df8bb644e1 100644 --- a/tools/clang/unittests/HLSL/LinkerTest.cpp +++ b/tools/clang/unittests/HLSL/LinkerTest.cpp @@ -526,6 +526,11 @@ TEST_F(LinkerTest, RunLinkMatArrayParam) { Link(L"main", L"ps_6_0", pLinker, {libName, libName2}, {"alloca [24 x float]", "getelementptr [12 x float], [12 x float]*"}, {}); + + Link(L"main", L"ps_6_9", pLinker, {libName, libName2}, + {"alloca [2 x <12 x float>]", + "getelementptr [12 x float], [12 x float]*"}, + {}); } TEST_F(LinkerTest, RunLinkMatParam) { diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index bb81c1c953..e337d2951c 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -146,12 +146,17 @@ class PixTest : public ::testing::Test { TEST_METHOD(RootSignatureUpgrade_Annotation) TEST_METHOD(DxilPIXDXRInvocationsLog_SanityTest) + TEST_METHOD(DxilPIXDXRInvocationsLog_EmbeddedRootSigs) TEST_METHOD(DebugInstrumentation_TextOutput) TEST_METHOD(DebugInstrumentation_BlockReport) TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs) + TEST_METHOD(NonUniformResourceIndex_Resource) + TEST_METHOD(NonUniformResourceIndex_DescriptorHeap) + TEST_METHOD(NonUniformResourceIndex_Raytracing) + dxc::DxcDllSupport m_dllSupport; VersionSupportInfo m_ver; @@ -443,6 +448,11 @@ class PixTest : public ::testing::Test { std::string RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob); CComPtr RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob); CComPtr RunDxilPIXDXRInvocationsLog(IDxcBlob *blob); + std::vector + RunDxilNonUniformResourceIndexInstrumentation(IDxcBlob *blob, + std::string &outputText); + void TestNuriCase(const char *source, const wchar_t *target, + uint32_t expectedResult); void TestPixUAVCase(char const *hlsl, wchar_t const *model, wchar_t const *entry); std::string Disassemble(IDxcBlob *pProgram); @@ -660,7 +670,7 @@ CComPtr PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) { CComPtr pOptimizedModule; CComPtr pText; VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( - dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + blob, Options.data(), Options.size(), &pOptimizedModule, &pText)); std::string outputText; if (pText->GetBufferSize() != 0) { @@ -670,6 +680,29 @@ CComPtr PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) { return pOptimizedModule; } +std::vector PixTest::RunDxilNonUniformResourceIndexInstrumentation( + IDxcBlob *blob, std::string &outputText) { + + CComPtr dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob); + CComPtr pOptimizer; + VERIFY_SUCCEEDED( + m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer)); + std::array Options = { + L"-opt-mod-passes", L"-dxil-dbg-value-to-dbg-declare", + L"-dxil-annotate-with-virtual-regs", + L"-hlsl-dxil-non-uniform-resource-index-instrumentation"}; + + CComPtr pOptimizedModule; + CComPtr pText; + VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( + dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + + outputText = BlobToUtf8(pText); + + const std::string disassembly = Disassemble(pOptimizedModule); + return Tokenize(disassembly, "\n"); +} + std::string PixTest::RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob) { CComPtr dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob); @@ -2945,6 +2978,230 @@ void MyMiss(inout MyPayload payload) RunDxilPIXDXRInvocationsLog(compiledLib); } +TEST_F(PixTest, DxilPIXDXRInvocationsLog_EmbeddedRootSigs) { + + const char *source = R"x( + +GlobalRootSignature grs = {"CBV(b0)"}; +struct MyPayload +{ + float4 color; +}; + +[shader("raygeneration")] +void MyRayGen() +{ +} + +[shader("closesthit")] +void MyClosestHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ +} + +[shader("anyhit")] +void MyAnyHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ +} + +[shader("miss")] +void MyMiss(inout MyPayload payload) +{ +} + +)x"; + + auto compiledLib = Compile(m_dllSupport, source, L"lib_6_3", + {L"-Qstrip_reflect"}, L"RootSig"); + RunDxilPIXDXRInvocationsLog(compiledLib); +} + +uint32_t NuriGetWaveInstructionCount(const std::vector &lines) { + // This is the instruction we'll insert into the shader if we detect dynamic + // resource indexing + const char *const waveActiveAllEqual = "call i1 @dx.op.waveActiveAllEqual"; + + uint32_t instCount = 0; + for (const std::string &line : lines) { + instCount += line.find(waveActiveAllEqual) != std::string::npos; + } + return instCount; +} + +void PixTest::TestNuriCase(const char *source, const wchar_t *target, + uint32_t expectedResult) { + + for (const OptimizationChoice &choice : OptimizationChoices) { + const std::vector compilationOptions = {choice.Flag}; + + CComPtr compiledLib = + Compile(m_dllSupport, source, target, compilationOptions); + + std::string outputText; + const std::vector dxilLines = + RunDxilNonUniformResourceIndexInstrumentation(compiledLib, outputText); + + VERIFY_ARE_EQUAL(NuriGetWaveInstructionCount(dxilLines), expectedResult); + + bool foundDynamicIndexingNoNuri = false; + const std::vector outputTextLines = Tokenize(outputText, "\n"); + for (const std::string &line : outputTextLines) { + if (line.find("FoundDynamicIndexingNoNuri") != std::string::npos) { + foundDynamicIndexingNoNuri = true; + break; + } + } + + VERIFY_ARE_EQUAL((expectedResult != 0), foundDynamicIndexingNoNuri); + } +} + +TEST_F(PixTest, NonUniformResourceIndex_Resource) { + + const char *source = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint index = uv.x * uv.y; + return tex[index].Load(int3(0, 0, 0)); +})x"; + + const char *sourceWithNuri = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x * uv.y; + return tex[NonUniformResourceIndex(i)].Load(int3(0, 0, 0)); +})x"; + + TestNuriCase(source, L"ps_6_0", 1); + TestNuriCase(sourceWithNuri, L"ps_6_0", 0); + + if (m_ver.SkipDxilVersion(1, 6)) { + return; + } + + TestNuriCase(source, L"ps_6_6", 1); + TestNuriCase(sourceWithNuri, L"ps_6_6", 0); +} + +TEST_F(PixTest, NonUniformResourceIndex_DescriptorHeap) { + + if (m_ver.SkipDxilVersion(1, 6)) { + return; + } + + const char *source = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x + uv.y; + Texture2D dynResTex = + ResourceDescriptorHeap[i]; + SamplerState dynResSampler = + SamplerDescriptorHeap[i]; + return dynResTex.Sample(dynResSampler, uv); +})x"; + + const char *sourceWithNuri = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x + uv.y; + Texture2D dynResTex = + ResourceDescriptorHeap[NonUniformResourceIndex(i)]; + SamplerState dynResSampler = + SamplerDescriptorHeap[NonUniformResourceIndex(i)]; + return dynResTex.Sample(dynResSampler, uv); +})x"; + + TestNuriCase(source, L"ps_6_6", 2); + TestNuriCase(sourceWithNuri, L"ps_6_6", 0); +} + +TEST_F(PixTest, NonUniformResourceIndex_Raytracing) { + + if (m_ver.SkipDxilVersion(1, 5)) { + return; + } + + const char *source = R"x( +RWTexture2D RT[] : register(u0); + +[noinline] +void FuncNoInline(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.x * rayIndex.y; + float4 c = float4(0.5, 0.5, 0.5, 0); + RT[i][rayIndex.xy] += c; +} + +void Func(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.y; + float4 c = float4(0, 1, 0, 0); + RT[i][rayIndex.xy] += c; +} + +[shader("raygeneration")] +void Main() +{ + float2 rayIndex = DispatchRaysIndex().xy; + + uint i1 = rayIndex.x; + float4 c1 = float4(1, 0, 1, 1); + RT[i1][rayIndex.xy] += c1; + + uint i2 = rayIndex.x * rayIndex.y * 0.25; + float4 c2 = float4(0.25, 0, 0.25, 0); + RT[i2][rayIndex.xy] += c2; + + Func(i1); + FuncNoInline(i2); +})x"; + + const char *sourceWithNuri = R"x( +RWTexture2D RT[] : register(u0); + +[noinline] +void FuncNoInline(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.x * rayIndex.y; + float4 c = float4(0.5, 0.5, 0.5, 0); + RT[NonUniformResourceIndex(i)][rayIndex.xy] += c; +} + +void Func(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.y; + float4 c = float4(0, 1, 0, 0); + RT[NonUniformResourceIndex(i)][rayIndex.xy] += c; +} + +[shader("raygeneration")] +void Main() +{ + float2 rayIndex = DispatchRaysIndex().xy; + + uint i1 = rayIndex.x; + float4 c1 = float4(1, 0, 1, 1); + RT[NonUniformResourceIndex(i1)][rayIndex.xy] += c1; + + uint i2 = rayIndex.x * rayIndex.y * 0.25; + float4 c2 = float4(0.25, 0, 0.25, 0); + RT[NonUniformResourceIndex(i2)][rayIndex.xy] += c2; + + Func(i1); + FuncNoInline(i2); +})x"; + + TestNuriCase(source, L"lib_6_5", 4); + TestNuriCase(sourceWithNuri, L"lib_6_5", 0); +} + TEST_F(PixTest, DebugInstrumentation_TextOutput) { const char *source = R"x( diff --git a/tools/clang/unittests/HLSL/PixTestUtils.cpp b/tools/clang/unittests/HLSL/PixTestUtils.cpp index 91b6c4479c..61647ff5fa 100644 --- a/tools/clang/unittests/HLSL/PixTestUtils.cpp +++ b/tools/clang/unittests/HLSL/PixTestUtils.cpp @@ -397,7 +397,7 @@ CComPtr Compile(dxc::DxcDllSupport &dllSupport, const char *hlsl, CheckOperationSucceeded(pResult, &pProgram); CComPtr pLib; - VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib)); + VERIFY_SUCCEEDED(dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib)); const hlsl::DxilContainerHeader *pContainer = hlsl::IsDxilContainerLike( pProgram->GetBufferPointer(), pProgram->GetBufferSize()); VERIFY_IS_NOT_NULL(pContainer); diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp index f69b0be204..01f24e0227 100644 --- a/tools/clang/unittests/HLSL/ValidationTest.cpp +++ b/tools/clang/unittests/HLSL/ValidationTest.cpp @@ -1506,21 +1506,23 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) { } TEST_F(ValidationTest, StructBufLoadCoordinates) { - RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", - "bufferLoad.f32(i32 68, %dx.types.Handle " - "%buf1_texture_structbuf, i32 1, i32 8)", - "bufferLoad.f32(i32 68, %dx.types.Handle " - "%buf1_texture_structbuf, i32 1, i32 undef)", - "structured buffer require 2 coordinates"); + RewriteAssemblyCheckMsg( + L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", + "bufferLoad.f32(i32 68, %dx.types.Handle " + "%buf1_texture_structbuf, i32 1, i32 8)", + "bufferLoad.f32(i32 68, %dx.types.Handle " + "%buf1_texture_structbuf, i32 1, i32 undef)", + "structured buffer requires defined index and offset coordinates"); } TEST_F(ValidationTest, StructBufStoreCoordinates) { - RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", - "bufferStore.f32(i32 69, %dx.types.Handle " - "%buf2_UAV_structbuf, i32 0, i32 0", - "bufferStore.f32(i32 69, %dx.types.Handle " - "%buf2_UAV_structbuf, i32 0, i32 undef", - "structured buffer require 2 coordinates"); + RewriteAssemblyCheckMsg( + L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", + "bufferStore.f32(i32 69, %dx.types.Handle " + "%buf2_UAV_structbuf, i32 0, i32 0", + "bufferStore.f32(i32 69, %dx.types.Handle " + "%buf2_UAV_structbuf, i32 0, i32 undef", + "structured buffer requires defined index and offset coordinates"); } TEST_F(ValidationTest, TypedBufRetType) { diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index 7066247883..6db27d7a41 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -820,10 +820,10 @@ class ExecutionTest { return false; } - if (GetModuleHandle("d3d10warp.dll") != NULL) { - CHAR szFullModuleFilePath[MAX_PATH] = ""; - GetModuleFileName(GetModuleHandle("d3d10warp.dll"), - szFullModuleFilePath, sizeof(szFullModuleFilePath)); + if (GetModuleHandleW(L"d3d10warp.dll") != NULL) { + WCHAR szFullModuleFilePath[MAX_PATH] = L""; + GetModuleFileNameW(GetModuleHandleW(L"d3d10warp.dll"), + szFullModuleFilePath, sizeof(szFullModuleFilePath)); WEX::Logging::Log::Comment(WEX::Common::String().Format( L"WARP driver loaded from: %S", szFullModuleFilePath)); } @@ -5632,7 +5632,7 @@ void ExecutionTest::RunBasicShaderModelTest(CComPtr pDevice, std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test is creating the resource to run + // this callback is called when the test is creating the resource to run // the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { UNREFERENCED_PARAMETER(Name); @@ -6999,7 +6999,7 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp")); @@ -7067,7 +7067,7 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -7157,7 +7157,7 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -7234,7 +7234,7 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp")); @@ -7314,7 +7314,7 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -7424,7 +7424,7 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -7494,7 +7494,7 @@ TEST_F(ExecutionTest, UnaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp")); @@ -7554,7 +7554,7 @@ TEST_F(ExecutionTest, UnaryUintOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp")); @@ -7619,7 +7619,7 @@ TEST_F(ExecutionTest, BinaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp")); @@ -7707,7 +7707,7 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp")); @@ -7777,7 +7777,7 @@ TEST_F(ExecutionTest, BinaryUintOpTest) { int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp")); @@ -7869,7 +7869,7 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp")); @@ -7948,7 +7948,7 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp")); @@ -8016,7 +8016,7 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp")); @@ -8091,7 +8091,7 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp")); @@ -8187,7 +8187,7 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp")); @@ -8264,7 +8264,7 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) { int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp")); @@ -8363,7 +8363,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp")); @@ -8948,7 +8948,7 @@ TEST_F(ExecutionTest, DotTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "DotOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SDotOp")); @@ -9240,7 +9240,7 @@ TEST_F(ExecutionTest, Msad4Test) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "Msad4", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SMsad4")); @@ -9342,7 +9342,7 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -9455,7 +9455,7 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -9883,7 +9883,7 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest( ++maskIndex) { std::shared_ptr test = RunShaderOpTestAfterParse( pDevice, m_support, "WaveIntrinsicsOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp")); @@ -12609,7 +12609,7 @@ TEST_F(ExecutionTest, HelperLaneTest) { std::shared_ptr test = RunShaderOpTestAfterParse( pDevice, m_support, "HelperLaneTestNoWave", - // this callbacked is called when the test is creating the resource to + // this callback is called when the test is creating the resource to // run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0")); diff --git a/tools/clang/utils/check_cfc/setup.py b/tools/clang/utils/check_cfc/setup.py index b5fc473639..7405513f0a 100644 --- a/tools/clang/utils/check_cfc/setup.py +++ b/tools/clang/utils/check_cfc/setup.py @@ -8,10 +8,10 @@ import platform import sys if platform.system() == 'Windows': - print "Could not find py2exe. Please install then run setup.py py2exe." + print("Could not find py2exe. Please install then run setup.py py2exe.") raise else: - print "setup.py only required on Windows." + print("setup.py only required on Windows.") sys.exit(1) setup( diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt index 06db8176c9..6f3e07dcf2 100644 --- a/utils/git/requirements_formatting.txt +++ b/utils/git/requirements_formatting.txt @@ -18,7 +18,7 @@ charset-normalizer==3.2.0 # via requests click==8.1.7 # via black -cryptography==43.0.1 +cryptography==44.0.1 # via pyjwt darker==1.7.2 # via -r llvm/utils/git/requirements_formatting.txt.in diff --git a/utils/hct/CMakeLists.txt b/utils/hct/CMakeLists.txt new file mode 100644 index 0000000000..41e6b494e6 --- /dev/null +++ b/utils/hct/CMakeLists.txt @@ -0,0 +1,3 @@ +# generate hlsl_intrinsic_opcodes.json to preserve high level intrinsic opcodes +# This uses CODE_TAG because the file exists in the source tree. +add_hlsl_hctgen(HlslIntrinsicOpcodes OUTPUT hlsl_intrinsic_opcodes.json CODE_TAG) diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 7f7637b230..f1274fd308 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1,6 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// // See hctdb.py for the implementation of intrinsic file processing. // // Intrinsic declarations are grouped into namespaces that @@ -361,8 +364,8 @@ void [[]] DispatchMesh(in uint threadGroupCountX, in uint threadGroupCountY, in // Return true if the current lane is a helper lane bool [[ro]] IsHelperLane(); -// HL Op for allocating ray query object that default constructor uses -uint [[hidden]] AllocateRayQuery(in uint flags); +// HL Op for allocating ray query object +uint [[hidden]] AllocateRayQuery(in uint flags, in uint rayqueryflags); resource [[hidden]] CreateResourceFromHeap(in uint index); @@ -393,7 +396,13 @@ void [[]] RawBufferStore(in u64 addr, in $funcT value); void [[]] RawBufferStore(in u64 addr, in $funcT value, in uint alignment); void [[]] ext_execution_mode(in uint mode, ...); void [[]] ext_execution_mode_id(in uint mode, ...); +$funcT2 [[]] static_pointer_cast(in VkBufferPointer ptr); +$funcT2 [[]] reinterpret_pointer_cast(in VkBufferPointer ptr); + +} namespace +namespace BufferPointerMethods { +$classT [[ro]] GetBufferContents(); } namespace // SPIRV Change Ends @@ -1089,6 +1098,45 @@ uint [[ro]] CommittedInstanceContributionToHitGroupIndex(); } namespace +// Shader Execution Reordering +namespace DxHitObjectMethods { + DxHitObject [[static,class_prefix,min_sm=6.9]] MakeNop(); + DxHitObject [[static,class_prefix,min_sm=6.9]] MakeMiss(in uint RayFlags, in uint MissShaderIndex, in ray_desc Ray); + DxHitObject [[static,class_prefix,min_sm=6.9]] FromRayQuery(in RayQuery rq); + DxHitObject [[static,class_prefix,min_sm=6.9]] FromRayQuery(in RayQuery rq, in uint HitKind, in udt Attributes); + DxHitObject [[static,class_prefix,min_sm=6.9]] TraceRay(in acceleration_struct AccelerationStructure, in uint RayFlags, in uint InstanceInclusionMask, in uint RayContributionToHitGroupIndex, in uint MultiplierForGeometryContributionToHitGroupIndex, in uint MissShaderIndex, in ray_desc Ray, inout udt Payload); + void [[static,class_prefix,min_sm=6.9]] Invoke(in DxHitObject ho, inout udt Payload); + bool [[rn,class_prefix,min_sm=6.9]] IsMiss(); + bool [[rn,class_prefix,min_sm=6.9]] IsHit(); + bool [[rn,class_prefix,min_sm=6.9]] IsNop(); + uint [[rn,class_prefix,min_sm=6.9]] GetRayFlags(); + float [[rn,class_prefix,min_sm=6.9]] GetRayTMin(); + float [[rn,class_prefix,min_sm=6.9]] GetRayTCurrent(); + float<3> [[rn,class_prefix,min_sm=6.9]] GetWorldRayOrigin(); + float<3> [[rn,class_prefix,min_sm=6.9]] GetWorldRayDirection(); + float<3> [[rn,class_prefix,min_sm=6.9]] GetObjectRayOrigin(); + float<3> [[rn,class_prefix,min_sm=6.9]] GetObjectRayDirection(); + float<3,4> [[rn,class_prefix,min_sm=6.9]] GetObjectToWorld3x4(); + float<4,3> [[rn,class_prefix,min_sm=6.9]] GetObjectToWorld4x3(); + float<3,4> [[rn,class_prefix,min_sm=6.9]] GetWorldToObject3x4(); + float<4,3> [[rn,class_prefix,min_sm=6.9]] GetWorldToObject4x3(); + uint [[rn,class_prefix,min_sm=6.9]] GetGeometryIndex(); + uint [[rn,class_prefix,min_sm=6.9]] GetInstanceIndex(); + uint [[rn,class_prefix,min_sm=6.9]] GetInstanceID(); + uint [[rn,class_prefix,min_sm=6.9]] GetPrimitiveIndex(); + uint [[rn,class_prefix,min_sm=6.9]] GetHitKind(); + uint [[rn,class_prefix,min_sm=6.9]] GetShaderTableIndex(); + $funcT [[class_prefix,min_sm=6.9]] GetAttributes(); + void [[class_prefix,min_sm=6.9]] SetShaderTableIndex(in uint RecordIndex); + uint [[ro,class_prefix,min_sm=6.9]] LoadLocalRootTableConstant(in uint RootConstantOffsetInBytes); +} namespace + +namespace DxIntrinsics { +void [[min_sm=6.9]] MaybeReorderThread(in DxHitObject HitObject); +void [[min_sm=6.9]] MaybeReorderThread(in uint CoherenceHint, in uint NumCoherenceHintBitsFromLSB); +void [[min_sm=6.9]] MaybeReorderThread(in DxHitObject HitObject, in uint CoherenceHint, in uint NumCoherenceHintBitsFromLSB); +} namespace + // Work Graphs objects and methods // EmptyNodeInput @@ -1136,4 +1184,3 @@ $classT [[]] SubpassLoad(in int sample) : subpassinputms_load; } namespace // SPIRV Change Ends - diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 66376c3b9b..6344fb5849 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1,5 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. +# Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +# All rights reserved. ############################################################################### # DXIL information. # ############################################################################### @@ -37,6 +39,30 @@ "array_local_ldst", ] +# These are the valid overload type characters for DXIL instructions. +# - "v" is for void, and can only be used alone. +# - "u" is for user defined type (UDT), and is mutually exclusive with the other +# types. +# - "o" is for an HLSL object type (e.g. Texture, Sampler, etc.), and is +# mutually exclusive with the other types. +# - "<" is for vector overloads, and may be followed by a set of supported +# component types. +# - If "<" is not followed by any component types, any preceding scalar types +# are used. +# - Vector component types are captured into a separate list during +# processing. +# - "," is used to separate multiple overload dimensions. +# - When used, only $x0, $x1, etc. are supported for overloaded parameter +# types. +# dxil_all_user_oload_chars must be kept in sync with the indices in +# hlsl::OP::TypeSlot in DxilOperations.h. +dxil_all_user_oload_chars = "hfd18wiluo<" +dxil_scalar_oload_chars = "hfd18wil" + +# Maximum number of overload dimensions supported through the extended overload +# in DXIL instructions. +dxil_max_overload_dims = 2 + class db_dxil_enum_value(object): "A representation for a value in an enumeration type" @@ -81,6 +107,7 @@ def __init__(self, name, **kwargs): self.ops = [] # the operands that this instruction takes self.is_allowed = True # whether this instruction is allowed in a DXIL program self.oload_types = "" # overload types if applicable + # Always call process_oload_types() after setting oload_types. self.fn_attr = "" # attribute shorthands: rn=does not access memory,ro=only reads from memory, self.is_deriv = False # whether this is some kind of derivative self.is_gradient = False # whether this requires a gradient calculation @@ -98,6 +125,9 @@ def __init__(self, name, **kwargs): self.is_reserved = self.dxil_class == "Reserved" self.shader_model_translated = () # minimum shader model required with translation by linker self.props = {} # extra properties + self.num_oloads = 0 # number of overloads for this instruction + if self.is_dxil_op: + self.process_oload_types() def __str__(self): return self.name @@ -105,6 +135,127 @@ def __str__(self): def fully_qualified_name(self): return "{}::{}".format(self.fully_qualified_name_prefix, self.name) + def process_oload_types(self): + if type(self.oload_types) is not str: + raise ValueError( + f"overload for '{self.name}' should be a string - use empty if n/a" + ) + # Early out for LLVM instructions + if not self.is_dxil_op: + return + + self.num_oloads = 0 + + # Early out for void overloads. + if self.oload_types == "v": + return + + if self.oload_types == "": + raise ValueError( + f"overload for '{self.name}' should not be empty - use void if n/a" + ) + if "v" in self.oload_types: + raise ValueError( + f"void overload should be exclusive to other types for '({self.name})'" + ) + + # Process oload_types for extended and vector overloads. + # Contrived example: "hf<, dxil_max_overload_dims: + raise ValueError( + "Too many overload dimensions for DXIL op " + f"{self.name}: '{self.oload_types}'" + ) + + def check_duplicate_overloads(oloads): + if len(oloads) != len(set(oloads)): + raise ValueError( + "Duplicate overload types specified for DXIL op " + f"{self.name}: '{oloads}' in '{self.oload_types}'" + ) + + def check_overload_chars(oloads, valid_chars): + invalid_chars = set(oloads).difference(set(valid_chars)) + if invalid_chars: + raise ValueError( + "Invalid overload type character(s) used for DXIL op " + f"{self.name}: '{invalid_chars}' in '{oloads}' from " + f"'{self.oload_types}'" + ) + + for n, oloads in enumerate(oload_types): + if len(oloads) == 0: + raise ValueError( + f"Invalid empty overload type for DXIL op " + f"{self.name}: '{self.oload_types}'" + ) + check_overload_chars(oloads, dxil_all_user_oload_chars) + + # split at vector for component overloads, if vector specified + # without following components, use the scalar overloads that + # precede the vector character. + split = oloads.split("<") + if len(split) == 1: + # No vector overload. + continue + elif len(split) != 2: + raise ValueError( + f"Invalid vector overload for DXIL op {self.name}: " + f"{oloads} in '{self.oload_types}'" + ) + + # Split into scalar and vector component overloads. + scalars, vector_oloads = split + check_duplicate_overloads(scalars) + if not vector_oloads: + vector_oloads = scalars + else: + check_duplicate_overloads(vector_oloads) + if not vector_oloads: + raise ValueError( + "No scalar overload types provided with vector overload " + f"for DXIL op {self.name}: '{self.oload_types}'" + ) + check_overload_chars(vector_oloads, dxil_scalar_oload_chars) + oload_types[n] = scalars + "<" + vector_oloads + # Reconstruct overload string with default vector overloads. + self.oload_types = ",".join(oload_types) + self.check_extended_oload_ops() + + def check_extended_oload_ops(self): + "Ensure ops has sequential extended overload references with $x0, $x1, etc." + if self.num_oloads < 2: + return + next_oload_idx = 0 + for i in self.ops: + if i.llvm_type.startswith("$x"): + if i.llvm_type != "$x" + str(next_oload_idx): + raise ValueError( + "Extended overloads are not sequentially referenced in " + f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}" + ) + next_oload_idx += 1 + if next_oload_idx != self.num_oloads: + raise ValueError( + "Extended overloads are not referenced for all overload " + f"dimensions in DXIL op {self.name}: {next_oload_idx} != " + f"{self.num_oloads}" + ) + class db_dxil_metadata(object): "A representation for a metadata record" @@ -328,7 +479,7 @@ def populate_categories_and_models(self): self.name_idx[i].category = "Dot" for ( i - ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore".split( + ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore,RawBufferVectorLoad,RawBufferVectorStore".split( "," ): self.name_idx[i].category = "Resources" @@ -455,6 +606,8 @@ def populate_categories_and_models(self): for i in "RawBufferLoad,RawBufferStore".split(","): self.name_idx[i].shader_model = 6, 2 self.name_idx[i].shader_model_translated = 6, 0 + for i in "RawBufferVectorLoad,RawBufferVectorStore".split(","): + self.name_idx[i].shader_model = 6, 9 for i in "DispatchRaysIndex,DispatchRaysDimensions".split(","): self.name_idx[i].category = "Ray Dispatch Arguments" self.name_idx[i].shader_model = 6, 3 @@ -477,9 +630,7 @@ def populate_categories_and_models(self): "closesthit", ) for i in "GeometryIndex".split(","): - self.name_idx[ - i - ].category = ( + self.name_idx[i].category = ( "Raytracing object space uint System Values, raytracing tier 1.1" ) self.name_idx[i].shader_model = 6, 5 @@ -574,9 +725,7 @@ def populate_categories_and_models(self): self.name_idx[i].shader_model = 6, 3 self.name_idx[i].shader_stages = ("library", "intersection") for i in "CreateHandleForLib".split(","): - self.name_idx[ - i - ].category = ( + self.name_idx[i].category = ( "Library create handle from resource struct (like HL intrinsic)" ) self.name_idx[i].shader_model = 6, 3 @@ -699,6 +848,31 @@ def populate_categories_and_models(self): self.name_idx[i].category = "Extended Command Information" self.name_idx[i].shader_stages = ("vertex",) self.name_idx[i].shader_model = 6, 8 + for i in ( + "HitObject_MakeMiss,HitObject_MakeNop" + + ",HitObject_TraceRay,HitObject_Invoke" + + ",HitObject_FromRayQuery,HitObject_FromRayQueryWithAttrs" + + ",HitObject_IsMiss,HitObject_IsHit,HitObject_IsNop" + + ",HitObject_RayFlags,HitObject_RayTMin,HitObject_RayTCurrent,HitObject_GeometryIndex,HitObject_InstanceIndex,HitObject_InstanceID,HitObject_PrimitiveIndex,HitObject_HitKind,HitObject_ShaderTableIndex" + + ",HitObject_WorldRayOrigin,HitObject_WorldRayDirection,HitObject_ObjectRayOrigin,HitObject_ObjectRayDirection" + + ",HitObject_ObjectToWorld3x4,HitObject_WorldToObject3x4" + + ",HitObject_SetShaderTableIndex,HitObject_LoadLocalRootTableConstant,HitObject_Attributes" + ).split(","): + self.name_idx[i].category = "Shader Execution Reordering" + self.name_idx[i].shader_model = 6, 9 + self.name_idx[i].shader_stages = ( + "library", + "raygeneration", + "closesthit", + "miss", + ) + for i in ("MaybeReorderThread").split(","): + self.name_idx[i].category = "Shader Execution Reordering" + self.name_idx[i].shader_model = 6, 9 + self.name_idx[i].shader_stages = ( + "library", + "raygeneration", + ) def populate_llvm_instructions(self): # Add instructions that map to LLVM instructions. @@ -1175,6 +1349,37 @@ def populate_llvm_instructions(self): self.add_llvm_instr( "OTHER", 53, "VAArg", "VAArgInst", "vaarg instruction", "", [] ) + + self.add_llvm_instr( + "OTHER", + 54, + "ExtractElement", + "ExtractElementInst", + "extracts from vector", + "", + [], + ) + + self.add_llvm_instr( + "OTHER", + 55, + "InsertElement", + "InsertElementInst", + "inserts into vector", + "", + [], + ) + + self.add_llvm_instr( + "OTHER", + 56, + "ShuffleVector", + "ShuffleVectorInst", + "Shuffle two vectors", + "", + [], + ) + self.add_llvm_instr( "OTHER", 57, @@ -1314,7 +1519,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1348,7 +1553,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hf", + "hf<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1365,7 +1570,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the reverse bit pattern of the input value", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1412,7 +1617,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1430,7 +1635,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1485,7 +1690,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "hfd", + "hfd<", "rn", [ db_dxil_param( @@ -1502,7 +1707,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "d", + "d<", "rn", [ db_dxil_param( @@ -1526,7 +1731,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs an integral " + i, - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "the operation result"), @@ -2419,7 +2624,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2437,7 +2642,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2455,7 +2660,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2473,7 +2678,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -5550,79 +5755,655 @@ def UFI(name, **mappings): next_op_idx = self.reserve_dxil_op_range("ReservedA", next_op_idx, 3) # Shader Execution Reordering - next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 31) + self.add_dxil_op( + "HitObject_TraceRay", + next_op_idx, + "HitObject_TraceRay", + "Analogous to TraceRay but without invoking CH/MS and returns the intermediate state as a HitObject", + "u", + "", + [ + db_dxil_param(0, "hit_object", "", "Resulting HitObject"), + db_dxil_param( + 2, + "res", + "accelerationStructure", + "Top-level acceleration structure to use", + ), + db_dxil_param( + 3, + "i32", + "rayFlags", + "Valid combination of Ray_flags", + ), + db_dxil_param( + 4, + "i32", + "instanceInclusionMask", + "Bottom 8 bits of InstanceInclusionMask are used to include/reject geometry instances based on the InstanceMask in each instance: if(!((InstanceInclusionMask & InstanceMask) & 0xff)) { ignore intersection }", + ), + db_dxil_param( + 5, + "i32", + "rayContributionToHitGroupIndex", + "Offset to add into Addressing calculations within shader tables for hit group indexing. Only the bottom 4 bits of this value are used", + ), + db_dxil_param( + 6, + "i32", + "multiplierForGeometryContributionToHitGroupIndex", + "Stride to multiply by per-geometry GeometryContributionToHitGroupIndex in Addressing calculations within shader tables for hit group indexing. Only the bottom 4 bits of this value are used", + ), + db_dxil_param( + 7, + "i32", + "missShaderIndex", + "Miss shader index in Addressing calculations within shader tables. Only the bottom 16 bits of this value are used", + ), + db_dxil_param(8, "f", "Origin_X", "Origin x of the ray"), + db_dxil_param(9, "f", "Origin_Y", "Origin y of the ray"), + db_dxil_param(10, "f", "Origin_Z", "Origin z of the ray"), + db_dxil_param(11, "f", "TMin", "Tmin of the ray"), + db_dxil_param(12, "f", "Direction_X", "Direction x of the ray"), + db_dxil_param(13, "f", "Direction_Y", "Direction y of the ray"), + db_dxil_param(14, "f", "Direction_Z", "Direction z of the ray"), + db_dxil_param(15, "f", "TMax", "Tmax of the ray"), + db_dxil_param( + 16, + "udt", + "payload", + "User-defined payload structure", + ), + ], + ) + next_op_idx += 1 - # Reserved block C - next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10) + self.add_dxil_op( + "HitObject_FromRayQuery", + next_op_idx, + "HitObject_FromRayQuery", + "Creates a new HitObject representing a committed hit from a RayQuery", + "v", + "ro", + [ + db_dxil_param( + 0, "hit_object", "", "HitObject created from RayQuery object" + ), + db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"), + ], + ) + next_op_idx += 1 - # Set interesting properties. - self.build_indices() - for ( - i - ) in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp,SampleCmpBias".split( - "," - ): - self.name_idx[i].is_gradient = True - for i in "DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","): - assert ( - self.name_idx[i].is_gradient == True - ), "all derivatives are marked as requiring gradients" - self.name_idx[i].is_deriv = True + self.add_dxil_op( + "HitObject_FromRayQueryWithAttrs", + next_op_idx, + "HitObject_FromRayQueryWithAttrs", + "Creates a new HitObject representing a committed hit from a RayQuery and committed attributes", + "u", + "ro", + [ + db_dxil_param( + 0, "hit_object", "", "HitObject created from RayQuery object" + ), + db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"), + db_dxil_param( + 3, + "i32", + "HitKind", + "User-specified value in range of 0-127 to identify the type of hit", + ), + db_dxil_param(4, "udt", "CommittedAttribs", "Committed attributes"), + ], + ) + next_op_idx += 1 - # TODO - some arguments are required to be immediate constants in DXIL, eg resource kinds; add this information - # consider - report instructions that are overloaded on a single type, then turn them into non-overloaded version of that type - self.verify_dense( - self.get_dxil_insts(), lambda x: x.dxil_opid, lambda x: x.name + self.add_dxil_op( + "HitObject_MakeMiss", + next_op_idx, + "HitObject_MakeMiss", + "Creates a new HitObject representing a miss", + "v", + "rn", + [ + db_dxil_param(0, "hit_object", "", "HitObject with a committed miss"), + db_dxil_param(2, "i32", "RayFlags", "ray flags"), + db_dxil_param(3, "i32", "MissShaderIndex", "Miss shader index"), + db_dxil_param(4, "f", "Origin_X", "Origin x of the ray"), + db_dxil_param(5, "f", "Origin_Y", "Origin y of the ray"), + db_dxil_param(6, "f", "Origin_Z", "Origin z of the ray"), + db_dxil_param(7, "f", "TMin", "Tmin of the ray"), + db_dxil_param(8, "f", "Direction_X", "Direction x of the ray"), + db_dxil_param(9, "f", "Direction_Y", "Direction y of the ray"), + db_dxil_param(10, "f", "Direction_Z", "Direction z of the ray"), + db_dxil_param(11, "f", "TMax", "Tmax of the ray"), + ], ) - for i in self.instr: - self.verify_dense(i.ops, lambda x: x.pos, lambda x: i.name) - for i in self.instr: - if i.is_dxil_op: - assert i.oload_types != "", ( - "overload for DXIL operation %s should not be empty - use void if n/a" - % (i.name) - ) - assert i.oload_types == "v" or i.oload_types.find("v") < 0, ( - "void overload should be exclusive to other types (%s)" % i.name - ) - assert ( - type(i.oload_types) is str - ), "overload for %s should be a string - use empty if n/a" % (i.name) + next_op_idx += 1 - # Verify that all operations in each class have the same signature. - import itertools + self.add_dxil_op( + "HitObject_MakeNop", + next_op_idx, + "HitObject_MakeNop", + "Creates an empty nop HitObject", + "v", + "rn", + [db_dxil_param(0, "hit_object", "", "Empty nop HitObject")], + ) + next_op_idx += 1 - class_sort_func = lambda x, y: x < y - class_key_func = lambda x: x.dxil_class - instr_ordered_by_class = sorted( - [i for i in self.instr if i.is_dxil_op], key=class_key_func + self.add_dxil_op( + "HitObject_Invoke", + next_op_idx, + "HitObject_Invoke", + "Represents the invocation of the CH/MS shader represented by the HitObject", + "u", + "", + [ + retvoid_param, + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param( + 3, + "udt", + "payload", + "User-defined payload structure", + ), + ], ) - instr_grouped_by_class = itertools.groupby( - instr_ordered_by_class, key=class_key_func + next_op_idx += 1 + + self.add_dxil_op( + "MaybeReorderThread", + next_op_idx, + "MaybeReorderThread", + "Reorders the current thread", + "v", + "", + [ + retvoid_param, + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "coherenceHint", "Coherence hint"), + db_dxil_param( + 4, + "i32", + "numCoherenceHintBitsFromLSB", + "Num coherence hint bits from LSB", + ), + ], ) + next_op_idx += 1 - def calc_oload_sig(inst): - result = "" - for o in inst.ops: - result += o.llvm_type - return result + self.add_dxil_op( + "HitObject_IsMiss", + next_op_idx, + "HitObject_StateScalar", + "Returns `true` if the HitObject represents a miss", + "1", + "rn", + [ + db_dxil_param(0, "i1", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 - for k, g in instr_grouped_by_class: - group = list(g) - if len(group) > 1: - first = group[0] - first_group = calc_oload_sig(first) - for other in group[1:]: - other_group = calc_oload_sig(other) - # TODO: uncomment assert when opcodes are fixed - # assert first_group == other_group, "overload signature %s for instruction %s differs from %s in %s" % (first.name, first_group, other.name, other_group) + self.add_dxil_op( + "HitObject_IsHit", + next_op_idx, + "HitObject_StateScalar", + "Returns `true` if the HitObject is a NOP-HitObject", + "1", + "rn", + [ + db_dxil_param(0, "i1", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 - def populate_extended_docs(self): - "Update the documentation with text from external files." - inst_starter = "* Inst: " - block_starter = "* BLOCK-BEGIN" - block_end = "* BLOCK-END" - thisdir = os.path.dirname(os.path.realpath(__file__)) + self.add_dxil_op( + "HitObject_IsNop", + next_op_idx, + "HitObject_StateScalar", + "Returns `true` if the HitObject represents a nop", + "1", + "rn", + [ + db_dxil_param(0, "i1", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_RayFlags", + next_op_idx, + "HitObject_StateScalar", + "Returns the ray flags set in the HitObject", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_RayTMin", + next_op_idx, + "HitObject_StateScalar", + "Returns the TMin value set in the HitObject", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_RayTCurrent", + next_op_idx, + "HitObject_StateScalar", + "Returns the current T value set in the HitObject", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_WorldRayOrigin", + next_op_idx, + "HitObject_StateVector", + "Returns the ray origin in world space", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_WorldRayDirection", + next_op_idx, + "HitObject_StateVector", + "Returns the ray direction in world space", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_ObjectRayOrigin", + next_op_idx, + "HitObject_StateVector", + "Returns the ray origin in object space", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_ObjectRayDirection", + next_op_idx, + "HitObject_StateVector", + "Returns the ray direction in object space", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_ObjectToWorld3x4", + next_op_idx, + "HitObject_StateMatrix", + "Returns the object to world space transformation matrix in 3x4 form", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param( + 3, + "i32", + "row", + "row [0..2], , relative to the element", + is_const=True, + ), + db_dxil_param( + 4, + "i32", + "col", + "column [0..3], relative to the element", + is_const=True, + ), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_WorldToObject3x4", + next_op_idx, + "HitObject_StateMatrix", + "Returns the world to object space transformation matrix in 3x4 form", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param( + 3, + "i32", + "row", + "row [0..2], relative to the element", + is_const=True, + ), + db_dxil_param( + 4, + "i32", + "col", + "column [0..3], relative to the element", + is_const=True, + ), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_GeometryIndex", + next_op_idx, + "HitObject_StateScalar", + "Returns the geometry index committed on hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_InstanceIndex", + next_op_idx, + "HitObject_StateScalar", + "Returns the instance index committed on hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_InstanceID", + next_op_idx, + "HitObject_StateScalar", + "Returns the instance id committed on hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_PrimitiveIndex", + next_op_idx, + "HitObject_StateScalar", + "Returns the primitive index committed on hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_HitKind", + next_op_idx, + "HitObject_StateScalar", + "Returns the HitKind of the hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_ShaderTableIndex", + next_op_idx, + "HitObject_StateScalar", + "Returns the shader table index set for this HitObject", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_SetShaderTableIndex", + next_op_idx, + "HitObject_SetShaderTableIndex", + "Returns a HitObject with updated shader table index", + "v", + "rn", + [ + db_dxil_param( + 0, "hit_object", "hitObject", "hit with shader table index set" + ), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "shaderTableIndex", "shader table index"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_LoadLocalRootTableConstant", + next_op_idx, + "HitObject_LoadLocalRootTableConstant", + "Returns the root table constant for this HitObject and offset", + "v", + "ro", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "offset", "offset"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_Attributes", + next_op_idx, + "HitObject_Attributes", + "Returns the attributes set for this HitObject", + "u", + "amo", + [ + retvoid_param, + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param( + 3, "udt", "attributes", "pointer to store the attributes to" + ), + ], + ) + next_op_idx += 1 + + next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 3, 28) + + # Reserved block C + next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10) + + # Long Vectors + self.add_dxil_op( + "RawBufferVectorLoad", + next_op_idx, + "RawBufferVectorLoad", + "reads from a raw buffer and structured buffer", + "hfwidl<", + "ro", + [ + db_dxil_param(0, "$r", "", "the loaded value"), + db_dxil_param(2, "res", "buf", "handle of Raw Buffer to load from"), + db_dxil_param( + 3, + "i32", + "index", + "element index for StructuredBuffer, or byte offset for ByteAddressBuffer", + ), + db_dxil_param( + 4, + "i32", + "elementOffset", + "offset into element for StructuredBuffer, or undef for ByteAddressBuffer", + ), + db_dxil_param( + 5, + "i32", + "alignment", + "relative load access alignment", + is_const=True, + ), + ], + counters=("tex_load",), + ) + next_op_idx += 1 + + self.add_dxil_op( + "RawBufferVectorStore", + next_op_idx, + "RawBufferVectorStore", + "writes to a RWByteAddressBuffer or RWStructuredBuffer", + "hfwidl<", + "", + [ + db_dxil_param(0, "v", "", ""), + db_dxil_param(2, "res", "uav", "handle of UAV to store to"), + db_dxil_param( + 3, + "i32", + "index", + "element index for StructuredBuffer, or byte offset for ByteAddressBuffer", + ), + db_dxil_param( + 4, + "i32", + "elementOffset", + "offset into element for StructuredBuffer, or undef for ByteAddressBuffer", + ), + db_dxil_param(5, "$o", "value0", "value"), + db_dxil_param( + 6, + "i32", + "alignment", + "relative store access alignment", + is_const=True, + ), + ], + counters=("tex_store",), + ) + next_op_idx += 1 + + # End of DXIL 1.9 opcodes. + # NOTE!! Update and uncomment when DXIL 1.9 opcodes are finalized: + # self.set_op_count_for_version(1, 9, next_op_idx) + # assert next_op_idx == NNN, ( + # "NNN is expected next operation index but encountered %d and thus opcodes are broken" + # % next_op_idx + # ) + + # Set interesting properties. + self.build_indices() + for ( + i + ) in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp,SampleCmpBias".split( + "," + ): + self.name_idx[i].is_gradient = True + for i in "DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","): + assert ( + self.name_idx[i].is_gradient == True + ), "all derivatives are marked as requiring gradients" + self.name_idx[i].is_deriv = True + + # TODO - some arguments are required to be immediate constants in DXIL, eg resource kinds; add this information + # consider - report instructions that are overloaded on a single type, then turn them into non-overloaded version of that type + self.verify_dense( + self.get_dxil_insts(), lambda x: x.dxil_opid, lambda x: x.name + ) + for i in self.instr: + self.verify_dense(i.ops, lambda x: x.pos, lambda x: i.name) + + # Verify that all operations in each class have the same signature. + import itertools + + class_sort_func = lambda x, y: x < y + class_key_func = lambda x: x.dxil_class + instr_ordered_by_class = sorted( + [i for i in self.instr if i.is_dxil_op], key=class_key_func + ) + instr_grouped_by_class = itertools.groupby( + instr_ordered_by_class, key=class_key_func + ) + + def calc_oload_sig(inst): + result = "" + for o in inst.ops: + result += o.llvm_type + return result + + for k, g in instr_grouped_by_class: + group = list(g) + if len(group) > 1: + first = group[0] + first_group = calc_oload_sig(first) + for other in group[1:]: + other_group = calc_oload_sig(other) + # TODO: uncomment assert when opcodes are fixed + # assert first_group == other_group, "overload signature %s for instruction %s differs from %s in %s" % (first.name, first_group, other.name, other_group) + + def populate_extended_docs(self): + "Update the documentation with text from external files." + inst_starter = "* Inst: " + block_starter = "* BLOCK-BEGIN" + block_end = "* BLOCK-END" + thisdir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(thisdir, "hctdb_inst_docs.txt")) as ops_file: inst_name = "" inst_doc = "" @@ -6049,6 +6830,12 @@ def add_pass(name, type_name, doc, opts): "HLSL DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV", [{"n": "maxNumEntriesInLog", "t": "int", "c": 1}], ) + add_pass( + "hlsl-dxil-non-uniform-resource-index-instrumentation", + "DxilNonUniformResourceIndexInstrumentation", + "HLSL DXIL NonUniformResourceIndex instrumentation for PIX", + [], + ) category_lib = "dxil_gen" @@ -6174,6 +6961,12 @@ def add_pass(name, type_name, doc, opts): "DXIL Lower createHandleForLib", [], ) + add_pass( + "hlsl-dxil-scalarize-vector-load-stores", + "DxilScalarizeVectorLoadStores", + "DXIL scalarize vector load/stores", + [], + ) add_pass( "hlsl-dxil-cleanup-dynamic-resource-handle", "DxilCleanupDynamicResourceHandle", @@ -7396,11 +8189,15 @@ def build_valrules(self): ) self.add_valrule( "Instr.CoordinateCountForRawTypedBuf", - "raw/typed buffer don't need 2 coordinates.", + "raw/typed buffer offset must be undef.", + ) + self.add_valrule( + "Instr.ConstAlignForRawBuf", + "Raw Buffer alignment value must be a constant.", ) self.add_valrule( "Instr.CoordinateCountForStructBuf", - "structured buffer require 2 coordinates.", + "structured buffer requires defined index and offset coordinates.", ) self.add_valrule( "Instr.MipLevelForGetDimension", @@ -7496,6 +8293,16 @@ def build_valrules(self): "Invalid use of completed record handle.", ) + # Shader Execution Reordering + self.add_valrule( + "Instr.UndefHitObject", + "HitObject is undef.", + ) + self.add_valrule( + "Instr.MayReorderThreadUndefCoherenceHintParam", + "Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.", + ) + # Some legacy rules: # - space is only supported for shader targets 5.1 and higher # - multiple rules regarding derivatives, which isn't a supported feature for DXIL @@ -8145,10 +8952,12 @@ def add_dxil_op_reserved(self, name, code_id): ) self.instr.append(i) - def reserve_dxil_op_range(self, group_name, start_id, count): + def reserve_dxil_op_range(self, group_name, start_id, count, start_reserved_id=0): "Reserve a range of dxil opcodes for future use; returns next id" for i in range(0, count): - self.add_dxil_op_reserved("{0}{1}".format(group_name, i), start_id + i) + self.add_dxil_op_reserved( + "{0}{1}".format(group_name, start_reserved_id + i), start_id + i + ) return start_id + count def get_instr_by_llvm_name(self, llvm_name): @@ -8208,6 +9017,9 @@ def __init__( unsigned_op, overload_idx, hidden, + min_shader_model, + static_member, + class_prefix, ): self.name = name # Function name self.idx = idx # Unique number within namespace @@ -8216,14 +9028,27 @@ def __init__( self.ns = ns # Function namespace self.ns_idx = ns_idx # Namespace index self.doc = doc # Documentation - id_prefix = "IOP" if ns == "Intrinsics" else "MOP" + id_prefix = "IOP" if ns.endswith("Intrinsics") else "MOP" + + class_name = None + if ns.endswith("Methods"): + class_name = ns[0 : -len("Methods")] + # SPIR-V Change Starts if ns == "VkIntrinsics": name = "Vk" + name self.name = "Vk" + self.name id_prefix = "IOP" # SPIR-V Change Ends - self.enum_name = "%s_%s" % (id_prefix, name) # enum name + if ns.startswith("Dx"): + if not class_prefix: + name = "Dx" + name + self.name = name + + if class_prefix: + self.enum_name = "%s_%s_%s" % (id_prefix, class_name, name) + else: + self.enum_name = "%s_%s" % (id_prefix, name) self.readonly = ro # Only read memory self.readnone = rn # Not read memory self.argmemonly = amo # Only accesses memory through argument pointers @@ -8235,6 +9060,13 @@ def __init__( overload_idx # Parameter determines the overload type, -1 means ret type ) self.hidden = hidden # Internal high-level op, not exposed to HLSL + # Encoded minimum shader model for this intrinsic + self.min_shader_model = 0 + if min_shader_model: + self.min_shader_model = (min_shader_model[0] << 4) | ( + min_shader_model[1] & 0x0F + ) + self.static_member = static_member # HLSL static member function self.key = ( ("%3d" % ns_idx) + "!" @@ -8247,6 +9079,8 @@ def __init__( self.vulkanSpecific = ns.startswith( "Vk" ) # Vulkan specific intrinsic - SPIRV change + self.opcode = None # high-level opcode assigned later + self.unsigned_opcode = None # unsigned high-level opcode if appicable class db_hlsl_namespace(object): @@ -8292,7 +9126,7 @@ def __init__( class db_hlsl(object): "A database of HLSL language data" - def __init__(self, intrinsic_defs): + def __init__(self, intrinsic_defs, opcode_data): self.base_types = { "bool": "LICOMPTYPE_BOOL", "int": "LICOMPTYPE_INT", @@ -8347,6 +9181,9 @@ def __init__(self, intrinsic_defs): "AnyNodeOutputRecord": "LICOMPTYPE_ANY_NODE_OUTPUT_RECORD", "GroupNodeOutputRecords": "LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS", "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS", + "DxHitObject": "LICOMPTYPE_HIT_OBJECT", + "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER", + "RayQuery": "LICOMPTYPE_RAY_QUERY", } self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"} @@ -8365,6 +9202,13 @@ def __init__(self, intrinsic_defs): self.populate_attributes() self.opcode_namespace = "hlsl::IntrinsicOp" + # Populate opcode data for HLSL intrinsics. + self.opcode_data = opcode_data + # If opcode data is empty, create the default structure. + if not self.opcode_data: + self.opcode_data["IntrinsicOpCodes"] = {"Num_Intrinsics": 0} + self.assign_opcodes() + def create_namespaces(self): last_ns = None self.namespaces = {} @@ -8399,9 +9243,10 @@ def load_intrinsics(self, intrinsic_defs): r"""( sampler\w* | string | (?:RW)?(?:Texture\w*|ByteAddressBuffer) | - acceleration_struct | ray_desc | + acceleration_struct | ray_desc | RayQuery | DxHitObject | Node\w* | RWNode\w* | EmptyNode\w* | - AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* + AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* | + VkBufferPointer $)""", flags=re.VERBOSE, ) @@ -8453,6 +9298,10 @@ def process_arg(desc, idx, done_args, intrinsic_name): template_id = "-3" component_id = "0" type_name = "void" + elif type_name == "$funcT2": + template_id = "-4" + component_id = "0" + type_name = "void" elif type_name == "...": assert idx != 0, "'...' can only be used in the parameter list" template_id = "-2" @@ -8581,6 +9430,8 @@ def do_object(m): template_id = "INTRIN_TEMPLATE_VARARGS" elif template_id == "-3": template_id = "INTRIN_TEMPLATE_FROM_FUNCTION" + elif template_id == "-4": + template_id = "INTRIN_TEMPLATE_FROM_FUNCTION_2" if component_id == "-1": component_id = "INTRIN_COMPTYPE_FROM_TYPE_ELT0" if component_id == "-2": @@ -8605,13 +9456,16 @@ def process_attr(attr): readonly = False # Only read memory readnone = False # Not read memory argmemonly = False # Only reads memory through pointer arguments + static_member = False # Static member function is_wave = False + class_prefix = False # Insert class name as enum_prefix # Is wave-sensitive unsigned_op = "" # Unsigned opcode if exist overload_param_index = ( -1 ) # Parameter determines the overload type, -1 means ret type. hidden = False + min_shader_model = (0, 0) for a in attrs: if a == "": continue @@ -8630,6 +9484,12 @@ def process_attr(attr): if a == "hidden": hidden = True continue + if a == "static": + static_member = True + continue + if a == "class_prefix": + class_prefix = True + continue assign = a.split("=") @@ -8644,6 +9504,24 @@ def process_attr(attr): if d == "overload": overload_param_index = int(v) continue + if d == "min_sm": + # min_sm is a string like "6.0" or "6.5" + # Convert to a tuple of integers (major, minor) + try: + major_minor = v.split(".") + if len(major_minor) != 2: + raise ValueError + major, minor = major_minor + major = int(major) + minor = int(minor) + # minor of 15 has special meaning, and larger values + # cannot be encoded in the version DWORD. + if major < 0 or minor < 0 or minor > 14: + raise ValueError + min_shader_model = (major, minor) + except ValueError: + assert False, "invalid min_sm: %s" % (v) + continue assert False, "invalid attr %s" % (a) return ( @@ -8654,6 +9532,9 @@ def process_attr(attr): unsigned_op, overload_param_index, hidden, + min_shader_model, + static_member, + class_prefix, ) current_namespace = None @@ -8701,6 +9582,9 @@ def process_attr(attr): unsigned_op, overload_param_index, hidden, + min_shader_model, + static_member, + class_prefix, ) = process_attr(attr) # Add an entry for this intrinsic. if bracket_cleanup_re.search(opts): @@ -8717,6 +9601,8 @@ def process_attr(attr): for in_arg in in_args: args.append(process_arg(in_arg, arg_idx, args, name)) arg_idx += 1 + if class_prefix: + assert current_namespace.endswith("Methods") # We have to process the return type description last # to match the compiler's handling of it and allow # the return type to match an input type. @@ -8739,6 +9625,9 @@ def process_attr(attr): unsigned_op, overload_param_index, hidden, + min_shader_model, + static_member, + class_prefix, ) ) num_entries += 1 @@ -8869,6 +9758,29 @@ def add_attr_arg(title_name, scope, args, doc): ) self.attributes = attributes + # Iterate through all intrinsics, assigning opcodes to each one. + # This uses the opcode_data to preserve already-assigned opcodes. + def assign_opcodes(self): + "Assign opcodes to the intrinsics." + IntrinsicOpDict = self.opcode_data["IntrinsicOpCodes"] + Num_Intrinsics = self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"] + + def add_intrinsic(name): + nonlocal Num_Intrinsics + opcode = IntrinsicOpDict.setdefault(name, Num_Intrinsics) + if opcode == Num_Intrinsics: + Num_Intrinsics += 1 + return opcode + + sorted_intrinsics = sorted(self.intrinsics, key=lambda x: x.key) + for i in sorted_intrinsics: + i.opcode = add_intrinsic(i.enum_name) + for i in sorted_intrinsics: + if i.unsigned_op == "": + continue + i.unsigned_opcode = add_intrinsic(i.unsigned_op) + self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"] = Num_Intrinsics + if __name__ == "__main__": db = db_dxil() diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 17eefd4918..f0d8b0ebae 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -18,6 +18,29 @@ def get_db_dxil(): return g_db_dxil +# opcode data contains fixed opcode assignments for HLSL intrinsics. +g_hlsl_opcode_data = None + + +def get_hlsl_opcode_data(): + global g_hlsl_opcode_data + if g_hlsl_opcode_data is None: + # Load the intrinsic opcodes from the JSON file. + json_filepath = os.path.join( + os.path.dirname(__file__), "hlsl_intrinsic_opcodes.json" + ) + try: + with open(json_filepath, "r") as file: + g_hlsl_opcode_data = json.load(file) + except FileNotFoundError: + print(f"File not found: {json_filepath}") + except json.JSONDecodeError as e: + print(f"Error decoding JSON from {json_filepath}: {e}") + if not g_hlsl_opcode_data: + g_hlsl_opcode_data = {} + return g_hlsl_opcode_data + + g_db_hlsl = None @@ -26,10 +49,14 @@ def get_db_hlsl(): if g_db_hlsl is None: thisdir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(thisdir, "gen_intrin_main.txt"), "r") as f: - g_db_hlsl = db_hlsl(f) + g_db_hlsl = db_hlsl(f, get_hlsl_opcode_data()) return g_db_hlsl +def get_max_oload_dims(): + return f"const unsigned kDxilMaxOloadDims = {dxil_max_overload_dims};" + + def format_comment(prefix, val): "Formats a value with a line-comment prefix." result = "" @@ -486,26 +513,15 @@ def print_opfunc_props(self): OP=self.OP ) ) - print( - "// OpCode OpCode name, OpCodeClass OpCodeClass name, void, h, f, d, i1, i8, i16, i32, i64, udt, obj, function attribute" - ) - # Example formatted string: - # { OC::TempRegLoad, "TempRegLoad", OCC::TempRegLoad, "tempRegLoad", false, true, true, false, true, false, true, true, false, Attribute::ReadOnly, }, - # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 - # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 last_category = None - # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong, u(dt) - f = lambda i, c: "true" if i.oload_types.find(c) >= 0 else "false" lower_exceptions = { "CBufferLoad": "cbufferLoad", "CBufferLoadLegacy": "cbufferLoadLegacy", "GSInstanceID": "gsInstanceID", } - lower_fn = ( - lambda t: lower_exceptions[t] - if t in lower_exceptions - else t[:1].lower() + t[1:] + lower_fn = lambda t: ( + lower_exceptions[t] if t in lower_exceptions else t[:1].lower() + t[1:] ) attr_dict = { "": "None", @@ -516,35 +532,47 @@ def print_opfunc_props(self): "nr": "NoReturn", "wv": "None", } - attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + "," + attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + oload_to_mask = lambda oload: sum( + [1 << dxil_all_user_oload_chars.find(c) for c in oload] + ) + oloads_fn = lambda oloads: ( + "{" + ",".join(["{0x%x}" % m for m in oloads]) + "}" + ) for i in self.instrs: if last_category != i.category: if last_category != None: print("") - print( - " // {category:118} void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute".format( - category=i.category - ) - ) + if not i.is_reserved: + print(f" // {i.category}") last_category = i.category + scalar_masks = [] + vector_masks = [] + if i.num_oloads > 0: + for n, o in enumerate(i.oload_types.split(",")): + if "<" in o: + v = o.split("<") + scalar_masks.append(oload_to_mask(v[0] + "<")) + vector_masks.append(oload_to_mask(v[1])) + else: + scalar_masks.append(oload_to_mask(o)) + vector_masks.append(0) print( - " {{ {OC}::{name:24} {quotName:27} {OCC}::{className:25} {classNameQuot:28} {{{v:>6},{h:>6},{f:>6},{d:>6},{b:>6},{e:>6},{w:>6},{i:>6},{l:>6},{u:>6},{o:>6}}}, {attr:20} }},".format( + ( + " {{ {OC}::{name:24} {quotName:27} {OCC}::{className:25} " + + "{classNameQuot:28} {attr:20}, {num_oloads}, " + + "{scalar_masks:16}, {vector_masks:16} }}, " + + "// Overloads: {oloads}" + ).format( name=i.name + ",", quotName='"' + i.name + '",', className=i.dxil_class + ",", classNameQuot='"' + lower_fn(i.dxil_class) + '",', - v=f(i, "v"), - h=f(i, "h"), - f=f(i, "f"), - d=f(i, "d"), - b=f(i, "1"), - e=f(i, "8"), - w=f(i, "w"), - i=f(i, "i"), - l=f(i, "l"), - u=f(i, "u"), - o=f(i, "o"), attr=attr_fn(i), + num_oloads=i.num_oloads, + scalar_masks=oloads_fn(scalar_masks), + vector_masks=oloads_fn(vector_masks), + oloads=i.oload_types, OC=self.OC, OCC=self.OCC, ) @@ -599,6 +627,10 @@ def print_opfunc_table(self): "noderecordhandle": "A(pNodeRecordHandle);", "nodeproperty": "A(nodeProperty);", "noderecordproperty": "A(nodeRecordProperty);", + "hit_object": "A(pHit);", + # Extended overload slots, extend as needed: + "$x0": "EXT(0);", + "$x1": "EXT(1);", } last_category = None for i in self.instrs: @@ -629,14 +661,24 @@ def print_opfunc_oload_type(self): obj_ty = "obj" vec_ty = "$vec" gsptr_ty = "$gsptr" + extended_ty = "$x" last_category = None index_dict = collections.OrderedDict() ptr_index_dict = collections.OrderedDict() single_dict = collections.OrderedDict() + # extended_dict collects overloads with multiple overload types + # grouped by the set of overload parameter indices. + extended_dict = collections.OrderedDict() struct_list = [] + extended_list = [] for instr in self.instrs: + if instr.num_oloads > 1: + # Process extended overloads separately. + extended_list.append(instr) + continue + ret_ty = instr.ops[0].llvm_type # Skip case return type is overload type if ret_ty == elt_ty: @@ -708,8 +750,7 @@ def print_opfunc_oload_type(self): "i": "IntegerType::get(Ctx, 32)", "l": "IntegerType::get(Ctx, 64)", "v": "Type::getVoidTy(Ctx)", - "u": "Type::getInt32PtrTy(Ctx)", - "o": "Type::getInt32PtrTy(Ctx)", + # No other types should be referenced here. } assert ty in type_code_texts, "llvm type %s is unknown" % (ty) ty_code = type_code_texts[ty] @@ -769,6 +810,61 @@ def print_opfunc_oload_type(self): line = line + "}" print(line) + for instr in extended_list: + # Collect indices for overloaded return and types, make a tuple of + # indices the key, and add the opcode to a list of opcodes for that + # key. Indices start with 0 for return type, and 1 for the first + # function parameter, which is the DXIL OpCode. + indices = [] + for index, op in enumerate(instr.ops): + # Skip dxil opcode. + if op.pos == 1: + continue + + op_type = op.llvm_type + if op_type.startswith(extended_ty): + try: + extended_index = int(op_type[2:]) + except: + raise ValueError( + "Error parsing extended operand type " + + f"'{op_type}' for DXIL op '{instr.name}'" + ) + if extended_index != len(indices): + raise ValueError( + f"'$x{extended_index}' is not in sequential " + + f"order for DXIL op '{instr.name}'" + ) + indices.append(op.pos) + + if len(indices) != instr.num_oloads: + raise ValueError( + f"DXIL op {instr.name}: extended overload count " + + "mismatches the number of overload types" + ) + extended_dict.setdefault(tuple(indices), []).append(instr.name) + + def get_type_at_index(index): + if index == 0: + return "FT->getReturnType()" + return f"FT->getParamType({index - 1})" + + for index_tuple, opcodes in extended_dict.items(): + line = "" + for opcode in opcodes: + line = line + f"case OpCode::{opcode}:\n" + if index_tuple[-1] > 0: + line += ( + f" if (FT->getNumParams() < {index_tuple[-1]})\n" + + " return nullptr;\n" + ) + line += ( + " return llvm::StructType::get(Ctx, {" + + ", ".join([get_type_at_index(index) for index in index_tuple]) + + "});\n" + ) + print(line) + class db_valfns_gen: "A generator of validation functions." @@ -964,15 +1060,11 @@ def get_hlsl_intrinsics(): last_ns = "" ns_table = "" is_vk_table = False # SPIRV Change - id_prefix = "" arg_idx = 0 opcode_namespace = db.opcode_namespace for i in sorted(db.intrinsics, key=lambda x: x.key): if last_ns != i.ns: last_ns = i.ns - id_prefix = ( - "IOP" if last_ns == "Intrinsics" or last_ns == "VkIntrinsics" else "MOP" - ) # SPIRV Change if len(ns_table): result += ns_table + "};\n" # SPIRV Change Starts @@ -989,13 +1081,24 @@ def get_hlsl_intrinsics(): result += "#ifdef ENABLE_SPIRV_CODEGEN\n\n" # SPIRV Change Ends arg_idx = 0 - ns_table += " {(UINT)%s::%s_%s, %s, %s, %s, %d, %d, g_%s_Args%s},\n" % ( + flags = [] + if i.readonly: + flags.append("INTRIN_FLAG_READ_ONLY") + if i.readnone: + flags.append("INTRIN_FLAG_READ_NONE") + if i.wave: + flags.append("INTRIN_FLAG_IS_WAVE") + if i.static_member: + flags.append("INTRIN_FLAG_STATIC_MEMBER") + if flags: + flags = " | ".join(flags) + else: + flags = "0" + ns_table += " {(UINT)%s::%s, %s, 0x%x, %d, %d, g_%s_Args%s},\n" % ( opcode_namespace, - id_prefix, - i.name, - str(i.readonly).lower(), - str(i.readnone).lower(), - str(i.wave).lower(), + i.enum_name, + flags, + i.min_shader_model, i.overload_param_index, len(i.params), last_ns, @@ -1045,22 +1148,22 @@ def wrap_with_ifdef_if_vulkan_specific(intrinsic, text): def enum_hlsl_intrinsics(): db = get_db_hlsl() result = "" - enumed = [] + enumed = set() for i in sorted(db.intrinsics, key=lambda x: x.key): if i.enum_name not in enumed: - enumerant = " %s,\n" % (i.enum_name) - result += wrap_with_ifdef_if_vulkan_specific(i, enumerant) # SPIRV Change - enumed.append(i.enum_name) + result += " %s = %d,\n" % (i.enum_name, i.opcode) + enumed.add(i.enum_name) # unsigned result += " // unsigned\n" for i in sorted(db.intrinsics, key=lambda x: x.key): if i.unsigned_op != "": if i.unsigned_op not in enumed: - result += " %s,\n" % (i.unsigned_op) - enumed.append(i.unsigned_op) + result += " %s = %d,\n" % (i.unsigned_op, i.unsigned_opcode) + enumed.add(i.unsigned_op) - result += " Num_Intrinsics,\n" + Num_Intrinsics = get_hlsl_opcode_data()["IntrinsicOpCodes"]["Num_Intrinsics"] + result += " Num_Intrinsics = %d,\n" % (Num_Intrinsics) return result @@ -1570,6 +1673,7 @@ def get_highest_released_shader_model(): ) return result + def get_highest_shader_model(): result = """static const unsigned kHighestMajor = %d; static const unsigned kHighestMinor = %d;""" % ( @@ -1578,6 +1682,7 @@ def get_highest_shader_model(): ) return result + def get_dxil_version_minor(): return "const unsigned kDxilMinor = %d;" % highest_minor diff --git a/utils/hct/hctgen.py b/utils/hct/hctgen.py index dbb7e3a745..1421fbfad5 100755 --- a/utils/hct/hctgen.py +++ b/utils/hct/hctgen.py @@ -2,6 +2,7 @@ import argparse from hctdb_instrhelp import * from hctdb import * +import json import sys import os import CodeTags @@ -28,6 +29,7 @@ "DxilCounters", "DxilMetadata", "RDAT_LibraryTypes", + "HlslIntrinsicOpcodes", ], ) parser.add_argument("--output", required=True) @@ -232,6 +234,14 @@ def writeDxilPIXPasses(args): return 0 +def writeHlslIntrinsicOpcodes(args): + out = openOutput(args) + # get_db_hlsl() initializes the hlsl intrinsic database and opcode_data. + get_db_hlsl() + json.dump(get_hlsl_opcode_data(), out, indent=2) + out.write("\n") + return 0 + args = parser.parse_args() if args.force_lf and args.force_crlf: eprint("--force-lf and --force-crlf are mutually exclusive, only pass one") diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json new file mode 100644 index 0000000000..d99b84b745 --- /dev/null +++ b/utils/hct/hlsl_intrinsic_opcodes.json @@ -0,0 +1,395 @@ +{ + "IntrinsicOpCodes": { + "Num_Intrinsics": 390, + "IOP_AcceptHitAndEndSearch": 0, + "IOP_AddUint64": 1, + "IOP_AllMemoryBarrier": 2, + "IOP_AllMemoryBarrierWithGroupSync": 3, + "IOP_AllocateRayQuery": 4, + "IOP_Barrier": 5, + "IOP_CallShader": 6, + "IOP_CheckAccessFullyMapped": 7, + "IOP_CreateResourceFromHeap": 8, + "IOP_D3DCOLORtoUBYTE4": 9, + "IOP_DeviceMemoryBarrier": 10, + "IOP_DeviceMemoryBarrierWithGroupSync": 11, + "IOP_DispatchMesh": 12, + "IOP_DispatchRaysDimensions": 13, + "IOP_DispatchRaysIndex": 14, + "IOP_EvaluateAttributeAtSample": 15, + "IOP_EvaluateAttributeCentroid": 16, + "IOP_EvaluateAttributeSnapped": 17, + "IOP_GeometryIndex": 18, + "IOP_GetAttributeAtVertex": 19, + "IOP_GetRemainingRecursionLevels": 20, + "IOP_GetRenderTargetSampleCount": 21, + "IOP_GetRenderTargetSamplePosition": 22, + "IOP_GroupMemoryBarrier": 23, + "IOP_GroupMemoryBarrierWithGroupSync": 24, + "IOP_HitKind": 25, + "IOP_IgnoreHit": 26, + "IOP_InstanceID": 27, + "IOP_InstanceIndex": 28, + "IOP_InterlockedAdd": 29, + "IOP_InterlockedAnd": 30, + "IOP_InterlockedCompareExchange": 31, + "IOP_InterlockedCompareExchangeFloatBitwise": 32, + "IOP_InterlockedCompareStore": 33, + "IOP_InterlockedCompareStoreFloatBitwise": 34, + "IOP_InterlockedExchange": 35, + "IOP_InterlockedMax": 36, + "IOP_InterlockedMin": 37, + "IOP_InterlockedOr": 38, + "IOP_InterlockedXor": 39, + "IOP_IsHelperLane": 40, + "IOP_NonUniformResourceIndex": 41, + "IOP_ObjectRayDirection": 42, + "IOP_ObjectRayOrigin": 43, + "IOP_ObjectToWorld": 44, + "IOP_ObjectToWorld3x4": 45, + "IOP_ObjectToWorld4x3": 46, + "IOP_PrimitiveIndex": 47, + "IOP_Process2DQuadTessFactorsAvg": 48, + "IOP_Process2DQuadTessFactorsMax": 49, + "IOP_Process2DQuadTessFactorsMin": 50, + "IOP_ProcessIsolineTessFactors": 51, + "IOP_ProcessQuadTessFactorsAvg": 52, + "IOP_ProcessQuadTessFactorsMax": 53, + "IOP_ProcessQuadTessFactorsMin": 54, + "IOP_ProcessTriTessFactorsAvg": 55, + "IOP_ProcessTriTessFactorsMax": 56, + "IOP_ProcessTriTessFactorsMin": 57, + "IOP_QuadAll": 58, + "IOP_QuadAny": 59, + "IOP_QuadReadAcrossDiagonal": 60, + "IOP_QuadReadAcrossX": 61, + "IOP_QuadReadAcrossY": 62, + "IOP_QuadReadLaneAt": 63, + "IOP_RayFlags": 64, + "IOP_RayTCurrent": 65, + "IOP_RayTMin": 66, + "IOP_ReportHit": 67, + "IOP_SetMeshOutputCounts": 68, + "IOP_TraceRay": 69, + "IOP_WaveActiveAllEqual": 70, + "IOP_WaveActiveAllTrue": 71, + "IOP_WaveActiveAnyTrue": 72, + "IOP_WaveActiveBallot": 73, + "IOP_WaveActiveBitAnd": 74, + "IOP_WaveActiveBitOr": 75, + "IOP_WaveActiveBitXor": 76, + "IOP_WaveActiveCountBits": 77, + "IOP_WaveActiveMax": 78, + "IOP_WaveActiveMin": 79, + "IOP_WaveActiveProduct": 80, + "IOP_WaveActiveSum": 81, + "IOP_WaveGetLaneCount": 82, + "IOP_WaveGetLaneIndex": 83, + "IOP_WaveIsFirstLane": 84, + "IOP_WaveMatch": 85, + "IOP_WaveMultiPrefixBitAnd": 86, + "IOP_WaveMultiPrefixBitOr": 87, + "IOP_WaveMultiPrefixBitXor": 88, + "IOP_WaveMultiPrefixCountBits": 89, + "IOP_WaveMultiPrefixProduct": 90, + "IOP_WaveMultiPrefixSum": 91, + "IOP_WavePrefixCountBits": 92, + "IOP_WavePrefixProduct": 93, + "IOP_WavePrefixSum": 94, + "IOP_WaveReadLaneAt": 95, + "IOP_WaveReadLaneFirst": 96, + "IOP_WorldRayDirection": 97, + "IOP_WorldRayOrigin": 98, + "IOP_WorldToObject": 99, + "IOP_WorldToObject3x4": 100, + "IOP_WorldToObject4x3": 101, + "IOP_abort": 102, + "IOP_abs": 103, + "IOP_acos": 104, + "IOP_all": 105, + "IOP_and": 106, + "IOP_any": 107, + "IOP_asdouble": 108, + "IOP_asfloat": 109, + "IOP_asfloat16": 110, + "IOP_asin": 111, + "IOP_asint": 112, + "IOP_asint16": 113, + "IOP_asuint": 114, + "IOP_asuint16": 115, + "IOP_atan": 116, + "IOP_atan2": 117, + "IOP_ceil": 118, + "IOP_clamp": 119, + "IOP_clip": 120, + "IOP_cos": 121, + "IOP_cosh": 122, + "IOP_countbits": 123, + "IOP_cross": 124, + "IOP_ddx": 125, + "IOP_ddx_coarse": 126, + "IOP_ddx_fine": 127, + "IOP_ddy": 128, + "IOP_ddy_coarse": 129, + "IOP_ddy_fine": 130, + "IOP_degrees": 131, + "IOP_determinant": 132, + "IOP_distance": 133, + "IOP_dot": 134, + "IOP_dot2add": 135, + "IOP_dot4add_i8packed": 136, + "IOP_dot4add_u8packed": 137, + "IOP_dst": 138, + "IOP_exp": 139, + "IOP_exp2": 140, + "IOP_f16tof32": 141, + "IOP_f32tof16": 142, + "IOP_faceforward": 143, + "IOP_firstbithigh": 144, + "IOP_firstbitlow": 145, + "IOP_floor": 146, + "IOP_fma": 147, + "IOP_fmod": 148, + "IOP_frac": 149, + "IOP_frexp": 150, + "IOP_fwidth": 151, + "IOP_isfinite": 152, + "IOP_isinf": 153, + "IOP_isnan": 154, + "IOP_ldexp": 155, + "IOP_length": 156, + "IOP_lerp": 157, + "IOP_lit": 158, + "IOP_log": 159, + "IOP_log10": 160, + "IOP_log2": 161, + "IOP_mad": 162, + "IOP_max": 163, + "IOP_min": 164, + "IOP_modf": 165, + "IOP_msad4": 166, + "IOP_mul": 167, + "IOP_normalize": 168, + "IOP_or": 169, + "IOP_pack_clamp_s8": 170, + "IOP_pack_clamp_u8": 171, + "IOP_pack_s8": 172, + "IOP_pack_u8": 173, + "IOP_pow": 174, + "IOP_printf": 175, + "IOP_radians": 176, + "IOP_rcp": 177, + "IOP_reflect": 178, + "IOP_refract": 179, + "IOP_reversebits": 180, + "IOP_round": 181, + "IOP_rsqrt": 182, + "IOP_saturate": 183, + "IOP_select": 184, + "IOP_sign": 185, + "IOP_sin": 186, + "IOP_sincos": 187, + "IOP_sinh": 188, + "IOP_smoothstep": 189, + "IOP_source_mark": 190, + "IOP_sqrt": 191, + "IOP_step": 192, + "IOP_tan": 193, + "IOP_tanh": 194, + "IOP_tex1D": 195, + "IOP_tex1Dbias": 196, + "IOP_tex1Dgrad": 197, + "IOP_tex1Dlod": 198, + "IOP_tex1Dproj": 199, + "IOP_tex2D": 200, + "IOP_tex2Dbias": 201, + "IOP_tex2Dgrad": 202, + "IOP_tex2Dlod": 203, + "IOP_tex2Dproj": 204, + "IOP_tex3D": 205, + "IOP_tex3Dbias": 206, + "IOP_tex3Dgrad": 207, + "IOP_tex3Dlod": 208, + "IOP_tex3Dproj": 209, + "IOP_texCUBE": 210, + "IOP_texCUBEbias": 211, + "IOP_texCUBEgrad": 212, + "IOP_texCUBElod": 213, + "IOP_texCUBEproj": 214, + "IOP_transpose": 215, + "IOP_trunc": 216, + "IOP_unpack_s8s16": 217, + "IOP_unpack_s8s32": 218, + "IOP_unpack_u8u16": 219, + "IOP_unpack_u8u32": 220, + "IOP_VkRawBufferLoad": 221, + "IOP_VkRawBufferStore": 222, + "IOP_VkReadClock": 223, + "IOP_Vkext_execution_mode": 224, + "IOP_Vkext_execution_mode_id": 225, + "MOP_Append": 226, + "MOP_RestartStrip": 227, + "MOP_CalculateLevelOfDetail": 228, + "MOP_CalculateLevelOfDetailUnclamped": 229, + "MOP_GetDimensions": 230, + "MOP_Load": 231, + "MOP_Sample": 232, + "MOP_SampleBias": 233, + "MOP_SampleCmp": 234, + "MOP_SampleCmpBias": 235, + "MOP_SampleCmpGrad": 236, + "MOP_SampleCmpLevel": 237, + "MOP_SampleCmpLevelZero": 238, + "MOP_SampleGrad": 239, + "MOP_SampleLevel": 240, + "MOP_Gather": 241, + "MOP_GatherAlpha": 242, + "MOP_GatherBlue": 243, + "MOP_GatherCmp": 244, + "MOP_GatherCmpAlpha": 245, + "MOP_GatherCmpBlue": 246, + "MOP_GatherCmpGreen": 247, + "MOP_GatherCmpRed": 248, + "MOP_GatherGreen": 249, + "MOP_GatherRaw": 250, + "MOP_GatherRed": 251, + "MOP_GetSamplePosition": 252, + "MOP_Load2": 253, + "MOP_Load3": 254, + "MOP_Load4": 255, + "MOP_InterlockedAdd": 256, + "MOP_InterlockedAdd64": 257, + "MOP_InterlockedAnd": 258, + "MOP_InterlockedAnd64": 259, + "MOP_InterlockedCompareExchange": 260, + "MOP_InterlockedCompareExchange64": 261, + "MOP_InterlockedCompareExchangeFloatBitwise": 262, + "MOP_InterlockedCompareStore": 263, + "MOP_InterlockedCompareStore64": 264, + "MOP_InterlockedCompareStoreFloatBitwise": 265, + "MOP_InterlockedExchange": 266, + "MOP_InterlockedExchange64": 267, + "MOP_InterlockedExchangeFloat": 268, + "MOP_InterlockedMax": 269, + "MOP_InterlockedMax64": 270, + "MOP_InterlockedMin": 271, + "MOP_InterlockedMin64": 272, + "MOP_InterlockedOr": 273, + "MOP_InterlockedOr64": 274, + "MOP_InterlockedXor": 275, + "MOP_InterlockedXor64": 276, + "MOP_Store": 277, + "MOP_Store2": 278, + "MOP_Store3": 279, + "MOP_Store4": 280, + "MOP_DecrementCounter": 281, + "MOP_IncrementCounter": 282, + "MOP_Consume": 283, + "MOP_WriteSamplerFeedback": 284, + "MOP_WriteSamplerFeedbackBias": 285, + "MOP_WriteSamplerFeedbackGrad": 286, + "MOP_WriteSamplerFeedbackLevel": 287, + "MOP_Abort": 288, + "MOP_CandidateGeometryIndex": 289, + "MOP_CandidateInstanceContributionToHitGroupIndex": 290, + "MOP_CandidateInstanceID": 291, + "MOP_CandidateInstanceIndex": 292, + "MOP_CandidateObjectRayDirection": 293, + "MOP_CandidateObjectRayOrigin": 294, + "MOP_CandidateObjectToWorld3x4": 295, + "MOP_CandidateObjectToWorld4x3": 296, + "MOP_CandidatePrimitiveIndex": 297, + "MOP_CandidateProceduralPrimitiveNonOpaque": 298, + "MOP_CandidateTriangleBarycentrics": 299, + "MOP_CandidateTriangleFrontFace": 300, + "MOP_CandidateTriangleRayT": 301, + "MOP_CandidateType": 302, + "MOP_CandidateWorldToObject3x4": 303, + "MOP_CandidateWorldToObject4x3": 304, + "MOP_CommitNonOpaqueTriangleHit": 305, + "MOP_CommitProceduralPrimitiveHit": 306, + "MOP_CommittedGeometryIndex": 307, + "MOP_CommittedInstanceContributionToHitGroupIndex": 308, + "MOP_CommittedInstanceID": 309, + "MOP_CommittedInstanceIndex": 310, + "MOP_CommittedObjectRayDirection": 311, + "MOP_CommittedObjectRayOrigin": 312, + "MOP_CommittedObjectToWorld3x4": 313, + "MOP_CommittedObjectToWorld4x3": 314, + "MOP_CommittedPrimitiveIndex": 315, + "MOP_CommittedRayT": 316, + "MOP_CommittedStatus": 317, + "MOP_CommittedTriangleBarycentrics": 318, + "MOP_CommittedTriangleFrontFace": 319, + "MOP_CommittedWorldToObject3x4": 320, + "MOP_CommittedWorldToObject4x3": 321, + "MOP_Proceed": 322, + "MOP_RayFlags": 323, + "MOP_RayTMin": 324, + "MOP_TraceRayInline": 325, + "MOP_WorldRayDirection": 326, + "MOP_WorldRayOrigin": 327, + "MOP_Count": 328, + "MOP_FinishedCrossGroupSharing": 329, + "MOP_GetGroupNodeOutputRecords": 330, + "MOP_GetThreadNodeOutputRecords": 331, + "MOP_IsValid": 332, + "MOP_GroupIncrementOutputCount": 333, + "MOP_ThreadIncrementOutputCount": 334, + "MOP_OutputComplete": 335, + "MOP_SubpassLoad": 336, + "IOP_InterlockedUMax": 337, + "IOP_InterlockedUMin": 338, + "IOP_WaveActiveUMax": 339, + "IOP_WaveActiveUMin": 340, + "IOP_WaveActiveUProduct": 341, + "IOP_WaveActiveUSum": 342, + "IOP_WaveMultiPrefixUProduct": 343, + "IOP_WaveMultiPrefixUSum": 344, + "IOP_WavePrefixUProduct": 345, + "IOP_WavePrefixUSum": 346, + "IOP_uabs": 347, + "IOP_uclamp": 348, + "IOP_udot": 349, + "IOP_ufirstbithigh": 350, + "IOP_umad": 351, + "IOP_umax": 352, + "IOP_umin": 353, + "IOP_umul": 354, + "IOP_usign": 355, + "MOP_InterlockedUMax": 356, + "MOP_InterlockedUMin": 357, + "MOP_DxHitObject_MakeNop": 358, + "IOP_DxMaybeReorderThread": 359, + "IOP_Vkreinterpret_pointer_cast": 360, + "IOP_Vkstatic_pointer_cast": 361, + "MOP_GetBufferContents": 362, + "MOP_DxHitObject_FromRayQuery": 363, + "MOP_DxHitObject_GetAttributes": 364, + "MOP_DxHitObject_GetGeometryIndex": 365, + "MOP_DxHitObject_GetHitKind": 366, + "MOP_DxHitObject_GetInstanceID": 367, + "MOP_DxHitObject_GetInstanceIndex": 368, + "MOP_DxHitObject_GetObjectRayDirection": 369, + "MOP_DxHitObject_GetObjectRayOrigin": 370, + "MOP_DxHitObject_GetObjectToWorld3x4": 371, + "MOP_DxHitObject_GetObjectToWorld4x3": 372, + "MOP_DxHitObject_GetPrimitiveIndex": 373, + "MOP_DxHitObject_GetRayFlags": 374, + "MOP_DxHitObject_GetRayTCurrent": 375, + "MOP_DxHitObject_GetRayTMin": 376, + "MOP_DxHitObject_GetShaderTableIndex": 377, + "MOP_DxHitObject_GetWorldRayDirection": 378, + "MOP_DxHitObject_GetWorldRayOrigin": 379, + "MOP_DxHitObject_GetWorldToObject3x4": 380, + "MOP_DxHitObject_GetWorldToObject4x3": 381, + "MOP_DxHitObject_Invoke": 382, + "MOP_DxHitObject_IsHit": 383, + "MOP_DxHitObject_IsMiss": 384, + "MOP_DxHitObject_IsNop": 385, + "MOP_DxHitObject_LoadLocalRootTableConstant": 386, + "MOP_DxHitObject_MakeMiss": 387, + "MOP_DxHitObject_SetShaderTableIndex": 388, + "MOP_DxHitObject_TraceRay": 389 + } +}