diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index 6c7bc2cce84bc..cd8f878f56f7b 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -99,6 +99,7 @@ namespace sycl::ext::oneapi::experimental { enum class architecture : /* unspecified */ { x86_64, + intel_cpu_spr, intel_gpu_bdw, intel_gpu_skl, intel_gpu_kbl, diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 5eb0a301ce4b5..624ae83897e48 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -14,6 +14,7 @@ namespace ext::oneapi::experimental { enum class architecture { x86_64, + intel_cpu_spr, intel_gpu_bdw, intel_gpu_skl, intel_gpu_kbl, diff --git a/sycl/include/sycl/ext/oneapi/matrix/compile-time-query.hpp b/sycl/include/sycl/ext/oneapi/matrix/compile-time-query.hpp new file mode 100644 index 0000000000000..817a6a731fb96 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/matrix/compile-time-query.hpp @@ -0,0 +1,579 @@ +//===---------- compile-time-query.hpp - SYCL matrix ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // +// This file implements the compile-time query interface for the joint_matrix +// experimental extension. Intel(R) Advanced Matrix Extensions (Intel(R) AMX), +// and Intel(R) Xe Matrix Extensions (Intel(R) XMX) support different logical +// sizes and types. The query interface is used to validate user code and inform +// them about supported types, sizes, scopes, and layouts by the current +// implementation. Note that this query interface is a compile-time query, so +// there will be no runtime errors. The query interface provides two +// functionalities: 1- At compile time, inform the user whether a specific +// combination is valid or not. 2- Construct the matrices using a default shape +// if user does not provide a combination + +#pragma once + +#include // for half +#include +#include // for use, layout +#include // for joint_matrix + +#include // for size_t +#include // for uint32_t, int8_t +#include // for enable_if + +using namespace sycl::ext::oneapi::experimental; + +namespace sycl { +inline namespace _V1 { +namespace ext { +namespace oneapi { +namespace experimental::matrix { + +enum class matrix_type { + bf16, + fp16, + tf32, + fp32, + fp64, + sint8, + sint16, + sint32, + sint64, + uint8, + uint16, + uint32, + uint64 +}; + +struct combination { + size_t max_msize; + size_t max_nsize; + size_t max_ksize; + size_t msize; + size_t nsize; + size_t ksize; + matrix_type atype; + matrix_type btype; + matrix_type ctype; + matrix_type dtype; +}; + +template +struct matrix_params; + +template +constexpr bool is_combination_valid_amx(size_t sM, size_t sN, size_t sK) { + // is_same_v is a C++17 feature + if ((std::is_same_v && std::is_same_v && + std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || + (std::is_same_v && std::is_same_v && + std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || + (std::is_same_v && std::is_same_v && + std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || + (std::is_same_v && std::is_same_v && + std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || + // bf16 + (std::is_same_v && + std::is_same_v && std::is_same_v && + sM <= 16 && sN <= 16 && sK <= 32)) + return true; + else + return false; +} + +template +constexpr bool are_types_valid_amx() { + if ((std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && + std::is_same_v && std::is_same_v)) + return true; + else + return false; +} + +// Default values query +// Specialization for when only types are given, need to query only sizes +template +struct matrix_params< + architecture::intel_cpu_spr, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { + static_assert((are_types_valid_amx()), + "Invalid types for AMX, supported types are int8_t, uint8_t, " + "and bf16 (Note that unsigned short should be used in the" + "DPC++ code to implement bf16) "); + + // construct the matrices using the default sizes + static constexpr std::size_t M = 16; + static constexpr std::size_t N = 16; + static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 64 : 32); + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Validation query +// Specialization when both types and sizes are given +template +struct matrix_params< + architecture::intel_cpu_spr, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { + // Validate that parameters are supported + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_amx(sM, sN, sK)), + "Invalid parameters for AMX, query valid types and maximum sizes " + "using: matrix_params myparams; and then " + "check out " + "myparams.combinations array"); + + // if combination is valid, construct the matrices + + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Intel XMX with SIMD8 capability +// The Intel XMX implementation supports the logical capability support of the +// HW So in this case, M, N, K sizes returned by the query represent the logical +// capabilities of the Intel XMX hardware. + +template +constexpr bool is_combination_valid_xmx8(size_t sM, size_t sN, size_t sK) { + if ((std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 32) || + (std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 32) || + (std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 32) || + (std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 32) || + (std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 16) || + (std::is_same_v && + std::is_same_v && std::is_same_v && + (sM >= 1 && sM <= 8) && sN == 8 && sK == 16)) + return true; + else + return false; +} + +template +constexpr bool are_types_valid_xmx8() { + if ((std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && + std::is_same_v && std::is_same_v)) + return true; + else + return false; +} + +// Default-values query: +// Specialization for when only types are given, need to query only sizes + +template +struct matrix_params< + architecture::intel_gpu_dg1, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { + static_assert((are_types_valid_xmx8()), + "Invalid types for architecture::intel_gpu_dg1, supported " + "types are int8_t, uint8_t, half, and bf16"); + + // construct the matrices using the default sizes + + static constexpr std::size_t M = 8; + static constexpr std::size_t N = 8; + static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Validation query: +// Specialization when both types and sizes are given +template +struct matrix_params< + architecture::intel_gpu_dg1, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { + // Validate that parameters are supported + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx8(sM, sN, sK)), + "Invalid parameters for XMX8, query valid combinations " + "using: " + "q.get_device().get_info()"); + + // if combination is valid, construct the matrices + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Default-values query: +// Specialization for when only types are given, need to query only sizes + +template +struct matrix_params< + architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { + static_assert((are_types_valid_xmx8()), + "Invalid types for architecture::intel_gpu_dg1, supported " + "types are int8_t, uint8_t, half, and bf16"); + + // construct the matrices using the default sizes + + static constexpr std::size_t M = 8; + static constexpr std::size_t N = 8; + static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Validation query: +// Specialization when both types and sizes are given +template +struct matrix_params< + architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { + // Validate that parameters are supported + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx8(sM, sN, sK)), + "Invalid parameters for XMX8, query valid combinations " + "using: " + "q.get_device().get_info()"); + + // if combination is valid, construct the matrices + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Default-values query: +// Specialization for when only types are given, need to query only sizes + +template +struct matrix_params< + architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { + static_assert((are_types_valid_xmx8()), + "Invalid types for architecture::intel_gpu_dg1, supported " + "types are int8_t, uint8_t, half, and bf16"); + + // construct the matrices using the default sizes + + static constexpr std::size_t M = 8; + static constexpr std::size_t N = 8; + static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Validation query: +// Specialization when both types and sizes are given +template +struct matrix_params< + architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { + // Validate that parameters are supported + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx8(sM, sN, sK)), + "Invalid parameters for XMX8, query valid combinations " + "using: " + "q.get_device().get_info()"); + + // if combination is valid, construct the matrices + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Default-values query: +// Specialization for when only types are given, need to query only sizes + +template +struct matrix_params< + architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { + static_assert((are_types_valid_xmx8()), + "Invalid types for architecture::intel_gpu_dg1, supported " + "types are int8_t, uint8_t, half, and bf16"); + + // construct the matrices using the default sizes + + static constexpr std::size_t M = 8; + static constexpr std::size_t N = 8; + static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Validation query: +// Specialization when both types and sizes are given +template +struct matrix_params< + architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { + // Validate that parameters are supported + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx8(sM, sN, sK)), + "Invalid parameters for XMX8, query valid combinations " + "using: " + "q.get_device().get_info()"); + + // if combination is valid, construct the matrices + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Intel XMX with SIMD16 capability +// The Intel XMX implementation supports the logical capability support of the +// HW So in this case, M, N, K sizes returned by the query represent the logical +// capabilities of the Intel XMX hardware. + +template +constexpr bool is_combination_valid_xmx16(size_t sM, size_t sN, size_t sK) { + if ((std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 32) || + (std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 32) || + (std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 32) || + (std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 32) || + (std::is_same_v && std::is_same_v && + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 16) || + (std::is_same_v && + std::is_same_v && std::is_same_v && + (sM >= 1 && sM <= 8) && sN == 16 && sK == 16)) + return true; + else + return false; +} + +template +constexpr bool are_types_valid_xmx16() { + if ((std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && std::is_same_v && + std::is_same_v) || + (std::is_same_v && + std::is_same_v && std::is_same_v)) + return true; + else + return false; +} + +// Default values query: +// Specialization for when only types are given, need to query only sizes + +template +struct matrix_params< + architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { + static_assert((are_types_valid_xmx16()), + "Invalid types for architecture::intel_gpu_pvc, supported " + "types are int8_t, uint8_t, " + "half, and bf16"); + + // construct the matrices using the default sizes + + static constexpr std::size_t M = 8; + static constexpr std::size_t N = 16; + static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Validation query: +// Specialization when both types and sizes are given +template +struct matrix_params< + architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { + // Validate that parameters are supported + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx16(sM, sN, sK)), + "Invalid parameters for architecture::intel_gpu_pvc, query valid " + "combinations " + "using: " + "q.get_device().get_info()"); + + // if combination is valid, construct the matrices + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; +} // namespace experimental::matrix +} // namespace oneapi +} // namespace ext +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix.hpp index 77037885fc28b..4311e6954e510 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix.hpp @@ -21,6 +21,6 @@ #include #endif // SYCL_EXT_ONEAPI_MATRIX_VERSION #if (SYCL_EXT_ONEAPI_MATRIX_VERSION == 4) +#include #include -#include #endif // SYCL_EXT_ONEAPI_MATRIX_VERSION diff --git a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp deleted file mode 100644 index f30ff53bb8a55..0000000000000 --- a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp +++ /dev/null @@ -1,606 +0,0 @@ -//===---------- static-query-use.hpp - SYCL matrix ------------*- C++ -*---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// ===--------------------------------------------------------------------=== // -// This file implements the static query interface for the joint_matrix -// experimental extension. Intel(R) Advanced Matrix Extensions (Intel(R) AMX), -// and Intel(R) Xe Matrix Extensions (Intel(R) XMX) support different logical -// sizes and types. The query interface is used to validate user code and inform -// them about supported types, sizes, scopes, and layouts by the current -// implementation. Note that this query interface is a compile-time query, so -// there will be no runtime errors. The query interface provides three -// functionalities: 1- At compile time, inform the user whether a specific -// combination is valid or not. 2- Construct the matrices using a default shape -// if user does not provide a combination 3- General query interface for sizes, -// types, scopes. This is needed to void padding by the user, for tuning, and -// efficient code generation if used by a library. - -#pragma once - -#include // for half -#include -#include // for use, layout -#include // for joint_matrix - -#include // for size_t -#include // for uint32_t, int8_t -#include // for enable_if - -namespace sycl { -inline namespace _V1 { -namespace ext { -namespace oneapi { -namespace experimental::matrix { - -enum class tpu { - xmx8, - xmx16, - amx, -}; -enum class matrix_type { - bf8, - bf16, - fp16, - tf32, - fp32, - fp64, - sint2, - sint4, - sint8, - sint16, - sint32, - sint64, - uint2, - uint4, - uint8, - uint16, - uint32, - uint64 -}; - -enum class scope_t { sub_group, work_group }; - -template -struct tpu_params; - -template -constexpr bool is_combination_valid_amx(int sM, int sN, int sK) { - // is_same_v is a C++17 feature - if ((std::is_same_v && std::is_same_v && - std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || - (std::is_same_v && std::is_same_v && - std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || - (std::is_same_v && std::is_same_v && - std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || - (std::is_same_v && std::is_same_v && - std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || - // bf16 - (std::is_same_v && - std::is_same_v && std::is_same_v && - sM <= 16 && sN <= 16 && sK <= 32)) - return true; - else - return false; -} - -template -constexpr bool are_types_valid_amx() { - if ((std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && - std::is_same_v && std::is_same_v)) - return true; - else - return false; -} - -// General query: -// types are not given, no default sizes and no implicit matrix construction -template -struct tpu_params { - static constexpr std::size_t M = -1; // depends on the type - static constexpr std::size_t N = -1; - static constexpr std::size_t K = -1; - - uint32_t numtiles = 8; - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - {16, 16, 64, mt::sint8, mt::sint8, mt::sint32}, - {16, 16, 64, mt::sint8, mt::uint8, mt::sint32}, - {16, 16, 64, mt::uint8, mt::sint8, mt::sint32}, - {16, 16, 64, mt::uint8, mt::uint8, mt::sint32}, - {16, 16, 32, mt::bf16, mt::bf16, mt::fp32}}; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); -}; - -// Sizes-only query -// Specialization for when only types are given, need to query only sizes -template -struct tpu_params && - !std::is_same_v && - !std::is_same_v)>::type> { - static_assert((are_types_valid_amx()), - "Invalid types for AMX, supported types are int8_t, uint8_t, " - "and bf16 (Note that unsigned short should be used in the" - "DPC++ code to implement bf16) "); - - // construct the matrices using the default sizes - static constexpr std::size_t M = 16; - static constexpr std::size_t N = 16; - static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 64 : 32); - - template - using joint_matrix_a = joint_matrix; - template - using joint_matrix_b = joint_matrix; - template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = 8; - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - static constexpr combination combinations[] = { - {16, 16, (sizeof(Ta) == 1) ? 64 : 32}}; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); -}; - -// Valid or not: -// Specialization when both types and sizes are given -template -struct tpu_params< - tpu::amx, Ta, Tb, Tc, sM, sN, sK, - typename std::enable_if<( - !std::is_same_v && !std::is_same_v && - !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { - // Validate that parameters are supported - static_assert( - (sM == 0 && sN == 0 && sK == 0) || - (is_combination_valid_amx(sM, sN, sK)), - "Invalid parameters for AMX, query valid types and maximum sizes " - "using: tpu_params myparams; and then check out " - "myparams.combinations array"); - - // if combination is valid, construct the matrices - - static constexpr std::size_t M = (sM != 0) ? sM : 16; - static constexpr std::size_t N = (sN != 0) ? sN : 16; - static constexpr std::size_t K = - (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 64 : 32); - - template - using joint_matrix_a = joint_matrix; - template - using joint_matrix_b = joint_matrix; - template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = 8; - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); -}; - -// Intel XMX with SIMD8 capability -// The Intel XMX implementation supports the logical capability support of the -// HW So in this case, M, N, K sizes returned by the query represent the logical -// capabilities of the Intel XMX hardware. - -template -constexpr bool is_combination_valid_xmx8(int sM, int sN, int sK) { - if ((std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 8 && sK == 32) || - (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 8 && sK == 32) || - (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 8 && sK == 32) || - (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 8 && sK == 32) || - (std::is_same_v && std::is_same_v && - std::is_same_v && - (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16) || - (std::is_same_v && - std::is_same_v && std::is_same_v && - (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16)) - return true; - else - return false; -} - -template -constexpr bool are_types_valid_xmx8() { - if ((std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && - std::is_same_v && std::is_same_v)) - return true; - else - return false; -} - -// General Query -// specialization for when types are not given --> no default values -template -struct tpu_params { - static constexpr std::size_t M = -1; // depends on the type - static constexpr std::size_t N = -1; - static constexpr std::size_t K = -1; - - uint32_t numtiles = -1; // does not apply for XMX8 - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 8, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 8, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 8, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 8, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 8, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 8, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 8, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 8, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 8, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 8, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 8, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 8, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 8, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 8, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 8, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 8, 32}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 8, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 8, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 8, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 8, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 8, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 8, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 8, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 8, 16}, - }; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); -}; - -// Sizes-only query: -// Specialization for when only types are given, need to query only sizes - -template -struct tpu_params && - !std::is_same_v && - !std::is_same_v)>::type> { - static_assert((are_types_valid_xmx8()), - "Invalid types for XMX8, supported types are int8_t, uint8_t, " - "half, and bf16 (Note that unsigned short should be used in the" - "DPC++ code to implement bf16)"); - - // construct the matrices using the default sizes - - static constexpr std::size_t M = 8; - static constexpr std::size_t N = 8; - static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); - - template - using joint_matrix_a = joint_matrix; - template - using joint_matrix_b = joint_matrix; - template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = -1; // does not apply for XMX8 - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - // The types used in the initialization below are fake and not used. In - // this case, users already chose the types, they are only looking for - // the - // sizes - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 8, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 8, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 8, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 8, (sizeof(Ta) == 1) ? 32 : 16}, - }; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); -}; - -// Valid or not: -// Specialization when both types and sizes are given -template -struct tpu_params< - tpu::xmx8, Ta, Tb, Tc, sM, sN, sK, - typename std::enable_if<((!std::is_same_v && sM != 0))>::type> { - // Validate that parameters are supported - static_assert((sM == 0 && sN == 0 && sK == 0) || - (is_combination_valid_xmx8(sM, sN, sK)), - "Invalid parameters for XMX8, query valid combinations " - "using: tpu_params myparams; and then check out " - "myparams.combinations array"); - - // if combination is valid, construct the matrices - static constexpr std::size_t M = (sM != 0) ? sM : 8; - static constexpr std::size_t N = (sN != 0) ? sN : 8; - static constexpr std::size_t K = - (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16); - - template - using joint_matrix_a = joint_matrix; - template - using joint_matrix_b = joint_matrix; - template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = -1; // does not apply for XMX8 - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); -}; - -// Intel XMX with SIMD16 capability -// The Intel XMX implementation supports the logical capability support of the -// HW So in this case, M, N, K sizes returned by the query represent the logical -// capabilities of the Intel XMX hardware. - -template -constexpr bool is_combination_valid_xmx16(int sM, int sN, int sK) { - if ((std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 16 && sK == 32) || - (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 16 && sK == 32) || - (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 16 && sK == 32) || - (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 16 && sK == 32) || - (std::is_same_v && std::is_same_v && - std::is_same_v && - (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16) || - (std::is_same_v && - std::is_same_v && std::is_same_v && - (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16)) - return true; - else - return false; -} - -template -constexpr bool are_types_valid_xmx16() { - if ((std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && std::is_same_v && - std::is_same_v) || - (std::is_same_v && - std::is_same_v && std::is_same_v)) - return true; - else - return false; -} - -// General Query -// specialization for when types are not given --> no default values -template -struct tpu_params { - static constexpr std::size_t M = -1; // depends on the type - static constexpr std::size_t N = -1; - static constexpr std::size_t K = -1; - - uint32_t numtiles = -1; // does not apply for XMX - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 16, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 16, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 16, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 16, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 16, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 16, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 16, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 16, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 16, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 16, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 16, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 16, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 16, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 16, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 16, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 16, 32}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 16, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 16, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 16, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 16, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 16, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 16, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 16, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 16, 16}, - }; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); -}; - -// Sizes-only query: -// Specialization for when only types are given, need to query only sizes - -template -struct tpu_params && - !std::is_same_v && - !std::is_same_v)>::type> { - static_assert((are_types_valid_xmx16()), - "Invalid types for XMX16, supported types are int8_t, uint8_t, " - "half, and bf16 (Note that unsigned short should be used in the" - "DPC++ code to implement bf16)"); - - // construct the matrices using the default sizes - - static constexpr std::size_t M = 8; - static constexpr std::size_t N = 16; - static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); - - template - using joint_matrix_a = joint_matrix; - template - using joint_matrix_b = joint_matrix; - template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = -1; // does not apply for XMX - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - // The types used in the initialization below are fake and not used. In - // this case, users already chose the types, they are only looking for - // the - // sizes - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 16, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 16, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 16, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 16, (sizeof(Ta) == 1) ? 32 : 16}, - }; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); -}; - -// Valid or not: -// Specialization when both types and sizes are given -template -struct tpu_params< - tpu::xmx16, Ta, Tb, Tc, sM, sN, sK, - typename std::enable_if<((!std::is_same_v && sM != 0))>::type> { - // Validate that parameters are supported - static_assert((sM == 0 && sN == 0 && sK == 0) || - (is_combination_valid_xmx16(sM, sN, sK)), - "Invalid parameters for XMX16, query valid combinations " - "using: tpu_params myparams; and then check out " - "myparams.combinations array"); - - // if combination is valid, construct the matrices - static constexpr std::size_t M = (sM != 0) ? sM : 8; - static constexpr std::size_t N = (sN != 0) ? sN : 8; - static constexpr std::size_t K = - (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16); - - template - using joint_matrix_a = joint_matrix; - template - using joint_matrix_b = joint_matrix; - template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = -1; // does not apply for XMX16 - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); -}; -} // namespace experimental::matrix -} // namespace oneapi -} // namespace ext -} // namespace _V1 -} // namespace sycl diff --git a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp index 048aed6341f6c..ccf50a5a76da9 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp @@ -39,7 +39,8 @@ void matrix_multiply(big_matrix &C, size_t K = NUM_COLS_A; assert(NUM_ROWS_C == NUM_ROWS_A && NUM_COLS_A == NUM_ROWS_B * 4); - using myparams2 = tpu_params; + using myparams2 = + matrix_params; constexpr int TM = myparams2::M; constexpr int TN = myparams2::N; constexpr int TK = myparams2::K; @@ -80,7 +81,7 @@ void matrix_multiply(big_matrix &C, myparams2::joint_matrix_b< sub_group, ext::intel::experimental::matrix::layout::packed> sub_b; - myparams2::joint_matrix_accumulator sub_c; + myparams2::joint_matrix_c sub_c; joint_matrix_load( sg, sub_c, diff --git a/sycl/test/matrix/compile-time-query.cpp b/sycl/test/matrix/compile-time-query.cpp new file mode 100644 index 0000000000000..e39361bbeeadd --- /dev/null +++ b/sycl/test/matrix/compile-time-query.cpp @@ -0,0 +1,146 @@ +// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-time-query %s +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi::experimental::matrix; + +void query_amx_spr() { + + // generates combination assert + // using myparams = matrix_params; + + // generates types assert + // using myparams2 = matrix_params; + + // tells whether a combination is valid or not, if valid, those will be set as + // default + using myparams = matrix_params; + + size_t dmsize = myparams::M; + size_t dnsize = myparams::N; + size_t dksize = myparams::K; + std::cout << "sizes of AMX matrix_params chosen by the user are: M " << dmsize + << " N " << dnsize << " K " << dksize << std::endl; + + // Sizes-only query: types are given, generate default sizes + using myparams2 = + matrix_params; + myparams2 p; + dmsize = myparams2::M; + dnsize = myparams2::N; + dksize = myparams2::K; + std::cout << "default AMX sizes matrix_params are: M " << dmsize << " N " + << dnsize << " K " << dksize << std::endl; + return; +} + +void query_xmx_dg() { + + // generates combination assert + // using myparams = matrix_params; + + // generate combination of type assert + // using myparams = matrix_params; + + // tells whether a combination is valid or not, if valid, those will be set as + // default + using myparams = matrix_params; + + size_t dmsize = myparams::M; + size_t dnsize = myparams::N; + size_t dksize = myparams::K; + std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg1 " + "matrix_params chosen by the user are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + + // sizes-only query: types are given, generate default sizes + using myparams2 = + matrix_params; + dmsize = myparams2::M; + dnsize = myparams2::N; + dksize = myparams2::K; + std::cout << "Default Intel XMX of architecture::intel_gpu_dg1 sizes are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + return; +} + +void query_xmx_ats() { + + // generates combination assert + // using myparams = matrix_params; + + // generate combination of type assert + // using myparams = matrix_params; + + // tells whether a combination is valid or not, if valid, those will be set as + // default + using myparams = matrix_params; + + size_t dmsize = myparams::M; + size_t dnsize = myparams::N; + size_t dksize = myparams::K; + std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg2_g10 " + "matrix_params chosen by the user are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + + // sizes-only query: types are given, generate default sizes + using myparams2 = + matrix_params; + dmsize = myparams2::M; + dnsize = myparams2::N; + dksize = myparams2::K; + std::cout + << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + return; +} + +void query_xmx_pvc() { + + // generates combination assert + // using myparams = matrix_params; + + // generate combination of type assert + // using myparams = matrix_params; + + // tells whether a combination is valid or not, if valid, those will be set as + // default + using myparams = matrix_params; + + size_t dmsize = myparams::M; + size_t dnsize = myparams::N; + size_t dksize = myparams::K; + std::cout << "sizes of architecture::intel_gpu_pvc matrix_params chosen by " + "the user are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + + // sizes-only query: types are given, generate default sizes + using myparams2 = + matrix_params; + dmsize = myparams2::M; + dnsize = myparams2::N; + dksize = myparams2::K; + std::cout << "Default Intel XMX of architecture::intel_gpu_pvc sizes are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + return; +} + +int main() { + query_amx_spr(); + query_xmx_dg(); + query_xmx_ats(); + query_xmx_pvc(); + return 0; +} diff --git a/sycl/test/matrix/query-use.cpp b/sycl/test/matrix/query-use.cpp deleted file mode 100644 index 9afc8e1173043..0000000000000 --- a/sycl/test/matrix/query-use.cpp +++ /dev/null @@ -1,162 +0,0 @@ -// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o query-use %s -#include -#include - -using namespace sycl; -using namespace sycl::ext::oneapi::experimental::matrix; - -void query_amx() { - - // generates combination assert - // using myparams = tpu_params; - - // generates types assert - // using myparams2 = tpu_params; - - // tells whether a combination is valid or not, if valid, those will be set as - // default - using myparams = tpu_params; - - size_t dmsize = myparams::M; - size_t dnsize = myparams::N; - size_t dksize = myparams::K; - std::cout << "sizes of AMX tpu_params chosen by the user are: M " << dmsize - << " N " << dnsize << " K " << dksize << std::endl; - - // Sizes-only query: types are given, generate default sizes - using myparams2 = tpu_params; - myparams2 p; - dmsize = myparams2::M; - dnsize = myparams2::N; - dksize = myparams2::K; - std::cout << "default AMX sizes tpu_params are: M " << dmsize << " N " - << dnsize << " K " << dksize << "\n AMX int8 num combinations is " - << p.num_combinations << std::endl; - - // general query: types are not given - tpu_params myparams3; - - if (myparams3.num_scopes > 0) - if (myparams3.scopes[0] == scope_t::sub_group) - std::cout << "There are " << myparams3.num_scopes - << " Scopes that are supported by AMX implementation and " - "subgroup is one of them " - << std::endl; - - std::cout << "AMX query num combinations: " << myparams3.num_combinations - << std::endl; - - if (myparams3.combinations[0].msize != 0) // this is a max params hardware - return; - constexpr int msize = myparams3.combinations[0].max_msize; - constexpr int nsize = myparams3.combinations[0].max_nsize; - constexpr int ksize = myparams3.combinations[0].max_ksize; - std::cout << "AMX query sizes are: M " << msize << " N " << nsize << " K " - << ksize << std::endl; - - size_t NDRangeM = 1024 / msize; - size_t NDRangeN = 1024 / nsize; - queue q; - q.submit([&](handler &cgh) { - cgh.parallel_for( - nd_range<2>({NDRangeM, NDRangeN}, {1, 1}), - [msize, ksize, nsize](nd_item<2> spmd_item) { - sub_group sg = spmd_item.get_sub_group(); - myparams2::joint_matrix_a sub_a1; - myparams2::joint_matrix_b< - sub_group, sycl::ext::intel::experimental::matrix::layout::packed> - sub_b1; - myparams2::joint_matrix_accumulator sub_c1; - - joint_matrix sub_a; - joint_matrix sub_b; - joint_matrix sub_c; - }); - }); -} - -void query_xmx8() { - - // generates combination assert - // using myparams = tpu_params; - - // generate combination of type assert - // using myparams = tpu_params; - - // tells whether a combination is valid or not, if valid, those will be set as - // default - using myparams = tpu_params; - - size_t dmsize = myparams::M; - size_t dnsize = myparams::N; - size_t dksize = myparams::K; - std::cout << "sizes of XMX8 tpu_params chosen by the user are: M " << dmsize - << " N " << dnsize << " K " << dksize << std::endl; - - // sizes-only query: types are given, generate default sizes - using myparams2 = tpu_params; - myparams2 p; - dmsize = myparams2::M; - dnsize = myparams2::N; - dksize = myparams2::K; - std::cout << "Default XMX8 sizes are: M " << dmsize << " N " << dnsize - << " K " << dksize << "\n XMX8 int8 num combinations is " - << p.num_combinations << std::endl; - - dmsize = myparams2::combinations[0].msize; - dnsize = myparams2::combinations[0].nsize; - dksize = myparams2::combinations[0].ksize; - std::cout << "one of XMX8 combination sizes is: M " << dmsize << " N " - << dnsize << " K " << dksize << std::endl; - - // general query: types are not given - tpu_params myparams3; - - if (myparams3.num_scopes > 0) - if (myparams3.scopes[0] == scope_t::sub_group) - std::cout << "There are " << myparams3.num_scopes - << " Scopes that are supported by XMX8 implementation and " - "subgroup is one of them " - << std::endl; - - std::cout << "XMX8 query num combinations: " << myparams3.num_combinations - << std::endl; - - if (myparams3.combinations[0].msize == 0) // this is not a max params hardware - return; - constexpr int msize = myparams3.combinations[0].msize; - constexpr int nsize = myparams3.combinations[0].nsize; - constexpr int ksize = myparams3.combinations[0].ksize; - std::cout << "XMX8 query sizes are: M " << msize << " N " << nsize << " K " - << ksize << std::endl; - std::cout << "XMX8 query max sizes are: M " - << myparams3.combinations[0].max_msize << " N " - << myparams3.combinations[0].max_nsize << " K " - << myparams3.combinations[0].max_ksize << std::endl; - - size_t NDRangeM = 1024 / msize; - size_t NDRangeN = 1024 / nsize; - queue q; - q.submit([&](handler &cgh) { - cgh.parallel_for( - nd_range<2>({NDRangeM, NDRangeN}, {1, 1}), - [msize, ksize, nsize](nd_item<2> spmd_item) { - sub_group sg = spmd_item.get_sub_group(); - myparams2::joint_matrix_a sub_a1; - myparams2::joint_matrix_b< - sub_group, sycl::ext::intel::experimental::matrix::layout::packed> - sub_b1; - myparams2::joint_matrix_accumulator sub_c1; - - joint_matrix sub_a; - joint_matrix sub_b; - joint_matrix sub_c; - }); - }); -} - -int main() { - query_amx(); - query_xmx8(); - return 0; -} diff --git a/sycl/test/matrix/runtime-query.cpp b/sycl/test/matrix/runtime-query.cpp new file mode 100644 index 0000000000000..64090faad4650 --- /dev/null +++ b/sycl/test/matrix/runtime-query.cpp @@ -0,0 +1,50 @@ +// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o runtime-query %s +// XFAIL: * + +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi::experimental::matrix; + +template +void matrix_runtime_query(queue q) { + + std::vector combinations = + q.get_device().get_info(); + + std::cout << "The matrix hardware implementation in this device provides " + "this number of combinations: " + << combinations.size() << std::endl; + + bool max_sizes; + if (combinations[0].maxsize == 0) + max_sizes = true; // this is a max params hardware + else + max_sizes = false; + for (int i = 0; i < combinations.size(); i++) { + if (Ta == combinations[i].atype && Tb == combinations[i].btype && + Tc == combinations[i].ctype && Td == combinations[i].dtype) { + // joint matrix GEMM kernel can be called using these sizes + if (max_sizes) + std::cout << "The matrix hardware implementation in this device " + "provides the following max sizes are: M " + << combinations[i].max_msize << " N " + << combinations[i].max_nsize << " K " + << combinations[i].max_ksize << std::endl; + else + std::cout << "The matrix hardware implementation in this device " + "provides the following sizes are: M " + << combinations[i].msize << " N " << combinations[i].nsize + << " K " << combinations[i].ksize << std::endl; + } + } +} + +int main() { + queue q; + matrix_runtime_query(q); + return 0; +}