diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc index 94c2bebe04906..d7f2ab8d985f2 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc @@ -741,12 +741,12 @@ descriptors that can be queried using `get_info` API. [frame="none",options="header"] |====================== | Device descriptors | Return type| Description -|`ext::oneapi::experimental::info::device::matrix::combinations` | +|`ext::oneapi::experimental::info::device::matrix_combinations` | `std::vector`| tells the set of supported matrix sizes and types on this device |====================== -The runtime query returns a vector of `combinations` of `combination` +The runtime query returns a vector of `matrix_combinations` of `combination` type. Each combination includes the sizes and the types for the matrices A, B, C, and D. Note that for each matrix hardware, the query returns `max_msize, max_nsize, max_ksize` or `msize, nsize, @@ -790,7 +790,7 @@ struct combination { } // namespace sycl::ext::oneapi::experimental::matrix ``` -Each combination of the `combinations` vector composes the types and +Each combination of the `matrix_combinations` vector composes the types and sizes of A, B, C, and D matrices supported by the device implementation. The table below provides a description of each member of the `combination` struct. @@ -832,7 +832,7 @@ the `T` template parameter as follows: + ```c++ // Ta, Tb, Tc, and Td are the types used in applications std::vector combinations = - device.get_info(); + device.get_info(); for (int i = 0; sizeof(combinations); i++) { if (Ta == combinations[i].atype && Tb == combinations[i].btype && @@ -849,7 +849,7 @@ for (int i = 0; sizeof(combinations); i++) { The table below provides a list of the combinations that `joint_matrix` implementations support on each of Intel AMX and Intel XMX hardware. Note that these can be returned using -`ext::oneapi::experimental::info::device::matrix::combinations`. +`ext::oneapi::experimental::info::device::matrix_combinations`. ==== Intel AMX Supported Combinations This is currently available in devices with the architecture @@ -863,11 +863,11 @@ table below. | A type | B type | C and D type | M | N | K | `matrix_type::uint8` | `matrix_type::uint8` | `matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64 -| `matrix_type::uint8` | `matrix_type::int8` | +| `matrix_type::uint8` | `matrix_type::sint8` | `matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64 -| `matrix_type::int8` | `matrix_type::uint8` | +| `matrix_type::sint8` | `matrix_type::uint8` | `matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64 -| `matrix_type::int8` | `matrix_type::int8` | +| `matrix_type::sint8` | `matrix_type::sint8` | `matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64 | `matrix_type::bf16` | `matrix_type::bf16` | `matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 32 @@ -875,32 +875,40 @@ table below. ==== Intel XMX Supported Combinations This is currently available in devices with the architecture -`architecture::intel_gpu_pvc` and `architecture::intel_gpu_dg2`. In -these architectures' implementation, the type of the C matrix must be -the same as the type of the D matrix. Therefore, that common type is -shown in a single column in the table below. +`architecture::intel_gpu_pvc`, `architecture::intel_gpu_dg2_g10`, +`architecture::intel_gpu_dg2_g11`, and +`architecture::intel_gpu_dg2_g12`. In these architectures' +implementation, the type of the C matrix must be the same as the type +of the D matrix. Therefore, that common type is shown in a single +column in the table below. [frame="none",options="header"] |====================== | A type | B type | C and D type | M | N | K | device -| `matrix_type::uint8` | `matrix_type::uint8` | -`matrix_type::int32` | +<=+ 8 | 16 | 32 | architecture::intel_gpu_pvc -| | | | |8||architecture::intel_gpu_dg2 -| `matrix_type::uint8` | `matrix_type::int8` | -`matrix_type::int32` | +<=+ 8 | 16 | 32 | architecture::intel_gpu_pvc -| | | | |8||architecture::intel_gpu_dg2 -| `matrix_type::int8` | `matrix_type::uint8` | -`matrix_type::int32` | +<=+ 8 | 16 | 32 | architecture::intel_gpu_pvc -| | | | |8||architecture::intel_gpu_dg2 -| `matrix_type::int8` | `matrix_type::int8` | -`matrix_type::int32` | +<=+ 8 | 16 | 32 | architecture::intel_gpu_pvc -| | | | |8||architecture::intel_gpu_dg2 -| `matrix_type::fp16` | `matrix_type::fp16` | -`matrix_type::fp32` | +<=+ 8 | 16 | 16 | architecture::intel_gpu_pvc -| | | | |8|| architecture::intel_gpu_dg2 -| `matrix_type::bf16` | `matrix_type::bf16` | -`matrix_type::fp32` | +<=+ 8 | 16 | 16 | architecture::intel_gpu_pvc -| | | | |8|| architecture::intel_gpu_dg2 +.2+| `matrix_type::uint8` .2+| `matrix_type::uint8` .2+| +`matrix_type::sint32` .2+| +<=+ 8 | 16 .2+| 32 +|`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10, +architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12` +.2+| `matrix_type::uint8` .2+| `matrix_type::sint8` .2+| +`matrix_type::sint32` .2+| +<=+ 8 | 16 .2+| 32 | +`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10, +architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12` +.2+| `matrix_type::sint8` .2+| `matrix_type::uint8` .2+| +`matrix_type::sint32` .2+| +<=+ 8 | 16 .2+| 32 | +`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10, +architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12` +.2+| `matrix_type::sint8` .2+| `matrix_type::sint8` .2+| +`matrix_type::sint32` .2+| +<=+ 8 | 16 .2+| 32 | +`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10, +architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12` +.2+|`matrix_type::fp16` .2+| `matrix_type::fp16` .2+| +`matrix_type::fp32` .2+| +<=+ 8 | 16 .2+| 16 | +`architecture::intel_gpu_pvc`|8| `architecture::intel_gpu_dg2_g10, +architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12` +.2+| `matrix_type::bf16` .2+| `matrix_type::bf16` .2+| +`matrix_type::fp32` .2+| +<=+ 8 | 16 .2+| 16 | +`architecture::intel_gpu_pvc` |8| `architecture::intel_gpu_dg2_g10, +architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12` |====================== ==== Nvidia Tensor Cores Supported Combinations @@ -932,11 +940,11 @@ supported parameter combination is specified in the following table. |16 |16 |16 |8 |32 |16 |32 |8 |16 -.3+| `matrix_type::int8` .3+| `matrix_type::int32` +.3+| `matrix_type::sint8` .3+| `matrix_type::sint32` |16 |16 |16 .6+| sm_72 |8 |32 |16 |32 |8 |16 -.3+|`matrix_type::uint8` .3+|`matrix_type::int32` +.3+|`matrix_type::uint8` .3+|`matrix_type::sint32` |16 |16 |16 |8 |32 |16 |32 |8 |16 diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index 6c7bc2cce84bc..e6a51ed9d103c 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -99,6 +99,7 @@ namespace sycl::ext::oneapi::experimental { enum class architecture : /* unspecified */ { x86_64, + intel_cpu_spr, intel_gpu_bdw, intel_gpu_skl, intel_gpu_kbl, @@ -195,6 +196,12 @@ of these enumerators, and it provides a brief description of their meanings. |- |Any CPU device with the x86_64 instruction set. +|`intel_cpu_spr` +|- +|Intel Xeon processor codenamed Sapphire Rapids. The utility of this +enumeration is currently limited. See the section "Limitations with +the experimental version" for details. + |`intel_gpu_bdw` |- |Broadwell Intel graphics architecture. @@ -246,7 +253,7 @@ of these enumerators, and it provides a brief description of their meanings. |`intel_gpu_adl_s` + `intel_gpu_rpl_s` |- -|Alder Lake S Intel graphics architecture or Raptor Lake Intel graphics +|Alder Lake S Intel graphics architecture or Raptor Lake Intel graphics architecture. |`intel_gpu_adl_p` @@ -589,6 +596,15 @@ feature, the application must be compiled in ahead-of-time (AOT) mode using description of the `-fsycl-targets` option. These are the target names of the form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*". +The architecture enumeration `intel_cpu_spr` does not currently work +with any of the APIs described in this extension. It cannot be used +with the `if_architecture_is` function, the +`device::ext_oneapi_architecture_is` function, or the +`info::device::architecture` query descriptor. It currently exists +only for use with the +link:sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc[sycl_ext_oneapi_matrix] +extension. + == Future direction This experimental extension is still evolving. We expect that future versions diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 5eb0a301ce4b5..624ae83897e48 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -14,6 +14,7 @@ namespace ext::oneapi::experimental { enum class architecture { x86_64, + intel_cpu_spr, intel_gpu_bdw, intel_gpu_skl, intel_gpu_kbl, diff --git a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp index f30ff53bb8a55..cfc699d86b38c 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp @@ -35,40 +35,42 @@ namespace ext { namespace oneapi { namespace experimental::matrix { -enum class tpu { - xmx8, - xmx16, - amx, -}; enum class matrix_type { - bf8, bf16, fp16, tf32, fp32, fp64, - sint2, - sint4, sint8, sint16, sint32, sint64, - uint2, - uint4, uint8, uint16, uint32, uint64 }; -enum class scope_t { sub_group, work_group }; +struct combination { + size_t max_msize; + size_t max_nsize; + size_t max_ksize; + size_t msize; + size_t nsize; + size_t ksize; + matrix_type atype; + matrix_type btype; + matrix_type ctype; + matrix_type dtype; +}; -template -struct tpu_params; +template +struct matrix_params; template -constexpr bool is_combination_valid_amx(int sM, int sN, int sK) { +constexpr bool is_combination_valid_amx(size_t sM, size_t sN, size_t sK) { // is_same_v is a C++17 feature if ((std::is_same_v && std::is_same_v && std::is_same_v && sM <= 16 && sN <= 16 && sK <= 64) || @@ -104,46 +106,14 @@ constexpr bool are_types_valid_amx() { return false; } -// General query: -// types are not given, no default sizes and no implicit matrix construction -template -struct tpu_params { - static constexpr std::size_t M = -1; // depends on the type - static constexpr std::size_t N = -1; - static constexpr std::size_t K = -1; - - uint32_t numtiles = 8; - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - {16, 16, 64, mt::sint8, mt::sint8, mt::sint32}, - {16, 16, 64, mt::sint8, mt::uint8, mt::sint32}, - {16, 16, 64, mt::uint8, mt::sint8, mt::sint32}, - {16, 16, 64, mt::uint8, mt::uint8, mt::sint32}, - {16, 16, 32, mt::bf16, mt::bf16, mt::fp32}}; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); -}; - -// Sizes-only query +// Default values query // Specialization for when only types are given, need to query only sizes -template -struct tpu_params && - !std::is_same_v && - !std::is_same_v)>::type> { +template +struct matrix_params< + architecture::intel_cpu_spr, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { static_assert((are_types_valid_amx()), "Invalid types for AMX, supported types are int8_t, uint8_t, " "and bf16 (Note that unsigned short should be used in the" @@ -159,34 +129,17 @@ struct tpu_params using joint_matrix_b = joint_matrix; template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = 8; - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - static constexpr combination combinations[] = { - {16, 16, (sizeof(Ta) == 1) ? 64 : 32}}; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; }; -// Valid or not: +// Validation query // Specialization when both types and sizes are given -template -struct tpu_params< - tpu::amx, Ta, Tb, Tc, sM, sN, sK, +template +struct matrix_params< + architecture::intel_cpu_spr, Ta, Tb, Tc, Td, sM, sN, sK, typename std::enable_if<( !std::is_same_v && !std::is_same_v && !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { @@ -195,27 +148,24 @@ struct tpu_params< (sM == 0 && sN == 0 && sK == 0) || (is_combination_valid_amx(sM, sN, sK)), "Invalid parameters for AMX, query valid types and maximum sizes " - "using: tpu_params myparams; and then check out " + "using: matrix_params myparams; and then " + "check out " "myparams.combinations array"); // if combination is valid, construct the matrices - static constexpr std::size_t M = (sM != 0) ? sM : 16; - static constexpr std::size_t N = (sN != 0) ? sN : 16; - static constexpr std::size_t K = - (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 64 : 32); + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; template using joint_matrix_a = joint_matrix; template using joint_matrix_b = joint_matrix; template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = 8; - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; }; // Intel XMX with SIMD8 capability @@ -224,25 +174,25 @@ struct tpu_params< // capabilities of the Intel XMX hardware. template -constexpr bool is_combination_valid_xmx8(int sM, int sN, int sK) { +constexpr bool is_combination_valid_xmx8(size_t sM, size_t sN, size_t sK) { if ((std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 8 && sK == 32) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 32) || (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 8 && sK == 32) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 32) || (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 8 && sK == 32) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 32) || (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 8 && sK == 32) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 32) || (std::is_same_v && std::is_same_v && - std::is_same_v && - (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 8 && + sK == 16) || (std::is_same_v && std::is_same_v && std::is_same_v && - (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16)) + (sM >= 1 && sM <= 8) && sN == 8 && sK == 16)) return true; else return false; @@ -267,72 +217,79 @@ constexpr bool are_types_valid_xmx8() { return false; } -// General Query -// specialization for when types are not given --> no default values -template -struct tpu_params { - static constexpr std::size_t M = -1; // depends on the type - static constexpr std::size_t N = -1; - static constexpr std::size_t K = -1; - - uint32_t numtiles = -1; // does not apply for XMX8 - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 8, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 8, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 8, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 8, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 8, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 8, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 8, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 8, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 8, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 8, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 8, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 8, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 8, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 8, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 8, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 8, 32}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 8, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 8, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 8, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 8, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 8, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 8, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 8, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 8, 16}, - }; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); +// Default-values query: +// Specialization for when only types are given, need to query only sizes + +template +struct matrix_params< + architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { + static_assert((are_types_valid_xmx8()), + "Invalid types for architecture::intel_gpu_dg2_g10, supported " + "types are int8_t, uint8_t, half, and bf16"); + + // construct the matrices using the default sizes + + static constexpr std::size_t M = 8; + static constexpr std::size_t N = 8; + static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Validation query: +// Specialization when both types and sizes are given +template +struct matrix_params< + architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { + // Validate that parameters are supported + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx8(sM, sN, sK)), + "Invalid parameters for XMX8, query valid combinations " + "using: " + "q.get_device().get_info()"); + + // if combination is valid, construct the matrices + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; }; -// Sizes-only query: +// Default-values query: // Specialization for when only types are given, need to query only sizes -template -struct tpu_params && - !std::is_same_v && - !std::is_same_v)>::type> { +template +struct matrix_params< + architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { static_assert((are_types_valid_xmx8()), - "Invalid types for XMX8, supported types are int8_t, uint8_t, " - "half, and bf16 (Note that unsigned short should be used in the" - "DPC++ code to implement bf16)"); + "Invalid types for architecture::intel_gpu_dg2_g11, supported" + "types are int8_t, uint8_t, half, and bf16"); // construct the matrices using the default sizes @@ -345,68 +302,102 @@ struct tpu_params using joint_matrix_b = joint_matrix; template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = -1; // does not apply for XMX8 - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - // The types used in the initialization below are fake and not used. In - // this case, users already chose the types, they are only looking for - // the - // sizes - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 8, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 8, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 8, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 8, (sizeof(Ta) == 1) ? 32 : 16}, - }; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; }; -// Valid or not: +// Validation query: // Specialization when both types and sizes are given -template -struct tpu_params< - tpu::xmx8, Ta, Tb, Tc, sM, sN, sK, - typename std::enable_if<((!std::is_same_v && sM != 0))>::type> { +template +struct matrix_params< + architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { // Validate that parameters are supported - static_assert((sM == 0 && sN == 0 && sK == 0) || - (is_combination_valid_xmx8(sM, sN, sK)), - "Invalid parameters for XMX8, query valid combinations " - "using: tpu_params myparams; and then check out " - "myparams.combinations array"); + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx8(sM, sN, sK)), + "Invalid parameters for XMX8, query valid combinations " + "using: " + "q.get_device().get_info()"); // if combination is valid, construct the matrices - static constexpr std::size_t M = (sM != 0) ? sM : 8; - static constexpr std::size_t N = (sN != 0) ? sN : 8; - static constexpr std::size_t K = - (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16); + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; + + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Default-values query: +// Specialization for when only types are given, need to query only sizes + +template +struct matrix_params< + architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { + static_assert((are_types_valid_xmx8()), + "Invalid types for architecture::intel_gpu_dg2_g12, supported " + "types are int8_t, uint8_t, half, and bf16"); + + // construct the matrices using the default sizes + + static constexpr std::size_t M = 8; + static constexpr std::size_t N = 8; + static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16); template using joint_matrix_a = joint_matrix; template using joint_matrix_b = joint_matrix; template - using joint_matrix_accumulator = - joint_matrix; + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; +}; + +// Validation query: +// Specialization when both types and sizes are given +template +struct matrix_params< + architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { + // Validate that parameters are supported + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx8(sM, sN, sK)), + "Invalid parameters for XMX8, query valid combinations " + "using: " + "q.get_device().get_info()"); + + // if combination is valid, construct the matrices + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; - uint32_t numtiles = -1; // does not apply for XMX8 - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); + template + using joint_matrix_a = joint_matrix; + template + using joint_matrix_b = joint_matrix; + template + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; }; // Intel XMX with SIMD16 capability @@ -415,25 +406,25 @@ struct tpu_params< // capabilities of the Intel XMX hardware. template -constexpr bool is_combination_valid_xmx16(int sM, int sN, int sK) { +constexpr bool is_combination_valid_xmx16(size_t sM, size_t sN, size_t sK) { if ((std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 16 && sK == 32) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 32) || (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 16 && sK == 32) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 32) || (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 16 && sK == 32) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 32) || (std::is_same_v && std::is_same_v && - std::is_same_v && (sM == 1 || sM == 2 || sM == 4 || sM == 8) && - sN == 16 && sK == 32) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 32) || (std::is_same_v && std::is_same_v && - std::is_same_v && - (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16) || + std::is_same_v && (sM >= 1 && sM <= 8) && sN == 16 && + sK == 16) || (std::is_same_v && std::is_same_v && std::is_same_v && - (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16)) + (sM >= 1 && sM <= 8) && sN == 16 && sK == 16)) return true; else return false; @@ -458,72 +449,19 @@ constexpr bool are_types_valid_xmx16() { return false; } -// General Query -// specialization for when types are not given --> no default values -template -struct tpu_params { - static constexpr std::size_t M = -1; // depends on the type - static constexpr std::size_t N = -1; - static constexpr std::size_t K = -1; - - uint32_t numtiles = -1; // does not apply for XMX - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 16, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 16, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 16, 32}, - {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 16, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 16, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 16, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 16, 32}, - {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 16, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 16, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 16, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 16, 32}, - {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 16, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 16, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 16, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 16, 32}, - {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 16, 32}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 16, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 16, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 16, 16}, - {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 16, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 16, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 16, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 16, 16}, - {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 16, 16}, - }; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); -}; - -// Sizes-only query: +// Default values query: // Specialization for when only types are given, need to query only sizes -template -struct tpu_params && - !std::is_same_v && - !std::is_same_v)>::type> { +template +struct matrix_params< + architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, 0, 0, 0, + typename std::enable_if<(!std::is_same_v && + !std::is_same_v && + !std::is_same_v)>::type> { static_assert((are_types_valid_xmx16()), - "Invalid types for XMX16, supported types are int8_t, uint8_t, " - "half, and bf16 (Note that unsigned short should be used in the" - "DPC++ code to implement bf16)"); + "Invalid types for architecture::intel_gpu_pvc, supported " + "types are int8_t, uint8_t, " + "half, and bf16"); // construct the matrices using the default sizes @@ -536,68 +474,42 @@ struct tpu_params using joint_matrix_b = joint_matrix; template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = -1; // does not apply for XMX - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); - struct combination { - uint32_t max_msize; - uint32_t max_nsize; - uint32_t max_ksize; - matrix_type atype; - matrix_type btype; - matrix_type accumulatortype; - uint32_t msize; - uint32_t nsize; - uint32_t ksize; - }; - using mt = matrix_type; - static constexpr combination combinations[] = { - // The types used in the initialization below are fake and not used. In - // this case, users already chose the types, they are only looking for - // the - // sizes - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 16, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 16, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 16, (sizeof(Ta) == 1) ? 32 : 16}, - {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 16, (sizeof(Ta) == 1) ? 32 : 16}, - }; - static constexpr int num_combinations = - sizeof(combinations) / sizeof(combination); + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; }; -// Valid or not: +// Validation query: // Specialization when both types and sizes are given -template -struct tpu_params< - tpu::xmx16, Ta, Tb, Tc, sM, sN, sK, - typename std::enable_if<((!std::is_same_v && sM != 0))>::type> { +template +struct matrix_params< + architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, sM, sN, sK, + typename std::enable_if<( + !std::is_same_v && !std::is_same_v && + !std::is_same_v && sM != 0 && sN != 0 && sK != 0)>::type> { // Validate that parameters are supported - static_assert((sM == 0 && sN == 0 && sK == 0) || - (is_combination_valid_xmx16(sM, sN, sK)), - "Invalid parameters for XMX16, query valid combinations " - "using: tpu_params myparams; and then check out " - "myparams.combinations array"); + static_assert( + (sM == 0 && sN == 0 && sK == 0) || + (is_combination_valid_xmx16(sM, sN, sK)), + "Invalid parameters for architecture::intel_gpu_pvc, query valid " + "combinations " + "using: " + "q.get_device().get_info()"); // if combination is valid, construct the matrices - static constexpr std::size_t M = (sM != 0) ? sM : 8; - static constexpr std::size_t N = (sN != 0) ? sN : 8; - static constexpr std::size_t K = - (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16); + static constexpr std::size_t M = sM; + static constexpr std::size_t N = sN; + static constexpr std::size_t K = sK; template using joint_matrix_a = joint_matrix; template using joint_matrix_b = joint_matrix; template - using joint_matrix_accumulator = - joint_matrix; - - uint32_t numtiles = -1; // does not apply for XMX16 - static constexpr scope_t scopes[] = {scope_t::sub_group}; - static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t); + using joint_matrix_c = joint_matrix; + template + using joint_matrix_d = joint_matrix; }; } // namespace experimental::matrix } // namespace oneapi diff --git a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp index 048aed6341f6c..ccf50a5a76da9 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp @@ -39,7 +39,8 @@ void matrix_multiply(big_matrix &C, size_t K = NUM_COLS_A; assert(NUM_ROWS_C == NUM_ROWS_A && NUM_COLS_A == NUM_ROWS_B * 4); - using myparams2 = tpu_params; + using myparams2 = + matrix_params; constexpr int TM = myparams2::M; constexpr int TN = myparams2::N; constexpr int TK = myparams2::K; @@ -80,7 +81,7 @@ void matrix_multiply(big_matrix &C, myparams2::joint_matrix_b< sub_group, ext::intel::experimental::matrix::layout::packed> sub_b; - myparams2::joint_matrix_accumulator sub_c; + myparams2::joint_matrix_c sub_c; joint_matrix_load( sg, sub_c, diff --git a/sycl/test/matrix/compile-query.cpp b/sycl/test/matrix/compile-query.cpp new file mode 100644 index 0000000000000..e110eef22a385 --- /dev/null +++ b/sycl/test/matrix/compile-query.cpp @@ -0,0 +1,113 @@ +// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-query %s +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi::experimental::matrix; + +void query_amx_spr() { + + // generates combination assert + // using myparams = matrix_params; + + // generates types assert + // using myparams2 = matrix_params; + + // tells whether a combination is valid or not, if valid, those will be set as + // default + using myparams = matrix_params; + + size_t dmsize = myparams::M; + size_t dnsize = myparams::N; + size_t dksize = myparams::K; + std::cout << "sizes of AMX matrix_params chosen by the user are: M " << dmsize + << " N " << dnsize << " K " << dksize << std::endl; + + // Sizes-only query: types are given, generate default sizes + using myparams2 = + matrix_params; + myparams2 p; + dmsize = myparams2::M; + dnsize = myparams2::N; + dksize = myparams2::K; + std::cout << "default AMX sizes matrix_params are: M " << dmsize << " N " + << dnsize << " K " << dksize << std::endl; + return; +} + +void query_xmx_dg2() { + + // generates combination assert + // using myparams = matrix_params; + + // generate combination of type assert + // using myparams = matrix_params; + + // tells whether a combination is valid or not, if valid, those will be set as + // default + using myparams = matrix_params; + + size_t dmsize = myparams::M; + size_t dnsize = myparams::N; + size_t dksize = myparams::K; + std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg2_g10 " + "matrix_params chosen by the user are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + + // sizes-only query: types are given, generate default sizes + using myparams2 = + matrix_params; + dmsize = myparams2::M; + dnsize = myparams2::N; + dksize = myparams2::K; + std::cout + << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + return; +} + +void query_xmx_pvc() { + + // generates combination assert + // using myparams = matrix_params; + + // generate combination of type assert + // using myparams = matrix_params; + + // tells whether a combination is valid or not, if valid, those will be set as + // default + using myparams = matrix_params; + + size_t dmsize = myparams::M; + size_t dnsize = myparams::N; + size_t dksize = myparams::K; + std::cout << "sizes of architecture::intel_gpu_pvc matrix_params chosen by " + "the user are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + + // sizes-only query: types are given, generate default sizes + using myparams2 = + matrix_params; + dmsize = myparams2::M; + dnsize = myparams2::N; + dksize = myparams2::K; + std::cout << "Default Intel XMX of architecture::intel_gpu_pvc sizes are: M " + << dmsize << " N " << dnsize << " K " << dksize << std::endl; + return; +} + +int main() { + query_amx_spr(); + query_xmx_dg2(); + query_xmx_pvc(); + return 0; +} diff --git a/sycl/test/matrix/query-use.cpp b/sycl/test/matrix/query-use.cpp deleted file mode 100644 index 9afc8e1173043..0000000000000 --- a/sycl/test/matrix/query-use.cpp +++ /dev/null @@ -1,162 +0,0 @@ -// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o query-use %s -#include -#include - -using namespace sycl; -using namespace sycl::ext::oneapi::experimental::matrix; - -void query_amx() { - - // generates combination assert - // using myparams = tpu_params; - - // generates types assert - // using myparams2 = tpu_params; - - // tells whether a combination is valid or not, if valid, those will be set as - // default - using myparams = tpu_params; - - size_t dmsize = myparams::M; - size_t dnsize = myparams::N; - size_t dksize = myparams::K; - std::cout << "sizes of AMX tpu_params chosen by the user are: M " << dmsize - << " N " << dnsize << " K " << dksize << std::endl; - - // Sizes-only query: types are given, generate default sizes - using myparams2 = tpu_params; - myparams2 p; - dmsize = myparams2::M; - dnsize = myparams2::N; - dksize = myparams2::K; - std::cout << "default AMX sizes tpu_params are: M " << dmsize << " N " - << dnsize << " K " << dksize << "\n AMX int8 num combinations is " - << p.num_combinations << std::endl; - - // general query: types are not given - tpu_params myparams3; - - if (myparams3.num_scopes > 0) - if (myparams3.scopes[0] == scope_t::sub_group) - std::cout << "There are " << myparams3.num_scopes - << " Scopes that are supported by AMX implementation and " - "subgroup is one of them " - << std::endl; - - std::cout << "AMX query num combinations: " << myparams3.num_combinations - << std::endl; - - if (myparams3.combinations[0].msize != 0) // this is a max params hardware - return; - constexpr int msize = myparams3.combinations[0].max_msize; - constexpr int nsize = myparams3.combinations[0].max_nsize; - constexpr int ksize = myparams3.combinations[0].max_ksize; - std::cout << "AMX query sizes are: M " << msize << " N " << nsize << " K " - << ksize << std::endl; - - size_t NDRangeM = 1024 / msize; - size_t NDRangeN = 1024 / nsize; - queue q; - q.submit([&](handler &cgh) { - cgh.parallel_for( - nd_range<2>({NDRangeM, NDRangeN}, {1, 1}), - [msize, ksize, nsize](nd_item<2> spmd_item) { - sub_group sg = spmd_item.get_sub_group(); - myparams2::joint_matrix_a sub_a1; - myparams2::joint_matrix_b< - sub_group, sycl::ext::intel::experimental::matrix::layout::packed> - sub_b1; - myparams2::joint_matrix_accumulator sub_c1; - - joint_matrix sub_a; - joint_matrix sub_b; - joint_matrix sub_c; - }); - }); -} - -void query_xmx8() { - - // generates combination assert - // using myparams = tpu_params; - - // generate combination of type assert - // using myparams = tpu_params; - - // tells whether a combination is valid or not, if valid, those will be set as - // default - using myparams = tpu_params; - - size_t dmsize = myparams::M; - size_t dnsize = myparams::N; - size_t dksize = myparams::K; - std::cout << "sizes of XMX8 tpu_params chosen by the user are: M " << dmsize - << " N " << dnsize << " K " << dksize << std::endl; - - // sizes-only query: types are given, generate default sizes - using myparams2 = tpu_params; - myparams2 p; - dmsize = myparams2::M; - dnsize = myparams2::N; - dksize = myparams2::K; - std::cout << "Default XMX8 sizes are: M " << dmsize << " N " << dnsize - << " K " << dksize << "\n XMX8 int8 num combinations is " - << p.num_combinations << std::endl; - - dmsize = myparams2::combinations[0].msize; - dnsize = myparams2::combinations[0].nsize; - dksize = myparams2::combinations[0].ksize; - std::cout << "one of XMX8 combination sizes is: M " << dmsize << " N " - << dnsize << " K " << dksize << std::endl; - - // general query: types are not given - tpu_params myparams3; - - if (myparams3.num_scopes > 0) - if (myparams3.scopes[0] == scope_t::sub_group) - std::cout << "There are " << myparams3.num_scopes - << " Scopes that are supported by XMX8 implementation and " - "subgroup is one of them " - << std::endl; - - std::cout << "XMX8 query num combinations: " << myparams3.num_combinations - << std::endl; - - if (myparams3.combinations[0].msize == 0) // this is not a max params hardware - return; - constexpr int msize = myparams3.combinations[0].msize; - constexpr int nsize = myparams3.combinations[0].nsize; - constexpr int ksize = myparams3.combinations[0].ksize; - std::cout << "XMX8 query sizes are: M " << msize << " N " << nsize << " K " - << ksize << std::endl; - std::cout << "XMX8 query max sizes are: M " - << myparams3.combinations[0].max_msize << " N " - << myparams3.combinations[0].max_nsize << " K " - << myparams3.combinations[0].max_ksize << std::endl; - - size_t NDRangeM = 1024 / msize; - size_t NDRangeN = 1024 / nsize; - queue q; - q.submit([&](handler &cgh) { - cgh.parallel_for( - nd_range<2>({NDRangeM, NDRangeN}, {1, 1}), - [msize, ksize, nsize](nd_item<2> spmd_item) { - sub_group sg = spmd_item.get_sub_group(); - myparams2::joint_matrix_a sub_a1; - myparams2::joint_matrix_b< - sub_group, sycl::ext::intel::experimental::matrix::layout::packed> - sub_b1; - myparams2::joint_matrix_accumulator sub_c1; - - joint_matrix sub_a; - joint_matrix sub_b; - joint_matrix sub_c; - }); - }); -} - -int main() { - query_amx(); - query_xmx8(); - return 0; -} diff --git a/sycl/test/matrix/runtime-query.cpp b/sycl/test/matrix/runtime-query.cpp new file mode 100644 index 0000000000000..d6ccedf24b870 --- /dev/null +++ b/sycl/test/matrix/runtime-query.cpp @@ -0,0 +1,50 @@ +// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o runtime-query %s +// XFAIL: * + +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; +using namespace sycl::ext::oneapi::experimental::matrix; + +template +void matrix_runtime_query(queue q) { + + std::vector combinations = + q.get_device().get_info(); + + std::cout << "The matrix hardware implementation in this device provides " + "this number of combinations: " + << combinations.size() << std::endl; + + bool max_sizes; + if (combinations[0].maxsize == 0) + max_sizes = true; // this is a max params hardware + else + max_sizes = false; + for (int i = 0; i < combinations.size(); i++) { + if (Ta == combinations[i].atype && Tb == combinations[i].btype && + Tc == combinations[i].ctype && Td == combinations[i].dtype) { + // joint matrix GEMM kernel can be called using these sizes + if (max_sizes) + std::cout << "The matrix hardware implementation in this device " + "provides the following max sizes are: M " + << combinations[i].max_msize << " N " + << combinations[i].max_nsize << " K " + << combinations[i].max_ksize << std::endl; + else + std::cout << "The matrix hardware implementation in this device " + "provides the following sizes are: M " + << combinations[i].msize << " N " << combinations[i].nsize + << " K " << combinations[i].ksize << std::endl; + } + } +} + +int main() { + queue q; + matrix_runtime_query(q); + return 0; +}