diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
index 94c2bebe04906..d7f2ab8d985f2 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
@@ -741,12 +741,12 @@ descriptors that can be queried using `get_info` API.
 [frame="none",options="header"]
 |======================
 | Device descriptors | Return type| Description
-|`ext::oneapi::experimental::info::device::matrix::combinations` |
+|`ext::oneapi::experimental::info::device::matrix_combinations` |
 `std::vector<combination>`| tells the set of supported matrix sizes
 and types on this device
 |======================
 
-The runtime query returns a vector of `combinations` of `combination`
+The runtime query returns a vector of `matrix_combinations` of `combination`
 type. Each combination includes the sizes and the types for the
 matrices A, B, C, and D. Note that for each matrix hardware,
 the query returns `max_msize, max_nsize, max_ksize` or `msize, nsize,
@@ -790,7 +790,7 @@ struct combination {
 } // namespace sycl::ext::oneapi::experimental::matrix
 ```
 
-Each combination of the `combinations` vector composes the types and
+Each combination of the `matrix_combinations` vector composes the types and
 sizes of A, B, C, and D matrices supported by the device
 implementation. The table below provides a description of each member
 of the `combination` struct.
@@ -832,7 +832,7 @@ the `T` template parameter as follows: +
 ```c++
 // Ta, Tb, Tc, and Td are the types used in applications
 std::vector<combination> combinations =
-           device.get_info<info::device::matrix::combinations>();
+           device.get_info<info::device::matrix_combinations>();
 for (int i = 0; sizeof(combinations); i++) {
   if (Ta == combinations[i].atype &&
       Tb == combinations[i].btype &&
@@ -849,7 +849,7 @@ for (int i = 0; sizeof(combinations); i++) {
 The table below provides a list of the combinations that
 `joint_matrix` implementations support on each of Intel AMX and Intel
 XMX hardware. Note that these can be returned using
-`ext::oneapi::experimental::info::device::matrix::combinations`.
+`ext::oneapi::experimental::info::device::matrix_combinations`.
 
 ==== Intel AMX Supported Combinations
 This is currently available in devices with the architecture
@@ -863,11 +863,11 @@ table below.
 | A type | B type | C and D type | M | N | K
 | `matrix_type::uint8`  | `matrix_type::uint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
-| `matrix_type::uint8`  | `matrix_type::int8` |
+| `matrix_type::uint8`  | `matrix_type::sint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
-| `matrix_type::int8`  | `matrix_type::uint8` |
+| `matrix_type::sint8`  | `matrix_type::uint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
-| `matrix_type::int8`  | `matrix_type::int8` |
+| `matrix_type::sint8`  | `matrix_type::sint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
 |  `matrix_type::bf16`       |  `matrix_type::bf16`   |
 `matrix_type::fp32`   |  +<=+ 16 |  +<=+ 16   |  +<=+ 32
@@ -875,32 +875,40 @@ table below.
 
 ==== Intel XMX Supported Combinations
 This is currently available in devices with the architecture
-`architecture::intel_gpu_pvc` and `architecture::intel_gpu_dg2`. In
-these architectures' implementation, the type of the C matrix must be
-the same as the type of the D matrix. Therefore, that common type is
-shown in a single column in the table below.
+`architecture::intel_gpu_pvc`, `architecture::intel_gpu_dg2_g10`,
+`architecture::intel_gpu_dg2_g11`, and
+`architecture::intel_gpu_dg2_g12`. In these architectures'
+implementation, the type of the C matrix must be the same as the type
+of the D matrix. Therefore, that common type is shown in a single
+column in the table below.
 
 [frame="none",options="header"]
 |======================
 | A type | B type | C and D type | M | N | K | device
-| `matrix_type::uint8`  | `matrix_type::uint8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | architecture::intel_gpu_pvc
-| | | | |8||architecture::intel_gpu_dg2
-| `matrix_type::uint8`  | `matrix_type::int8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | architecture::intel_gpu_pvc
-| | | | |8||architecture::intel_gpu_dg2
-| `matrix_type::int8`  | `matrix_type::uint8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | architecture::intel_gpu_pvc
-| | | | |8||architecture::intel_gpu_dg2
-| `matrix_type::int8`  | `matrix_type::int8` |
-`matrix_type::int32`  |  +<=+ 8 |  16 |  32 | architecture::intel_gpu_pvc
-| | | | |8||architecture::intel_gpu_dg2
-|  `matrix_type::fp16`       |  `matrix_type::fp16`   |
-`matrix_type::fp32`   |  +<=+ 8 |  16   |  16 | architecture::intel_gpu_pvc
-| | | | |8|| architecture::intel_gpu_dg2
-|  `matrix_type::bf16`       |  `matrix_type::bf16`   |
-`matrix_type::fp32`   |  +<=+ 8 |  16   |  16 | architecture::intel_gpu_pvc
-| | | | |8|| architecture::intel_gpu_dg2
+.2+| `matrix_type::uint8`  .2+| `matrix_type::uint8` .2+|
+`matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32
+|`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
+.2+| `matrix_type::uint8`  .2+| `matrix_type::sint8` .2+|
+`matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
+.2+| `matrix_type::sint8`  .2+| `matrix_type::uint8` .2+|
+`matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
+.2+| `matrix_type::sint8`  .2+| `matrix_type::sint8` .2+|
+`matrix_type::sint32`  .2+|  +<=+ 8 |  16 .2+|  32 |
+`architecture::intel_gpu_pvc`|8|`architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
+.2+|`matrix_type::fp16`       .2+|  `matrix_type::fp16`   .2+|
+`matrix_type::fp32`   .2+|  +<=+ 8 |  16   .2+|  16 |
+`architecture::intel_gpu_pvc`|8| `architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
+.2+|  `matrix_type::bf16`       .2+|  `matrix_type::bf16`   .2+|
+`matrix_type::fp32`   .2+|  +<=+ 8 |  16   .2+|  16 |
+`architecture::intel_gpu_pvc` |8| `architecture::intel_gpu_dg2_g10,
+architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
 |======================
 
 ==== Nvidia Tensor Cores Supported Combinations
@@ -932,11 +940,11 @@ supported parameter combination is specified in the following table.
 |16 |16 |16
 |8 |32 |16
 |32 |8 |16
-.3+| `matrix_type::int8`  .3+| `matrix_type::int32`
+.3+| `matrix_type::sint8`  .3+| `matrix_type::sint32`
 |16 |16 |16 .6+| sm_72
 |8 |32 |16
 |32 |8 |16
-.3+|`matrix_type::uint8`  .3+|`matrix_type::int32`
+.3+|`matrix_type::uint8`  .3+|`matrix_type::sint32`
 |16 |16 |16
 |8 |32 |16
 |32 |8 |16
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
index 6c7bc2cce84bc..e6a51ed9d103c 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -99,6 +99,7 @@ namespace sycl::ext::oneapi::experimental {
 
 enum class architecture : /* unspecified */ {
   x86_64,
+  intel_cpu_spr,
   intel_gpu_bdw,
   intel_gpu_skl,
   intel_gpu_kbl,
@@ -195,6 +196,12 @@ of these enumerators, and it provides a brief description of their meanings.
 |-
 |Any CPU device with the x86_64 instruction set.
 
+|`intel_cpu_spr`
+|-
+|Intel Xeon processor codenamed Sapphire Rapids. The utility of this
+enumeration is currently limited. See the section "Limitations with
+the experimental version" for details.
+
 |`intel_gpu_bdw`
 |-
 |Broadwell Intel graphics architecture.
@@ -246,7 +253,7 @@ of these enumerators, and it provides a brief description of their meanings.
 |`intel_gpu_adl_s` +
 `intel_gpu_rpl_s`
 |-
-|Alder Lake S Intel graphics architecture or Raptor Lake Intel graphics 
+|Alder Lake S Intel graphics architecture or Raptor Lake Intel graphics
 architecture.
 
 |`intel_gpu_adl_p`
@@ -589,6 +596,15 @@ feature, the application must be compiled in ahead-of-time (AOT) mode using
 description of the `-fsycl-targets` option.  These are the target names of the
 form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*".
 
+The architecture enumeration `intel_cpu_spr` does not currently work
+with any of the APIs described in this extension. It cannot be used
+with the `if_architecture_is` function, the
+`device::ext_oneapi_architecture_is` function, or the
+`info::device::architecture` query descriptor. It currently exists
+only for use with the
+link:sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc[sycl_ext_oneapi_matrix]
+extension.
+
 == Future direction
 
 This experimental extension is still evolving.  We expect that future versions
diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 5eb0a301ce4b5..624ae83897e48 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -14,6 +14,7 @@ namespace ext::oneapi::experimental {
 
 enum class architecture {
   x86_64,
+  intel_cpu_spr,
   intel_gpu_bdw,
   intel_gpu_skl,
   intel_gpu_kbl,
diff --git a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
index f30ff53bb8a55..cfc699d86b38c 100644
--- a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
+++ b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
@@ -35,40 +35,42 @@ namespace ext {
 namespace oneapi {
 namespace experimental::matrix {
 
-enum class tpu {
-  xmx8,
-  xmx16,
-  amx,
-};
 enum class matrix_type {
-  bf8,
   bf16,
   fp16,
   tf32,
   fp32,
   fp64,
-  sint2,
-  sint4,
   sint8,
   sint16,
   sint32,
   sint64,
-  uint2,
-  uint4,
   uint8,
   uint16,
   uint32,
   uint64
 };
 
-enum class scope_t { sub_group, work_group };
+struct combination {
+  size_t max_msize;
+  size_t max_nsize;
+  size_t max_ksize;
+  size_t msize;
+  size_t nsize;
+  size_t ksize;
+  matrix_type atype;
+  matrix_type btype;
+  matrix_type ctype;
+  matrix_type dtype;
+};
 
-template <tpu u, typename Ta = void, typename Tb = void, typename Tc = void,
-          int sM = 0, int sN = 0, int sK = 0, typename Enabled = void>
-struct tpu_params;
+template <architecture u, typename Ta, typename Tb, typename Tc,
+          typename Td = Tc, size_t sM = 0, size_t sN = 0, size_t sK = 0,
+          typename Enabled = void>
+struct matrix_params;
 
 template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_amx(int sM, int sN, int sK) {
+constexpr bool is_combination_valid_amx(size_t sM, size_t sN, size_t sK) {
   // is_same_v is a C++17 feature
   if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
        std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
@@ -104,46 +106,14 @@ constexpr bool are_types_valid_amx() {
     return false;
 }
 
-// General query:
-// types are not given, no default sizes and no implicit matrix construction
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::amx, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {16, 16, 64, mt::sint8, mt::sint8, mt::sint32},
-      {16, 16, 64, mt::sint8, mt::uint8, mt::sint32},
-      {16, 16, 64, mt::uint8, mt::sint8, mt::sint32},
-      {16, 16, 64, mt::uint8, mt::uint8, mt::sint32},
-      {16, 16, 32, mt::bf16, mt::bf16, mt::fp32}};
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Sizes-only query
+// Default values query
 // Specialization for when only types are given, need to query only sizes
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::amx, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_cpu_spr, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_amx<Ta, Tb, Tc>()),
                 "Invalid types for AMX, supported types are int8_t, uint8_t, "
                 "and bf16 (Note that unsigned short should be used in the"
@@ -159,34 +129,17 @@ struct tpu_params<tpu::amx, Ta, Tb, Tc, 0, 0, 0,
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  static constexpr combination combinations[] = {
-      {16, 16, (sizeof(Ta) == 1) ? 64 : 32}};
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
-// Valid or not:
+// Validation query
 // Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::amx, Ta, Tb, Tc, sM, sN, sK,
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_cpu_spr, Ta, Tb, Tc, Td, sM, sN, sK,
     typename std::enable_if<(
         !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
         !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
@@ -195,27 +148,24 @@ struct tpu_params<
       (sM == 0 && sN == 0 && sK == 0) ||
           (is_combination_valid_amx<Ta, Tb, Tc>(sM, sN, sK)),
       "Invalid parameters for AMX, query valid types and maximum sizes "
-      "using: tpu_params<tpu::amx> myparams; and then check out "
+      "using: matrix_params<architecture::intel_cpu_spr> myparams; and then "
+      "check out "
       "myparams.combinations array");
 
   // if combination is valid, construct the matrices
 
-  static constexpr std::size_t M = (sM != 0) ? sM : 16;
-  static constexpr std::size_t N = (sN != 0) ? sN : 16;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 64 : 32);
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
 
   template <typename Group, layout Layout>
   using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
 // Intel XMX with SIMD8 capability
@@ -224,25 +174,25 @@ struct tpu_params<
 // capabilities of the Intel XMX hardware.
 
 template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_xmx8(int sM, int sN, int sK) {
+constexpr bool is_combination_valid_xmx8(size_t sM, size_t sN, size_t sK) {
   if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
       (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
       (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
       (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
       (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
-       std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16) ||
+       std::is_same_v<Tc, float> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 16) ||
       (std::is_same_v<Ta, unsigned short> &&
        std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16))
+       (sM >= 1 && sM <= 8) && sN == 8 && sK == 16))
     return true;
   else
     return false;
@@ -267,72 +217,79 @@ constexpr bool are_types_valid_xmx8() {
     return false;
 }
 
-// General Query
-// specialization for when types are not given --> no default values
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::xmx8, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 8, 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg2_g10, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
-// Sizes-only query:
+// Default-values query:
 // Specialization for when only types are given, need to query only sizes
 
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::xmx8, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
-                "Invalid types for XMX8, supported types are int8_t, uint8_t, "
-                "half, and bf16 (Note that unsigned short should be used in the"
-                "DPC++ code to implement bf16)");
+                "Invalid types for architecture::intel_gpu_dg2_g11, supported"
+                "types are int8_t, uint8_t, half, and bf16");
 
   // construct the matrices using the default sizes
 
@@ -345,68 +302,102 @@ struct tpu_params<tpu::xmx8, Ta, Tb, Tc, 0, 0, 0,
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      // The types used in the initialization below are fake and not used. In
-      // this case, users already chose the types, they are only looking for
-      // the
-      // sizes
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 8, (sizeof(Ta) == 1) ? 32 : 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
-// Valid or not:
+// Validation query:
 // Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::xmx8, Ta, Tb, Tc, sM, sN, sK,
-    typename std::enable_if<((!std::is_same_v<Ta, void> && sM != 0))>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
   // Validate that parameters are supported
-  static_assert((sM == 0 && sN == 0 && sK == 0) ||
-                    (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
-                "Invalid parameters for XMX8, query valid combinations "
-                "using: tpu_params<tpu::xmx8> myparams; and then check out "
-                "myparams.combinations array");
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
 
   // if combination is valid, construct the matrices
-  static constexpr std::size_t M = (sM != 0) ? sM : 8;
-  static constexpr std::size_t N = (sN != 0) ? sN : 8;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16);
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg2_g12, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
 
   template <typename Group, layout Layout>
   using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
 
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
 // Intel XMX with SIMD16 capability
@@ -415,25 +406,25 @@ struct tpu_params<
 // capabilities of the Intel XMX hardware.
 
 template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_xmx16(int sM, int sN, int sK) {
+constexpr bool is_combination_valid_xmx16(size_t sM, size_t sN, size_t sK) {
   if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
       (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
       (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
       (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
       (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
-       std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16) ||
+       std::is_same_v<Tc, float> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 16) ||
       (std::is_same_v<Ta, unsigned short> &&
        std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16))
+       (sM >= 1 && sM <= 8) && sN == 16 && sK == 16))
     return true;
   else
     return false;
@@ -458,72 +449,19 @@ constexpr bool are_types_valid_xmx16() {
     return false;
 }
 
-// General Query
-// specialization for when types are not given --> no default values
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::xmx16, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = -1; // does not apply for XMX
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 16, 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Sizes-only query:
+// Default values query:
 // Specialization for when only types are given, need to query only sizes
 
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::xmx16, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
   static_assert((are_types_valid_xmx16<Ta, Tb, Tc>()),
-                "Invalid types for XMX16, supported types are int8_t, uint8_t, "
-                "half, and bf16 (Note that unsigned short should be used in the"
-                "DPC++ code to implement bf16)");
+                "Invalid types for architecture::intel_gpu_pvc, supported "
+                "types are int8_t, uint8_t, "
+                "half, and bf16");
 
   // construct the matrices using the default sizes
 
@@ -536,68 +474,42 @@ struct tpu_params<tpu::xmx16, Ta, Tb, Tc, 0, 0, 0,
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      // The types used in the initialization below are fake and not used. In
-      // this case, users already chose the types, they are only looking for
-      // the
-      // sizes
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 16, (sizeof(Ta) == 1) ? 32 : 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 
-// Valid or not:
+// Validation query:
 // Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::xmx16, Ta, Tb, Tc, sM, sN, sK,
-    typename std::enable_if<((!std::is_same_v<Ta, void> && sM != 0))>::type> {
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
   // Validate that parameters are supported
-  static_assert((sM == 0 && sN == 0 && sK == 0) ||
-                    (is_combination_valid_xmx16<Ta, Tb, Tc>(sM, sN, sK)),
-                "Invalid parameters for XMX16, query valid combinations "
-                "using: tpu_params<tpu::xmx16> myparams; and then check out "
-                "myparams.combinations array");
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx16<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for architecture::intel_gpu_pvc, query valid "
+      "combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
 
   // if combination is valid, construct the matrices
-  static constexpr std::size_t M = (sM != 0) ? sM : 8;
-  static constexpr std::size_t N = (sN != 0) ? sN : 8;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16);
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
 
   template <typename Group, layout Layout>
   using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
   template <typename Group, layout Layout>
   using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
   template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX16
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
 };
 } // namespace experimental::matrix
 } // namespace oneapi
diff --git a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
index 048aed6341f6c..ccf50a5a76da9 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
@@ -39,7 +39,8 @@ void matrix_multiply(big_matrix<T1, NUM_ROWS_C, NUM_COLS_C> &C,
   size_t K = NUM_COLS_A;
   assert(NUM_ROWS_C == NUM_ROWS_A && NUM_COLS_A == NUM_ROWS_B * 4);
 
-  using myparams2 = tpu_params<tpu::amx, int8_t, int8_t, int>;
+  using myparams2 =
+      matrix_params<architecture::intel_cpu_spr, int8_t, int8_t, int>;
   constexpr int TM = myparams2::M;
   constexpr int TN = myparams2::N;
   constexpr int TK = myparams2::K;
@@ -80,7 +81,7 @@ void matrix_multiply(big_matrix<T1, NUM_ROWS_C, NUM_COLS_C> &C,
            myparams2::joint_matrix_b<
                sub_group, ext::intel::experimental::matrix::layout::packed>
                sub_b;
-           myparams2::joint_matrix_accumulator<sub_group> sub_c;
+           myparams2::joint_matrix_c<sub_group> sub_c;
 
            joint_matrix_load(
                sg, sub_c,
diff --git a/sycl/test/matrix/compile-query.cpp b/sycl/test/matrix/compile-query.cpp
new file mode 100644
index 0000000000000..e110eef22a385
--- /dev/null
+++ b/sycl/test/matrix/compile-query.cpp
@@ -0,0 +1,113 @@
+// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-query %s
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental;
+using namespace sycl::ext::oneapi::experimental::matrix;
+
+void query_amx_spr() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_cpu_spr, int, int, int,
+  // 2, 8, 32>;
+
+  // generates types assert
+  // using myparams2 = matrix_params<architecture::intel_cpu_spr, int, int,
+  // int>;
+
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_cpu_spr, int8_t, int8_t,
+                                 int, int, 2, 8, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of AMX matrix_params chosen by the user are: M " << dmsize
+            << " N " << dnsize << " K " << dksize << std::endl;
+
+  // Sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_cpu_spr, int8_t, int8_t, int>;
+  myparams2 p;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout << "default AMX sizes matrix_params  are: M " << dmsize << " N "
+            << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+void query_xmx_dg2() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
+  // int, 2, 8, 32>;
+
+  // generate combination of type assert
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
+  // int>;
+
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int8_t,
+                                 int8_t, int, int, 2, 8, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg2_g10 "
+               "matrix_params chosen by the user are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+
+  // sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_gpu_dg2_g10, int8_t, int8_t, int>;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout
+      << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes  are: M "
+      << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+void query_xmx_pvc() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_gpu_pvc, int, int, int,
+  // 2, 8, 32>;
+
+  // generate combination of type assert
+  // using myparams = matrix_params<architecture::intel_gpu_pvc, int, int, int>;
+
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_gpu_pvc, int8_t, int8_t,
+                                 int, int, 2, 16, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of architecture::intel_gpu_pvc matrix_params chosen by "
+               "the user are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+
+  // sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_gpu_pvc, int8_t, int8_t, int>;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout << "Default Intel XMX of architecture::intel_gpu_pvc sizes  are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+int main() {
+  query_amx_spr();
+  query_xmx_dg2();
+  query_xmx_pvc();
+  return 0;
+}
diff --git a/sycl/test/matrix/query-use.cpp b/sycl/test/matrix/query-use.cpp
deleted file mode 100644
index 9afc8e1173043..0000000000000
--- a/sycl/test/matrix/query-use.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o query-use %s
-#include <iostream>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-using namespace sycl::ext::oneapi::experimental::matrix;
-
-void query_amx() {
-
-  // generates combination assert
-  // using myparams = tpu_params<tpu::amx, int, int, int, 2, 8, 32>;
-
-  // generates types assert
-  // using myparams2 = tpu_params<tpu::amx, int, int, int>;
-
-  // tells whether a combination is valid or not, if valid, those will be set as
-  // default
-  using myparams = tpu_params<tpu::amx, int8_t, int8_t, int, 2, 8, 32>;
-
-  size_t dmsize = myparams::M;
-  size_t dnsize = myparams::N;
-  size_t dksize = myparams::K;
-  std::cout << "sizes of AMX tpu_params chosen by the user are: M " << dmsize
-            << " N " << dnsize << " K " << dksize << std::endl;
-
-  // Sizes-only query: types are given, generate default sizes
-  using myparams2 = tpu_params<tpu::amx, int8_t, int8_t, int>;
-  myparams2 p;
-  dmsize = myparams2::M;
-  dnsize = myparams2::N;
-  dksize = myparams2::K;
-  std::cout << "default AMX sizes tpu_params  are: M " << dmsize << " N "
-            << dnsize << " K " << dksize << "\n AMX int8 num combinations is "
-            << p.num_combinations << std::endl;
-
-  // general query: types are not given
-  tpu_params<tpu::amx> myparams3;
-
-  if (myparams3.num_scopes > 0)
-    if (myparams3.scopes[0] == scope_t::sub_group)
-      std::cout << "There are " << myparams3.num_scopes
-                << " Scopes that are supported by AMX implementation and "
-                   "subgroup is one of them "
-                << std::endl;
-
-  std::cout << "AMX query num combinations: " << myparams3.num_combinations
-            << std::endl;
-
-  if (myparams3.combinations[0].msize != 0) // this is a max params hardware
-    return;
-  constexpr int msize = myparams3.combinations[0].max_msize;
-  constexpr int nsize = myparams3.combinations[0].max_nsize;
-  constexpr int ksize = myparams3.combinations[0].max_ksize;
-  std::cout << "AMX query sizes are: M " << msize << " N " << nsize << " K "
-            << ksize << std::endl;
-
-  size_t NDRangeM = 1024 / msize;
-  size_t NDRangeN = 1024 / nsize;
-  queue q;
-  q.submit([&](handler &cgh) {
-    cgh.parallel_for<class imatrix>(
-        nd_range<2>({NDRangeM, NDRangeN}, {1, 1}),
-        [msize, ksize, nsize](nd_item<2> spmd_item) {
-          sub_group sg = spmd_item.get_sub_group();
-          myparams2::joint_matrix_a<sub_group, layout::row_major> sub_a1;
-          myparams2::joint_matrix_b<
-              sub_group, sycl::ext::intel::experimental::matrix::layout::packed>
-              sub_b1;
-          myparams2::joint_matrix_accumulator<sub_group> sub_c1;
-
-          joint_matrix<sub_group, unsigned short, use::a, msize, ksize> sub_a;
-          joint_matrix<sub_group, unsigned short, use::b, ksize, nsize> sub_b;
-          joint_matrix<sub_group, float, use::accumulator, msize, nsize> sub_c;
-        });
-  });
-}
-
-void query_xmx8() {
-
-  // generates combination assert
-  // using myparams = tpu_params<tpu::xmx8, int, int, int, 2, 8, 32>;
-
-  // generate combination of type assert
-  // using myparams = tpu_params<tpu::xmx8, int, int, int>;
-
-  // tells whether a combination is valid or not, if valid, those will be set as
-  // default
-  using myparams = tpu_params<tpu::xmx8, int8_t, int8_t, int, 2, 8, 32>;
-
-  size_t dmsize = myparams::M;
-  size_t dnsize = myparams::N;
-  size_t dksize = myparams::K;
-  std::cout << "sizes of XMX8 tpu_params chosen by the user are: M " << dmsize
-            << " N " << dnsize << " K " << dksize << std::endl;
-
-  // sizes-only query: types are given, generate default sizes
-  using myparams2 = tpu_params<tpu::xmx8, int8_t, int8_t, int>;
-  myparams2 p;
-  dmsize = myparams2::M;
-  dnsize = myparams2::N;
-  dksize = myparams2::K;
-  std::cout << "Default XMX8 sizes  are: M " << dmsize << " N " << dnsize
-            << " K " << dksize << "\n XMX8 int8 num combinations is "
-            << p.num_combinations << std::endl;
-
-  dmsize = myparams2::combinations[0].msize;
-  dnsize = myparams2::combinations[0].nsize;
-  dksize = myparams2::combinations[0].ksize;
-  std::cout << "one of XMX8 combination sizes  is: M " << dmsize << " N "
-            << dnsize << " K " << dksize << std::endl;
-
-  // general query: types are not given
-  tpu_params<tpu::xmx8> myparams3;
-
-  if (myparams3.num_scopes > 0)
-    if (myparams3.scopes[0] == scope_t::sub_group)
-      std::cout << "There are " << myparams3.num_scopes
-                << " Scopes that are supported by XMX8 implementation and "
-                   "subgroup is one of them "
-                << std::endl;
-
-  std::cout << "XMX8 query num combinations: " << myparams3.num_combinations
-            << std::endl;
-
-  if (myparams3.combinations[0].msize == 0) // this is not a max params hardware
-    return;
-  constexpr int msize = myparams3.combinations[0].msize;
-  constexpr int nsize = myparams3.combinations[0].nsize;
-  constexpr int ksize = myparams3.combinations[0].ksize;
-  std::cout << "XMX8 query sizes are: M " << msize << " N " << nsize << " K "
-            << ksize << std::endl;
-  std::cout << "XMX8 query max sizes are: M "
-            << myparams3.combinations[0].max_msize << " N "
-            << myparams3.combinations[0].max_nsize << " K "
-            << myparams3.combinations[0].max_ksize << std::endl;
-
-  size_t NDRangeM = 1024 / msize;
-  size_t NDRangeN = 1024 / nsize;
-  queue q;
-  q.submit([&](handler &cgh) {
-    cgh.parallel_for<class dmatrix>(
-        nd_range<2>({NDRangeM, NDRangeN}, {1, 1}),
-        [msize, ksize, nsize](nd_item<2> spmd_item) {
-          sub_group sg = spmd_item.get_sub_group();
-          myparams2::joint_matrix_a<sub_group, layout::row_major> sub_a1;
-          myparams2::joint_matrix_b<
-              sub_group, sycl::ext::intel::experimental::matrix::layout::packed>
-              sub_b1;
-          myparams2::joint_matrix_accumulator<sub_group> sub_c1;
-
-          joint_matrix<sub_group, unsigned short, use::a, msize, ksize> sub_a;
-          joint_matrix<sub_group, unsigned short, use::b, ksize, nsize> sub_b;
-          joint_matrix<sub_group, float, use::accumulator, msize, nsize> sub_c;
-        });
-  });
-}
-
-int main() {
-  query_amx();
-  query_xmx8();
-  return 0;
-}
diff --git a/sycl/test/matrix/runtime-query.cpp b/sycl/test/matrix/runtime-query.cpp
new file mode 100644
index 0000000000000..d6ccedf24b870
--- /dev/null
+++ b/sycl/test/matrix/runtime-query.cpp
@@ -0,0 +1,50 @@
+// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o runtime-query %s
+// XFAIL: *
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental;
+using namespace sycl::ext::oneapi::experimental::matrix;
+
+template <matrix_type Ta, matrix_type Tb, matrix_type Tc, matrix_type Td>
+void matrix_runtime_query(queue q) {
+
+  std::vector<combination> combinations =
+      q.get_device().get_info<sycl::info::device::matrix_combinations>();
+
+  std::cout << "The matrix hardware implementation in this device provides "
+               "this number of combinations: "
+            << combinations.size() << std::endl;
+
+  bool max_sizes;
+  if (combinations[0].maxsize == 0)
+    max_sizes = true; // this is a max params hardware
+  else
+    max_sizes = false;
+  for (int i = 0; i < combinations.size(); i++) {
+    if (Ta == combinations[i].atype && Tb == combinations[i].btype &&
+        Tc == combinations[i].ctype && Td == combinations[i].dtype) {
+      // joint matrix GEMM kernel can be called using these sizes
+      if (max_sizes)
+        std::cout << "The matrix hardware implementation in this device "
+                     "provides the following max sizes are: M "
+                  << combinations[i].max_msize << " N "
+                  << combinations[i].max_nsize << " K "
+                  << combinations[i].max_ksize << std::endl;
+      else
+        std::cout << "The matrix hardware implementation in this device "
+                     "provides the following sizes are: M "
+                  << combinations[i].msize << " N " << combinations[i].nsize
+                  << " K " << combinations[i].ksize << std::endl;
+    }
+  }
+}
+
+int main() {
+  queue q;
+  matrix_runtime_query<matrix_type::bf16, matrix_type::bf16, matrix_type::float,
+                       matrix_type::float>(q);
+  return 0;
+}