diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
index 6c7bc2cce84bc..cd8f878f56f7b 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -99,6 +99,7 @@ namespace sycl::ext::oneapi::experimental {
 
 enum class architecture : /* unspecified */ {
   x86_64,
+  intel_cpu_spr,
   intel_gpu_bdw,
   intel_gpu_skl,
   intel_gpu_kbl,
diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
index 5eb0a301ce4b5..624ae83897e48 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp
@@ -14,6 +14,7 @@ namespace ext::oneapi::experimental {
 
 enum class architecture {
   x86_64,
+  intel_cpu_spr,
   intel_gpu_bdw,
   intel_gpu_skl,
   intel_gpu_kbl,
diff --git a/sycl/include/sycl/ext/oneapi/matrix/compile-time-query.hpp b/sycl/include/sycl/ext/oneapi/matrix/compile-time-query.hpp
new file mode 100644
index 0000000000000..817a6a731fb96
--- /dev/null
+++ b/sycl/include/sycl/ext/oneapi/matrix/compile-time-query.hpp
@@ -0,0 +1,579 @@
+//===---------- compile-time-query.hpp - SYCL matrix ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// ===--------------------------------------------------------------------=== //
+// This file implements the compile-time query interface for the joint_matrix
+// experimental extension. Intel(R) Advanced Matrix Extensions (Intel(R) AMX),
+// and Intel(R) Xe Matrix Extensions (Intel(R) XMX) support different logical
+// sizes and types. The query interface is used to validate user code and inform
+// them about supported types, sizes, scopes, and layouts by the current
+// implementation. Note that this query interface is a compile-time query, so
+// there will be no runtime errors. The query interface provides two
+// functionalities: 1- At compile time, inform the user whether a specific
+// combination is valid or not. 2- Construct the matrices using a default shape
+// if user does not provide a combination
+
+#pragma once
+
+#include <sycl/aliases.hpp> // for half
+#include <sycl/ext/oneapi/matrix/matrix-tensorcores.hpp>
+#include <sycl/ext/oneapi/matrix/matrix-unified-utils.hpp> // for use, layout
+#include <sycl/ext/oneapi/matrix/matrix-unified.hpp>       // for joint_matrix
+
+#include <cstddef>     // for size_t
+#include <stdint.h>    // for uint32_t, int8_t
+#include <type_traits> // for enable_if
+
+using namespace sycl::ext::oneapi::experimental;
+
+namespace sycl {
+inline namespace _V1 {
+namespace ext {
+namespace oneapi {
+namespace experimental::matrix {
+
+enum class matrix_type {
+  bf16,
+  fp16,
+  tf32,
+  fp32,
+  fp64,
+  sint8,
+  sint16,
+  sint32,
+  sint64,
+  uint8,
+  uint16,
+  uint32,
+  uint64
+};
+
+struct combination {
+  size_t max_msize;
+  size_t max_nsize;
+  size_t max_ksize;
+  size_t msize;
+  size_t nsize;
+  size_t ksize;
+  matrix_type atype;
+  matrix_type btype;
+  matrix_type ctype;
+  matrix_type dtype;
+};
+
+template <architecture u, typename Ta, typename Tb, typename Tc,
+          typename Td = Tc, size_t sM = 0, size_t sN = 0, size_t sK = 0,
+          typename Enabled = void>
+struct matrix_params;
+
+template <typename Ta, typename Tb, typename Tc>
+constexpr bool is_combination_valid_amx(size_t sM, size_t sN, size_t sK) {
+  // is_same_v is a C++17 feature
+  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
+      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
+      // bf16
+      (std::is_same_v<Ta, unsigned short> &&
+       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
+       sM <= 16 && sN <= 16 && sK <= 32))
+    return true;
+  else
+    return false;
+}
+
+template <typename Ta, typename Tb, typename Tc>
+constexpr bool are_types_valid_amx() {
+  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, unsigned short> &&
+       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float>))
+    return true;
+  else
+    return false;
+}
+
+// Default values query
+// Specialization for when only types are given, need to query only sizes
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_cpu_spr, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_amx<Ta, Tb, Tc>()),
+                "Invalid types for AMX, supported types are int8_t, uint8_t, "
+                "and bf16 (Note that unsigned short should be used in the"
+                "DPC++ code to implement bf16) ");
+
+  // construct the matrices using the default sizes
+  static constexpr std::size_t M = 16;
+  static constexpr std::size_t N = 16;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 64 : 32);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_cpu_spr, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_amx<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for AMX, query valid types and maximum sizes "
+      "using: matrix_params<architecture::intel_cpu_spr> myparams; and then "
+      "check out "
+      "myparams.combinations array");
+
+  // if combination is valid, construct the matrices
+
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Intel XMX with SIMD8 capability
+// The Intel XMX implementation supports the logical capability support of the
+// HW So in this case, M, N, K sizes returned by the query represent the logical
+// capabilities of the Intel XMX hardware.
+
+template <typename Ta, typename Tb, typename Tc>
+constexpr bool is_combination_valid_xmx8(size_t sM, size_t sN, size_t sK) {
+  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
+      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 32) ||
+      (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
+       std::is_same_v<Tc, float> && (sM >= 1 && sM <= 8) && sN == 8 &&
+       sK == 16) ||
+      (std::is_same_v<Ta, unsigned short> &&
+       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
+       (sM >= 1 && sM <= 8) && sN == 8 && sK == 16))
+    return true;
+  else
+    return false;
+}
+
+template <typename Ta, typename Tb, typename Tc>
+constexpr bool are_types_valid_xmx8() {
+  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
+       std::is_same_v<Tc, float>) ||
+      (std::is_same_v<Ta, unsigned short> &&
+       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float>))
+    return true;
+  else
+    return false;
+}
+
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg1, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg1, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g10, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g11, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Default-values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_dg1, supported "
+                "types are int8_t, uint8_t, half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 8;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_dg2_g12, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for XMX8, query valid combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Intel XMX with SIMD16 capability
+// The Intel XMX implementation supports the logical capability support of the
+// HW So in this case, M, N, K sizes returned by the query represent the logical
+// capabilities of the Intel XMX hardware.
+
+template <typename Ta, typename Tb, typename Tc>
+constexpr bool is_combination_valid_xmx16(size_t sM, size_t sN, size_t sK) {
+  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
+      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 32) ||
+      (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
+       std::is_same_v<Tc, float> && (sM >= 1 && sM <= 8) && sN == 16 &&
+       sK == 16) ||
+      (std::is_same_v<Ta, unsigned short> &&
+       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
+       (sM >= 1 && sM <= 8) && sN == 16 && sK == 16))
+    return true;
+  else
+    return false;
+}
+
+template <typename Ta, typename Tb, typename Tc>
+constexpr bool are_types_valid_xmx16() {
+  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
+       std::is_same_v<Tc, int>) ||
+      (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
+       std::is_same_v<Tc, float>) ||
+      (std::is_same_v<Ta, unsigned short> &&
+       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float>))
+    return true;
+  else
+    return false;
+}
+
+// Default values query:
+// Specialization for when only types are given, need to query only sizes
+
+template <typename Ta, typename Tb, typename Tc, typename Td>
+struct matrix_params<
+    architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, 0, 0, 0,
+    typename std::enable_if<(!std::is_same_v<Ta, void> &&
+                             !std::is_same_v<Tb, void> &&
+                             !std::is_same_v<Tc, void>)>::type> {
+  static_assert((are_types_valid_xmx16<Ta, Tb, Tc>()),
+                "Invalid types for architecture::intel_gpu_pvc, supported "
+                "types are int8_t, uint8_t, "
+                "half, and bf16");
+
+  // construct the matrices using the default sizes
+
+  static constexpr std::size_t M = 8;
+  static constexpr std::size_t N = 16;
+  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+
+// Validation query:
+// Specialization when both types and sizes are given
+template <typename Ta, typename Tb, typename Tc, typename Td, size_t sM,
+          size_t sN, size_t sK>
+struct matrix_params<
+    architecture::intel_gpu_pvc, Ta, Tb, Tc, Td, sM, sN, sK,
+    typename std::enable_if<(
+        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
+        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
+  // Validate that parameters are supported
+  static_assert(
+      (sM == 0 && sN == 0 && sK == 0) ||
+          (is_combination_valid_xmx16<Ta, Tb, Tc>(sM, sN, sK)),
+      "Invalid parameters for architecture::intel_gpu_pvc, query valid "
+      "combinations "
+      "using: "
+      "q.get_device().get_info<sycl::info::device::matrix::combinations>()");
+
+  // if combination is valid, construct the matrices
+  static constexpr std::size_t M = sM;
+  static constexpr std::size_t N = sN;
+  static constexpr std::size_t K = sK;
+
+  template <typename Group, layout Layout>
+  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
+  template <typename Group, layout Layout>
+  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
+  template <typename Group>
+  using joint_matrix_c = joint_matrix<Group, Tc, use::accumulator, M, N>;
+  template <typename Group>
+  using joint_matrix_d = joint_matrix<Group, Td, use::accumulator, M, N>;
+};
+} // namespace experimental::matrix
+} // namespace oneapi
+} // namespace ext
+} // namespace _V1
+} // namespace sycl
diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix.hpp
index 77037885fc28b..4311e6954e510 100644
--- a/sycl/include/sycl/ext/oneapi/matrix/matrix.hpp
+++ b/sycl/include/sycl/ext/oneapi/matrix/matrix.hpp
@@ -21,6 +21,6 @@
 #include <sycl/ext/oneapi/matrix/static-query.hpp>
 #endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
 #if (SYCL_EXT_ONEAPI_MATRIX_VERSION == 4)
+#include <sycl/ext/oneapi/matrix/compile-time-query.hpp>
 #include <sycl/ext/oneapi/matrix/matrix-unified.hpp>
-#include <sycl/ext/oneapi/matrix/static-query-use.hpp>
 #endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
diff --git a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp b/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
deleted file mode 100644
index f30ff53bb8a55..0000000000000
--- a/sycl/include/sycl/ext/oneapi/matrix/static-query-use.hpp
+++ /dev/null
@@ -1,606 +0,0 @@
-//===---------- static-query-use.hpp - SYCL matrix ------------*- C++ -*---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// ===--------------------------------------------------------------------=== //
-// This file implements the static query interface for the joint_matrix
-// experimental extension. Intel(R) Advanced Matrix Extensions (Intel(R) AMX),
-// and Intel(R) Xe Matrix Extensions (Intel(R) XMX) support different logical
-// sizes and types. The query interface is used to validate user code and inform
-// them about supported types, sizes, scopes, and layouts by the current
-// implementation. Note that this query interface is a compile-time query, so
-// there will be no runtime errors. The query interface provides three
-// functionalities: 1- At compile time, inform the user whether a specific
-// combination is valid or not. 2- Construct the matrices using a default shape
-// if user does not provide a combination 3- General query interface for sizes,
-// types, scopes. This is needed to void padding by the user, for tuning, and
-// efficient code generation if used by a library.
-
-#pragma once
-
-#include <sycl/aliases.hpp> // for half
-#include <sycl/ext/oneapi/matrix/matrix-tensorcores.hpp>
-#include <sycl/ext/oneapi/matrix/matrix-unified-utils.hpp> // for use, layout
-#include <sycl/ext/oneapi/matrix/matrix-unified.hpp>       // for joint_matrix
-
-#include <cstddef>     // for size_t
-#include <stdint.h>    // for uint32_t, int8_t
-#include <type_traits> // for enable_if
-
-namespace sycl {
-inline namespace _V1 {
-namespace ext {
-namespace oneapi {
-namespace experimental::matrix {
-
-enum class tpu {
-  xmx8,
-  xmx16,
-  amx,
-};
-enum class matrix_type {
-  bf8,
-  bf16,
-  fp16,
-  tf32,
-  fp32,
-  fp64,
-  sint2,
-  sint4,
-  sint8,
-  sint16,
-  sint32,
-  sint64,
-  uint2,
-  uint4,
-  uint8,
-  uint16,
-  uint32,
-  uint64
-};
-
-enum class scope_t { sub_group, work_group };
-
-template <tpu u, typename Ta = void, typename Tb = void, typename Tc = void,
-          int sM = 0, int sN = 0, int sK = 0, typename Enabled = void>
-struct tpu_params;
-
-template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_amx(int sM, int sN, int sK) {
-  // is_same_v is a C++17 feature
-  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
-      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && sM <= 16 && sN <= 16 && sK <= 64) ||
-      // bf16
-      (std::is_same_v<Ta, unsigned short> &&
-       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
-       sM <= 16 && sN <= 16 && sK <= 32))
-    return true;
-  else
-    return false;
-}
-
-template <typename Ta, typename Tb, typename Tc>
-constexpr bool are_types_valid_amx() {
-  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, unsigned short> &&
-       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float>))
-    return true;
-  else
-    return false;
-}
-
-// General query:
-// types are not given, no default sizes and no implicit matrix construction
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::amx, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {16, 16, 64, mt::sint8, mt::sint8, mt::sint32},
-      {16, 16, 64, mt::sint8, mt::uint8, mt::sint32},
-      {16, 16, 64, mt::uint8, mt::sint8, mt::sint32},
-      {16, 16, 64, mt::uint8, mt::uint8, mt::sint32},
-      {16, 16, 32, mt::bf16, mt::bf16, mt::fp32}};
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Sizes-only query
-// Specialization for when only types are given, need to query only sizes
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::amx, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
-  static_assert((are_types_valid_amx<Ta, Tb, Tc>()),
-                "Invalid types for AMX, supported types are int8_t, uint8_t, "
-                "and bf16 (Note that unsigned short should be used in the"
-                "DPC++ code to implement bf16) ");
-
-  // construct the matrices using the default sizes
-  static constexpr std::size_t M = 16;
-  static constexpr std::size_t N = 16;
-  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 64 : 32);
-
-  template <typename Group, layout Layout>
-  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
-  template <typename Group, layout Layout>
-  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
-  template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  static constexpr combination combinations[] = {
-      {16, 16, (sizeof(Ta) == 1) ? 64 : 32}};
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Valid or not:
-// Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::amx, Ta, Tb, Tc, sM, sN, sK,
-    typename std::enable_if<(
-        !std::is_same_v<Ta, void> && !std::is_same_v<Tb, void> &&
-        !std::is_same_v<Tc, void> && sM != 0 && sN != 0 && sK != 0)>::type> {
-  // Validate that parameters are supported
-  static_assert(
-      (sM == 0 && sN == 0 && sK == 0) ||
-          (is_combination_valid_amx<Ta, Tb, Tc>(sM, sN, sK)),
-      "Invalid parameters for AMX, query valid types and maximum sizes "
-      "using: tpu_params<tpu::amx> myparams; and then check out "
-      "myparams.combinations array");
-
-  // if combination is valid, construct the matrices
-
-  static constexpr std::size_t M = (sM != 0) ? sM : 16;
-  static constexpr std::size_t N = (sN != 0) ? sN : 16;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 64 : 32);
-
-  template <typename Group, layout Layout>
-  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
-  template <typename Group, layout Layout>
-  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
-  template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = 8;
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-};
-
-// Intel XMX with SIMD8 capability
-// The Intel XMX implementation supports the logical capability support of the
-// HW So in this case, M, N, K sizes returned by the query represent the logical
-// capabilities of the Intel XMX hardware.
-
-template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_xmx8(int sM, int sN, int sK) {
-  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
-      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 8 && sK == 32) ||
-      (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
-       std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16) ||
-      (std::is_same_v<Ta, unsigned short> &&
-       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 8 && sK == 16))
-    return true;
-  else
-    return false;
-}
-
-template <typename Ta, typename Tb, typename Tc>
-constexpr bool are_types_valid_xmx8() {
-  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
-       std::is_same_v<Tc, float>) ||
-      (std::is_same_v<Ta, unsigned short> &&
-       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float>))
-    return true;
-  else
-    return false;
-}
-
-// General Query
-// specialization for when types are not given --> no default values
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::xmx8, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 8, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 8, 32},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 8, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 8, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 8, 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Sizes-only query:
-// Specialization for when only types are given, need to query only sizes
-
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::xmx8, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
-  static_assert((are_types_valid_xmx8<Ta, Tb, Tc>()),
-                "Invalid types for XMX8, supported types are int8_t, uint8_t, "
-                "half, and bf16 (Note that unsigned short should be used in the"
-                "DPC++ code to implement bf16)");
-
-  // construct the matrices using the default sizes
-
-  static constexpr std::size_t M = 8;
-  static constexpr std::size_t N = 8;
-  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
-
-  template <typename Group, layout Layout>
-  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
-  template <typename Group, layout Layout>
-  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
-  template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      // The types used in the initialization below are fake and not used. In
-      // this case, users already chose the types, they are only looking for
-      // the
-      // sizes
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 8, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 8, (sizeof(Ta) == 1) ? 32 : 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Valid or not:
-// Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::xmx8, Ta, Tb, Tc, sM, sN, sK,
-    typename std::enable_if<((!std::is_same_v<Ta, void> && sM != 0))>::type> {
-  // Validate that parameters are supported
-  static_assert((sM == 0 && sN == 0 && sK == 0) ||
-                    (is_combination_valid_xmx8<Ta, Tb, Tc>(sM, sN, sK)),
-                "Invalid parameters for XMX8, query valid combinations "
-                "using: tpu_params<tpu::xmx8> myparams; and then check out "
-                "myparams.combinations array");
-
-  // if combination is valid, construct the matrices
-  static constexpr std::size_t M = (sM != 0) ? sM : 8;
-  static constexpr std::size_t N = (sN != 0) ? sN : 8;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16);
-
-  template <typename Group, layout Layout>
-  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
-  template <typename Group, layout Layout>
-  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
-  template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX8
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-};
-
-// Intel XMX with SIMD16 capability
-// The Intel XMX implementation supports the logical capability support of the
-// HW So in this case, M, N, K sizes returned by the query represent the logical
-// capabilities of the Intel XMX hardware.
-
-template <typename Ta, typename Tb, typename Tc>
-constexpr bool is_combination_valid_xmx16(int sM, int sN, int sK) {
-  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
-      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int> && (sM == 1 || sM == 2 || sM == 4 || sM == 8) &&
-       sN == 16 && sK == 32) ||
-      (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
-       std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16) ||
-      (std::is_same_v<Ta, unsigned short> &&
-       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float> &&
-       (sM == 1 || sM == 2 || sM == 4 || sM == 8) && sN == 16 && sK == 16))
-    return true;
-  else
-    return false;
-}
-
-template <typename Ta, typename Tb, typename Tc>
-constexpr bool are_types_valid_xmx16() {
-  if ((std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, int8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, int8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, uint8_t> && std::is_same_v<Tb, uint8_t> &&
-       std::is_same_v<Tc, int>) ||
-      (std::is_same_v<Ta, half> && std::is_same_v<Tb, half> &&
-       std::is_same_v<Tc, float>) ||
-      (std::is_same_v<Ta, unsigned short> &&
-       std::is_same_v<Tb, unsigned short> && std::is_same_v<Tc, float>))
-    return true;
-  else
-    return false;
-}
-
-// General Query
-// specialization for when types are not given --> no default values
-template <int sM, int sN, int sK>
-struct tpu_params<tpu::xmx16, void, void, void, sM, sN, sK> {
-  static constexpr std::size_t M = -1; // depends on the type
-  static constexpr std::size_t N = -1;
-  static constexpr std::size_t K = -1;
-
-  uint32_t numtiles = -1; // does not apply for XMX
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::sint8, mt::sint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::sint8, mt::uint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::uint8, mt::sint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 1, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 2, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 4, 16, 32},
-      {0, 0, 0, mt::uint8, mt::uint8, mt::sint32, 8, 16, 32},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 1, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 2, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 4, 16, 16},
-      {0, 0, 0, mt::fp16, mt::fp16, mt::fp32, 8, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 1, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 2, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 4, 16, 16},
-      {0, 0, 0, mt::bf16, mt::bf16, mt::fp32, 8, 16, 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Sizes-only query:
-// Specialization for when only types are given, need to query only sizes
-
-template <typename Ta, typename Tb, typename Tc>
-struct tpu_params<tpu::xmx16, Ta, Tb, Tc, 0, 0, 0,
-                  typename std::enable_if<(!std::is_same_v<Ta, void> &&
-                                           !std::is_same_v<Tb, void> &&
-                                           !std::is_same_v<Tc, void>)>::type> {
-  static_assert((are_types_valid_xmx16<Ta, Tb, Tc>()),
-                "Invalid types for XMX16, supported types are int8_t, uint8_t, "
-                "half, and bf16 (Note that unsigned short should be used in the"
-                "DPC++ code to implement bf16)");
-
-  // construct the matrices using the default sizes
-
-  static constexpr std::size_t M = 8;
-  static constexpr std::size_t N = 16;
-  static constexpr std::size_t K = ((sizeof(Ta) == 1) ? 32 : 16);
-
-  template <typename Group, layout Layout>
-  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
-  template <typename Group, layout Layout>
-  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
-  template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-  struct combination {
-    uint32_t max_msize;
-    uint32_t max_nsize;
-    uint32_t max_ksize;
-    matrix_type atype;
-    matrix_type btype;
-    matrix_type accumulatortype;
-    uint32_t msize;
-    uint32_t nsize;
-    uint32_t ksize;
-  };
-  using mt = matrix_type;
-  static constexpr combination combinations[] = {
-      // The types used in the initialization below are fake and not used. In
-      // this case, users already chose the types, they are only looking for
-      // the
-      // sizes
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 1, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 2, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 4, 16, (sizeof(Ta) == 1) ? 32 : 16},
-      {0, 0, 0, mt::bf8, mt::bf8, mt::bf8, 8, 16, (sizeof(Ta) == 1) ? 32 : 16},
-  };
-  static constexpr int num_combinations =
-      sizeof(combinations) / sizeof(combination);
-};
-
-// Valid or not:
-// Specialization when both types and sizes are given
-template <typename Ta, typename Tb, typename Tc, int sM, int sN, int sK>
-struct tpu_params<
-    tpu::xmx16, Ta, Tb, Tc, sM, sN, sK,
-    typename std::enable_if<((!std::is_same_v<Ta, void> && sM != 0))>::type> {
-  // Validate that parameters are supported
-  static_assert((sM == 0 && sN == 0 && sK == 0) ||
-                    (is_combination_valid_xmx16<Ta, Tb, Tc>(sM, sN, sK)),
-                "Invalid parameters for XMX16, query valid combinations "
-                "using: tpu_params<tpu::xmx16> myparams; and then check out "
-                "myparams.combinations array");
-
-  // if combination is valid, construct the matrices
-  static constexpr std::size_t M = (sM != 0) ? sM : 8;
-  static constexpr std::size_t N = (sN != 0) ? sN : 8;
-  static constexpr std::size_t K =
-      (sK != 0) ? sK : ((sizeof(Ta) == 1) ? 32 : 16);
-
-  template <typename Group, layout Layout>
-  using joint_matrix_a = joint_matrix<Group, Ta, use::a, M, K, Layout>;
-  template <typename Group, layout Layout>
-  using joint_matrix_b = joint_matrix<Group, Tb, use::b, K, N, Layout>;
-  template <typename Group>
-  using joint_matrix_accumulator =
-      joint_matrix<Group, Tc, use::accumulator, M, N>;
-
-  uint32_t numtiles = -1; // does not apply for XMX16
-  static constexpr scope_t scopes[] = {scope_t::sub_group};
-  static constexpr int num_scopes = sizeof(scopes) / sizeof(scope_t);
-};
-} // namespace experimental::matrix
-} // namespace oneapi
-} // namespace ext
-} // namespace _V1
-} // namespace sycl
diff --git a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
index 048aed6341f6c..ccf50a5a76da9 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp
@@ -39,7 +39,8 @@ void matrix_multiply(big_matrix<T1, NUM_ROWS_C, NUM_COLS_C> &C,
   size_t K = NUM_COLS_A;
   assert(NUM_ROWS_C == NUM_ROWS_A && NUM_COLS_A == NUM_ROWS_B * 4);
 
-  using myparams2 = tpu_params<tpu::amx, int8_t, int8_t, int>;
+  using myparams2 =
+      matrix_params<architecture::intel_cpu_spr, int8_t, int8_t, int>;
   constexpr int TM = myparams2::M;
   constexpr int TN = myparams2::N;
   constexpr int TK = myparams2::K;
@@ -80,7 +81,7 @@ void matrix_multiply(big_matrix<T1, NUM_ROWS_C, NUM_COLS_C> &C,
            myparams2::joint_matrix_b<
                sub_group, ext::intel::experimental::matrix::layout::packed>
                sub_b;
-           myparams2::joint_matrix_accumulator<sub_group> sub_c;
+           myparams2::joint_matrix_c<sub_group> sub_c;
 
            joint_matrix_load(
                sg, sub_c,
diff --git a/sycl/test/matrix/compile-time-query.cpp b/sycl/test/matrix/compile-time-query.cpp
new file mode 100644
index 0000000000000..e39361bbeeadd
--- /dev/null
+++ b/sycl/test/matrix/compile-time-query.cpp
@@ -0,0 +1,146 @@
+// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o compile-time-query %s
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental;
+using namespace sycl::ext::oneapi::experimental::matrix;
+
+void query_amx_spr() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_cpu_spr, int, int, int,
+  // 2, 8, 32>;
+
+  // generates types assert
+  // using myparams2 = matrix_params<architecture::intel_cpu_spr, int, int,
+  // int>;
+
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_cpu_spr, int8_t, int8_t,
+                                 int, int, 2, 8, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of AMX matrix_params chosen by the user are: M " << dmsize
+            << " N " << dnsize << " K " << dksize << std::endl;
+
+  // Sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_cpu_spr, int8_t, int8_t, int>;
+  myparams2 p;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout << "default AMX sizes matrix_params  are: M " << dmsize << " N "
+            << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+void query_xmx_dg() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_gpu_dg1, int, int, int,
+  // 2, 8, 32>;
+
+  // generate combination of type assert
+  // using myparams = matrix_params<architecture::intel_gpu_dg1, int, int, int>;
+
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_gpu_dg1, int8_t, int8_t,
+                                 int, int, 2, 8, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg1 "
+               "matrix_params chosen by the user are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+
+  // sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_gpu_dg1, int8_t, int8_t, int>;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout << "Default Intel XMX of architecture::intel_gpu_dg1 sizes  are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+void query_xmx_ats() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
+  // int, // 2, 8, 32>;
+
+  // generate combination of type assert
+  // using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int, int,
+  // int>;
+
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_gpu_dg2_g10, int8_t,
+                                 int8_t, int, int, 2, 8, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of Intel XMX of architecture::intel_gpu_dg2_g10 "
+               "matrix_params chosen by the user are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+
+  // sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_gpu_dg2_g10, int8_t, int8_t, int>;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout
+      << "Default Intel XMX of architecture::intel_gpu_dg2_g10 sizes  are: M "
+      << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+void query_xmx_pvc() {
+
+  // generates combination assert
+  // using myparams = matrix_params<architecture::intel_gpu_pvc, int, int, int,
+  // 2, 8, 32>;
+
+  // generate combination of type assert
+  // using myparams = matrix_params<architecture::intel_gpu_pvc, int, int, int>;
+
+  // tells whether a combination is valid or not, if valid, those will be set as
+  // default
+  using myparams = matrix_params<architecture::intel_gpu_pvc, int8_t, int8_t,
+                                 int, int, 2, 16, 32>;
+
+  size_t dmsize = myparams::M;
+  size_t dnsize = myparams::N;
+  size_t dksize = myparams::K;
+  std::cout << "sizes of architecture::intel_gpu_pvc matrix_params chosen by "
+               "the user are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+
+  // sizes-only query: types are given, generate default sizes
+  using myparams2 =
+      matrix_params<architecture::intel_gpu_pvc, int8_t, int8_t, int>;
+  dmsize = myparams2::M;
+  dnsize = myparams2::N;
+  dksize = myparams2::K;
+  std::cout << "Default Intel XMX of architecture::intel_gpu_pvc sizes  are: M "
+            << dmsize << " N " << dnsize << " K " << dksize << std::endl;
+  return;
+}
+
+int main() {
+  query_amx_spr();
+  query_xmx_dg();
+  query_xmx_ats();
+  query_xmx_pvc();
+  return 0;
+}
diff --git a/sycl/test/matrix/query-use.cpp b/sycl/test/matrix/query-use.cpp
deleted file mode 100644
index 9afc8e1173043..0000000000000
--- a/sycl/test/matrix/query-use.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o query-use %s
-#include <iostream>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-using namespace sycl::ext::oneapi::experimental::matrix;
-
-void query_amx() {
-
-  // generates combination assert
-  // using myparams = tpu_params<tpu::amx, int, int, int, 2, 8, 32>;
-
-  // generates types assert
-  // using myparams2 = tpu_params<tpu::amx, int, int, int>;
-
-  // tells whether a combination is valid or not, if valid, those will be set as
-  // default
-  using myparams = tpu_params<tpu::amx, int8_t, int8_t, int, 2, 8, 32>;
-
-  size_t dmsize = myparams::M;
-  size_t dnsize = myparams::N;
-  size_t dksize = myparams::K;
-  std::cout << "sizes of AMX tpu_params chosen by the user are: M " << dmsize
-            << " N " << dnsize << " K " << dksize << std::endl;
-
-  // Sizes-only query: types are given, generate default sizes
-  using myparams2 = tpu_params<tpu::amx, int8_t, int8_t, int>;
-  myparams2 p;
-  dmsize = myparams2::M;
-  dnsize = myparams2::N;
-  dksize = myparams2::K;
-  std::cout << "default AMX sizes tpu_params  are: M " << dmsize << " N "
-            << dnsize << " K " << dksize << "\n AMX int8 num combinations is "
-            << p.num_combinations << std::endl;
-
-  // general query: types are not given
-  tpu_params<tpu::amx> myparams3;
-
-  if (myparams3.num_scopes > 0)
-    if (myparams3.scopes[0] == scope_t::sub_group)
-      std::cout << "There are " << myparams3.num_scopes
-                << " Scopes that are supported by AMX implementation and "
-                   "subgroup is one of them "
-                << std::endl;
-
-  std::cout << "AMX query num combinations: " << myparams3.num_combinations
-            << std::endl;
-
-  if (myparams3.combinations[0].msize != 0) // this is a max params hardware
-    return;
-  constexpr int msize = myparams3.combinations[0].max_msize;
-  constexpr int nsize = myparams3.combinations[0].max_nsize;
-  constexpr int ksize = myparams3.combinations[0].max_ksize;
-  std::cout << "AMX query sizes are: M " << msize << " N " << nsize << " K "
-            << ksize << std::endl;
-
-  size_t NDRangeM = 1024 / msize;
-  size_t NDRangeN = 1024 / nsize;
-  queue q;
-  q.submit([&](handler &cgh) {
-    cgh.parallel_for<class imatrix>(
-        nd_range<2>({NDRangeM, NDRangeN}, {1, 1}),
-        [msize, ksize, nsize](nd_item<2> spmd_item) {
-          sub_group sg = spmd_item.get_sub_group();
-          myparams2::joint_matrix_a<sub_group, layout::row_major> sub_a1;
-          myparams2::joint_matrix_b<
-              sub_group, sycl::ext::intel::experimental::matrix::layout::packed>
-              sub_b1;
-          myparams2::joint_matrix_accumulator<sub_group> sub_c1;
-
-          joint_matrix<sub_group, unsigned short, use::a, msize, ksize> sub_a;
-          joint_matrix<sub_group, unsigned short, use::b, ksize, nsize> sub_b;
-          joint_matrix<sub_group, float, use::accumulator, msize, nsize> sub_c;
-        });
-  });
-}
-
-void query_xmx8() {
-
-  // generates combination assert
-  // using myparams = tpu_params<tpu::xmx8, int, int, int, 2, 8, 32>;
-
-  // generate combination of type assert
-  // using myparams = tpu_params<tpu::xmx8, int, int, int>;
-
-  // tells whether a combination is valid or not, if valid, those will be set as
-  // default
-  using myparams = tpu_params<tpu::xmx8, int8_t, int8_t, int, 2, 8, 32>;
-
-  size_t dmsize = myparams::M;
-  size_t dnsize = myparams::N;
-  size_t dksize = myparams::K;
-  std::cout << "sizes of XMX8 tpu_params chosen by the user are: M " << dmsize
-            << " N " << dnsize << " K " << dksize << std::endl;
-
-  // sizes-only query: types are given, generate default sizes
-  using myparams2 = tpu_params<tpu::xmx8, int8_t, int8_t, int>;
-  myparams2 p;
-  dmsize = myparams2::M;
-  dnsize = myparams2::N;
-  dksize = myparams2::K;
-  std::cout << "Default XMX8 sizes  are: M " << dmsize << " N " << dnsize
-            << " K " << dksize << "\n XMX8 int8 num combinations is "
-            << p.num_combinations << std::endl;
-
-  dmsize = myparams2::combinations[0].msize;
-  dnsize = myparams2::combinations[0].nsize;
-  dksize = myparams2::combinations[0].ksize;
-  std::cout << "one of XMX8 combination sizes  is: M " << dmsize << " N "
-            << dnsize << " K " << dksize << std::endl;
-
-  // general query: types are not given
-  tpu_params<tpu::xmx8> myparams3;
-
-  if (myparams3.num_scopes > 0)
-    if (myparams3.scopes[0] == scope_t::sub_group)
-      std::cout << "There are " << myparams3.num_scopes
-                << " Scopes that are supported by XMX8 implementation and "
-                   "subgroup is one of them "
-                << std::endl;
-
-  std::cout << "XMX8 query num combinations: " << myparams3.num_combinations
-            << std::endl;
-
-  if (myparams3.combinations[0].msize == 0) // this is not a max params hardware
-    return;
-  constexpr int msize = myparams3.combinations[0].msize;
-  constexpr int nsize = myparams3.combinations[0].nsize;
-  constexpr int ksize = myparams3.combinations[0].ksize;
-  std::cout << "XMX8 query sizes are: M " << msize << " N " << nsize << " K "
-            << ksize << std::endl;
-  std::cout << "XMX8 query max sizes are: M "
-            << myparams3.combinations[0].max_msize << " N "
-            << myparams3.combinations[0].max_nsize << " K "
-            << myparams3.combinations[0].max_ksize << std::endl;
-
-  size_t NDRangeM = 1024 / msize;
-  size_t NDRangeN = 1024 / nsize;
-  queue q;
-  q.submit([&](handler &cgh) {
-    cgh.parallel_for<class dmatrix>(
-        nd_range<2>({NDRangeM, NDRangeN}, {1, 1}),
-        [msize, ksize, nsize](nd_item<2> spmd_item) {
-          sub_group sg = spmd_item.get_sub_group();
-          myparams2::joint_matrix_a<sub_group, layout::row_major> sub_a1;
-          myparams2::joint_matrix_b<
-              sub_group, sycl::ext::intel::experimental::matrix::layout::packed>
-              sub_b1;
-          myparams2::joint_matrix_accumulator<sub_group> sub_c1;
-
-          joint_matrix<sub_group, unsigned short, use::a, msize, ksize> sub_a;
-          joint_matrix<sub_group, unsigned short, use::b, ksize, nsize> sub_b;
-          joint_matrix<sub_group, float, use::accumulator, msize, nsize> sub_c;
-        });
-  });
-}
-
-int main() {
-  query_amx();
-  query_xmx8();
-  return 0;
-}
diff --git a/sycl/test/matrix/runtime-query.cpp b/sycl/test/matrix/runtime-query.cpp
new file mode 100644
index 0000000000000..64090faad4650
--- /dev/null
+++ b/sycl/test/matrix/runtime-query.cpp
@@ -0,0 +1,50 @@
+// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 -fsycl -o runtime-query %s
+// XFAIL: *
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental;
+using namespace sycl::ext::oneapi::experimental::matrix;
+
+template <matrix_type Ta, matrix_type Tb, matrix_type Tc, matrix_type Td>
+void matrix_runtime_query(queue q) {
+
+  std::vector<combination> combinations =
+      q.get_device().get_info<sycl::info::device::matrix::combinations>();
+
+  std::cout << "The matrix hardware implementation in this device provides "
+               "this number of combinations: "
+            << combinations.size() << std::endl;
+
+  bool max_sizes;
+  if (combinations[0].maxsize == 0)
+    max_sizes = true; // this is a max params hardware
+  else
+    max_sizes = false;
+  for (int i = 0; i < combinations.size(); i++) {
+    if (Ta == combinations[i].atype && Tb == combinations[i].btype &&
+        Tc == combinations[i].ctype && Td == combinations[i].dtype) {
+      // joint matrix GEMM kernel can be called using these sizes
+      if (max_sizes)
+        std::cout << "The matrix hardware implementation in this device "
+                     "provides the following max sizes are: M "
+                  << combinations[i].max_msize << " N "
+                  << combinations[i].max_nsize << " K "
+                  << combinations[i].max_ksize << std::endl;
+      else
+        std::cout << "The matrix hardware implementation in this device "
+                     "provides the following sizes are: M "
+                  << combinations[i].msize << " N " << combinations[i].nsize
+                  << " K " << combinations[i].ksize << std::endl;
+    }
+  }
+}
+
+int main() {
+  queue q;
+  matrix_runtime_query<matrix_type::bf16, matrix_type::bf16, matrix_type::float,
+                       matrix_type::float>(q);
+  return 0;
+}