IntelPython
diff --git a/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎dpnp/backend/extensions/blas/CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎dpnp/backend/extensions/blas/CMakeLists.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎dpnp/backend/extensions/blas/blas_py.cpp
Lines changed: 16 additions & 6 deletions b/‎dpnp/backend/extensions/blas/blas_py.cpp
Lines changed: 16 additions & 6 deletions
diff --git a/‎dpnp/backend/extensions/blas/dot_common.hpp
Lines changed: 2 additions & 1 deletion b/‎dpnp/backend/extensions/blas/dot_common.hpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎dpnp/backend/extensions/blas/gemm.cpp
Lines changed: 7 additions & 4 deletions b/‎dpnp/backend/extensions/blas/gemm.cpp
Lines changed: 7 additions & 4 deletions
diff --git a/‎dpnp/backend/extensions/blas/gemm_batch.cpp
Lines changed: 2 additions & 1 deletion b/‎dpnp/backend/extensions/blas/gemm_batch.cpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎dpnp/backend/extensions/blas/gemv.cpp
Lines changed: 24 additions & 25 deletions b/‎dpnp/backend/extensions/blas/gemv.cpp
Lines changed: 24 additions & 25 deletions
diff --git a/‎dpnp/backend/extensions/blas/gemv.hpp
Lines changed: 0 additions & 1 deletion b/‎dpnp/backend/extensions/blas/gemv.hpp
Lines changed: 0 additions & 1 deletion
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * Added `--target-cuda[=ARCH]` option to replace the deprecated `--target=cuda`, allowing users to build for CUDA devices with optional architecture selection using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478)
 * Added several new `pre-commit` rules, including protection against direct commits to master/maintenance branches [#2500](https://github.com/IntelPython/dpnp/pull/2500)
 * Added implementation of `dpnp.ndarray.view` method [#2520](https://github.com/IntelPython/dpnp/pull/2520)
+* Added a new backend routine `syrk` from oneMKL to perform symmetric rank-k update which is used for a specialized matrix multiplication where the result is a symmetric matrix [2509](https://github.com/IntelPython/dpnp/pull/2509)
 
 ### Changed
 
 
@@ -30,6 +30,7 @@ set(_module_src
     ${CMAKE_CURRENT_SOURCE_DIR}/gemm.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/gemm_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/gemv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/syrk.cpp
 )
 
 pybind11_add_module(${python_module_name} MODULE ${_module_src})
@@ -61,6 +62,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
 
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
@@ -36,6 +36,7 @@
 #include "dotu.hpp"
 #include "gemm.hpp"
 #include "gemv.hpp"
+#include "syrk.hpp"
 
 namespace blas_ns = dpnp::extensions::blas;
 namespace py = pybind11;
@@ -48,6 +49,7 @@ void init_dispatch_vectors_tables(void)
     blas_ns::init_gemm_batch_dispatch_table();
     blas_ns::init_gemm_dispatch_table();
     blas_ns::init_gemv_dispatch_vector();
+    blas_ns::init_syrk_dispatch_vector();
 }
 
 static dot_impl_fn_ptr_t dot_dispatch_vector[dpctl_td_ns::num_types];
@@ -73,7 +75,7 @@ PYBIND11_MODULE(_blas_impl, m)
         };
 
         m.def("_dot", dot_pyapi,
-              "Call `dot` from OneMKL BLAS library to compute "
+              "Call `dot` from oneMKL BLAS library to compute "
               "the dot product of two real-valued vectors.",
               py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"),
               py::arg("result"), py::arg("depends") = py::list());
@@ -91,7 +93,7 @@ PYBIND11_MODULE(_blas_impl, m)
         };
 
         m.def("_dotc", dotc_pyapi,
-              "Call `dotc` from OneMKL BLAS library to compute "
+              "Call `dotc` from oneMKL BLAS library to compute "
               "the dot product of two complex vectors, "
               "conjugating the first vector.",
               py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"),
@@ -110,37 +112,45 @@ PYBIND11_MODULE(_blas_impl, m)
         };
 
         m.def("_dotu", dotu_pyapi,
-              "Call `dotu` from OneMKL BLAS library to compute "
+              "Call `dotu` from oneMKL BLAS library to compute "
               "the dot product of two complex vectors.",
               py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"),
               py::arg("result"), py::arg("depends") = py::list());
     }
 
     {
         m.def("_gemm", &blas_ns::gemm,
-              "Call `gemm` from OneMKL BLAS library to compute "
+              "Call `gemm` from oneMKL BLAS library to compute "
               "the matrix-matrix product with 2-D matrices.",
               py::arg("sycl_queue"), py::arg("matrixA"), py::arg("matrixB"),
               py::arg("resultC"), py::arg("depends") = py::list());
     }
 
     {
         m.def("_gemm_batch", &blas_ns::gemm_batch,
-              "Call `gemm_batch` from OneMKL BLAS library to compute "
+              "Call `gemm_batch` from oneMKL BLAS library to compute "
               "the matrix-matrix product for a batch of 2-D matrices.",
               py::arg("sycl_queue"), py::arg("matrixA"), py::arg("matrixB"),
               py::arg("resultC"), py::arg("depends") = py::list());
     }
 
     {
         m.def("_gemv", &blas_ns::gemv,
-              "Call `gemv` from OneMKL BLAS library to compute "
+              "Call `gemv` from oneMKL BLAS library to compute "
               "the matrix-vector product with a general matrix.",
               py::arg("sycl_queue"), py::arg("matrixA"), py::arg("vectorX"),
               py::arg("vectorY"), py::arg("transpose"),
               py::arg("depends") = py::list());
     }
 
+    {
+        m.def("_syrk", &blas_ns::syrk,
+              "Call `syrk` from oneMKL BLAS library to compute "
+              "the matrix-vector product with a general matrix.",
+              py::arg("sycl_queue"), py::arg("matrixA"), py::arg("resultC"),
+              py::arg("depends") = py::list());
+    }
+
     {
         m.def(
             "_using_onemath",
 
@@ -128,7 +128,8 @@ std::pair<sycl::event, sycl::event>
     dot_impl_fn_ptr_t dot_fn = dot_dispatch_vector[type_id];
     if (dot_fn == nullptr) {
         throw py::value_error(
-            "Types of input vectors and result array are mismatched.");
+            "No dot implementation is available for the specified data type "
+            "of the input and output arrays.");
     }
 
     char *x_typeless_ptr = vectorX.get_data();
 
@@ -119,8 +119,7 @@ static sycl::event gemm_impl(sycl::queue &exec_q,
             Tab(1), // Scaling factor for the product of matrices A and B.
             a,      // Pointer to matrix A.
             lda,    // Leading dimension of matrix A, which is the
-                    // stride between successive rows (for row major
-                    // layout).
+                    // stride between successive rows (for row major layout).
             b,      // Pointer to matrix B.
             ldb,    // Leading dimension of matrix B, similar to lda.
             Tab(0), // Scaling factor for matrix C.
@@ -158,7 +157,8 @@ std::tuple<sycl::event, sycl::event, bool>
     const int resultC_nd = resultC.get_ndim();
 
     if ((matrixA_nd != 2) || (matrixB_nd != 2) || (resultC_nd != 2)) {
-        throw py::value_error("Input matrices must be two-dimensional.");
+        throw py::value_error(
+            "Input and output matrices must be two-dimensional.");
     }
 
     auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
@@ -276,6 +276,8 @@ std::tuple<sycl::event, sycl::event, bool>
         }
     }
     else {
+        // both A and B are f_contig so using column-major gemm and
+        // no transpose is needed
         transA = oneapi::mkl::transpose::N;
         transB = oneapi::mkl::transpose::N;
         lda = m;
@@ -303,7 +305,8 @@ std::tuple<sycl::event, sycl::event, bool>
         gemm_dispatch_table[matrixAB_type_id][resultC_type_id];
     if (gemm_fn == nullptr) {
         throw py::value_error(
-            "Types of input matrices and result matrix are mismatched.");
+            "No gemm implementation is available for the specified data type "
+            "of the input and output arrays.");
     }
 
     const char *a_typeless_ptr = matrixA.get_data();
 
@@ -379,7 +379,8 @@ std::tuple<sycl::event, sycl::event, bool>
         gemm_batch_dispatch_table[matrixAB_type_id][resultC_type_id];
     if (gemm_batch_fn == nullptr) {
         throw py::value_error(
-            "Types of input matrices and result matrix are mismatched.");
+            "No gemm_batch implementation is available for the specified data "
+            "type of the input and output arrays.");
     }
 
     const char *a_typeless_ptr = matrixA.get_data();
 
@@ -109,8 +109,7 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
             T(1),   // Scaling factor for the matrix-vector product.
             a,      // Pointer to the input matrix A.
             lda,    // Leading dimension of matrix A, which is the
-                    // stride between successive rows (for row major
-                    // layout).
+                    // stride between successive rows (for row major layout).
             x,      // Pointer to the input vector x.
             incx,   // The stride of vector x.
             T(0),   // Scaling factor for vector y.
@@ -181,6 +180,26 @@ std::pair<sycl::event, sycl::event>
     const py::ssize_t *a_shape = matrixA.get_shape_raw();
     const py::ssize_t *x_shape = vectorX.get_shape_raw();
     const py::ssize_t *y_shape = vectorY.get_shape_raw();
+    if (transpose) {
+        if (a_shape[0] != x_shape[0]) {
+            throw py::value_error("The number of rows in A must be equal to "
+                                  "the number of elements in X.");
+        }
+        if (a_shape[1] != y_shape[0]) {
+            throw py::value_error("The number of columns in A must be equal to "
+                                  "the number of elements in Y.");
+        }
+    }
+    else {
+        if (a_shape[1] != x_shape[0]) {
+            throw py::value_error("The number of columns in A must be equal to "
+                                  "the number of elements in X.");
+        }
+        if (a_shape[0] != y_shape[0]) {
+            throw py::value_error("The number of rows in A must be equal to "
+                                  "the number of elements in Y.");
+        }
+    }
 
     oneapi::mkl::transpose transA;
     std::size_t src_nelems;
@@ -234,27 +253,6 @@ std::pair<sycl::event, sycl::event>
     }
 #endif // USE_ONEMATH_CUBLAS
 
-    if (transpose) {
-        if (a_shape[0] != x_shape[0]) {
-            throw py::value_error("The number of rows in A must be equal to "
-                                  "the number of elements in X.");
-        }
-        if (a_shape[1] != y_shape[0]) {
-            throw py::value_error("The number of columns in A must be equal to "
-                                  "the number of elements in Y.");
-        }
-    }
-    else {
-        if (a_shape[1] != x_shape[0]) {
-            throw py::value_error("The number of columns in A must be equal to "
-                                  "the number of elements in X.");
-        }
-        if (a_shape[0] != y_shape[0]) {
-            throw py::value_error("The number of rows in A must be equal to "
-                                  "the number of elements in Y.");
-        }
-    }
-
     const std::int64_t lda = is_row_major ? n : m;
     dpctl::tensor::validation::CheckWritable::throw_if_not_writable(vectorY);
     dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(vectorY,
@@ -275,10 +273,11 @@ std::pair<sycl::event, sycl::event>
     gemv_impl_fn_ptr_t gemv_fn = gemv_dispatch_vector[type_id];
     if (gemv_fn == nullptr) {
         throw py::value_error(
-            "Types of input arrays and result array are mismatched.");
+            "No gemv implementation is available for the specified data type "
+            "of the input and output arrays.");
     }
 
-    char *a_typeless_ptr = matrixA.get_data();
+    const char *a_typeless_ptr = matrixA.get_data();
     char *x_typeless_ptr = vectorX.get_data();
     char *y_typeless_ptr = vectorY.get_data();
 
 
@@ -41,5 +41,4 @@ extern std::pair<sycl::event, sycl::event>
          const std::vector<sycl::event> &depends);
 
 extern void init_gemv_dispatch_vector(void);
-extern void init_gemv_batch_dispatch_vector(void);
 } // namespace dpnp::extensions::blas
Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,8 @@ std::pair<sycl::event, sycl::event>`
`128`	`128`	`dot_impl_fn_ptr_t dot_fn = dot_dispatch_vector[type_id];`
`129`	`129`	`if (dot_fn == nullptr) {`
`130`	`130`	`throw py::value_error(`
`131`		`- "Types of input vectors and result array are mismatched.");`
	`131`	`+ "No dot implementation is available for the specified data type "`
	`132`	`+ "of the input and output arrays.");`
`132`	`133`	`}`
`133`	`134`
`134`	`135`	`char *x_typeless_ptr = vectorX.get_data();`
Original file line number	Diff line number	Diff line change
`@@ -379,7 +379,8 @@ std::tuple<sycl::event, sycl::event, bool>`
`379`	`379`	`gemm_batch_dispatch_table[matrixAB_type_id][resultC_type_id];`
`380`	`380`	`if (gemm_batch_fn == nullptr) {`
`381`	`381`	`throw py::value_error(`
`382`		`- "Types of input matrices and result matrix are mismatched.");`
	`382`	`+ "No gemm_batch implementation is available for the specified data "`
	`383`	`+ "type of the input and output arrays.");`
`383`	`384`	`}`
`384`	`385`
`385`	`386`	`const char *a_typeless_ptr = matrixA.get_data();`