From 5a14aed5da2dcdaedb0c0f0c170a1de89c5f2c54 Mon Sep 17 00:00:00 2001 From: Dmitry Nikolaev <139769634+dnikolaev-amd@users.noreply.github.com> Date: Fri, 25 Jul 2025 02:21:17 +0200 Subject: [PATCH] [rocm7.0_internal_testing] remove extra transposes in NHWC convolutions on MIOpen (#2405) remove aten::contiguous for NHWC convolutions Tests: - nn/test_convolution.py::TestConvolutionNNDeviceTypeCUDA::test_conv_cudnn_nhwc_cuda_float32 - nn/test_convolution.py::TestConvolutionNNDeviceTypeCUDA::test_conv_cudnn_nhwc_cuda_float16 Before: image After: image Fixes SWDEV-526887 --- aten/src/ATen/native/miopen/Conv_miopen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aten/src/ATen/native/miopen/Conv_miopen.cpp b/aten/src/ATen/native/miopen/Conv_miopen.cpp index f4e67e4fc307a..f9ac375c205ec 100644 --- a/aten/src/ATen/native/miopen/Conv_miopen.cpp +++ b/aten/src/ATen/native/miopen/Conv_miopen.cpp @@ -1200,7 +1200,7 @@ std::tuple miopen_convolution_transpose_backwa IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, std::array output_mask) { - Tensor grad_output = grad_output_t.contiguous(); + Tensor grad_output = grad_output_t.contiguous(input.suggest_memory_format()); Tensor grad_input, grad_weight, grad_bias; if (output_mask[0]) { @@ -1452,7 +1452,7 @@ std::tuple miopen_depthwise_convolution_backwa IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, std::array output_mask) { - Tensor grad_output = grad_output_t.contiguous(); + Tensor grad_output = grad_output_t.contiguous(input.suggest_memory_format()); Tensor grad_input, grad_weight, grad_bias; if (output_mask[0]) {