Skip to content

[AUTOGENERATED] [release/2.7] [rocm6.4_internal_testing][SWDEV-535305] Fixed test_extra_cuda_context in test_c10d_nccl.py and refactored is_navi3_arch function #2382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: release/2.7
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions test/distributed/test_c10d_nccl.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@
TEST_WITH_DEV_DBG_ASAN,
TEST_WITH_ROCM,
TestCase,
is_arch,
NAVI_ARCH,
)


Expand Down Expand Up @@ -625,6 +627,12 @@ def _helper_test_extra_cuda_context_by_memory(self):
# Rank 0 takes a snapshot before collective -- this snapshot should have
# included rank 0's own context.
if self.rank == 0:
# We need this extra sleep for NAVI_ARCH because rccl_init inside init_process_group
# is happening in a separate process and it is taking longer to finish on NAVI_ARCH.
# Sleeping here ensures that the init is competed successfully and mem_get_info can
# get stable numbers.
if is_arch(NAVI_ARCH):
time.sleep(5)
free, total = torch.cuda.mem_get_info(device)
used_before = float(total - free)

Expand Down
13 changes: 13 additions & 0 deletions test/inductor/test_decompose_mem_bound_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@
from torch.testing import FileCheck
from torch.testing._internal.common_utils import (
instantiate_parametrized_tests,
<<<<<<< HEAD
patch_test_members,
is_navi3_arch,
=======
NAVI3_ARCH,
is_arch,
>>>>>>> 71a21d9cde ([rocm6.4_internal_testing][SWDEV-535305] Fixed `test_extra_cuda_context` in `test_c10d_nccl.py` and refactored is_navi3_arch function (#2341))
parametrize,
TEST_XPU,
)
Expand Down Expand Up @@ -49,6 +54,14 @@ def forward(self, input1, input2):
output = torch.mm(input1, input2)
return output

<<<<<<< HEAD
=======
# We have to increase tolerance for navi3 because all fp16, bf16
# GEMMs operations have an accuracy issue caused by hardware limitation
default_atol = 3e-3 if is_arch(NAVI3_ARCH) else 1e-3
default_rtol = 4e-3 if is_arch(NAVI3_ARCH) else 1e-3

>>>>>>> 71a21d9cde ([rocm6.4_internal_testing][SWDEV-535305] Fixed `test_extra_cuda_context` in `test_c10d_nccl.py` and refactored is_navi3_arch function (#2341))

@requires_gpu
@unittest.skipIf(
Expand Down
4 changes: 2 additions & 2 deletions torch/testing/_internal/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,11 @@
NAVI3_ARCH = ("gfx1100", "gfx1101")
NAVI4_ARCH = ("gfx1200", "gfx1201")

def is_navi3_arch():
def is_arch(arch_list):
if torch.cuda.is_available():
prop = torch.cuda.get_device_properties(0)
gfx_arch = prop.gcnArchName.split(":")[0]
if gfx_arch in NAVI3_ARCH:
if gfx_arch in arch_list:
return True
return False

Expand Down