From 06accedf80015b7534eefdedc9ad8b8daf96b18a Mon Sep 17 00:00:00 2001 From: yuchengliu1 Date: Fri, 13 Sep 2024 16:23:34 +0800 Subject: [PATCH 1/3] update decomp --- test/xpu/test_decomp_xpu.py | 41 ++++++++++++++++++++++++++++++++++++- test/xpu/xpu_test_utils.py | 2 ++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/test/xpu/test_decomp_xpu.py b/test/xpu/test_decomp_xpu.py index d659197d9..69bfb366c 100644 --- a/test/xpu/test_decomp_xpu.py +++ b/test/xpu/test_decomp_xpu.py @@ -2,7 +2,7 @@ import torch from torch.testing._internal.common_device_type import instantiate_device_type_tests -from torch.testing._internal.common_utils import run_tests +from torch.testing._internal.common_utils import run_tests, skipIfCrossRef try: from xpu_test_utils import XPUPatchForImport @@ -77,6 +77,45 @@ def _op_assert_ref(test_case, op, test_dtype, i, orig, decomp, ref, args, kwargs ) test_decomp.op_assert_ref=_op_assert_ref +@skipIfCrossRef +def _test_amp_batch_norm_backward(self): + device = "xpu" + grad_out = torch.randn((1, 2, 16, 16), dtype=torch.float16, device=device) + x = torch.randn((1, 2, 16, 16), dtype=torch.float16, device=device) + weight = torch.randn((2,), dtype=torch.float32, device=device) + rmean = torch.randn((2,), dtype=torch.float32, device=device) + rvar = torch.randn((2,), dtype=torch.float32, device=device) + mean = torch.randn((0,), dtype=torch.float32, device=device) + + ref = torch.ops.aten.native_batch_norm_backward( + grad_out, + x, + weight, + rmean, + rvar, + mean, + mean, + False, + 1e-05, + [True, True, True], + ) + res = torch._decomp.decompositions.native_batch_norm_backward( + grad_out, + x, + weight, + rmean, + rvar, + mean, + mean, + False, + 1e-05, + [True, True, True], + ) + for a, b in zip(ref, res): + self.assertEqual(a.stride(), b.stride()) + self.assertEqual(a.dtype, b.dtype) +DecompOneOffTests.test_amp_batch_norm_backward=_test_amp_batch_norm_backward + instantiate_device_type_tests(TestDecomp, globals(), only_for="xpu", allow_xpu=True) instantiate_device_type_tests(DecompOneOffTests, globals(), only_for="xpu", allow_xpu=True) diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py index 7f15c44b1..7662d2ccd 100644 --- a/test/xpu/xpu_test_utils.py +++ b/test/xpu/xpu_test_utils.py @@ -265,6 +265,8 @@ ("narrow_copy","test_meta_outplace"), ("narrow_copy","test_dispatch_meta_outplace"), ("narrow_copy","test_dispatch_symbolic_meta_outplace"), + ("logspace","test_quick"), + ("logspace","test_comprehensive"), ] # some case should adjust tolerance to pass. From 634620c143ae4f2861446809d381978579423beb Mon Sep 17 00:00:00 2001 From: yuchengliu1 Date: Sat, 14 Sep 2024 11:58:59 +0800 Subject: [PATCH 2/3] adjust tolerance --- test/xpu/test_decomp_xpu.py | 65 +++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/test/xpu/test_decomp_xpu.py b/test/xpu/test_decomp_xpu.py index 69bfb366c..ab7905b5b 100644 --- a/test/xpu/test_decomp_xpu.py +++ b/test/xpu/test_decomp_xpu.py @@ -2,7 +2,7 @@ import torch from torch.testing._internal.common_device_type import instantiate_device_type_tests -from torch.testing._internal.common_utils import run_tests, skipIfCrossRef +from torch.testing._internal.common_utils import run_tests, skipIfCrossRef, _getDefaultRtolAndAtol try: from xpu_test_utils import XPUPatchForImport @@ -49,7 +49,7 @@ def _op_assert_ref(test_case, op, test_dtype, i, orig, decomp, ref, args, kwargs (torch.float16, torch.ops.aten.reflection_pad1d_backward.default): 5e-3, (torch.bfloat16, torch.ops.aten.reflection_pad1d_backward.default): 5e-3, (torch.float16, torch.ops.aten.reflection_pad2d_backward.default): 5e-3, - (torch.bfloat16, torch.ops.aten.reflection_pad2d_backward.default): 5e-3, + (torch.bfloat16, torch.ops.aten.reflection_pad2d_backward.default): 7e-3, # adjust tolerance for xpu, so hook this func (torch.float16, torch.ops.aten.reflection_pad3d_backward.default): 5e-3, (torch.bfloat16, torch.ops.aten.reflection_pad3d_backward.default): 5e-2, # see https://github.com/pytorch/pytorch/pull/96264 @@ -77,6 +77,67 @@ def _op_assert_ref(test_case, op, test_dtype, i, orig, decomp, ref, args, kwargs ) test_decomp.op_assert_ref=_op_assert_ref +def _op_assert_equal(test_case, op, test_dtype, orig, decomp, args, kwargs): + test_case.assertEqual( + orig.dtype, + decomp.dtype, + f"Operation: {op}, orig.dtype: {orig.dtype}, decomp.dtype: {decomp.dtype}, {args}, {kwargs}", + ) + # Before adding an entry to this table, make sure your decomposition is right :) + tol_table = { + # Due to strange epsilon behaviors, see https://github.com/pytorch/pytorch/issues/73161 + (torch.float32, torch.ops.aten.native_layer_norm.default): (1e-3, 1e-3), + (torch.float32, torch.ops.aten.native_layer_norm_backward.default): ( + 1e-3, + 1e-3, + ), + (torch.float64, torch.ops.aten.native_layer_norm.default): (1e-6, 1e-6), + # This exceeds default tolerances only on CPU, on CUDA it's fine + (torch.float32, torch.ops.aten.grid_sampler_2d.default): (7e-6, 3e-5), + # Exceeds tolerances on CUDA, likely due to fma + (torch.float32, torch.ops.aten.mv.default): (1e-5, 3e-5), + (torch.complex64, torch.ops.aten.mv.default): (5e-5, 5e-5), + (torch.float64, torch.ops.aten.upsample_bicubic2d.vec): (1e-5, 5e-4), + (torch.float64, torch.ops.aten.upsample_bicubic2d.default): (1e-5, 5e-4), + # The decomposition is TOO correct. It computes everything in int64, so sometimes + # there's an off-by-one error. See + # https://github.com/pytorch/pytorch/issues/81996 + # https://github.com/pytorch/pytorch/issues/82230 + (torch.int8, torch.ops.aten.linspace.default): (0, 1), + (torch.uint8, torch.ops.aten.linspace.default): (0, 1), + (torch.int16, torch.ops.aten.linspace.default): (0, 1), + (torch.int32, torch.ops.aten.linspace.default): (0, 1), + (torch.int64, torch.ops.aten.linspace.default): (0, 1), + (torch.int8, torch.ops.aten.linspace.Tensor_Tensor): (0, 1), + (torch.uint8, torch.ops.aten.linspace.Tensor_Tensor): (0, 1), + (torch.int16, torch.ops.aten.linspace.Tensor_Tensor): (0, 1), + (torch.int32, torch.ops.aten.linspace.Tensor_Tensor): (0, 1), + (torch.int64, torch.ops.aten.linspace.Tensor_Tensor): (0, 1), + (torch.int8, torch.ops.aten.linspace.Tensor_Scalar): (0, 1), + (torch.uint8, torch.ops.aten.linspace.Tensor_Scalar): (0, 1), + (torch.int16, torch.ops.aten.linspace.Tensor_Scalar): (0, 1), + (torch.int32, torch.ops.aten.linspace.Tensor_Scalar): (0, 1), + (torch.int64, torch.ops.aten.linspace.Tensor_Scalar): (0, 1), + (torch.int8, torch.ops.aten.linspace.Scalar_Tensor): (0, 1), + (torch.uint8, torch.ops.aten.linspace.Scalar_Tensor): (0, 1), + (torch.int16, torch.ops.aten.linspace.Scalar_Tensor): (0, 1), + (torch.int32, torch.ops.aten.linspace.Scalar_Tensor): (0, 1), + (torch.int64, torch.ops.aten.linspace.Scalar_Tensor): (0, 1), + (torch.float64,torch.ops.aten._native_batch_norm_legit.default):(3e-7,5e-7), # adjust tolerance for xpu, so hook this func + } + if (decomp.dtype, op) in tol_table: + rtol, atol = tol_table[(decomp.dtype, op)] + else: + rtol, atol = _getDefaultRtolAndAtol(orig.dtype, decomp.dtype) + test_case.assertEqual( + orig, + decomp, + rtol=rtol, + atol=atol, + msg=f"{op.__name__}\nargs = {args}\nkwargs = {kwargs}", + ) +test_decomp.op_assert_equal=_op_assert_equal + @skipIfCrossRef def _test_amp_batch_norm_backward(self): device = "xpu" From 3c367aaf173eda79491d9e965e8bdf72f6a19b59 Mon Sep 17 00:00:00 2001 From: Cheng Penghui Date: Tue, 19 Nov 2024 08:18:25 +0000 Subject: [PATCH 3/3] fixed import error Signed-off-by: Cheng Penghui --- test/xpu/skip_list_common.py | 1 + test/xpu/test_decomp_xpu.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index 3b54bfc27..3085a53c5 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -3624,4 +3624,5 @@ "test_sparse_mm_xpu_float64", # - NotImplementedError: Could not run 'aten::addmm' with arguments from the 'SparseXPU' backend. This could be because the operator doesn't exist for this backend, or wa... "test_sparse_sum_xpu_float64", # - NotImplementedError: Could not run 'aten::_sparse_sum_backward' with arguments from the 'SparseXPU' backend. This could be because the operator doesn't exist for this... ), + "test_decomp_xpu.py": None, } diff --git a/test/xpu/test_decomp_xpu.py b/test/xpu/test_decomp_xpu.py index ab7905b5b..512aa41ee 100644 --- a/test/xpu/test_decomp_xpu.py +++ b/test/xpu/test_decomp_xpu.py @@ -2,7 +2,7 @@ import torch from torch.testing._internal.common_device_type import instantiate_device_type_tests -from torch.testing._internal.common_utils import run_tests, skipIfCrossRef, _getDefaultRtolAndAtol +from torch.testing._internal.common_utils import run_tests, skipIfCrossRef try: from xpu_test_utils import XPUPatchForImport @@ -11,7 +11,7 @@ with XPUPatchForImport(False): import test_decomp - from test_decomp import TestDecomp,DecompOneOffTests + from test_decomp import TestDecomp,DecompOneOffTests, _getDefaultRtolAndAtol def _op_assert_ref(test_case, op, test_dtype, i, orig, decomp, ref, args, kwargs): assert orig.dtype == decomp.dtype, f"{i} Operation: {op}"