diff --git a/patch/npu.patch b/patch/npu.patch index b6dbaf17916558163079fafc06869e8841545d73..8bdfe9cd9ed0f583132fa4ffd29f6e9898a9f8d0 100644 --- a/patch/npu.patch +++ b/patch/npu.patch @@ -1,6 +1,6 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/CMakeLists.txt pytorch-develop/aten/CMakeLists.txt --- pytorch-v1.5.0/aten/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/CMakeLists.txt 2021-07-23 18:20:43.601371780 +0800 ++++ pytorch-develop/aten/CMakeLists.txt 2021-07-26 21:32:24.439091701 +0800 @@ -22,8 +22,10 @@ set(ATen_CPU_INCLUDE) set(ATen_THIRD_PARTY_INCLUDE) @@ -51,7 +51,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt pytorch-develop/aten/src/ATen/CMakeLists.txt --- pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-23 18:20:43.605371924 +0800 ++++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-26 21:32:24.439091701 +0800 @@ -67,6 +67,9 @@ FILE(GLOB native_quantized_h "native/quantized/*.h" "native/quantized/cpu/*.h") FILE(GLOB native_cpu_h "native/cpu/*.h") @@ -129,7 +129,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h --- pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-23 18:20:43.609372067 +0800 ++++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-26 21:32:24.447091987 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -170,7 +170,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/function_wrapper.py pytorch-develop/aten/src/ATen/function_wrapper.py --- pytorch-v1.5.0/aten/src/ATen/function_wrapper.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-23 18:20:43.617372354 +0800 ++++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-26 21:32:24.455092274 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -354,7 +354,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for option in declaration['options']: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/gen.py pytorch-develop/aten/src/ATen/gen.py --- pytorch-v1.5.0/aten/src/ATen/gen.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/gen.py 2021-07-23 18:20:43.617372354 +0800 ++++ pytorch-develop/aten/src/ATen/gen.py 2021-07-26 21:32:24.455092274 +0800 @@ -1,3 +1,18 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -512,7 +512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= generate_outputs() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp --- pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-23 18:20:43.629372785 +0800 ++++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-26 21:32:24.467092704 +0800 @@ -339,20 +339,20 @@ void hardsigmoid_backward_kernel(TensorIterator& iter) { @@ -540,7 +540,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp pytorch-develop/aten/src/ATen/native/Memory.cpp --- pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-23 18:20:43.625372640 +0800 ++++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-26 21:32:24.459092417 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -595,7 +595,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= detail::computeStorageSize(self.sizes(), self.strides()), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml pytorch-develop/aten/src/ATen/native/native_functions.yaml --- pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-23 18:20:43.645373358 +0800 ++++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-26 21:32:24.483093277 +0800 @@ -1,6 +1,5 @@ # See README.md in this directory for more guidance @@ -1291,7 +1291,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: cudnn_is_acceptable(Tensor self) -> bool use_c10_dispatcher: full -@@ -751,38 +926,58 @@ +@@ -751,46 +926,70 @@ - func: constant_pad_nd(Tensor self, int[] pad, Scalar value=0) -> Tensor variants: function @@ -1349,8 +1349,11 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + NPU: conv_transpose2d_npu_ - func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor ++ npu_dispatch: ++ NPU: conv_transpose3d_npu_ -@@ -791,6 +986,8 @@ + - func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!) + manual_kernel_registration: True variants: method device_guard: False supports_named_tensor: True @@ -1359,7 +1362,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor use_c10_dispatcher: full -@@ -800,6 +997,8 @@ +@@ -800,6 +999,8 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -1368,7 +1371,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: cos_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -807,17 +1006,23 @@ +@@ -807,17 +1008,23 @@ dispatch: CPU: _cos__cpu CUDA: _cos__cuda @@ -1392,7 +1395,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: cosh_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -825,12 +1030,16 @@ +@@ -825,12 +1032,16 @@ dispatch: CPU: _cosh__cpu CUDA: _cosh__cuda @@ -1409,7 +1412,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor use_c10_dispatcher: full -@@ -897,6 +1106,50 @@ +@@ -897,6 +1108,50 @@ dispatch: CUDA: cudnn_convolution_transpose_backward_weight @@ -1460,7 +1463,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # NB: input is special cased in a way I don't quite understand - func: cudnn_grid_sampler(Tensor self, Tensor grid) -> Tensor output use_c10_dispatcher: full -@@ -930,16 +1183,24 @@ +@@ -930,16 +1185,24 @@ - func: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices) supports_named_tensor: True variants: function, method @@ -1485,7 +1488,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _cummin_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> () variants: function -@@ -950,16 +1211,24 @@ +@@ -950,16 +1213,24 @@ - func: cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor supports_named_tensor: True variants: function, method @@ -1510,7 +1513,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor supports_named_tensor: True -@@ -976,20 +1245,28 @@ +@@ -976,20 +1247,28 @@ supports_named_tensor: True - func: ctc_loss.IntList(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor @@ -1540,7 +1543,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: det(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -1013,6 +1290,8 @@ +@@ -1013,6 +1292,8 @@ - func: fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!) variants: method @@ -1549,7 +1552,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: div.Tensor(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -1022,6 +1301,8 @@ +@@ -1022,6 +1303,8 @@ CUDA: div SparseCPU: div_sparse SparseCUDA: div_sparse @@ -1558,7 +1561,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) -@@ -1031,6 +1312,8 @@ +@@ -1031,6 +1314,8 @@ CUDA: div_ SparseCPU: div_sparse_ SparseCUDA: div_sparse_ @@ -1567,7 +1570,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -@@ -1039,6 +1322,8 @@ +@@ -1039,6 +1324,8 @@ CUDA: div_out SparseCPU: div_out_sparse_zerodim SparseCUDA: div_out_sparse_zerodim @@ -1576,7 +1579,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True # For C++ only, until we have conversion from C++ numbers to Tensor -@@ -1046,10 +1331,14 @@ +@@ -1046,10 +1333,14 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -1591,7 +1594,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dot(Tensor self, Tensor tensor) -> Tensor use_c10_dispatcher: full -@@ -1057,29 +1346,41 @@ +@@ -1057,29 +1348,41 @@ dispatch: CPU: legacy::cpu::_th_dot CUDA: legacy::cuda::_th_dot @@ -1633,7 +1636,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: embedding_sparse_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor use_c10_dispatcher: full -@@ -1099,6 +1400,8 @@ +@@ -1099,6 +1402,8 @@ dispatch: CPU: _embedding_bag_cpu CUDA: _embedding_bag_cuda @@ -1642,7 +1645,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, int num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights) -> Tensor -@@ -1125,6 +1428,8 @@ +@@ -1125,6 +1430,8 @@ MkldnnCPU: empty_mkldnn SparseCPU: empty_sparse SparseCUDA: empty_sparse @@ -1651,7 +1654,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor variants: method -@@ -1154,6 +1459,8 @@ +@@ -1154,6 +1461,8 @@ supports_named_tensor: True variants: method device_guard: False @@ -1660,7 +1663,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: empty.out(int[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!) device_guard: False -@@ -1161,16 +1468,22 @@ +@@ -1161,16 +1470,22 @@ - func: empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor device_guard: False supports_named_tensor: True @@ -1683,7 +1686,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: erf_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -1178,12 +1491,18 @@ +@@ -1178,17 +1493,25 @@ dispatch: CPU: _erf__cpu CUDA: _erf__cuda @@ -1702,7 +1705,29 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: erfc(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -1207,6 +1526,8 @@ + supports_named_tensor: True + variants: function, method ++ npu_dispatch: ++ NPU: erfc_npu + + - func: erfc_(Tensor(a!) self) -> Tensor(a!) + supports_named_tensor: True +@@ -1196,17 +1519,23 @@ + dispatch: + CPU: _erfc__cpu + CUDA: _erfc__cuda ++ npu_dispatch: ++ NPU: erfc_npu_ + + - func: erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) + supports_named_tensor: True + dispatch: + CPU: _erfc_out_cpu + CUDA: _erfc_out_cuda ++ npu_dispatch: ++ NPU: erfc_out_npu + + - func: exp(Tensor self) -> Tensor use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -1711,7 +1736,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: exp_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -1214,51 +1535,69 @@ +@@ -1214,51 +1543,69 @@ dispatch: CPU: _exp__cpu CUDA: _exp__cuda @@ -1783,7 +1808,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: flatten.using_ints(Tensor self, int start_dim=0, int end_dim=-1) -> Tensor use_c10_dispatcher: full -@@ -1280,25 +1619,35 @@ +@@ -1280,25 +1627,35 @@ - func: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!) supports_named_tensor: True variants: function, method @@ -1819,7 +1844,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: floor_divide(Tensor self, Tensor other) -> Tensor variants: function, method -@@ -1308,6 +1657,8 @@ +@@ -1308,6 +1665,8 @@ SparseCPU: floor_divide_sparse SparseCUDA: floor_divide_sparse supports_named_tensor: True @@ -1828,7 +1853,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) variants: method -@@ -1317,6 +1668,8 @@ +@@ -1317,6 +1676,8 @@ SparseCPU: floor_divide_sparse_ SparseCUDA: floor_divide_sparse_ supports_named_tensor: True @@ -1837,7 +1862,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) dispatch: -@@ -1325,33 +1678,56 @@ +@@ -1325,33 +1686,56 @@ SparseCPU: floor_divide_out_sparse_zerodim SparseCUDA: floor_divide_out_sparse_zerodim supports_named_tensor: True @@ -1894,7 +1919,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: full_like(Tensor self, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor supports_named_tensor: True -@@ -1373,6 +1749,8 @@ +@@ -1373,6 +1757,8 @@ # `align_corners = True`. - func: grid_sampler(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor use_c10_dispatcher: full @@ -1903,7 +1928,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor use_c10_dispatcher: full -@@ -1390,23 +1768,39 @@ +@@ -1390,23 +1776,39 @@ dispatch: CPU: grid_sampler_3d_cpu CUDA: grid_sampler_3d_cuda @@ -1943,7 +1968,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hinge_embedding_loss(Tensor self, Tensor target, float margin=1.0, int reduction=Mean) -> Tensor use_c10_dispatcher: full -@@ -1414,8 +1808,13 @@ +@@ -1414,8 +1816,13 @@ - func: ger(Tensor self, Tensor vec2) -> Tensor use_c10_dispatcher: full variants: function, method @@ -1957,7 +1982,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enabled=True) -> Tensor -@@ -1460,6 +1859,8 @@ +@@ -1460,6 +1867,8 @@ # NB: The following functions are declared in aten/src/ATen/templates/TensorBody.h and defined in aten/src/ATen/TensorIndexing.cpp: # - Tensor Tensor::index(ArrayRef indices) # - Tensor Tensor::index(std::initializer_list indices) @@ -1966,7 +1991,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!) variants: method -@@ -1476,17 +1877,23 @@ +@@ -1476,17 +1885,23 @@ - func: index_put_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor(a!) variants: function, method @@ -1991,7 +2016,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor variants: function -@@ -1494,8 +1901,12 @@ +@@ -1494,8 +1909,12 @@ - func: inverse(Tensor self) -> Tensor use_c10_dispatcher: full variants: function, method @@ -2004,7 +2029,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _inverse_helper(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -1507,6 +1918,8 @@ +@@ -1507,6 +1926,8 @@ - func: isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor use_c10_dispatcher: full variants: function, method @@ -2013,7 +2038,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: isnan(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -1518,6 +1931,8 @@ +@@ -1518,6 +1939,8 @@ CUDA: isnan SparseCPU: isnan_sparse SparseCUDA: isnan_sparse @@ -2022,7 +2047,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: is_distributed(Tensor self) -> bool use_c10_dispatcher: full -@@ -1541,6 +1956,8 @@ +@@ -1541,6 +1964,8 @@ variants: function, method device_guard: False supports_named_tensor: True @@ -2031,7 +2056,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: is_same_size(Tensor self, Tensor other) -> bool use_c10_dispatcher: full -@@ -1556,29 +1973,41 @@ +@@ -1556,29 +1981,41 @@ - func: kl_div(Tensor self, Tensor target, int reduction=Mean) -> Tensor use_c10_dispatcher: full @@ -2073,7 +2098,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: layer_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor -@@ -1586,11 +2015,15 @@ +@@ -1586,11 +2023,15 @@ dispatch: CPU: layer_norm_cpu CUDA: layer_norm_cuda @@ -2089,7 +2114,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor python_module: nn -@@ -1622,46 +2055,64 @@ +@@ -1622,46 +2063,64 @@ use_c10_dispatcher: full - func: linspace(Scalar start, Scalar end, int steps=100, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -2154,7 +2179,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: log1p_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -1671,6 +2122,8 @@ +@@ -1671,6 +2130,8 @@ CUDA: log1p_ SparseCPU: log1p_sparse_ SparseCUDA: log1p_sparse_ @@ -2163,7 +2188,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -1679,67 +2132,95 @@ +@@ -1679,67 +2140,95 @@ CUDA: log1p_out SparseCPU: log1p_out_sparse SparseCUDA: log1p_out_sparse @@ -2259,7 +2284,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: margin_ranking_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor use_c10_dispatcher: full -@@ -1748,9 +2229,13 @@ +@@ -1748,9 +2237,13 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -2273,7 +2298,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: matrix_rank.tol(Tensor self, float tol, bool symmetric=False) -> Tensor use_c10_dispatcher: full -@@ -1765,22 +2250,34 @@ +@@ -1765,22 +2258,34 @@ - func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) variants: function, method supports_named_tensor: True @@ -2308,7 +2333,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -1791,6 +2288,8 @@ +@@ -1791,6 +2296,8 @@ - func: max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor supports_named_tensor: True @@ -2317,7 +2342,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor requires_tensor: True -@@ -1814,6 +2313,8 @@ +@@ -1814,6 +2321,8 @@ CPU: mean_cpu_gpu CUDA: mean_cpu_gpu QuantizedCPU: quantized_mean_cpu @@ -2326,7 +2351,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mean.dim(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor variants: function, method -@@ -1822,6 +2323,8 @@ +@@ -1822,6 +2331,8 @@ CPU: mean_cpu_gpu CUDA: mean_cpu_gpu QuantizedCPU: quantized_mean_cpu @@ -2335,7 +2360,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mean.out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -1829,47 +2332,73 @@ +@@ -1829,47 +2340,73 @@ CPU: mean_out_cpu_gpu CUDA: mean_out_cpu_gpu QuantizedCPU: quantized_mean_out_cpu @@ -2409,7 +2434,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor -@@ -1958,6 +2487,8 @@ +@@ -1958,6 +2495,8 @@ CUDA: legacy::cuda::_th_mm SparseCPU: _sparse_mm SparseCUDA: _sparse_mm @@ -2418,7 +2443,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) -@@ -1966,6 +2497,8 @@ +@@ -1966,6 +2505,8 @@ CUDA: legacy::cuda::_th_mm_out SparseCPU: _sparse_mm_out SparseCUDA: _sparse_mm_out @@ -2427,7 +2452,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor -@@ -1994,6 +2527,8 @@ +@@ -1994,6 +2535,8 @@ SparseCPU: mul_sparse SparseCUDA: mul_sparse MkldnnCPU: mkldnn_mul @@ -2436,7 +2461,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) -@@ -2004,6 +2539,8 @@ +@@ -2004,6 +2547,8 @@ SparseCPU: mul_sparse_ SparseCUDA: mul_sparse_ MkldnnCPU: mkldnn_mul_ @@ -2445,7 +2470,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -@@ -2013,15 +2550,21 @@ +@@ -2013,15 +2558,21 @@ SparseCPU: mul_out_sparse_cpu SparseCUDA: mul_out_sparse_cuda MkldnnCPU: mkldnn_mul_out @@ -2467,7 +2492,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mv(Tensor self, Tensor vec) -> Tensor use_c10_dispatcher: full -@@ -2030,12 +2573,16 @@ +@@ -2030,12 +2581,16 @@ CPU: mv_cpu CUDA: legacy::cuda::_th_mv supports_named_tensor: True @@ -2484,7 +2509,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mvlgamma(Tensor self, int p) -> Tensor use_c10_dispatcher: full -@@ -2052,6 +2599,8 @@ +@@ -2052,6 +2607,8 @@ CUDA: narrow_copy_dense SparseCPU: narrow_copy_sparse SparseCUDA: narrow_copy_sparse @@ -2493,7 +2518,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a) variants: function, method -@@ -2068,6 +2617,8 @@ +@@ -2068,6 +2625,8 @@ CPU: batch_norm_cpu CUDA: batch_norm_cuda MkldnnCPU: mkldnn_batch_norm @@ -2502,7 +2527,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!)) dispatch: -@@ -2098,6 +2649,8 @@ +@@ -2098,6 +2657,8 @@ dispatch: CPU: batch_norm_backward_cpu CUDA: batch_norm_backward_cuda @@ -2511,7 +2536,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor) dispatch: -@@ -2117,6 +2670,8 @@ +@@ -2117,6 +2678,8 @@ - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, int[2] padding, int[2] stride=1) -> Tensor variants: function @@ -2520,7 +2545,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor) variants: function -@@ -2129,42 +2684,60 @@ +@@ -2129,42 +2692,60 @@ - func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor device_guard: False @@ -2583,7 +2608,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Only exposed from C++ -- in Python, # we expose it as an attribute `T`, not a function. -@@ -2253,54 +2826,82 @@ +@@ -2253,54 +2834,82 @@ supports_named_tensor: True - func: randperm(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -2667,7 +2692,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: repeat_interleave.Tensor(Tensor repeats) -> Tensor use_c10_dispatcher: full -@@ -2316,6 +2917,8 @@ +@@ -2316,6 +2925,8 @@ - func: repeat_interleave.self_int(Tensor self, int repeats, int? dim=None) -> Tensor use_c10_dispatcher: full variants: function, method @@ -2676,7 +2701,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: reshape(Tensor self, int[] shape) -> Tensor variants: function, method -@@ -2337,16 +2940,22 @@ +@@ -2337,16 +2948,22 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -2699,7 +2724,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor -@@ -2360,6 +2969,8 @@ +@@ -2360,6 +2977,8 @@ CUDA: relu MkldnnCPU: mkldnn_relu QuantizedCPU: quantized_relu @@ -2708,7 +2733,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: relu_(Tensor(a!) self) -> Tensor(a!) -@@ -2370,6 +2981,8 @@ +@@ -2370,6 +2989,8 @@ CUDA: relu_ MkldnnCPU: mkldnn_relu_ QuantizedCPU: quantized_relu_ @@ -2717,7 +2742,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: prelu(Tensor self, Tensor weight) -> Tensor use_c10_dispatcher: full -@@ -2377,12 +2990,16 @@ +@@ -2377,12 +2998,16 @@ dispatch: CPU: prelu_cpu CUDA: prelu_cuda @@ -2734,7 +2759,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gelu(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2390,6 +3007,8 @@ +@@ -2390,6 +3015,8 @@ dispatch: CPU: gelu_cpu CUDA: gelu_cuda @@ -2743,7 +2768,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gelu_backward(Tensor grad, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2397,29 +3016,41 @@ +@@ -2397,29 +3024,41 @@ dispatch: CPU: gelu_backward_cpu CUDA: gelu_backward_cuda @@ -2785,7 +2810,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a) variants: function, method -@@ -2433,14 +3064,21 @@ +@@ -2433,14 +3072,21 @@ - func: selu(Tensor self) -> Tensor use_c10_dispatcher: full @@ -2808,7 +2833,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sigmoid(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2451,6 +3089,8 @@ +@@ -2451,6 +3097,8 @@ CUDA: sigmoid QuantizedCPU: quantized_sigmoid MkldnnCPU: mkldnn_sigmoid @@ -2817,7 +2842,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sigmoid_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2459,36 +3099,52 @@ +@@ -2459,36 +3107,52 @@ CPU: sigmoid_ CUDA: sigmoid_ MkldnnCPU: mkldnn_sigmoid_ @@ -2870,7 +2895,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Returns a copy of this `Variable` that is detached from its autograd graph. # This method is OK to call if the `Variable` is a view. -@@ -2533,6 +3189,8 @@ +@@ -2533,6 +3197,8 @@ - func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet) variants: function, method @@ -2879,7 +2904,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: smm(Tensor self, Tensor mat2) -> Tensor use_c10_dispatcher: full -@@ -2542,10 +3200,14 @@ +@@ -2542,10 +3208,14 @@ - func: softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor variants: function, method supports_named_tensor: True @@ -2894,7 +2919,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _softmax(Tensor self, int dim, bool half_to_float) -> Tensor use_c10_dispatcher: full -@@ -2553,12 +3215,16 @@ +@@ -2553,12 +3223,16 @@ CPU: softmax_cpu CUDA: softmax_cuda MkldnnCPU: mkldnn_softmax @@ -2911,7 +2936,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[] variants: function, method -@@ -2609,8 +3275,12 @@ +@@ -2609,8 +3283,12 @@ SparseCUDA: _sspaddmm_out_cuda - func: stack(Tensor[] tensors, int dim=0) -> Tensor @@ -2924,7 +2949,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # The signature is designed to be consistent with librosa except that it is # missing the `pad_mode` and `center` arguments, which are taken care of at -@@ -2633,20 +3303,30 @@ +@@ -2633,20 +3311,30 @@ - func: sum(Tensor self, *, ScalarType? dtype=None) -> Tensor variants: function, method supports_named_tensor: True @@ -2955,7 +2980,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sum_to_size(Tensor self, int[] size) -> Tensor variants: method -@@ -2656,13 +3336,19 @@ +@@ -2656,13 +3344,19 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -2975,7 +3000,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: square(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2677,51 +3363,81 @@ +@@ -2677,51 +3371,81 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3058,7 +3083,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: t(Tensor(a) self) -> Tensor(a) device_guard: False -@@ -2736,6 +3452,8 @@ +@@ -2736,6 +3460,8 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -3067,7 +3092,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tan_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2743,12 +3461,16 @@ +@@ -2743,12 +3469,16 @@ dispatch: CPU: _tan__cpu CUDA: _tan__cuda @@ -3084,7 +3109,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tanh(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2758,6 +3480,8 @@ +@@ -2758,6 +3488,8 @@ CPU: tanh CUDA: tanh QuantizedCPU: quantized_tanh @@ -3093,7 +3118,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tanh_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2765,12 +3489,16 @@ +@@ -2765,12 +3497,16 @@ dispatch: CPU: _tanh__cpu CUDA: _tanh__cuda @@ -3110,7 +3135,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor variants: function -@@ -2783,6 +3511,8 @@ +@@ -2783,6 +3519,8 @@ dispatch: CPU: threshold CUDA: threshold_cuda @@ -3119,7 +3144,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!) variants: function -@@ -2790,12 +3520,16 @@ +@@ -2790,12 +3528,16 @@ dispatch: CPU: threshold_ CUDA: threshold__cuda @@ -3136,7 +3161,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor use_c10_dispatcher: full -@@ -2803,6 +3537,8 @@ +@@ -2803,6 +3545,8 @@ dispatch: CPU: threshold_backward CUDA: threshold_backward_cuda @@ -3145,7 +3170,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a) variants: function, method -@@ -2835,18 +3571,24 @@ +@@ -2835,18 +3579,24 @@ use_c10_dispatcher: full python_module: nn variants: function @@ -3170,7 +3195,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # default int[] value [0,1] should not add space after comma, since native_parse.py uses ', ' to split args -@@ -2872,6 +3614,8 @@ +@@ -2872,6 +3622,8 @@ CUDA: true_divide SparseCPU: true_divide_sparse SparseCUDA: true_divide_sparse @@ -3179,7 +3204,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) -@@ -2881,6 +3625,8 @@ +@@ -2881,6 +3633,8 @@ CUDA: true_divide_ SparseCPU: true_divide_sparse_ SparseCUDA: true_divide_sparse_ @@ -3188,7 +3213,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -@@ -2889,31 +3635,43 @@ +@@ -2889,31 +3643,43 @@ CUDA: true_divide_out SparseCPU: true_divide_out_sparse_zerodim SparseCUDA: true_divide_out_sparse_zerodim @@ -3232,7 +3257,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: type_as(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -2956,6 +3714,8 @@ +@@ -2956,6 +3722,8 @@ dispatch: CPU: _unique2_cpu CUDA: _unique2_cuda @@ -3241,7 +3266,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _unsafe_view(Tensor self, int[] size) -> Tensor -@@ -2971,32 +3731,48 @@ +@@ -2971,32 +3739,48 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3290,7 +3315,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: view_as(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -3009,13 +3785,19 @@ +@@ -3009,13 +3793,19 @@ - func: where.self(Tensor condition, Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full variants: function, method @@ -3310,7 +3335,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor variants: function -@@ -3041,13 +3823,21 @@ +@@ -3041,13 +3831,21 @@ - func: zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor device_guard: False @@ -3332,7 +3357,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor use_c10_dispatcher: full -@@ -3100,25 +3890,37 @@ +@@ -3100,25 +3898,37 @@ - func: _sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor dispatch: @@ -3372,7 +3397,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor variants: function, method -@@ -3162,12 +3964,16 @@ +@@ -3162,12 +3972,16 @@ SparseCUDA: clone_sparse MkldnnCPU: mkldnn_clone QuantizedCPU: quantized_clone @@ -3389,7 +3414,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -3176,6 +3982,8 @@ +@@ -3176,6 +3990,8 @@ CUDA: pow_out SparseCPU: pow_out_sparse_scalar SparseCUDA: pow_out_sparse_scalar @@ -3398,7 +3423,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor use_c10_dispatcher: full -@@ -3186,6 +3994,8 @@ +@@ -3186,6 +4002,8 @@ CUDA: pow SparseCPU: pow_sparse_scalar SparseCUDA: pow_sparse_scalar @@ -3407,7 +3432,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: zero_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -3196,6 +4006,14 @@ +@@ -3196,6 +4014,14 @@ SparseCPU: zero_sparse_ SparseCUDA: zero_sparse_ MkldnnCPU: mkldnn_zero_ @@ -3422,7 +3447,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) dispatch: -@@ -3204,6 +4022,8 @@ +@@ -3204,6 +4030,8 @@ SparseCPU: sub_out_sparse SparseCUDA: sub_out_sparse supports_named_tensor: True @@ -3431,7 +3456,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor use_c10_dispatcher: full -@@ -3213,6 +4033,8 @@ +@@ -3213,6 +4041,8 @@ CUDA: sub SparseCPU: sub_sparse SparseCUDA: sub_sparse @@ -3440,7 +3465,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) -@@ -3222,6 +4044,8 @@ +@@ -3222,6 +4052,8 @@ CUDA: sub_ SparseCPU: sub_sparse_ SparseCUDA: sub_sparse_ @@ -3449,7 +3474,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True # For C++ only, until we have conversion from C++ numbers to Tensor -@@ -3229,21 +4053,29 @@ +@@ -3229,21 +4061,29 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3479,7 +3504,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Functionally the same as addmm, but we give it a different derivative formula # that doesn't propagate gradients to non-present entries on sparse. -@@ -3257,6 +4089,8 @@ +@@ -3257,6 +4097,8 @@ CUDA: legacy::cuda::_th_addmm_out SparseCPU: addmm_out_sparse_dense_cpu SparseCUDA: addmm_out_sparse_dense_cuda @@ -3488,7 +3513,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor -@@ -3267,6 +4101,8 @@ +@@ -3267,6 +4109,8 @@ CUDA: legacy::cuda::_th_addmm SparseCPU: addmm_sparse_dense_cpu SparseCUDA: addmm_sparse_dense_cuda @@ -3497,7 +3522,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) -@@ -3278,9 +4114,10 @@ +@@ -3278,9 +4122,10 @@ # broadcasting SparseCPU: s_addmm_sparse_dense_cpu_ SparseCUDA: s_addmm_sparse_dense_cuda_ @@ -3509,7 +3534,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # NOTE [ Sparse: autograd and API ] # # -@@ -3396,7 +4233,6 @@ +@@ -3396,7 +4241,6 @@ # shared. In other words, their outputs are non-differentiable views of the # sparse tensor. @@ -3517,7 +3542,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given # the default would never make sense. - func: sparse_coo_tensor.size(int[] size, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False) -> Tensor -@@ -3433,7 +4269,6 @@ +@@ -3433,7 +4277,6 @@ SparseCUDA: sparse_resize_and_clear_ requires_tensor: True @@ -3525,7 +3550,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sparse_mask(Tensor self, Tensor mask) -> Tensor use_c10_dispatcher: full variants: method -@@ -3442,7 +4277,6 @@ +@@ -3442,7 +4285,6 @@ SparseCUDA: sparse_mask_cuda requires_tensor: True @@ -3533,7 +3558,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: to_dense(Tensor self) -> Tensor use_c10_dispatcher: full variants: method -@@ -3474,7 +4308,6 @@ +@@ -3474,7 +4316,6 @@ requires_tensor: True device_guard: False @@ -3541,7 +3566,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dense_dim(Tensor self) -> int use_c10_dispatcher: full variants: method -@@ -3494,7 +4327,6 @@ +@@ -3494,7 +4335,6 @@ requires_tensor: True device_guard: False @@ -3549,7 +3574,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _nnz(Tensor self) -> int use_c10_dispatcher: full variants: method -@@ -3504,7 +4336,6 @@ +@@ -3504,7 +4344,6 @@ requires_tensor: True device_guard: False @@ -3557,7 +3582,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: coalesce(Tensor self) -> Tensor use_c10_dispatcher: full variants: method -@@ -3513,7 +4344,6 @@ +@@ -3513,7 +4352,6 @@ SparseCUDA: coalesce_sparse_cuda requires_tensor: True @@ -3565,7 +3590,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: is_coalesced(Tensor self) -> bool use_c10_dispatcher: full variants: method -@@ -3524,7 +4354,6 @@ +@@ -3524,7 +4362,6 @@ device_guard: False supports_named_tensor: True @@ -3573,7 +3598,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _indices(Tensor(a) self) -> Tensor(a) variants: method dispatch: -@@ -3568,7 +4397,6 @@ +@@ -3568,7 +4405,6 @@ requires_tensor: True device_guard: False @@ -3581,7 +3606,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) dispatch: SparseCPU: hspmm_out_sparse_cpu -@@ -3630,11 +4458,15 @@ +@@ -3630,11 +4466,15 @@ variants: function dispatch: CPU: quantize_per_tensor_cpu @@ -3597,7 +3622,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dequantize(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -3713,20 +4545,28 @@ +@@ -3713,20 +4553,28 @@ variants: method device_guard: False supports_named_tensor: True @@ -3626,7 +3651,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: meshgrid(Tensor[] tensors) -> Tensor[] -@@ -3765,6 +4605,8 @@ +@@ -3765,6 +4613,8 @@ dispatch: CPU: _local_scalar_dense_cpu CUDA: _local_scalar_dense_cuda @@ -3635,7 +3660,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= variants: function supports_named_tensor: True -@@ -3791,10 +4633,16 @@ +@@ -3791,10 +4641,16 @@ # RNN cells and layers - func: lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor) @@ -3652,7 +3677,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor) -@@ -3839,10 +4687,14 @@ +@@ -3839,10 +4695,14 @@ # PackedSequence utilities - func: _pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor) @@ -3667,7 +3692,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # wrappers for legacy TH methods -@@ -3852,6 +4704,8 @@ +@@ -3852,6 +4712,8 @@ dispatch: CPU: set_ CUDA: set_ @@ -3676,7 +3701,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!) variants: method -@@ -3860,6 +4714,8 @@ +@@ -3860,6 +4722,8 @@ CPU: legacy::cpu::_th_set_ CUDA: legacy::cuda::_th_set_ QuantizedCPU: set_storage @@ -3685,7 +3710,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!) variants: method -@@ -3867,12 +4723,16 @@ +@@ -3867,12 +4731,16 @@ dispatch: CPU: set_tensor_ CUDA: set_tensor_ @@ -3702,7 +3727,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_quantizer_(Tensor(a!) self, ConstQuantizerPtr quantizer) -> Tensor(a!) variants: method -@@ -3892,6 +4752,8 @@ +@@ -3892,6 +4760,8 @@ dispatch: CPU: masked_fill__cpu CUDA: masked_fill__cuda @@ -3711,7 +3736,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor -@@ -3904,6 +4766,8 @@ +@@ -3904,6 +4774,8 @@ dispatch: CPU: masked_fill__cpu CUDA: masked_fill__cuda @@ -3720,7 +3745,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor -@@ -3916,6 +4780,8 @@ +@@ -3916,6 +4788,8 @@ dispatch: CPU: masked_scatter__cpu CUDA: masked_scatter__cuda @@ -3729,7 +3754,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor use_c10_dispatcher: full -@@ -3929,25 +4795,35 @@ +@@ -3929,25 +4803,35 @@ CUDA: view MkldnnCPU: mkldnn_view QuantizedCPU: view @@ -3765,7 +3790,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!) variants: method -@@ -3955,11 +4831,15 @@ +@@ -3955,11 +4839,15 @@ dispatch: CPU: legacy::cpu::_th_index_fill_ CUDA: legacy::cuda::_th_index_fill_ @@ -3781,7 +3806,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!) variants: method -@@ -3967,11 +4847,15 @@ +@@ -3967,11 +4855,15 @@ CPU: index_fill_ CUDA: index_fill_ supports_named_tensor: True @@ -3797,7 +3822,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!) variants: method -@@ -3994,6 +4878,8 @@ +@@ -3994,6 +4886,8 @@ dispatch: CPU: scatter_cpu_ CUDA: legacy::cuda::_th_scatter_ @@ -3806,7 +3831,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor use_c10_dispatcher: full -@@ -4004,6 +4890,8 @@ +@@ -4004,6 +4898,8 @@ dispatch: CPU: scatter_fill_cpu_ CUDA: legacy::cuda::_th_scatter_ @@ -3815,7 +3840,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor use_c10_dispatcher: full -@@ -4020,81 +4908,127 @@ +@@ -4020,81 +4916,127 @@ dispatch: CPU: scatter_add_cpu_ CUDA: legacy::cuda::_th_scatter_add_ @@ -3943,7 +3968,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: __iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) variants: method -@@ -4107,70 +5041,106 @@ +@@ -4107,70 +5049,106 @@ dispatch: CPU: bitwise_or_out CUDA: bitwise_or_out @@ -4050,7 +4075,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: __ixor__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) variants: method -@@ -4240,18 +5210,24 @@ +@@ -4240,18 +5218,24 @@ - func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!) supports_named_tensor: True variants: method @@ -4075,7 +4100,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: digamma_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -4266,6 +5242,8 @@ +@@ -4266,6 +5250,8 @@ dispatch: CPU: legacy::cpu::_th_renorm_ CUDA: legacy::cuda::_th_renorm_ @@ -4084,7 +4109,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!) supports_named_tensor: True -@@ -4273,6 +5251,8 @@ +@@ -4273,6 +5259,8 @@ dispatch: CPU: pow_ CUDA: pow_ @@ -4093,7 +4118,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!) supports_named_tensor: True -@@ -4280,53 +5260,71 @@ +@@ -4280,53 +5268,71 @@ dispatch: CPU: pow_ CUDA: pow_ @@ -4165,7 +4190,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor use_c10_dispatcher: full -@@ -4334,28 +5332,40 @@ +@@ -4334,28 +5340,40 @@ dispatch: CPU: legacy::cpu::_th_addbmm CUDA: legacy::cuda::_th_addbmm @@ -4206,7 +4231,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!) -@@ -4380,6 +5390,8 @@ +@@ -4380,6 +5398,8 @@ dispatch: CPU: legacy::cpu::_th_diag_out CUDA: legacy::cuda::_th_diag_out @@ -4215,7 +4240,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: diag(Tensor self, int diagonal=0) -> Tensor use_c10_dispatcher: full -@@ -4387,40 +5399,58 @@ +@@ -4387,40 +5407,58 @@ dispatch: CPU: legacy::cpu::_th_diag CUDA: legacy::cuda::_th_diag @@ -4274,7 +4299,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: trace(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -4435,6 +5465,8 @@ +@@ -4435,6 +5473,8 @@ CPU: ne_out CUDA: ne_out QuantizedCPU: ne_out_quantized_cpu @@ -4283,7 +4308,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4444,6 +5476,8 @@ +@@ -4444,6 +5484,8 @@ CPU: ne CUDA: ne QuantizedCPU: ne_quantized_cpu @@ -4292,7 +4317,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4451,6 +5485,8 @@ +@@ -4451,6 +5493,8 @@ CPU: ne_out CUDA: ne_out QuantizedCPU: ne_out_quantized_cpu @@ -4301,7 +4326,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4460,6 +5496,8 @@ +@@ -4460,6 +5504,8 @@ CPU: ne CUDA: ne QuantizedCPU: ne_quantized_cpu @@ -4310,7 +4335,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4467,6 +5505,8 @@ +@@ -4467,6 +5513,8 @@ CPU: eq_out CUDA: eq_out QuantizedCPU: eq_out_quantized_cpu @@ -4319,7 +4344,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4476,6 +5516,8 @@ +@@ -4476,6 +5524,8 @@ CPU: eq CUDA: eq QuantizedCPU: eq_quantized_cpu @@ -4328,7 +4353,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4483,6 +5525,8 @@ +@@ -4483,6 +5533,8 @@ CPU: eq_out CUDA: eq_out QuantizedCPU: eq_out_quantized_cpu @@ -4337,7 +4362,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4492,6 +5536,8 @@ +@@ -4492,6 +5544,8 @@ CPU: eq CUDA: eq QuantizedCPU: eq_quantized_cpu @@ -4346,7 +4371,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4499,6 +5545,8 @@ +@@ -4499,6 +5553,8 @@ CPU: ge_out CUDA: ge_out QuantizedCPU: ge_out_quantized_cpu @@ -4355,7 +4380,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4508,6 +5556,8 @@ +@@ -4508,6 +5564,8 @@ CPU: ge CUDA: ge QuantizedCPU: ge_quantized_cpu @@ -4364,7 +4389,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4515,6 +5565,8 @@ +@@ -4515,6 +5573,8 @@ CPU: ge_out CUDA: ge_out QuantizedCPU: ge_out_quantized_cpu @@ -4373,7 +4398,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4524,6 +5576,8 @@ +@@ -4524,6 +5584,8 @@ CPU: ge CUDA: ge QuantizedCPU: ge_quantized_cpu @@ -4382,7 +4407,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4531,6 +5585,8 @@ +@@ -4531,6 +5593,8 @@ CPU: le_out CUDA: le_out QuantizedCPU: le_out_quantized_cpu @@ -4391,7 +4416,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4540,6 +5596,8 @@ +@@ -4540,6 +5604,8 @@ CPU: le CUDA: le QuantizedCPU: le_quantized_cpu @@ -4400,7 +4425,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4547,6 +5605,8 @@ +@@ -4547,6 +5613,8 @@ CPU: le_out CUDA: le_out QuantizedCPU: le_out_quantized_cpu @@ -4409,7 +4434,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4556,6 +5616,8 @@ +@@ -4556,6 +5624,8 @@ CPU: le CUDA: le QuantizedCPU: le_quantized_cpu @@ -4418,7 +4443,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4563,6 +5625,8 @@ +@@ -4563,6 +5633,8 @@ CPU: gt_out CUDA: gt_out QuantizedCPU: gt_out_quantized_cpu @@ -4427,7 +4452,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4572,6 +5636,8 @@ +@@ -4572,6 +5644,8 @@ CPU: gt CUDA: gt QuantizedCPU: gt_quantized_cpu @@ -4436,7 +4461,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4579,6 +5645,8 @@ +@@ -4579,6 +5653,8 @@ CPU: gt_out CUDA: gt_out QuantizedCPU: gt_out_quantized_cpu @@ -4445,7 +4470,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4588,6 +5656,8 @@ +@@ -4588,6 +5664,8 @@ CPU: gt CUDA: gt QuantizedCPU: gt_quantized_cpu @@ -4454,7 +4479,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4595,6 +5665,8 @@ +@@ -4595,6 +5673,8 @@ CPU: lt_out CUDA: lt_out QuantizedCPU: lt_out_quantized_cpu @@ -4463,7 +4488,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4604,6 +5676,8 @@ +@@ -4604,6 +5684,8 @@ CPU: lt CUDA: lt QuantizedCPU: lt_quantized_cpu @@ -4472,7 +4497,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4611,6 +5685,8 @@ +@@ -4611,6 +5693,8 @@ CPU: lt_out CUDA: lt_out QuantizedCPU: lt_out_quantized_cpu @@ -4481,7 +4506,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4620,11 +5696,16 @@ +@@ -4620,11 +5704,16 @@ CPU: lt CUDA: lt QuantizedCPU: lt_quantized_cpu @@ -4498,7 +4523,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: take(Tensor self, Tensor index) -> Tensor use_c10_dispatcher: full -@@ -4632,11 +5713,16 @@ +@@ -4632,11 +5721,16 @@ dispatch: CPU: legacy::cpu::_th_take CUDA: legacy::cuda::_th_take @@ -4515,7 +4540,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_select(Tensor self, int dim, Tensor index) -> Tensor use_c10_dispatcher: full -@@ -4646,17 +5732,25 @@ +@@ -4646,17 +5740,25 @@ CUDA: legacy::cuda::_th_index_select SparseCPU: index_select_sparse SparseCUDA: index_select_sparse @@ -4541,7 +4566,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: masked_select(Tensor self, Tensor mask) -> Tensor use_c10_dispatcher: full -@@ -4665,11 +5759,15 @@ +@@ -4665,11 +5767,15 @@ CPU: masked_select_cpu CUDA: masked_select_cuda supports_named_tensor: True @@ -4557,7 +4582,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: nonzero(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -4677,6 +5775,8 @@ +@@ -4677,6 +5783,8 @@ dispatch: CPU: legacy::cpu::_th_nonzero CUDA: legacy::cuda::_th_nonzero @@ -4566,7 +4591,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: nonzero_numpy(Tensor self) -> Tensor[] variants: method, function -@@ -4685,6 +5785,8 @@ +@@ -4685,6 +5793,8 @@ dispatch: CPU: gather_out_cpu CUDA: gather_out_cuda @@ -4575,7 +4600,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor use_c10_dispatcher: full -@@ -4692,34 +5794,50 @@ +@@ -4692,34 +5802,50 @@ dispatch: CPU: gather_cpu CUDA: gather_cuda @@ -4626,7 +4651,16 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lstsq.X(Tensor self, Tensor A, *, Tensor(a!) X, Tensor(b!) qr) -> (Tensor(a!) solution, Tensor(b!) QR) dispatch: -@@ -4753,6 +5871,8 @@ +@@ -4742,6 +5868,8 @@ + dispatch: + CPU: _triangular_solve_helper_cpu + CUDA: _triangular_solve_helper_cuda ++ npu_dispatch: ++ NPU: _triangular_solve_helper_npu + + - func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) + +@@ -4753,6 +5881,8 @@ dispatch: CPU: _symeig_helper_cpu CUDA: _symeig_helper_cuda @@ -4635,7 +4669,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eig.e(Tensor self, bool eigenvectors=False, *, Tensor(a!) e, Tensor(b!) v) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) dispatch: -@@ -4826,9 +5946,13 @@ +@@ -4826,9 +5956,13 @@ CUDA: legacy::cuda::_th_potri - func: qr.Q(Tensor self, bool some=True, *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R) @@ -4649,7 +4683,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _qr_helper(Tensor self, bool some) -> (Tensor, Tensor) variants: function -@@ -4891,12 +6015,16 @@ +@@ -4891,12 +6025,16 @@ dispatch: CPU: multinomial_out CUDA: multinomial_out @@ -4666,7 +4700,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _multinomial_alias_setup(Tensor probs) -> (Tensor, Tensor) variants: function -@@ -4947,6 +6075,8 @@ +@@ -4947,6 +6085,8 @@ dispatch: CPU: erfinv CUDA: erfinv @@ -4675,7 +4709,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: erfinv_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -4954,26 +6084,36 @@ +@@ -4954,26 +6094,36 @@ dispatch: CPU: _erfinv__cpu CUDA: _erfinv__cuda @@ -4712,7 +4746,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor use_c10_dispatcher: full -@@ -4981,21 +6121,29 @@ +@@ -4981,21 +6131,29 @@ - func: atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True @@ -4742,7 +4776,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor use_c10_dispatcher: full -@@ -5003,6 +6151,8 @@ +@@ -5003,6 +6161,8 @@ dispatch: CPU: lerp_cpu_scalar CUDA: lerp_cuda_scalar @@ -4751,7 +4785,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor use_c10_dispatcher: full -@@ -5010,6 +6160,8 @@ +@@ -5010,6 +6170,8 @@ dispatch: CPU: lerp_cpu_tensor CUDA: lerp_cuda_tensor @@ -4760,7 +4794,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!) dispatch: -@@ -5027,6 +6179,8 @@ +@@ -5027,6 +6189,8 @@ dispatch: CPU: fmod_out CUDA: legacy::cuda::_th_fmod_out @@ -4769,7 +4803,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: fmod.Scalar(Tensor self, Scalar other) -> Tensor use_c10_dispatcher: full -@@ -5034,11 +6188,15 @@ +@@ -5034,11 +6198,15 @@ dispatch: CPU: fmod CUDA: legacy::cuda::_th_fmod @@ -4785,7 +4819,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: fmod.Tensor(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -5046,11 +6204,15 @@ +@@ -5046,11 +6214,15 @@ dispatch: CPU: fmod CUDA: legacy::cuda::_th_fmod @@ -4801,7 +4835,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: remainder.Scalar(Tensor self, Scalar other) -> Tensor use_c10_dispatcher: full -@@ -5058,11 +6220,15 @@ +@@ -5058,11 +6230,15 @@ dispatch: CPU: remainder CUDA: remainder @@ -4817,7 +4851,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: remainder.Tensor(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -5070,12 +6236,18 @@ +@@ -5070,12 +6246,18 @@ dispatch: CPU: remainder CUDA: remainder @@ -4836,7 +4870,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: min(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5084,13 +6256,19 @@ +@@ -5084,13 +6266,19 @@ CPU: min CUDA: legacy::cuda::_th_min QuantizedCPU: min_quant @@ -4856,7 +4890,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: max(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5099,6 +6277,8 @@ +@@ -5099,6 +6287,8 @@ CPU: max CUDA: legacy::cuda::_th_max QuantizedCPU: max_quant @@ -4865,7 +4899,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: median(Tensor self) -> Tensor -@@ -5107,12 +6287,16 @@ +@@ -5107,12 +6297,16 @@ dispatch: CPU: median_cpu CUDA: median_cuda @@ -4882,7 +4916,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) variants: method, function -@@ -5120,23 +6304,45 @@ +@@ -5120,23 +6314,45 @@ CPU: legacy::cpu::_th_sort CUDA: legacy::cuda::_th_sort QuantizedCPU: sort_quant @@ -4928,7 +4962,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: topk(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices) variants: method, function -@@ -5144,11 +6350,15 @@ +@@ -5144,11 +6360,15 @@ CPU: topk CUDA: topk QuantizedCPU: quantized_topk_cpu @@ -4944,7 +4978,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: any(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5159,11 +6369,15 @@ +@@ -5159,11 +6379,15 @@ CUDA: any SparseCPU: any_sparse SparseCUDA: any_sparse @@ -4960,7 +4994,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor use_c10_dispatcher: full -@@ -5171,6 +6385,8 @@ +@@ -5171,6 +6395,8 @@ dispatch: CPU: legacy::cpu::_th_renorm CUDA: legacy::cuda::_th_renorm @@ -4969,7 +5003,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a) variants: method -@@ -5178,6 +6394,8 @@ +@@ -5178,6 +6404,8 @@ dispatch: CPU: unfold CUDA: unfold @@ -4978,7 +5012,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: equal(Tensor self, Tensor other) -> bool use_c10_dispatcher: full -@@ -5186,6 +6404,8 @@ +@@ -5186,6 +6414,8 @@ CPU: legacy::cpu::_th_equal CUDA: legacy::cuda::_th_equal QuantizedCPU: quantized_equal @@ -4987,7 +5021,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!) -@@ -5193,6 +6413,8 @@ +@@ -5193,6 +6423,8 @@ dispatch: CPU: pow_out CUDA: pow_out @@ -4996,7 +5030,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor use_c10_dispatcher: full -@@ -5201,12 +6423,16 @@ +@@ -5201,12 +6433,16 @@ dispatch: CPU: pow CUDA: pow @@ -5013,7 +5047,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor use_c10_dispatcher: full -@@ -5214,6 +6440,8 @@ +@@ -5214,6 +6450,8 @@ dispatch: CPU: pow CUDA: pow @@ -5022,7 +5056,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!) variants: method -@@ -5221,40 +6449,58 @@ +@@ -5221,40 +6459,58 @@ CPU: normal_cpu_ CUDA: normal_cuda_ supports_named_tensor: True @@ -5081,7 +5115,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: alias(Tensor(a) self) -> Tensor(a) variants: method, function -@@ -5265,43 +6511,59 @@ +@@ -5265,43 +6521,59 @@ dispatch: CPU: legacy::cpu::_th_addr CUDA: legacy::cuda::_th_addr @@ -5142,7 +5176,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _var(Tensor self, bool unbiased=True) -> Tensor use_c10_dispatcher: full -@@ -5309,6 +6571,8 @@ +@@ -5309,6 +6581,8 @@ CPU: legacy::cpu::_th_var CUDA: legacy::cuda::_th_var supports_named_tensor: True @@ -5151,7 +5185,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _std(Tensor self, bool unbiased=True) -> Tensor use_c10_dispatcher: full -@@ -5321,6 +6585,8 @@ +@@ -5321,6 +6595,8 @@ variants: function dispatch: CUDA: _amp_non_finite_check_and_unscale_cuda_ @@ -5160,7 +5194,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _amp_update_scale(Tensor(a!) growth_tracker, Tensor current_scale, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor variants: function -@@ -5332,12 +6598,16 @@ +@@ -5332,12 +6608,16 @@ CPU: _cat_cpu CUDA: cat_cuda QuantizedCPU: quantized_cat @@ -5177,7 +5211,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor, Tensor) dispatch: -@@ -5353,36 +6623,50 @@ +@@ -5353,36 +6633,50 @@ dispatch: CPU: legacy::cpu::_th_max CUDA: legacy::cuda::_th_max @@ -5228,7 +5262,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor use_c10_dispatcher: full -@@ -5390,23 +6674,33 @@ +@@ -5390,23 +6684,33 @@ dispatch: CPU: mse_loss_backward CUDA: mse_loss_backward @@ -5262,7 +5296,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: multi_margin_loss.out(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5434,22 +6728,30 @@ +@@ -5434,22 +6738,30 @@ - func: multilabel_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -5293,7 +5327,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: multilabel_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -5466,97 +6768,137 @@ +@@ -5466,97 +6778,137 @@ - func: nll_loss.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -5431,7 +5465,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu.out(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5564,6 +6906,8 @@ +@@ -5564,6 +6916,8 @@ CPU: elu_out CUDA: elu_out QuantizedCPU: quantized_elu_out @@ -5440,7 +5474,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor use_c10_dispatcher: full -@@ -5572,16 +6916,22 @@ +@@ -5572,16 +6926,22 @@ CPU: elu CUDA: elu QuantizedCPU: quantized_elu @@ -5463,7 +5497,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!) python_module: nn -@@ -5589,12 +6939,16 @@ +@@ -5589,12 +6949,16 @@ CPU: elu_ CUDA: elu_ QuantizedCPU: quantized_elu_ @@ -5480,7 +5514,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: glu(Tensor self, int dim=-1) -> Tensor use_c10_dispatcher: full -@@ -5602,12 +6956,16 @@ +@@ -5602,12 +6966,16 @@ dispatch: CPU: glu CUDA: legacy::cuda::_thnn_glu_forward @@ -5497,7 +5531,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor use_c10_dispatcher: full -@@ -5615,20 +6973,30 @@ +@@ -5615,20 +6983,30 @@ dispatch: CPU: glu_backward CUDA: legacy::cuda::_thnn_glu_backward @@ -5528,7 +5562,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh.out(Tensor self, Scalar min_val=-1, Scalar max_val=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5636,6 +7004,8 @@ +@@ -5636,6 +7014,8 @@ CPU: hardtanh_out CUDA: hardtanh_out QuantizedCPU: quantized_hardtanh_out @@ -5537,7 +5571,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor use_c10_dispatcher: full -@@ -5644,16 +7014,22 @@ +@@ -5644,16 +7024,22 @@ CPU: hardtanh CUDA: hardtanh QuantizedCPU: quantized_hardtanh @@ -5560,7 +5594,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!) python_module: nn -@@ -5661,6 +7037,8 @@ +@@ -5661,6 +7047,8 @@ CPU: hardtanh_ CUDA: hardtanh_ QuantizedCPU: quantized_hardtanh_ @@ -5569,7 +5603,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5668,6 +7046,8 @@ +@@ -5668,6 +7056,8 @@ CPU: leaky_relu_out CUDA: leaky_relu_out QuantizedCPU: quantized_leaky_relu_out @@ -5578,7 +5612,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor use_c10_dispatcher: full -@@ -5676,10 +7056,14 @@ +@@ -5676,10 +7066,14 @@ CPU: leaky_relu CUDA: leaky_relu QuantizedCPU: quantized_leaky_relu @@ -5593,7 +5627,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!) python_module: nn -@@ -5687,31 +7071,44 @@ +@@ -5687,31 +7081,44 @@ CPU: leaky_relu_ CUDA: leaky_relu_ QuantizedCPU: quantized_leaky_relu_ @@ -5638,7 +5672,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor use_c10_dispatcher: full -@@ -5719,62 +7116,88 @@ +@@ -5719,62 +7126,88 @@ dispatch: CPU: log_sigmoid_backward_cpu CUDA: legacy::cuda::_thnn_log_sigmoid_backward @@ -5727,7 +5761,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_avg_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5782,9 +7205,13 @@ +@@ -5782,9 +7215,13 @@ CPU: adaptive_avg_pool2d_out_cpu CUDA: adaptive_avg_pool2d_out_cuda MkldnnCPU: mkldnn_adaptive_avg_pool2d_out @@ -5741,7 +5775,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor dispatch: -@@ -5796,6 +7223,8 @@ +@@ -5796,6 +7233,8 @@ CPU: adaptive_avg_pool2d_cpu CUDA: adaptive_avg_pool2d_cuda QuantizedCPU: quantized_adaptive_avg_pool2d @@ -5750,7 +5784,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5803,24 +7232,32 @@ +@@ -5803,24 +7242,32 @@ dispatch: CPU: adaptive_avg_pool2d_backward_cpu CUDA: adaptive_avg_pool2d_backward_cuda @@ -5783,7 +5817,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_avg_pool3d_backward(Tensor grad_output, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5828,6 +7265,8 @@ +@@ -5828,6 +7275,8 @@ dispatch: CPU: adaptive_avg_pool3d_backward_cpu CUDA: adaptive_avg_pool3d_backward_cuda @@ -5792,7 +5826,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5835,6 +7274,8 @@ +@@ -5835,6 +7284,8 @@ dispatch: CPU: adaptive_max_pool2d_out_cpu CUDA: adaptive_max_pool2d_out_cuda @@ -5801,7 +5835,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor) -@@ -5842,12 +7283,16 @@ +@@ -5842,12 +7293,16 @@ dispatch: CPU: adaptive_max_pool2d_cpu CUDA: adaptive_max_pool2d_cuda @@ -5818,7 +5852,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor use_c10_dispatcher: full -@@ -5855,6 +7300,8 @@ +@@ -5855,6 +7310,8 @@ dispatch: CPU: adaptive_max_pool2d_backward_cpu CUDA: adaptive_max_pool2d_backward_cuda @@ -5827,7 +5861,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool3d.out(Tensor self, int[3] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5889,6 +7336,8 @@ +@@ -5889,6 +7346,8 @@ CPU: avg_pool2d_out_cpu CUDA: avg_pool2d_out_cuda MkldnnCPU: mkldnn_avg_pool2d_out @@ -5836,7 +5870,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor python_module: nn -@@ -5897,24 +7346,32 @@ +@@ -5897,24 +7356,32 @@ CUDA: avg_pool2d_cuda MkldnnCPU: mkldnn_avg_pool2d QuantizedCPU: quantized_avg_pool2d @@ -5869,7 +5903,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor python_module: nn -@@ -5922,18 +7379,24 @@ +@@ -5922,18 +7389,24 @@ CPU: avg_pool3d_cpu CUDA: avg_pool3d_cuda QuantizedCPU: quantized_avg_pool3d @@ -5894,7 +5928,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: fractional_max_pool2d.output(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5993,6 +7456,8 @@ +@@ -5993,6 +7466,8 @@ dispatch: CPU: max_pool2d_with_indices_out_cpu CUDA: max_pool2d_with_indices_out_cuda @@ -5903,7 +5937,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -6000,6 +7465,8 @@ +@@ -6000,6 +7475,8 @@ dispatch: CPU: max_pool2d_with_indices_cpu CUDA: max_pool2d_with_indices_cuda @@ -5912,7 +5946,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: max_pool2d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) -@@ -6007,12 +7474,16 @@ +@@ -6007,12 +7484,16 @@ dispatch: CPU: max_pool2d_with_indices_backward_out_cpu CUDA: max_pool2d_with_indices_backward_out_cuda @@ -5929,7 +5963,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool3d_with_indices.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -6020,6 +7491,8 @@ +@@ -6020,6 +7501,8 @@ dispatch: CPU: max_pool3d_with_indices_out_cpu CUDA: max_pool3d_with_indices_out_cuda @@ -5938,7 +5972,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -6027,6 +7500,8 @@ +@@ -6027,6 +7510,8 @@ dispatch: CPU: max_pool3d_with_indices_cpu CUDA: max_pool3d_with_indices_cuda @@ -5947,7 +5981,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) -@@ -6034,12 +7509,17 @@ +@@ -6034,12 +7519,17 @@ dispatch: CPU: max_pool3d_with_indices_backward_out_cpu CUDA: max_pool3d_with_indices_backward_out_cuda @@ -5965,7 +5999,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: max_unpool2d.out(Tensor self, Tensor indices, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6166,12 +7646,16 @@ +@@ -6166,12 +7656,16 @@ dispatch: CPU: replication_pad2d_out_cpu CUDA: replication_pad2d_out_cuda @@ -5982,7 +6016,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: replication_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6214,12 +7698,16 @@ +@@ -6214,12 +7708,16 @@ dispatch: CPU: upsample_linear1d_out_cpu CUDA: upsample_linear1d_out_cuda @@ -5999,7 +6033,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_linear1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, bool align_corners, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6232,12 +7720,16 @@ +@@ -6232,12 +7730,16 @@ dispatch: CPU: upsample_linear1d_backward_cpu CUDA: upsample_linear1d_backward_cuda @@ -6016,7 +6050,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_bilinear2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6245,96 +7737,128 @@ +@@ -6245,96 +7747,128 @@ CPU: upsample_bilinear2d_cpu CUDA: upsample_bilinear2d_cuda QuantizedCPU: quantized_upsample_bilinear2d_cpu @@ -6145,7 +6179,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6342,24 +7866,32 @@ +@@ -6342,24 +7876,32 @@ CPU: upsample_nearest2d_cpu CUDA: upsample_nearest2d_cuda QuantizedCPU: quantized_upsample_nearest2d_cpu @@ -6178,7 +6212,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6367,38 +7899,52 @@ +@@ -6367,38 +7909,52 @@ CPU: upsample_nearest3d_cpu CUDA: upsample_nearest3d_cuda QuantizedCPU: quantized_upsample_nearest3d_cpu @@ -6231,7 +6265,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # What's a thnn_conv_ versus a slow_conv_? # -@@ -6423,24 +7969,32 @@ +@@ -6423,24 +7979,32 @@ dispatch: CPU: slow_conv_transpose2d_out_cpu CUDA: slow_conv_transpose2d_out_cuda @@ -6264,7 +6298,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6468,21 +8022,29 @@ +@@ -6468,21 +8032,29 @@ - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -6294,7 +6328,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!)? grad_input, Tensor(b!)? grad_weight, Tensor(c!)? grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) python_module: nn -@@ -6495,32 +8057,46 @@ +@@ -6495,32 +8067,46 @@ dispatch: CPU: slow_conv2d_backward_cpu CUDA: legacy::cuda::_thnn_conv2d_backward @@ -6341,7 +6375,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6553,12 +8129,16 @@ +@@ -6553,12 +8139,16 @@ dispatch: CPU: slow_conv_dilated2d_cpu CUDA: slow_conv_dilated2d_cuda @@ -6358,7 +6392,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor python_module: nn -@@ -6577,57 +8157,413 @@ +@@ -6577,57 +8167,413 @@ dispatch: CPU: col2im_out_cpu CUDA: col2im_out_cuda @@ -6775,7 +6809,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S --- pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-23 18:20:43.681374649 +0800 ++++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-26 21:32:24.519094569 +0800 @@ -659,14 +659,14 @@ SUB x1, x1, 4 @@ -6801,7 +6835,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CMP x1, 2 diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp pytorch-develop/aten/src/ATen/native/TensorCompare.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-23 18:20:43.629372785 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-26 21:32:24.463092561 +0800 @@ -64,7 +64,7 @@ Tensor isinf(const Tensor &self) { @@ -6813,7 +6847,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return AT_DISPATCH_FLOATING_TYPES_AND_HALF(self.scalar_type(), "isinf", [&]() { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp pytorch-develop/aten/src/ATen/native/TensorFactories.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-23 18:20:43.629372785 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-26 21:32:24.463092561 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6858,7 +6892,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp pytorch-develop/aten/src/ATen/native/TensorProperties.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-23 18:20:43.629372785 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-26 21:32:24.463092561 +0800 @@ -87,6 +87,7 @@ if (self.is_contiguous(memory_format)) { return self; @@ -6869,7 +6903,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= "preserve memory format is unsupported by the contiguous operator"); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp --- pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-23 18:20:43.629372785 +0800 ++++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-26 21:32:24.467092704 +0800 @@ -26,7 +26,7 @@ const scalar_t* in = &idata[output_y * input_width + output_x]; scalar_t* out = &odata[output_y * output_width + output_x]; @@ -6881,7 +6915,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= out += output_width * output_height; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native_parse.py pytorch-develop/aten/src/ATen/native_parse.py --- pytorch-v1.5.0/aten/src/ATen/native_parse.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-23 18:20:43.697375223 +0800 ++++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-26 21:32:24.535095142 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6919,7 +6953,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= msg = '''Exception raised in processing function: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py pytorch-develop/aten/src/ATen/preprocess_declarations.py --- pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-23 18:20:43.697375223 +0800 ++++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-26 21:32:24.535095142 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6951,7 +6985,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h pytorch-develop/aten/src/ATen/templates/TensorBody.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-23 18:20:43.697375223 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-26 21:32:24.535095142 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6984,7 +7018,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h pytorch-develop/aten/src/ATen/templates/TensorMethods.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-23 18:20:43.697375223 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-26 21:32:24.535095142 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7018,7 +7052,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/CMakeLists.txt pytorch-develop/aten/src/TH/CMakeLists.txt --- pytorch-v1.5.0/aten/src/TH/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-23 18:20:43.701375366 +0800 ++++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-26 21:32:24.539095286 +0800 @@ -48,6 +48,11 @@ ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE) @@ -7033,7 +7067,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp pytorch-develop/aten/src/TH/generic/THStorage.cpp --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-23 18:20:43.701375366 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-26 21:32:24.543095429 +0800 @@ -1,9 +1,32 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7142,7 +7176,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.h pytorch-develop/aten/src/TH/generic/THStorage.h --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-23 18:20:43.701375366 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-26 21:32:24.543095429 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7181,7 +7215,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/CMakeLists.txt pytorch-develop/c10/CMakeLists.txt --- pytorch-v1.5.0/c10/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/CMakeLists.txt 2021-07-23 18:20:43.713375796 +0800 ++++ pytorch-develop/c10/CMakeLists.txt 2021-07-26 21:32:24.555095860 +0800 @@ -63,6 +63,14 @@ message(STATUS "don't use NUMA") endif() @@ -7210,7 +7244,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # not checked in diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Backend.h pytorch-develop/c10/core/Backend.h --- pytorch-v1.5.0/c10/core/Backend.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Backend.h 2021-07-23 18:20:43.713375796 +0800 ++++ pytorch-develop/c10/core/Backend.h 2021-07-26 21:32:24.555095860 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7305,7 +7339,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.cpp pytorch-develop/c10/core/Device.cpp --- pytorch-v1.5.0/c10/core/Device.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.cpp 2021-07-23 18:20:43.713375796 +0800 ++++ pytorch-develop/c10/core/Device.cpp 2021-07-26 21:32:24.555095860 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7345,7 +7379,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= types.begin(), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.h pytorch-develop/c10/core/Device.h --- pytorch-v1.5.0/c10/core/Device.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.h 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/Device.h 2021-07-26 21:32:24.559096002 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7380,7 +7414,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return type_ == DeviceType::CPU; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.cpp pytorch-develop/c10/core/DeviceType.cpp --- pytorch-v1.5.0/c10/core/DeviceType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-26 21:32:24.559096002 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7420,7 +7454,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return false; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.h pytorch-develop/c10/core/DeviceType.h --- pytorch-v1.5.0/c10/core/DeviceType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.h 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/DeviceType.h 2021-07-26 21:32:24.559096002 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7463,7 +7497,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= constexpr DeviceType kXLA = DeviceType::XLA; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.cpp pytorch-develop/c10/core/DispatchKey.cpp --- pytorch-v1.5.0/c10/core/DispatchKey.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-26 21:32:24.559096002 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7495,7 +7529,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= case DispatchKey::TESTING_ONLY_GenericModeTensorId: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.h pytorch-develop/c10/core/DispatchKey.h --- pytorch-v1.5.0/c10/core/DispatchKey.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.h 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/DispatchKey.h 2021-07-26 21:32:24.559096002 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7527,7 +7561,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Storage.h pytorch-develop/c10/core/Storage.h --- pytorch-v1.5.0/c10/core/Storage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Storage.h 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/Storage.h 2021-07-26 21:32:24.559096002 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7561,7 +7595,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/StorageImpl.h pytorch-develop/c10/core/StorageImpl.h --- pytorch-v1.5.0/c10/core/StorageImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/StorageImpl.h 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/StorageImpl.h 2021-07-26 21:32:24.559096002 +0800 @@ -1,12 +1,39 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7618,7 +7652,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorImpl.h pytorch-develop/c10/core/TensorImpl.h --- pytorch-v1.5.0/c10/core/TensorImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorImpl.h 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/TensorImpl.h 2021-07-26 21:32:24.559096002 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7688,7 +7722,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorOptions.h pytorch-develop/c10/core/TensorOptions.h --- pytorch-v1.5.0/c10/core/TensorOptions.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorOptions.h 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/core/TensorOptions.h 2021-07-26 21:32:24.559096002 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7729,7 +7763,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/macros/Export.h pytorch-develop/c10/macros/Export.h --- pytorch-v1.5.0/c10/macros/Export.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/macros/Export.h 2021-07-23 18:20:43.717375940 +0800 ++++ pytorch-develop/c10/macros/Export.h 2021-07-26 21:32:24.563096147 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7856,7 +7890,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/caffe2/CMakeLists.txt pytorch-develop/caffe2/CMakeLists.txt --- pytorch-v1.5.0/caffe2/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-23 18:20:43.725376226 +0800 ++++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-26 21:32:24.571096433 +0800 @@ -32,6 +32,7 @@ # Add source, includes, and libs to lists list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS}) @@ -8003,7 +8037,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs. diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.clang-format pytorch-develop/.clang-format --- pytorch-v1.5.0/.clang-format 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.clang-format 2021-07-23 18:20:43.597371637 +0800 ++++ pytorch-develop/.clang-format 2021-07-26 21:32:24.431091414 +0800 @@ -84,5 +84,4 @@ SpacesInSquareBrackets: false Standard: Cpp11 @@ -8014,7 +8048,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/BuildVariables.cmake pytorch-develop/cmake/BuildVariables.cmake --- pytorch-v1.5.0/cmake/BuildVariables.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-23 18:20:43.837380242 +0800 ++++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-26 21:32:24.683100449 +0800 @@ -11,6 +11,7 @@ # CMakeLists.txt files under each folder respectively. set(Caffe2_CPU_SRCS) @@ -8041,7 +8075,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # symbols. However, if the lib is whole linked in caffe2 lib, we don't want diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Codegen.cmake pytorch-develop/cmake/Codegen.cmake --- pytorch-v1.5.0/cmake/Codegen.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Codegen.cmake 2021-07-23 18:20:43.837380242 +0800 ++++ pytorch-develop/cmake/Codegen.cmake 2021-07-26 21:32:24.683100449 +0800 @@ -191,13 +191,14 @@ file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt generated_cpp) file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt-cuda cuda_generated_cpp) @@ -8072,7 +8106,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Dependencies.cmake pytorch-develop/cmake/Dependencies.cmake --- pytorch-v1.5.0/cmake/Dependencies.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Dependencies.cmake 2021-07-23 18:20:43.837380242 +0800 ++++ pytorch-develop/cmake/Dependencies.cmake 2021-07-26 21:32:24.683100449 +0800 @@ -1509,6 +1509,13 @@ ENDIF(NOT C_HAS_THREAD) endif() @@ -8089,7 +8123,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Summary.cmake pytorch-develop/cmake/Summary.cmake --- pytorch-v1.5.0/cmake/Summary.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Summary.cmake 2021-07-23 18:20:43.837380242 +0800 ++++ pytorch-develop/cmake/Summary.cmake 2021-07-26 21:32:24.687100592 +0800 @@ -134,6 +134,7 @@ if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") @@ -8100,7 +8134,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endfunction() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/TorchConfig.cmake.in pytorch-develop/cmake/TorchConfig.cmake.in --- pytorch-v1.5.0/cmake/TorchConfig.cmake.in 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-23 18:20:43.837380242 +0800 ++++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-26 21:32:24.687100592 +0800 @@ -112,6 +112,11 @@ list(APPEND TORCH_LIBRARIES ${TORCH_CUDA_LIBRARIES}) endif() @@ -8115,7 +8149,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/CMakeLists.txt pytorch-develop/CMakeLists.txt --- pytorch-v1.5.0/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/CMakeLists.txt 2021-07-23 18:20:43.597371637 +0800 ++++ pytorch-develop/CMakeLists.txt 2021-07-26 21:32:24.435091556 +0800 @@ -205,6 +205,10 @@ option(USE_TBB "Use TBB" OFF) option(ONNX_ML "Enable traditional ONNX ML API." ON) @@ -8182,7 +8216,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.dockerignore pytorch-develop/.dockerignore --- pytorch-v1.5.0/.dockerignore 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.dockerignore 2021-07-23 18:20:43.597371637 +0800 ++++ pytorch-develop/.dockerignore 2021-07-26 21:32:24.431091414 +0800 @@ -1,257 +1 @@ -# READ THIS BEFORE YOU REFACTOR ME -# @@ -8458,7 +8492,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/requirements.txt pytorch-develop/requirements.txt --- pytorch-v1.5.0/requirements.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/requirements.txt 2021-07-23 18:20:43.857380960 +0800 ++++ pytorch-develop/requirements.txt 2021-07-26 21:32:24.703101165 +0800 @@ -4,4 +4,12 @@ requests setuptools @@ -8477,7 +8511,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/setup.py pytorch-develop/setup.py --- pytorch-v1.5.0/setup.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/setup.py 2021-07-23 18:20:43.857380960 +0800 ++++ pytorch-develop/setup.py 2021-07-26 21:32:24.707101310 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -8576,7 +8610,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'python/serialized_test/data/operator_test/*.zip', diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/derivatives.yaml pytorch-develop/tools/autograd/derivatives.yaml --- pytorch-v1.5.0/tools/autograd/derivatives.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-23 18:20:44.997421834 +0800 ++++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-26 21:32:25.855142472 +0800 @@ -107,6 +107,10 @@ # # NB: The parameter names here MUST be consistent with the parameter names @@ -8692,7 +8726,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/dump_utils.py pytorch-develop/tools/autograd/dump_utils.py --- pytorch-v1.5.0/tools/autograd/dump_utils.py 1970-01-01 08:00:00.000000000 +0800 -+++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-23 18:20:44.997421834 +0800 ++++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-26 21:32:25.855142472 +0800 @@ -0,0 +1,115 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# All rights reserved. @@ -8811,7 +8845,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py pytorch-develop/tools/autograd/gen_autograd_functions.py --- pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-23 18:20:44.997421834 +0800 ++++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-26 21:32:25.855142472 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -8997,7 +9031,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_python_functions.py pytorch-develop/tools/autograd/gen_python_functions.py --- pytorch-v1.5.0/tools/autograd/gen_python_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-23 18:20:44.997421834 +0800 ++++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-26 21:32:25.855142472 +0800 @@ -1,3 +1,20 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9039,7 +9073,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'value': argname, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_variable_type.py pytorch-develop/tools/autograd/gen_variable_type.py --- pytorch-v1.5.0/tools/autograd/gen_variable_type.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-23 18:20:44.997421834 +0800 ++++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-26 21:32:25.855142472 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9212,7 +9246,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/Functions.cpp pytorch-develop/tools/autograd/templates/Functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/Functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-23 18:20:45.001421978 +0800 ++++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-26 21:32:25.855142472 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9292,7 +9326,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto sparse = sparse_.coalesce(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp pytorch-develop/tools/autograd/templates/python_torch_functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-23 18:20:45.001421978 +0800 ++++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-26 21:32:25.855142472 +0800 @@ -22,7 +22,7 @@ #include "torch/csrc/autograd/generated/variable_factories.h" #include "torch/csrc/utils/structseq.h" @@ -9376,7 +9410,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp pytorch-develop/tools/autograd/templates/python_variable_methods.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-23 18:20:45.001421978 +0800 ++++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-26 21:32:25.855142472 +0800 @@ -15,7 +15,13 @@ #include "torch/csrc/cuda/Stream.h" #include "torch/csrc/cuda/Event.h" @@ -9463,7 +9497,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"has_names", (PyCFunction)THPVariable_has_names, METH_NOARGS, NULL}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp pytorch-develop/tools/autograd/templates/VariableType.cpp --- pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-23 18:20:45.001421978 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-26 21:32:25.855142472 +0800 @@ -1,7 +1,27 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9494,7 +9528,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.h pytorch-develop/tools/autograd/templates/VariableType.h --- pytorch-v1.5.0/tools/autograd/templates/VariableType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-23 18:20:45.001421978 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-26 21:32:25.855142472 +0800 @@ -1,3 +1,20 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9526,7 +9560,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= const at::Tensor & unpack(const Tensor & t, const char * name, int pos); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/build_variables.bzl pytorch-develop/tools/build_variables.bzl --- pytorch-v1.5.0/tools/build_variables.bzl 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/build_variables.bzl 2021-07-23 18:20:45.001421978 +0800 ++++ pytorch-develop/tools/build_variables.bzl 2021-07-26 21:32:25.859142615 +0800 @@ -46,6 +46,7 @@ "torch/csrc/autograd/functions/utils.cpp", "torch/csrc/autograd/input_buffer.cpp", @@ -9612,7 +9646,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def grad(outputs: _TensorOrTensors, inputs: _TensorOrTensors, grad_outputs: Optional[_TensorOrTensors]=..., retain_graph: Optional[bool]=..., create_graph: bool=..., only_inputs: bool=..., allow_unused: bool=...) -> Tuple[Tensor, ...]: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/autograd/profiler.py pytorch-develop/torch/autograd/profiler.py --- pytorch-v1.5.0/torch/autograd/profiler.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/autograd/profiler.py 2021-07-23 18:20:45.005422122 +0800 ++++ pytorch-develop/torch/autograd/profiler.py 2021-07-26 21:32:25.863142758 +0800 @@ -1,8 +1,25 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -10085,7 +10119,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return ''.join(result) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/CMakeLists.txt pytorch-develop/torch/CMakeLists.txt --- pytorch-v1.5.0/torch/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/CMakeLists.txt 2021-07-23 18:20:45.001421978 +0800 ++++ pytorch-develop/torch/CMakeLists.txt 2021-07-26 21:32:25.859142615 +0800 @@ -97,6 +97,7 @@ ${TORCH_SRC_DIR}/csrc/tensor/python_tensor.cpp ${TORCH_SRC_DIR}/csrc/utils.cpp @@ -10117,7 +10151,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/engine.cpp pytorch-develop/torch/csrc/autograd/engine.cpp --- pytorch-v1.5.0/torch/csrc/autograd/engine.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-26 21:32:25.875143188 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10240,7 +10274,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto event = c10::Event{c10::DeviceType::CUDA}; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp pytorch-develop/torch/csrc/autograd/functions/tensor.cpp --- pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-26 21:32:25.875143188 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10272,7 +10306,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= /*non_blocking=*/false, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/init.cpp pytorch-develop/torch/csrc/autograd/init.cpp --- pytorch-v1.5.0/torch/csrc/autograd/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-26 21:32:25.875143188 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10315,7 +10349,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= m.def("_enable_profiler", enableProfiler); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp pytorch-develop/torch/csrc/autograd/input_buffer.cpp --- pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-26 21:32:25.875143188 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10367,7 +10401,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto& old_var = buffer[pos]; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp pytorch-develop/torch/csrc/autograd/profiler.cpp --- pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-26 21:32:25.875143188 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10563,7 +10597,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CUDAStubs::~CUDAStubs() = default; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.h pytorch-develop/torch/csrc/autograd/profiler.h --- pytorch-v1.5.0/torch/csrc/autograd/profiler.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-26 21:32:25.875143188 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10688,7 +10722,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp pytorch-develop/torch/csrc/autograd/python_variable.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-26 21:32:25.875143188 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10742,7 +10776,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"is_complex", (getter)THPVariable_is_complex, nullptr, nullptr, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-26 21:32:25.875143188 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10783,7 +10817,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h --- pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-26 21:32:25.875143188 +0800 @@ -168,6 +168,45 @@ return r.release(); } @@ -10832,7 +10866,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!r) throw python_error(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp --- pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-23 18:20:45.017422551 +0800 ++++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-26 21:32:25.871143045 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10866,7 +10900,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!t.defined()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp pytorch-develop/torch/csrc/distributed/c10d/comm.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-23 18:20:45.021422695 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-26 21:32:25.879143332 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10972,7 +11006,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= while (!in_flight.empty()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp pytorch-develop/torch/csrc/distributed/c10d/init.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-23 18:20:45.021422695 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-26 21:32:25.879143332 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11029,7 +11063,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= .def("is_success", &::c10d::ProcessGroup::Work::isSuccess) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-23 18:20:45.021422695 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-26 21:32:25.879143332 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11154,7 +11188,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp pytorch-develop/torch/csrc/DynamicTypes.cpp --- pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-23 18:20:45.005422122 +0800 ++++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-26 21:32:25.863142758 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11203,7 +11237,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return it->second; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Generator.cpp pytorch-develop/torch/csrc/Generator.cpp --- pytorch-v1.5.0/torch/csrc/Generator.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-23 18:20:45.009422265 +0800 ++++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-26 21:32:25.863142758 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11271,7 +11305,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= #endif diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/serialization.cpp pytorch-develop/torch/csrc/generic/serialization.cpp --- pytorch-v1.5.0/torch/csrc/generic/serialization.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-23 18:20:45.025422838 +0800 ++++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-26 21:32:25.883143476 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11371,7 +11405,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/Storage.cpp pytorch-develop/torch/csrc/generic/Storage.cpp --- pytorch-v1.5.0/torch/csrc/generic/Storage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-23 18:20:45.025422838 +0800 ++++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-26 21:32:25.883143476 +0800 @@ -1,7 +1,25 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11450,7 +11484,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for (Py_ssize_t i = 0; i < length; i++) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp pytorch-develop/torch/csrc/generic/StorageMethods.cpp --- pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-23 18:20:45.025422838 +0800 ++++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-26 21:32:25.883143476 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11498,7 +11532,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"_write_file", (PyCFunction)THPStorage_(writeFile), METH_VARARGS, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Module.cpp pytorch-develop/torch/csrc/Module.cpp --- pytorch-v1.5.0/torch/csrc/Module.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Module.cpp 2021-07-23 18:20:45.009422265 +0800 ++++ pytorch-develop/torch/csrc/Module.cpp 2021-07-26 21:32:25.863142758 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11642,7 +11676,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto set_module_attr = [&](const char* name, PyObject* v, bool incref = true) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp pytorch-develop/torch/csrc/tensor/python_tensor.cpp --- pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-23 18:20:45.045423556 +0800 ++++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-26 21:32:25.903144193 +0800 @@ -1,18 +1,35 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12019,7 +12053,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +} // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.cpp pytorch-develop/torch/csrc/utils/init.cpp --- pytorch-v1.5.0/torch/csrc/utils/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-23 18:20:45.045423556 +0800 ++++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-26 21:32:25.903144193 +0800 @@ -1,6 +1,10 @@ #include #include @@ -12107,7 +12141,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.h pytorch-develop/torch/csrc/utils/init.h --- pytorch-v1.5.0/torch/csrc/utils/init.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.h 2021-07-23 18:20:45.045423556 +0800 ++++ pytorch-develop/torch/csrc/utils/init.h 2021-07-26 21:32:25.903144193 +0800 @@ -8,4 +8,7 @@ void initThroughputBenchmarkBindings(PyObject* module); @@ -12118,7 +12152,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h pytorch-develop/torch/csrc/utils/python_arg_parser.h --- pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-23 18:20:45.045423556 +0800 ++++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-26 21:32:25.903144193 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12153,7 +12187,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return at::Device(device_str); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp pytorch-develop/torch/csrc/utils/tensor_layouts.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-23 18:20:45.049423699 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-26 21:32:25.903144193 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12184,7 +12218,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= registerLayoutObject((THPLayout*)strided_layout, at::Backend::QuantizedCPU); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp pytorch-develop/torch/csrc/utils/tensor_new.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-23 18:20:45.049423699 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-26 21:32:25.903144193 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12320,7 +12354,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } else if(expected_layout == c10::kSparse) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp pytorch-develop/torch/csrc/utils/tensor_types.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-23 18:20:45.049423699 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-26 21:32:25.903144193 +0800 @@ -1,58 +1,91 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12533,7 +12567,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def get_rng_state(): ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/distributed/distributed_c10d.py pytorch-develop/torch/distributed/distributed_c10d.py --- pytorch-v1.5.0/torch/distributed/distributed_c10d.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-23 18:20:45.049423699 +0800 ++++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-26 21:32:25.907144336 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12614,7 +12648,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/__init__.py pytorch-develop/torch/__init__.py --- pytorch-v1.5.0/torch/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/__init__.py 2021-07-23 18:20:45.001421978 +0800 ++++ pytorch-develop/torch/__init__.py 2021-07-26 21:32:25.859142615 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12657,7 +12691,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt pytorch-develop/torch/lib/c10d/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-23 18:20:45.053423842 +0800 ++++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-26 21:32:25.911144479 +0800 @@ -28,6 +28,10 @@ option(USE_C10D_NCCL "USE C10D NCCL" ON) endif() @@ -12710,7 +12744,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= copy_header(ProcessGroupMPI.hpp) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt pytorch-develop/torch/lib/libshm/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-23 18:20:45.057423986 +0800 ++++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-26 21:32:25.911144479 +0800 @@ -37,8 +37,11 @@ SET_TARGET_PROPERTIES(shm PROPERTIES PREFIX "lib" @@ -12767,7 +12801,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -_maybe_indices_t = _scalar_or_tuple_2_t[Tensor] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/functional.py pytorch-develop/torch/nn/functional.py --- pytorch-v1.5.0/torch/nn/functional.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/functional.py 2021-07-23 18:20:45.057423986 +0800 ++++ pytorch-develop/torch/nn/functional.py 2021-07-26 21:32:25.915144623 +0800 @@ -1611,7 +1611,7 @@ else: output = input.matmul(weight.t()) @@ -12790,7 +12824,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -from . import parallel as parallel diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/batchnorm.py pytorch-develop/torch/nn/modules/batchnorm.py --- pytorch-v1.5.0/torch/nn/modules/batchnorm.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-23 18:20:45.057423986 +0800 ++++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-26 21:32:25.915144623 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12822,7 +12856,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= self.register_parameter('running_var', None) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/module.py pytorch-develop/torch/nn/modules/module.py --- pytorch-v1.5.0/torch/nn/modules/module.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/module.py 2021-07-23 18:20:45.061424129 +0800 ++++ pytorch-develop/torch/nn/modules/module.py 2021-07-26 21:32:25.915144623 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12965,7 +12999,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return t.to(device, dtype if t.is_floating_point() else None, non_blocking, memory_format=convert_to_format) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/normalization.py pytorch-develop/torch/nn/modules/normalization.py --- pytorch-v1.5.0/torch/nn/modules/normalization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-23 18:20:45.061424129 +0800 ++++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-26 21:32:25.915144623 +0800 @@ -128,13 +128,14 @@ """ __constants__ = ['normalized_shape', 'eps', 'elementwise_affine'] @@ -13034,7 +13068,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - module_kwargs: Optional[Any] = ...) -> Tensor: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/parallel/distributed.py pytorch-develop/torch/nn/parallel/distributed.py --- pytorch-v1.5.0/torch/nn/parallel/distributed.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-23 18:20:45.061424129 +0800 ++++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-26 21:32:25.919144766 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13385,7 +13419,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def remove_weight_norm(module: T_module, name: str = ...) -> T_module: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/onnx/symbolic_opset9.py pytorch-develop/torch/onnx/symbolic_opset9.py --- pytorch-v1.5.0/torch/onnx/symbolic_opset9.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-23 18:20:45.065424272 +0800 ++++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-26 21:32:25.919144766 +0800 @@ -1621,14 +1621,23 @@ slices = [sym_help._slice_helper(g, w, axes=[0], starts=[x * n], ends=[y * n]) for x, y in intervals] return g.op('Concat', *slices, axis_i=0) @@ -13463,7 +13497,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., lr_decay: float=..., weight_decay: float=..., initial_accumulator_value: float=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/optim/adamax.py pytorch-develop/torch/optim/adamax.py --- pytorch-v1.5.0/torch/optim/adamax.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/optim/adamax.py 2021-07-23 18:20:45.065424272 +0800 ++++ pytorch-develop/torch/optim/adamax.py 2021-07-26 21:32:25.923144910 +0800 @@ -80,8 +80,8 @@ exp_inf.mul_(beta2).unsqueeze(0), grad.abs().add_(eps).unsqueeze_(0) @@ -13640,7 +13674,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/serialization.py pytorch-develop/torch/serialization.py --- pytorch-v1.5.0/torch/serialization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/serialization.py 2021-07-23 18:20:45.065424272 +0800 ++++ pytorch-develop/torch/serialization.py 2021-07-26 21:32:25.923144910 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13724,7 +13758,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def location_tag(storage): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/storage.py pytorch-develop/torch/storage.py --- pytorch-v1.5.0/torch/storage.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/storage.py 2021-07-23 18:20:45.065424272 +0800 ++++ pytorch-develop/torch/storage.py 2021-07-26 21:32:25.923144910 +0800 @@ -7,6 +7,7 @@ class _StorageBase(object): @@ -13744,7 +13778,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= else: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/tensor.py pytorch-develop/torch/tensor.py --- pytorch-v1.5.0/torch/tensor.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/tensor.py 2021-07-23 18:20:45.065424272 +0800 ++++ pytorch-develop/torch/tensor.py 2021-07-26 21:32:25.923144910 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13806,7 +13840,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def __reversed__(self): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_tensor_str.py pytorch-develop/torch/_tensor_str.py --- pytorch-v1.5.0/torch/_tensor_str.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_tensor_str.py 2021-07-23 18:20:45.005422122 +0800 ++++ pytorch-develop/torch/_tensor_str.py 2021-07-26 21:32:25.859142615 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13860,7 +13894,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= has_default_dtype = self.dtype in (torch.get_default_dtype(), torch.int64, torch.bool) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/dataloader.py pytorch-develop/torch/utils/data/dataloader.py --- pytorch-v1.5.0/torch/utils/data/dataloader.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-23 18:20:45.069424416 +0800 ++++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-26 21:32:25.927145052 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14069,7 +14103,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, sampler: Sampler[int], batch_size: int, drop_last: bool) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py pytorch-develop/torch/utils/data/_utils/pin_memory.py --- pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-23 18:20:45.069424416 +0800 ++++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-26 21:32:25.927145052 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14130,7 +14164,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/__init__.py pytorch-develop/torch/utils/__init__.py --- pytorch-v1.5.0/torch/utils/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/__init__.py 2021-07-23 18:20:45.069424416 +0800 ++++ pytorch-develop/torch/utils/__init__.py 2021-07-26 21:32:25.927145052 +0800 @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals @@ -14141,7 +14175,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def set_module(obj, mod): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_utils.py pytorch-develop/torch/_utils.py --- pytorch-v1.5.0/torch/_utils.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_utils.py 2021-07-23 18:20:45.005422122 +0800 ++++ pytorch-develop/torch/_utils.py 2021-07-26 21:32:25.863142758 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. diff --git a/src/aten/src/ATen/native/native_functions.yaml b/src/aten/src/ATen/native/native_functions.yaml index ebd9756a6eabb78d5d729c7b37f0cc7e12adc9c3..23e8d91a55ae48e9c09eb8c5d0804b2fcb297aac 100644 --- a/src/aten/src/ATen/native/native_functions.yaml +++ b/src/aten/src/ATen/native/native_functions.yaml @@ -980,6 +980,8 @@ NPU: conv_transpose2d_npu_ - func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor + npu_dispatch: + NPU: conv_transpose3d_npu_ - func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!) manual_kernel_registration: True @@ -1508,6 +1510,8 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method + npu_dispatch: + NPU: erfc_npu - func: erfc_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True @@ -1515,12 +1519,16 @@ dispatch: CPU: _erfc__cpu CUDA: _erfc__cuda + npu_dispatch: + NPU: erfc_npu_ - func: erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True dispatch: CPU: _erfc_out_cpu CUDA: _erfc_out_cuda + npu_dispatch: + NPU: erfc_out_npu - func: exp(Tensor self) -> Tensor use_c10_dispatcher: full @@ -5860,6 +5868,8 @@ dispatch: CPU: _triangular_solve_helper_cpu CUDA: _triangular_solve_helper_cuda + npu_dispatch: + NPU: _triangular_solve_helper_npu - func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) diff --git a/src/aten/src/ATen/native/npu/ErfcKernelNpu.cpp b/src/aten/src/ATen/native/npu/ErfcKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..77c1afffa82ff4ec162c2e3006a9cd08531fe989 --- /dev/null +++ b/src/aten/src/ATen/native/npu/ErfcKernelNpu.cpp @@ -0,0 +1,57 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +Tensor& erfc_out_npu_no_check(Tensor& out, const Tensor& self){ + OpCommand cmd; + cmd.Name("Erfc") + .Input(self) + .Output(out) + .Run(); + return out; +} + +Tensor& erfc_out_npu(Tensor& out, const Tensor& self) { + OpPreparation::CheckOut( + {self}, + out, + self, + self.sizes()); + + OpPipeWithDefinedOut pipe; + return pipe.CheckMemory({self}, {out}) + .Func([&self](Tensor& out){erfc_out_npu_no_check(out, self);}) + .Call(out); +} + +Tensor erfc_npu(const Tensor& self) { + Tensor result = OpPreparation::ApplyTensor(self); + erfc_out_npu_no_check(result, self); + return result; +} + +Tensor& erfc_npu_(Tensor& self) { + erfc_out_npu(self, self); + return self; +} + +} // native +} // at diff --git a/src/aten/src/ATen/native/npu/TriangularSolveHelperKernelNpu.cpp b/src/aten/src/ATen/native/npu/TriangularSolveHelperKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c7960c07eb1e453713bee43e6689359bf0c17d48 --- /dev/null +++ b/src/aten/src/ATen/native/npu/TriangularSolveHelperKernelNpu.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +std::tuple _triangular_solve_helper_npu( + const Tensor& self, + const Tensor& A, + bool upper, + bool transpose, + bool unitriangular) { + TORCH_CHECK(self.dtype() == at::kFloat && A.dtype() == at::kFloat, + "_triangular_solve_helper_npu only supported Float, but get ", self.dtype(), ' ', A.dtype()); + auto self_working_copy = OpPreparation::ApplyTensor(self); + auto A_working_copy = A.clone(); + + Tensor A_tensor = A; + if (unitriangular) { + auto diagonal_tensor = at::eye(A_tensor.size(-2), A_tensor.size(-1), A_tensor.options()); + A_tensor = A_tensor * (1 - diagonal_tensor) + diagonal_tensor; + } + OpCommand cmd; + cmd.Name("MatrixTriangularSolve") + .Input(A_tensor) + .Input(self) + .Output(self_working_copy) + .Attr("lower", !upper) + .Attr("adjoint", transpose) + .Run(); + return std::tuple(self_working_copy, A_working_copy); +} +} +} diff --git a/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp b/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp index f84a0656c0d4f737d3dd747763cbac3e7bed3f8d..33962dc935a2d611e99743c8d867874a90612513 100644 --- a/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp @@ -17,6 +17,7 @@ #include "ATen/native/npu/utils/CalcuOpUtil.h" #include "ATen/native/npu/utils/KernelNpuOutputSize.h" #include "ATen/native/npu/utils/NpuUtils.h" +#include "ATen/native/npu/utils/OpAdapter.h" namespace at { namespace native { @@ -87,6 +88,96 @@ inline SmallVector conv_input_size( return input_size; } +SmallVector convolution_transpose3d_npu_output_size( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef padding, + IntArrayRef output_padding, + IntArrayRef stride, + IntArrayRef dilation, + int64_t groups) { + int64_t N = input.size(0); + int64_t D = input.size(2); + int64_t H = input.size(3); + int64_t W = input.size(4); + int64_t Co = weight.size(1) * groups; + auto kernel_size = weight.sizes().slice(2); + + int64_t Do = (D - 1) * stride[0] - 2 * padding[0] + + dilation[0] * (kernel_size[0] - 1) + output_padding[0] + 1; + int64_t Ho = (H - 1) * stride[1] - 2 * padding[1] + + dilation[1] * (kernel_size[1] - 1) + output_padding[1] + 1; + int64_t Wo = (W - 1) * stride[2] - 2 * padding[2] + + dilation[2] * (kernel_size[2] - 1) + output_padding[2] + 1; + + SmallVector outputSize = {N, Co, Do, Ho, Wo}; + + return outputSize; +} + +Tensor& convolution_transpose3d_out_npu_nocheck( + Tensor& result, + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef padding, + IntArrayRef output_padding, + IntArrayRef stride, + IntArrayRef dilation, + int64_t groups) { + SmallVector paddings = { + padding[0], padding[0], padding[1], padding[1], padding[2], padding[2]}; + SmallVector outputpadding = {0, 0, 0, 0, 0}; + SmallVector stridesSize = {1, 1, stride[0], stride[1], stride[2]}; + SmallVector dilations = {1, 1, dilation[0], dilation[1], dilation[2]}; + string dataFormat = "NCDHW"; + + SmallVector sizeVec = array_to_small_vector(result.sizes()); + OpCommand cmd; + cmd.Name("Conv3DTranspose") + .Input(sizeVec, at::kInt) + .Input(input) + .Input(weight); + if (bias.defined()){ + cmd.Input(bias); + } + cmd.Output(result) + .Attr("pads", paddings) + .Attr("output_padding", outputpadding) + .Attr("strides", stridesSize) + .Attr("dilations", dilations) + .Attr("groups", groups) + .Attr("data_format", dataFormat) + .Run(); + + return result; +} + +Tensor convolution_transpose3d_npu( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef padding, + IntArrayRef output_padding, + IntArrayRef stride, + IntArrayRef dilation, + int64_t groups) { + // calculate the output size + auto outputSize = convolution_transpose3d_npu_output_size( + input, weight, bias, padding, output_padding, stride, dilation, groups); + + // construct the output tensor of the NPU + Tensor result = + OpPreparation::ApplyTensorWithFormat(input, outputSize, ACL_FORMAT_NDC1HWC0); + + // calculate the output result of the NPU + convolution_transpose3d_out_npu_nocheck( + result, input, weight, bias, padding, output_padding, stride, dilation, groups); + + return result; +} + void view1d_as_2d( SmallVector& stride, SmallVector& padding, @@ -163,6 +254,27 @@ Tensor conv_transpose2d_npu_( groups); } +Tensor conv_transpose3d_npu_( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef output_padding, + int64_t groups, + IntArrayRef dilation) { + return at::convolution( + input, + weight, + bias, + stride, + padding, + dilation, + true, + output_padding, + groups); +} + Tensor convolution_npu( const Tensor& input, const Tensor& weight, @@ -305,6 +417,11 @@ Tensor npu_convolution_transpose( input, weight, bias, padding, output_padding, stride, dilation, groups); } + if (dim == 5) { + output = convolution_transpose3d_npu( + input, weight, bias, padding, output_padding, stride, dilation, groups); + } + return output; } diff --git a/src/aten/src/ATen/native/npu/frame/OpDynamicParamMaker.cpp b/src/aten/src/ATen/native/npu/frame/OpDynamicParamMaker.cpp index c63a7e74409f07ec6779fd80e9a07af78089c9f9..af554749f8a4528f3187b89198e31d0505550d26 100644 --- a/src/aten/src/ATen/native/npu/frame/OpDynamicParamMaker.cpp +++ b/src/aten/src/ATen/native/npu/frame/OpDynamicParamMaker.cpp @@ -16,8 +16,8 @@ #include "ATen/native/npu/utils/CalcuOpUtil.h" #include "ATen/native/npu/frame/OpDynamicParamMaker.h" #include -#include #include "c10/npu/NPUStream.h" +#include "ATen/native/npu/interface/AclOpCompileInterface.h" namespace at { namespace native { diff --git a/src/aten/src/ATen/native/npu/frame/OpDynamicParamMaker.h b/src/aten/src/ATen/native/npu/frame/OpDynamicParamMaker.h index 0fd4d6695499d73d53ac82a2c809dd461f7cf8df..d4da8922a5343196da28a2e6201baf9a730f9368 100644 --- a/src/aten/src/ATen/native/npu/frame/OpDynamicParamMaker.h +++ b/src/aten/src/ATen/native/npu/frame/OpDynamicParamMaker.h @@ -17,7 +17,7 @@ #define __NATIVE_NPU_UTILS_OP_DYNAMIC_PARAM_MAKER__ #include -#include +#include "ATen/native/npu/interface/AclOpCompileInterface.h" #include "ATen/native/npu/frame/OpParamMaker.h" #include "c10/npu/NPUStream.h" diff --git a/src/aten/src/ATen/native/npu/frame/OpParamMaker.cpp b/src/aten/src/ATen/native/npu/frame/OpParamMaker.cpp index 856903449f49cc5039d27cb643998ad2182845a4..3a1447ef3e45a708b532fea0d49678425f527a74 100644 --- a/src/aten/src/ATen/native/npu/frame/OpParamMaker.cpp +++ b/src/aten/src/ATen/native/npu/frame/OpParamMaker.cpp @@ -160,7 +160,7 @@ aclError OpCommandImpl::InnerRun(string name, AclExecParam& params) { bool reset_flag = false; if (env::CheckFuzzyEnable() && FuzzyCompileBlacklist::GetInstance().IsInBlacklist(name)) { - aclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_DEFAULT); + AclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_DEFAULT); reset_flag = true; } aclError ret; @@ -182,7 +182,7 @@ aclError OpCommandImpl::InnerRun(string name, AclExecParam& params) { ++index; } while(NpuUtils::IsOomError(ret, index) && (index < NPU_MAX_OP_EXEC_TRY_NUM)); if (reset_flag) { - aclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_FUZZ); + AclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_FUZZ); } return ret; } @@ -198,7 +198,7 @@ int ExecFunc(void* in, aclrtStream stream) { bool reset_flag = false; if (env::CheckFuzzyEnable() && FuzzyCompileBlacklist::GetInstance().IsInBlacklist(cur_paras->opType)) { - aclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_DEFAULT); + AclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_DEFAULT); reset_flag = true; } int index = 0; @@ -219,7 +219,7 @@ int ExecFunc(void* in, aclrtStream stream) { ++index; } while(NpuUtils::IsOomError(ret, index) && (index < NPU_MAX_OP_EXEC_TRY_NUM)); if (reset_flag) { - aclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_FUZZ); + AclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_FUZZ); } if (ret != ACL_ERROR_NONE) { C10_NPU_SHOW_ERR_MSG(); diff --git a/src/aten/src/ATen/native/npu/frame/OpParamMaker.h b/src/aten/src/ATen/native/npu/frame/OpParamMaker.h index bf0f28830e26bb9c444fce5e492dbbb10fb5cab2..a40fda2773f73e6012e7d1458bb6755c8656cf7a 100644 --- a/src/aten/src/ATen/native/npu/frame/OpParamMaker.h +++ b/src/aten/src/ATen/native/npu/frame/OpParamMaker.h @@ -17,7 +17,7 @@ #define __NATIVE_NPU_UTILS_OP_PARAM_MAKER__ #include -#include +#include "ATen/native/npu/interface/AclOpCompileInterface.h" #include "ATen/native/npu/frame/NPUDefine.h" #include "ATen/native/npu/interface/Graph.h" #include "c10/npu/NPUStream.h" diff --git a/src/aten/src/ATen/native/npu/interface/AclOpCompileInterface.cpp b/src/aten/src/ATen/native/npu/interface/AclOpCompileInterface.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5db59fcf856521d8214b052611cf83c554def260 --- /dev/null +++ b/src/aten/src/ATen/native/npu/interface/AclOpCompileInterface.cpp @@ -0,0 +1,31 @@ +#include "AclOpCompileInterface.h" +#include "c10/npu/register/FunctionLoader.h" +#include "c10/util/Exception.h" +namespace at { +namespace native { +namespace npu { + +#undef LOAD_FUNCTION +#define LOAD_FUNCTION(funcName) \ + REGISTER_FUNCTION(libacl_op_compiler, funcName) +#undef GET_FUNC +#define GET_FUNC(funcName) \ + GET_FUNCTION(libacl_op_compiler, funcName) + +REGISTER_LIBRARY(libacl_op_compiler) +LOAD_FUNCTION(aclopSetCompileFlag) + +aclError AclopSetCompileFlag(aclOpCompileFlag flag) { + typedef aclError(*aclopSetCompileFlagFunc)(aclOpCompileFlag); + static aclopSetCompileFlagFunc func = nullptr; + if (func == nullptr) { + func = (aclopSetCompileFlagFunc)GET_FUNC(aclopSetCompileFlag); + } + TORCH_CHECK(func, "Failed to find function ", "aclopSetCompileFlag"); + auto ret = func(flag); + return ret; +} + +} // namespace npu +} // namespace native +} // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/interface/AclOpCompileInterface.h b/src/aten/src/ATen/native/npu/interface/AclOpCompileInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..1271a51795f07abfa256398ace9f204bd53fdf88 --- /dev/null +++ b/src/aten/src/ATen/native/npu/interface/AclOpCompileInterface.h @@ -0,0 +1,39 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __NATIVE_NPU_INTERFACE_ACLOPCOMPILE__ +#define __NATIVE_NPU_INTERFACE_ACLOPCOMPILE__ + +#include +namespace at { +namespace native { +namespace npu { + +/** + * @ingroup AscendCL + * @brief an interface set compile flag + * + * @param flag [IN] flag: ACL_OPCOMPILE_DEFAULT represent static compile while ACL_OPCOMPILE_FUZZ represent dynamic compile + * + * @retval ACL_ERROR_NONE The function is successfully executed. + * @retval OtherValues Failure + */ +aclError AclopSetCompileFlag(aclOpCompileFlag flag); + +} // namespace npu +} // namespace native +} // namespace at + +#endif //__NATIVE_NPU_INTERFACE_ACLOPCOMPILE__ \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp b/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp index 46abd15c008bb4f760ea7aea96c530489dea9c2e..5cbbb613527dacad36ec13a3023c97218701b72b 100644 --- a/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp +++ b/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp @@ -5,7 +5,7 @@ #include "ATen/native/npu/utils/NpuFuzzyBlacklist.h" #include "ATen/native/npu/utils/NpuProfilingDispatch.h" #include -#include +#include "ATen/native/npu/interface/AclOpCompileInterface.h" namespace at { namespace native { namespace npu { @@ -24,8 +24,8 @@ REGISTER_OPTION_HOOK(mdldumpswitch, [](const std::string& val) { REGISTER_OPTION_HOOK(mdldumpconfigpath, [](const std::string& val) { aclmdlSetDump(val.c_str()); }) REGISTER_OPTION_HOOK(fuzzycompileswitch, [](const std::string& val) { - if (val == "enable") { aclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_FUZZ); } - else { aclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_DEFAULT); } + if (val == "enable") { AclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_FUZZ); } + else { AclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_DEFAULT); } }) REGISTER_OPTION_BOOL_FUNCTION(CheckFuzzyEnable, fuzzycompileswitch, "disable", "enable") diff --git a/src/aten/src/ATen/native/npu/test__triangular_solve_helper.py b/src/aten/src/ATen/native/npu/test__triangular_solve_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..1167670fc7b9f9260f4b32ebe89697e9bbe8da69 --- /dev/null +++ b/src/aten/src/ATen/native/npu/test__triangular_solve_helper.py @@ -0,0 +1,55 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn.functional as F +import numpy as np +from common_utils import TestCase, run_tests +from common_device_type import instantiate_device_type_tests +from util_test import create_common_tensor + +class TestTriangularSolveHelper(TestCase): + def cpu_op_exec(self, input1, input2, upper, transpose, unitriangular): + output_s, output_a = input1.triangular_solve(input2, upper, transpose, unitriangular) + return output_s, output_a + + def npu_op_exec(self, input1, input2, upper, transpose, unitriangular): + output_s, output_a = input1.triangular_solve(input2, upper, transpose, unitriangular) + output_s = output_s.cpu() + output_a = output_a.cpu() + return output_s, output_a + + def test_triangular_solve_helper_fp32(self, device): + shape_format = [ + [[np.float32, -1, [2, 3]], [np.float32, -1, [2, 2]]], + [[np.float32, -1, [3, 2, 3]], [np.float32, -1, [3, 2, 2]]], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + for upper in [True, False]: + for transpose in [True, False]: + for unitriangular in [True, False]: + cpu_s, cpu_a = self.cpu_op_exec(cpu_input1, cpu_input2, upper, transpose, unitriangular) + npu_s, npu_a = self.npu_op_exec(npu_input1, npu_input2, upper, transpose, unitriangular) + self.assertRtolEqual(cpu_a, npu_a) + self.assertRtolEqual(cpu_s, npu_s) + + +instantiate_device_type_tests(TestTriangularSolveHelper, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() + diff --git a/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp b/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp index 412d1fc32b7bca4bb8f5d7bcac31eee8458a5bc8..0bfb9ae831c14d59c68416996a976487c3d31a32 100644 --- a/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp +++ b/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp @@ -17,7 +17,7 @@ #include "CalcuOpUtil.h" #include #include -#include +#include "ATen/native/npu/interface/AclOpCompileInterface.h" #include #include "ATen/native/npu/frame/InferFormat.h" #include "ATen/native/npu/mirror/NPUMemoryOverlap.h" @@ -632,7 +632,7 @@ void CalcuOpUtil::execute_npu_operate( bool reset_flag = false; if (env::CheckFuzzyEnable() && FuzzyCompileBlacklist::GetInstance().IsInBlacklist(opName)) { - aclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_DEFAULT); + AclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_DEFAULT); reset_flag = true; } NPU_LOGD("Op %s aclopCompileAndExecute Run.", opName.c_str()); @@ -680,7 +680,7 @@ void CalcuOpUtil::execute_npu_operate( ACL_REQUIRE_OK_OP(ret, opName.c_str()); } if (reset_flag) { - aclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_FUZZ); + AclopSetCompileFlag(aclOpCompileFlag::ACL_OP_COMPILE_FUZZ); } aclopDestroyAttr(attr); NPUStatus ret = DestroyAclParams(params); diff --git a/src/aten/src/ATen/native/npu/utils/DynamicShapeUtil.h b/src/aten/src/ATen/native/npu/utils/DynamicShapeUtil.h index d980f2696e08b57be9c1d72abfc75ebb43a6ee14..d19c51ecab700407e0c5fd20bdb09069e203f0d0 100644 --- a/src/aten/src/ATen/native/npu/utils/DynamicShapeUtil.h +++ b/src/aten/src/ATen/native/npu/utils/DynamicShapeUtil.h @@ -20,7 +20,7 @@ #include #include #include -#include +#include "ATen/native/npu/interface/AclOpCompileInterface.h" #include "ATen/native/npu/frame/InputInfoLib.h" #include "ATen/native/npu/frame/LogUtil.h" #include "ATen/native/npu/frame/DebugDynamic.h" diff --git a/src/aten/src/ATen/native/npu/utils/NpuUtils.h b/src/aten/src/ATen/native/npu/utils/NpuUtils.h index 34849d55cad36c5333a20af8bc900313b8c8c2a4..a32e64a1a3999b39c325215a9ad4394234c481dd 100644 --- a/src/aten/src/ATen/native/npu/utils/NpuUtils.h +++ b/src/aten/src/ATen/native/npu/utils/NpuUtils.h @@ -21,7 +21,7 @@ #include "c10/npu/NPUCachingAllocator.h" #include #include -#include +#include "ATen/native/npu/interface/AclOpCompileInterface.h" #include #include #include diff --git a/src/c10/npu/NPUQueue.cpp b/src/c10/npu/NPUQueue.cpp index e41b2aa7f5c619e7f6adbb27f5e092bf6fc18147..a31a8d87c0b92b686b17ec314fad6db3011199a0 100644 --- a/src/c10/npu/NPUQueue.cpp +++ b/src/c10/npu/NPUQueue.cpp @@ -21,8 +21,6 @@ #include #include - -#include #include //#define OPEN_QUEUE_DEBUG diff --git a/src/c10/npu/sys_ctrl/npu_sys_ctrl.cpp b/src/c10/npu/sys_ctrl/npu_sys_ctrl.cpp index be57d3ad835760f4baba959cd20b4265fb3b8813..e0d40fd52106b741c63cfec513bd7d5b5301fba9 100644 --- a/src/c10/npu/sys_ctrl/npu_sys_ctrl.cpp +++ b/src/c10/npu/sys_ctrl/npu_sys_ctrl.cpp @@ -18,7 +18,6 @@ #include #include #include -#include namespace c10 { namespace npu { diff --git a/src/third_party/acl/inc/acl/acl_op_compiler.h b/src/third_party/acl/inc/acl/acl_op_compiler.h index 7bb14569ebc35c4def509c0bc4003d7cfcadf162..438de77a2a8d1760be5ee8ffd805ec6f7b09acf4 100644 --- a/src/third_party/acl/inc/acl/acl_op_compiler.h +++ b/src/third_party/acl/inc/acl/acl_op_compiler.h @@ -105,17 +105,6 @@ ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType, */ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value); -/** - * @ingroup AscendCL - * @brief an interface set compile flag - * - * @param flag [IN] flag: ACL_OPCOMPILE_DEFAULT represent static compile while ACL_OPCOMPILE_FUZZ represent dynamic compile - * - * @retval ACL_ERROR_NONE The function is successfully executed. - * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); - #ifdef __cplusplus } #endif diff --git a/src/third_party/acl/libs/acl_op_compiler.cpp b/src/third_party/acl/libs/acl_op_compiler.cpp index b7980a1ca87896dfce9c7a790aaff3f266d4f4bc..368512e8bb8a069e5b7a723f791796649a7cda2e 100644 --- a/src/third_party/acl/libs/acl_op_compiler.cpp +++ b/src/third_party/acl/libs/acl_op_compiler.cpp @@ -68,8 +68,3 @@ aclError aclSetCompileopt( return 0; } -// Dynamic shape相关 -aclError aclopSetCompileFlag( - aclOpCompileFlag flag) { - return 0; -}; diff --git a/test/test_npu/test_network_ops/test_conv_transpose3d.py b/test/test_npu/test_network_ops/test_conv_transpose3d.py new file mode 100644 index 0000000000000000000000000000000000000000..dc13cd7c0e0319e5878698e58f4054bc6684c202 --- /dev/null +++ b/test/test_npu/test_network_ops/test_conv_transpose3d.py @@ -0,0 +1,91 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import sys +import copy +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestConvTranspose2d(TestCase): + def cpu_op_exec(self, input, weight, groups): + cpu_output = torch.nn.functional.conv_transpose3d(input, weight,bias=None, + stride=1, padding=0, output_padding=0, groups=groups, dilation=1) + cpu_output = cpu_output.numpy() + return cpu_output + + def cpu_op_exec_fp16(self, input, weight, groups): + input = input.to(torch.float32) + weight = weight.to(torch.float32) + cpu_output = torch.nn.functional.conv_transpose3d(input, weight, bias=None, + stride=1, padding=0, output_padding=0, groups=groups, dilation=1) + cpu_output = cpu_output.numpy() + cpu_output = cpu_output.astype(np.float16) + + return cpu_output + + def npu_op_exec(self, input, weight, groups): + input = input.to("npu") + weight = weight.to("npu") + npu_output = torch.nn.functional.conv_transpose3d(input, weight, bias=None, + stride=1, padding=0, output_padding=0, groups=groups, dilation=1) + npu_output = npu_output.to("cpu").numpy() + + return npu_output + + def test_conv_transpose2d_fp32(self, device): + shape_format = [ + [[np.float32, 30, [12, 12, 4, 14, 14]], [np.float32, 30, [12, 12, 3, 3, 3]], 1], + [[np.float32, 30, [12, 64, 4, 14, 14]], [np.float32, 30, [64, 64, 3, 3, 3]], 1], + [[np.float32, 30, [12, 25, 2, 7, 7]], [np.float32, 30, [25, 25, 3, 3, 3]], 1], + [[np.float32, 30, [12, 51, 1, 4, 4]], [np.float32, 30, [51, 51, 3, 3, 3]], 1], + [[np.float32, 30, [12, 25, 2, 7, 7]], [np.float32, 30, [25, 25, 1, 1, 1]], 1] + ] + for item in shape_format: + input_cpu, input_npu = create_common_tensor(item[0], 0, 10) + weight_cpu, weight_npu = create_common_tensor(item[1], 0, 10) + if input_cpu.dtype == torch.float16: + cpu_output = self.cpu_op_exec_fp16(input_cpu, weight_cpu, item[-1]) + else: + cpu_output = self.cpu_op_exec(input_cpu, weight_cpu, item[-1]) + npu_output = self.npu_op_exec(input_npu, weight_npu, item[-1]) + # fp32精度不足,放宽对其精度要求 + self.assertRtolEqual(cpu_output, npu_output, prec=1.e-1) + + def test_conv_transpose2d_fp16(self, device): + shape_format = [ + [[np.float16, 30, [12, 12, 4, 14, 14]], [np.float16, 30, [12, 12, 3, 3, 3]], 1], + [[np.float16, 30, [12, 64, 4, 14, 14]], [np.float16, 30, [64, 64, 3, 3, 3]], 1], + [[np.float16, 30, [12, 25, 2, 7, 7]], [np.float16, 30, [25, 25, 3, 3, 3]], 1], + [[np.float16, 30, [12, 51, 1, 4, 4]], [np.float16, 30, [51, 51, 3, 3, 3]], 1], + [[np.float16, 30, [12, 25, 2, 7, 7]], [np.float16, 30, [25, 25, 1, 1, 1]], 1], + ] + for item in shape_format: + input_cpu, input_npu = create_common_tensor(item[0], 0, 10) + weight_cpu, weight_npu = create_common_tensor(item[1], 0, 10) + if input_cpu.dtype == torch.float16: + cpu_output = self.cpu_op_exec_fp16(input_cpu, weight_cpu, item[-1]) + else: + cpu_output = self.cpu_op_exec(input_cpu, weight_cpu, item[-1]) + npu_output = self.npu_op_exec(input_npu, weight_npu, item[-1]) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestConvTranspose2d, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_erfc.py b/test/test_npu/test_network_ops/test_erfc.py new file mode 100644 index 0000000000000000000000000000000000000000..be565528649d7d96a79b5389b087bfc9746a848e --- /dev/null +++ b/test/test_npu/test_network_ops/test_erfc.py @@ -0,0 +1,161 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import copy +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestErfc(TestCase): + + def cpu_op_exec(self,input1): + output = torch.erfc(input1) + output = output.numpy() + return output + + def npu_op_exec(self,input1): + output = torch.erfc(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def cpu_op_exec_(self,input1): + torch.erfc_(input1) + output = input1.numpy() + return output + + + def npu_op_exec_(self,input1): + torch.erfc_(input1) + output = input1.to("cpu") + output = output.numpy() + return output + + + def cpu_op_exec_out(self,input1,cpu_out): + torch.erfc(input1, out = cpu_out) + output = cpu_out.numpy() + return output + + def npu_op_exec_out(self,input1,npu_out): + torch.erfc(input1, out = npu_out) + output = npu_out.to("cpu") + output = output.numpy() + return output + + def test_erfc_float32_common_shape_format(self, device): + shape_format = [ + [np.float32, 0 , (4, 3, 10, 9)], + [np.float32, -1, (2,4, 3)], + [np.float32, 3, (20, 13)], + [np.float32, 4, (20, 13)], + [np.float32, 2, (100, 50)], + [np.float32, 30, (20, 13, 10, 15, 20)] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_erfc_float16_common_shape_format(self, device): + shape_format = [ + [np.float16, 0 , (4, 3, 10, 9)], + [np.float16, -1, (2,4, 3)], + [np.float16, 3, (20, 13)], + [np.float16, 4, (20, 13)], + [np.float16, 2, (100, 50)], + [np.float16, 30, (20, 13, 10, 15, 20)] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1) + npu_output = self.npu_op_exec(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_erfc_float321_common_shape_format(self, device): + shape_format = [ + [np.float32, 0 , (4, 3, 10, 9)], + [np.float32, -1, (2, 4, 3)], + [np.float32, 3, (20, 13)], + [np.float32, 4, (20, 13)], + [np.float32, 2, (100, 50)], + [np.float32, 30, (20, 13, 10, 15, 20)] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_output = self.cpu_op_exec_(cpu_input1) + npu_output = self.npu_op_exec_(npu_input1) + self.assertRtolEqual(cpu_output, npu_output) + + def test_erfc_float161_common_shape_format(self, device): + shape_format = [ + [np.float16, 0 , (4, 3, 10, 9)], + [np.float16, -1, (2, 4, 3)], + [np.float16, 3, (20, 13)], + [np.float16, 4, (20, 13)], + [np.float16, 2, (100, 50)], + [np.float16, 30, (20, 13, 10, 15, 20)] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output = self.cpu_op_exec_(cpu_input1) + npu_output = self.npu_op_exec_(npu_input1) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_erfc_out_float32_common_shape_format(self, device): + shape_format = [ + [np.float32, 0 , (4, 3, 10, 9)], + [np.float32, -1, (2,4, 3)], + [np.float32, 3, (20, 13)], + [np.float32, 4, (20, 13)], + [np.float32, 2, (100, 50)], + [np.float32, 30, (20, 13, 10, 15, 20)] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_out, npu_out = create_common_tensor(item, 1, 100) + cpu_output = self.cpu_op_exec_out(cpu_input1, cpu_out) + npu_output = self.npu_op_exec_out(npu_input1, npu_out) + self.assertRtolEqual(cpu_output, npu_output) + + def test_erfc_out_float16_common_shape_format(self, device): + shape_format = [ + [np.float16, 0 , (4, 3, 10, 9)], + [np.float16, -1, (2,4, 3)], + [np.float16, 3, (20, 13)], + [np.float16, 4, (20, 13)], + [np.float16, 2, (100, 50)], + [np.float16, 30, (20, 13, 10, 15, 20)] + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_out, npu_out = create_common_tensor(item, 1, 100) + cpu_out = cpu_out.to(torch.float32) + cpu_output = self.cpu_op_exec_out(cpu_input1, cpu_out) + npu_output = self.npu_op_exec_out(npu_input1, npu_out) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestErfc, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests()