diff --git a/patch/npu.patch b/patch/npu.patch index 2c369ae0ead72c86da0dc96cd41d8047b19a1da8..8f837804fe2ac2d2991fab836bdfbe39755c20bc 100644 --- a/patch/npu.patch +++ b/patch/npu.patch @@ -1,6 +1,6 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/CMakeLists.txt pytorch-develop/aten/CMakeLists.txt --- pytorch-v1.5.0/aten/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/CMakeLists.txt 2021-07-29 20:15:45.583572501 +0800 ++++ pytorch-develop/aten/CMakeLists.txt 2021-08-03 16:07:55.738412409 +0800 @@ -22,8 +22,10 @@ set(ATen_CPU_INCLUDE) set(ATen_THIRD_PARTY_INCLUDE) @@ -51,7 +51,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt pytorch-develop/aten/src/ATen/CMakeLists.txt --- pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-29 20:15:45.583572501 +0800 ++++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-08-03 16:07:55.738412409 +0800 @@ -67,6 +67,9 @@ FILE(GLOB native_quantized_h "native/quantized/*.h" "native/quantized/cpu/*.h") FILE(GLOB native_cpu_h "native/cpu/*.h") @@ -129,7 +129,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h --- pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-29 20:15:45.587572643 +0800 ++++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-08-03 16:07:55.746412476 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -170,7 +170,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/function_wrapper.py pytorch-develop/aten/src/ATen/function_wrapper.py --- pytorch-v1.5.0/aten/src/ATen/function_wrapper.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-29 20:15:45.595572931 +0800 ++++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-08-03 16:07:55.754412543 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -354,7 +354,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for option in declaration['options']: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/gen.py pytorch-develop/aten/src/ATen/gen.py --- pytorch-v1.5.0/aten/src/ATen/gen.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/gen.py 2021-07-29 20:15:45.595572931 +0800 ++++ pytorch-develop/aten/src/ATen/gen.py 2021-08-03 16:07:55.754412543 +0800 @@ -1,3 +1,18 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -512,7 +512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= generate_outputs() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp --- pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-29 20:15:45.603573218 +0800 ++++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-08-03 16:07:55.766412643 +0800 @@ -339,20 +339,20 @@ void hardsigmoid_backward_kernel(TensorIterator& iter) { @@ -540,7 +540,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp pytorch-develop/aten/src/ATen/native/Memory.cpp --- pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-29 20:15:45.599573074 +0800 ++++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-08-03 16:07:55.758412576 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -595,7 +595,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= detail::computeStorageSize(self.sizes(), self.strides()), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml pytorch-develop/aten/src/ATen/native/native_functions.yaml --- pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-29 20:15:45.615573647 +0800 ++++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-08-03 16:07:55.778412743 +0800 @@ -1,6 +1,5 @@ # See README.md in this directory for more guidance @@ -1412,7 +1412,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor use_c10_dispatcher: full -@@ -897,6 +1108,50 @@ +@@ -897,6 +1108,54 @@ dispatch: CUDA: cudnn_convolution_transpose_backward_weight @@ -1436,6 +1436,10 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + npu_dispatch_only: + NPU: npu_convolution_backward + ++- func: npu_convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor input, Tensor gO, Tensor weight, int[] stride, int[] padding, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor) ++ npu_dispatch_only: ++ NPU: npu_convolution_double_backward ++ +- func: npu_conv2d(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor + npu_dispatch_only: + NPU: conv2d_npu @@ -1463,7 +1467,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # NB: input is special cased in a way I don't quite understand - func: cudnn_grid_sampler(Tensor self, Tensor grid) -> Tensor output use_c10_dispatcher: full -@@ -930,16 +1185,24 @@ +@@ -930,16 +1189,24 @@ - func: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices) supports_named_tensor: True variants: function, method @@ -1488,7 +1492,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _cummin_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> () variants: function -@@ -950,16 +1213,24 @@ +@@ -950,16 +1217,24 @@ - func: cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor supports_named_tensor: True variants: function, method @@ -1513,7 +1517,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor supports_named_tensor: True -@@ -976,20 +1247,28 @@ +@@ -976,20 +1251,28 @@ supports_named_tensor: True - func: ctc_loss.IntList(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor @@ -1543,7 +1547,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: det(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -1013,6 +1292,8 @@ +@@ -1013,6 +1296,8 @@ - func: fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!) variants: method @@ -1552,7 +1556,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: div.Tensor(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -1022,6 +1303,8 @@ +@@ -1022,6 +1307,8 @@ CUDA: div SparseCPU: div_sparse SparseCUDA: div_sparse @@ -1561,7 +1565,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) -@@ -1031,6 +1314,8 @@ +@@ -1031,6 +1318,8 @@ CUDA: div_ SparseCPU: div_sparse_ SparseCUDA: div_sparse_ @@ -1570,7 +1574,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -@@ -1039,6 +1324,8 @@ +@@ -1039,6 +1328,8 @@ CUDA: div_out SparseCPU: div_out_sparse_zerodim SparseCUDA: div_out_sparse_zerodim @@ -1579,7 +1583,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True # For C++ only, until we have conversion from C++ numbers to Tensor -@@ -1046,10 +1333,14 @@ +@@ -1046,10 +1337,14 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -1594,7 +1598,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dot(Tensor self, Tensor tensor) -> Tensor use_c10_dispatcher: full -@@ -1057,29 +1348,41 @@ +@@ -1057,29 +1352,41 @@ dispatch: CPU: legacy::cpu::_th_dot CUDA: legacy::cuda::_th_dot @@ -1636,7 +1640,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: embedding_sparse_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor use_c10_dispatcher: full -@@ -1099,6 +1402,8 @@ +@@ -1099,6 +1406,8 @@ dispatch: CPU: _embedding_bag_cpu CUDA: _embedding_bag_cuda @@ -1645,7 +1649,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, int num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights) -> Tensor -@@ -1125,6 +1430,8 @@ +@@ -1125,6 +1434,8 @@ MkldnnCPU: empty_mkldnn SparseCPU: empty_sparse SparseCUDA: empty_sparse @@ -1654,7 +1658,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor variants: method -@@ -1154,6 +1461,8 @@ +@@ -1154,6 +1465,8 @@ supports_named_tensor: True variants: method device_guard: False @@ -1663,7 +1667,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: empty.out(int[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!) device_guard: False -@@ -1161,16 +1470,22 @@ +@@ -1161,16 +1474,22 @@ - func: empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor device_guard: False supports_named_tensor: True @@ -1686,7 +1690,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: erf_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -1178,17 +1493,25 @@ +@@ -1178,17 +1497,25 @@ dispatch: CPU: _erf__cpu CUDA: _erf__cuda @@ -1712,7 +1716,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: erfc_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -1196,17 +1519,23 @@ +@@ -1196,17 +1523,23 @@ dispatch: CPU: _erfc__cpu CUDA: _erfc__cuda @@ -1736,7 +1740,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: exp_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -1214,51 +1543,69 @@ +@@ -1214,51 +1547,69 @@ dispatch: CPU: _exp__cpu CUDA: _exp__cuda @@ -1808,7 +1812,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: flatten.using_ints(Tensor self, int start_dim=0, int end_dim=-1) -> Tensor use_c10_dispatcher: full -@@ -1280,25 +1627,35 @@ +@@ -1280,25 +1631,35 @@ - func: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!) supports_named_tensor: True variants: function, method @@ -1844,7 +1848,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: floor_divide(Tensor self, Tensor other) -> Tensor variants: function, method -@@ -1308,6 +1665,8 @@ +@@ -1308,6 +1669,8 @@ SparseCPU: floor_divide_sparse SparseCUDA: floor_divide_sparse supports_named_tensor: True @@ -1853,7 +1857,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) variants: method -@@ -1317,6 +1676,8 @@ +@@ -1317,6 +1680,8 @@ SparseCPU: floor_divide_sparse_ SparseCUDA: floor_divide_sparse_ supports_named_tensor: True @@ -1862,7 +1866,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) dispatch: -@@ -1325,33 +1686,56 @@ +@@ -1325,33 +1690,56 @@ SparseCPU: floor_divide_out_sparse_zerodim SparseCUDA: floor_divide_out_sparse_zerodim supports_named_tensor: True @@ -1919,7 +1923,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: full_like(Tensor self, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor supports_named_tensor: True -@@ -1373,6 +1757,8 @@ +@@ -1373,6 +1761,8 @@ # `align_corners = True`. - func: grid_sampler(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor use_c10_dispatcher: full @@ -1928,7 +1932,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor use_c10_dispatcher: full -@@ -1390,23 +1776,39 @@ +@@ -1390,23 +1780,39 @@ dispatch: CPU: grid_sampler_3d_cpu CUDA: grid_sampler_3d_cuda @@ -1968,7 +1972,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hinge_embedding_loss(Tensor self, Tensor target, float margin=1.0, int reduction=Mean) -> Tensor use_c10_dispatcher: full -@@ -1414,8 +1816,13 @@ +@@ -1414,8 +1820,13 @@ - func: ger(Tensor self, Tensor vec2) -> Tensor use_c10_dispatcher: full variants: function, method @@ -1982,7 +1986,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enabled=True) -> Tensor -@@ -1460,6 +1867,8 @@ +@@ -1460,6 +1871,8 @@ # NB: The following functions are declared in aten/src/ATen/templates/TensorBody.h and defined in aten/src/ATen/TensorIndexing.cpp: # - Tensor Tensor::index(ArrayRef indices) # - Tensor Tensor::index(std::initializer_list indices) @@ -1991,7 +1995,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!) variants: method -@@ -1476,17 +1885,23 @@ +@@ -1476,17 +1889,23 @@ - func: index_put_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor(a!) variants: function, method @@ -2016,7 +2020,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor variants: function -@@ -1494,8 +1909,12 @@ +@@ -1494,8 +1913,12 @@ - func: inverse(Tensor self) -> Tensor use_c10_dispatcher: full variants: function, method @@ -2029,7 +2033,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _inverse_helper(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -1507,6 +1926,8 @@ +@@ -1507,6 +1930,8 @@ - func: isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor use_c10_dispatcher: full variants: function, method @@ -2038,7 +2042,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: isnan(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -1518,6 +1939,8 @@ +@@ -1518,6 +1943,8 @@ CUDA: isnan SparseCPU: isnan_sparse SparseCUDA: isnan_sparse @@ -2047,7 +2051,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: is_distributed(Tensor self) -> bool use_c10_dispatcher: full -@@ -1541,6 +1964,8 @@ +@@ -1541,6 +1968,8 @@ variants: function, method device_guard: False supports_named_tensor: True @@ -2056,7 +2060,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: is_same_size(Tensor self, Tensor other) -> bool use_c10_dispatcher: full -@@ -1556,29 +1981,41 @@ +@@ -1556,29 +1985,41 @@ - func: kl_div(Tensor self, Tensor target, int reduction=Mean) -> Tensor use_c10_dispatcher: full @@ -2098,7 +2102,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: layer_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor -@@ -1586,11 +2023,15 @@ +@@ -1586,11 +2027,15 @@ dispatch: CPU: layer_norm_cpu CUDA: layer_norm_cuda @@ -2114,7 +2118,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor python_module: nn -@@ -1622,46 +2063,64 @@ +@@ -1622,46 +2067,64 @@ use_c10_dispatcher: full - func: linspace(Scalar start, Scalar end, int steps=100, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -2179,7 +2183,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: log1p_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -1671,6 +2130,8 @@ +@@ -1671,6 +2134,8 @@ CUDA: log1p_ SparseCPU: log1p_sparse_ SparseCUDA: log1p_sparse_ @@ -2188,7 +2192,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -1679,67 +2140,95 @@ +@@ -1679,67 +2144,95 @@ CUDA: log1p_out SparseCPU: log1p_out_sparse SparseCUDA: log1p_out_sparse @@ -2284,7 +2288,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: margin_ranking_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor use_c10_dispatcher: full -@@ -1748,9 +2237,13 @@ +@@ -1748,9 +2241,13 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -2298,7 +2302,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: matrix_rank.tol(Tensor self, float tol, bool symmetric=False) -> Tensor use_c10_dispatcher: full -@@ -1765,22 +2258,34 @@ +@@ -1765,22 +2262,34 @@ - func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) variants: function, method supports_named_tensor: True @@ -2333,7 +2337,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -1791,6 +2296,8 @@ +@@ -1791,6 +2300,8 @@ - func: max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor supports_named_tensor: True @@ -2342,7 +2346,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor requires_tensor: True -@@ -1814,6 +2321,8 @@ +@@ -1814,6 +2325,8 @@ CPU: mean_cpu_gpu CUDA: mean_cpu_gpu QuantizedCPU: quantized_mean_cpu @@ -2351,7 +2355,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mean.dim(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor variants: function, method -@@ -1822,6 +2331,8 @@ +@@ -1822,6 +2335,8 @@ CPU: mean_cpu_gpu CUDA: mean_cpu_gpu QuantizedCPU: quantized_mean_cpu @@ -2360,7 +2364,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mean.out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -1829,47 +2340,73 @@ +@@ -1829,47 +2344,73 @@ CPU: mean_out_cpu_gpu CUDA: mean_out_cpu_gpu QuantizedCPU: quantized_mean_out_cpu @@ -2434,7 +2438,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor -@@ -1958,6 +2495,8 @@ +@@ -1958,6 +2499,8 @@ CUDA: legacy::cuda::_th_mm SparseCPU: _sparse_mm SparseCUDA: _sparse_mm @@ -2443,7 +2447,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) -@@ -1966,6 +2505,8 @@ +@@ -1966,6 +2509,8 @@ CUDA: legacy::cuda::_th_mm_out SparseCPU: _sparse_mm_out SparseCUDA: _sparse_mm_out @@ -2452,7 +2456,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor -@@ -1994,6 +2535,8 @@ +@@ -1994,6 +2539,8 @@ SparseCPU: mul_sparse SparseCUDA: mul_sparse MkldnnCPU: mkldnn_mul @@ -2461,7 +2465,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) -@@ -2004,6 +2547,8 @@ +@@ -2004,6 +2551,8 @@ SparseCPU: mul_sparse_ SparseCUDA: mul_sparse_ MkldnnCPU: mkldnn_mul_ @@ -2470,7 +2474,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -@@ -2013,15 +2558,21 @@ +@@ -2013,15 +2562,21 @@ SparseCPU: mul_out_sparse_cpu SparseCUDA: mul_out_sparse_cuda MkldnnCPU: mkldnn_mul_out @@ -2492,7 +2496,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mv(Tensor self, Tensor vec) -> Tensor use_c10_dispatcher: full -@@ -2030,12 +2581,16 @@ +@@ -2030,12 +2585,16 @@ CPU: mv_cpu CUDA: legacy::cuda::_th_mv supports_named_tensor: True @@ -2509,7 +2513,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mvlgamma(Tensor self, int p) -> Tensor use_c10_dispatcher: full -@@ -2052,6 +2607,8 @@ +@@ -2052,6 +2611,8 @@ CUDA: narrow_copy_dense SparseCPU: narrow_copy_sparse SparseCUDA: narrow_copy_sparse @@ -2518,7 +2522,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a) variants: function, method -@@ -2068,6 +2625,8 @@ +@@ -2068,6 +2629,8 @@ CPU: batch_norm_cpu CUDA: batch_norm_cuda MkldnnCPU: mkldnn_batch_norm @@ -2527,7 +2531,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!)) dispatch: -@@ -2098,6 +2657,8 @@ +@@ -2098,6 +2661,8 @@ dispatch: CPU: batch_norm_backward_cpu CUDA: batch_norm_backward_cuda @@ -2536,7 +2540,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor) dispatch: -@@ -2117,6 +2678,8 @@ +@@ -2117,6 +2682,8 @@ - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, int[2] padding, int[2] stride=1) -> Tensor variants: function @@ -2545,7 +2549,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor) variants: function -@@ -2129,42 +2692,60 @@ +@@ -2129,42 +2696,60 @@ - func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor device_guard: False @@ -2608,7 +2612,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Only exposed from C++ -- in Python, # we expose it as an attribute `T`, not a function. -@@ -2253,54 +2834,82 @@ +@@ -2253,54 +2838,82 @@ supports_named_tensor: True - func: randperm(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -2692,7 +2696,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: repeat_interleave.Tensor(Tensor repeats) -> Tensor use_c10_dispatcher: full -@@ -2316,6 +2925,8 @@ +@@ -2316,6 +2929,8 @@ - func: repeat_interleave.self_int(Tensor self, int repeats, int? dim=None) -> Tensor use_c10_dispatcher: full variants: function, method @@ -2701,7 +2705,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: reshape(Tensor self, int[] shape) -> Tensor variants: function, method -@@ -2337,16 +2948,22 @@ +@@ -2337,16 +2952,22 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -2724,7 +2728,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor -@@ -2360,6 +2977,8 @@ +@@ -2360,6 +2981,8 @@ CUDA: relu MkldnnCPU: mkldnn_relu QuantizedCPU: quantized_relu @@ -2733,7 +2737,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: relu_(Tensor(a!) self) -> Tensor(a!) -@@ -2370,6 +2989,8 @@ +@@ -2370,6 +2993,8 @@ CUDA: relu_ MkldnnCPU: mkldnn_relu_ QuantizedCPU: quantized_relu_ @@ -2742,7 +2746,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: prelu(Tensor self, Tensor weight) -> Tensor use_c10_dispatcher: full -@@ -2377,12 +2998,16 @@ +@@ -2377,12 +3002,16 @@ dispatch: CPU: prelu_cpu CUDA: prelu_cuda @@ -2759,7 +2763,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gelu(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2390,6 +3015,8 @@ +@@ -2390,6 +3019,8 @@ dispatch: CPU: gelu_cpu CUDA: gelu_cuda @@ -2768,7 +2772,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gelu_backward(Tensor grad, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2397,29 +3024,41 @@ +@@ -2397,29 +3028,41 @@ dispatch: CPU: gelu_backward_cpu CUDA: gelu_backward_cuda @@ -2810,7 +2814,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a) variants: function, method -@@ -2433,14 +3072,21 @@ +@@ -2433,14 +3076,21 @@ - func: selu(Tensor self) -> Tensor use_c10_dispatcher: full @@ -2833,7 +2837,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sigmoid(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2451,6 +3097,8 @@ +@@ -2451,6 +3101,8 @@ CUDA: sigmoid QuantizedCPU: quantized_sigmoid MkldnnCPU: mkldnn_sigmoid @@ -2842,7 +2846,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sigmoid_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2459,36 +3107,52 @@ +@@ -2459,36 +3111,52 @@ CPU: sigmoid_ CUDA: sigmoid_ MkldnnCPU: mkldnn_sigmoid_ @@ -2895,7 +2899,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Returns a copy of this `Variable` that is detached from its autograd graph. # This method is OK to call if the `Variable` is a view. -@@ -2533,6 +3197,8 @@ +@@ -2533,6 +3201,8 @@ - func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet) variants: function, method @@ -2904,7 +2908,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: smm(Tensor self, Tensor mat2) -> Tensor use_c10_dispatcher: full -@@ -2542,10 +3208,14 @@ +@@ -2542,10 +3212,14 @@ - func: softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor variants: function, method supports_named_tensor: True @@ -2919,7 +2923,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _softmax(Tensor self, int dim, bool half_to_float) -> Tensor use_c10_dispatcher: full -@@ -2553,12 +3223,16 @@ +@@ -2553,12 +3227,16 @@ CPU: softmax_cpu CUDA: softmax_cuda MkldnnCPU: mkldnn_softmax @@ -2936,7 +2940,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[] variants: function, method -@@ -2609,8 +3283,12 @@ +@@ -2609,8 +3287,12 @@ SparseCUDA: _sspaddmm_out_cuda - func: stack(Tensor[] tensors, int dim=0) -> Tensor @@ -2949,7 +2953,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # The signature is designed to be consistent with librosa except that it is # missing the `pad_mode` and `center` arguments, which are taken care of at -@@ -2633,20 +3311,30 @@ +@@ -2633,20 +3315,30 @@ - func: sum(Tensor self, *, ScalarType? dtype=None) -> Tensor variants: function, method supports_named_tensor: True @@ -2980,7 +2984,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sum_to_size(Tensor self, int[] size) -> Tensor variants: method -@@ -2656,13 +3344,19 @@ +@@ -2656,13 +3348,19 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -3000,7 +3004,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: square(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2677,51 +3371,81 @@ +@@ -2677,51 +3375,81 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3083,7 +3087,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: t(Tensor(a) self) -> Tensor(a) device_guard: False -@@ -2736,6 +3460,8 @@ +@@ -2736,6 +3464,8 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -3092,7 +3096,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tan_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2743,12 +3469,16 @@ +@@ -2743,12 +3473,16 @@ dispatch: CPU: _tan__cpu CUDA: _tan__cuda @@ -3109,7 +3113,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tanh(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2758,6 +3488,8 @@ +@@ -2758,6 +3492,8 @@ CPU: tanh CUDA: tanh QuantizedCPU: quantized_tanh @@ -3118,7 +3122,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tanh_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2765,12 +3497,16 @@ +@@ -2765,12 +3501,16 @@ dispatch: CPU: _tanh__cpu CUDA: _tanh__cuda @@ -3135,7 +3139,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor variants: function -@@ -2783,6 +3519,8 @@ +@@ -2783,6 +3523,8 @@ dispatch: CPU: threshold CUDA: threshold_cuda @@ -3144,7 +3148,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!) variants: function -@@ -2790,12 +3528,16 @@ +@@ -2790,12 +3532,16 @@ dispatch: CPU: threshold_ CUDA: threshold__cuda @@ -3161,7 +3165,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor use_c10_dispatcher: full -@@ -2803,6 +3545,8 @@ +@@ -2803,6 +3549,8 @@ dispatch: CPU: threshold_backward CUDA: threshold_backward_cuda @@ -3170,7 +3174,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a) variants: function, method -@@ -2835,18 +3579,24 @@ +@@ -2835,18 +3583,24 @@ use_c10_dispatcher: full python_module: nn variants: function @@ -3195,7 +3199,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # default int[] value [0,1] should not add space after comma, since native_parse.py uses ', ' to split args -@@ -2872,6 +3622,8 @@ +@@ -2872,6 +3626,8 @@ CUDA: true_divide SparseCPU: true_divide_sparse SparseCUDA: true_divide_sparse @@ -3204,7 +3208,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) -@@ -2881,6 +3633,8 @@ +@@ -2881,6 +3637,8 @@ CUDA: true_divide_ SparseCPU: true_divide_sparse_ SparseCUDA: true_divide_sparse_ @@ -3213,7 +3217,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -@@ -2889,31 +3643,43 @@ +@@ -2889,31 +3647,43 @@ CUDA: true_divide_out SparseCPU: true_divide_out_sparse_zerodim SparseCUDA: true_divide_out_sparse_zerodim @@ -3257,7 +3261,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: type_as(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -2956,6 +3722,8 @@ +@@ -2956,6 +3726,8 @@ dispatch: CPU: _unique2_cpu CUDA: _unique2_cuda @@ -3266,7 +3270,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _unsafe_view(Tensor self, int[] size) -> Tensor -@@ -2971,32 +3739,48 @@ +@@ -2971,32 +3743,48 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3315,7 +3319,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: view_as(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -3009,13 +3793,19 @@ +@@ -3009,13 +3797,19 @@ - func: where.self(Tensor condition, Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full variants: function, method @@ -3335,7 +3339,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor variants: function -@@ -3041,13 +3831,21 @@ +@@ -3041,13 +3835,21 @@ - func: zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor device_guard: False @@ -3357,7 +3361,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor use_c10_dispatcher: full -@@ -3100,25 +3898,37 @@ +@@ -3100,25 +3902,37 @@ - func: _sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor dispatch: @@ -3397,7 +3401,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor variants: function, method -@@ -3162,12 +3972,16 @@ +@@ -3162,12 +3976,16 @@ SparseCUDA: clone_sparse MkldnnCPU: mkldnn_clone QuantizedCPU: quantized_clone @@ -3414,7 +3418,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -3176,6 +3990,8 @@ +@@ -3176,6 +3994,8 @@ CUDA: pow_out SparseCPU: pow_out_sparse_scalar SparseCUDA: pow_out_sparse_scalar @@ -3423,7 +3427,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor use_c10_dispatcher: full -@@ -3186,6 +4002,8 @@ +@@ -3186,6 +4006,8 @@ CUDA: pow SparseCPU: pow_sparse_scalar SparseCUDA: pow_sparse_scalar @@ -3432,7 +3436,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: zero_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -3196,6 +4014,14 @@ +@@ -3196,6 +4018,14 @@ SparseCPU: zero_sparse_ SparseCUDA: zero_sparse_ MkldnnCPU: mkldnn_zero_ @@ -3447,7 +3451,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) dispatch: -@@ -3204,6 +4030,8 @@ +@@ -3204,6 +4034,8 @@ SparseCPU: sub_out_sparse SparseCUDA: sub_out_sparse supports_named_tensor: True @@ -3456,7 +3460,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor use_c10_dispatcher: full -@@ -3213,6 +4041,8 @@ +@@ -3213,6 +4045,8 @@ CUDA: sub SparseCPU: sub_sparse SparseCUDA: sub_sparse @@ -3465,7 +3469,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) -@@ -3222,6 +4052,8 @@ +@@ -3222,6 +4056,8 @@ CUDA: sub_ SparseCPU: sub_sparse_ SparseCUDA: sub_sparse_ @@ -3474,7 +3478,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True # For C++ only, until we have conversion from C++ numbers to Tensor -@@ -3229,21 +4061,29 @@ +@@ -3229,21 +4065,29 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3504,7 +3508,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Functionally the same as addmm, but we give it a different derivative formula # that doesn't propagate gradients to non-present entries on sparse. -@@ -3257,6 +4097,8 @@ +@@ -3257,6 +4101,8 @@ CUDA: legacy::cuda::_th_addmm_out SparseCPU: addmm_out_sparse_dense_cpu SparseCUDA: addmm_out_sparse_dense_cuda @@ -3513,7 +3517,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor -@@ -3267,6 +4109,8 @@ +@@ -3267,6 +4113,8 @@ CUDA: legacy::cuda::_th_addmm SparseCPU: addmm_sparse_dense_cpu SparseCUDA: addmm_sparse_dense_cuda @@ -3522,7 +3526,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) -@@ -3278,9 +4122,10 @@ +@@ -3278,9 +4126,10 @@ # broadcasting SparseCPU: s_addmm_sparse_dense_cpu_ SparseCUDA: s_addmm_sparse_dense_cuda_ @@ -3534,7 +3538,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # NOTE [ Sparse: autograd and API ] # # -@@ -3396,7 +4241,6 @@ +@@ -3396,7 +4245,6 @@ # shared. In other words, their outputs are non-differentiable views of the # sparse tensor. @@ -3542,7 +3546,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given # the default would never make sense. - func: sparse_coo_tensor.size(int[] size, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False) -> Tensor -@@ -3433,7 +4277,6 @@ +@@ -3433,7 +4281,6 @@ SparseCUDA: sparse_resize_and_clear_ requires_tensor: True @@ -3550,7 +3554,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sparse_mask(Tensor self, Tensor mask) -> Tensor use_c10_dispatcher: full variants: method -@@ -3442,7 +4285,6 @@ +@@ -3442,7 +4289,6 @@ SparseCUDA: sparse_mask_cuda requires_tensor: True @@ -3558,7 +3562,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: to_dense(Tensor self) -> Tensor use_c10_dispatcher: full variants: method -@@ -3474,7 +4316,6 @@ +@@ -3474,7 +4320,6 @@ requires_tensor: True device_guard: False @@ -3566,7 +3570,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dense_dim(Tensor self) -> int use_c10_dispatcher: full variants: method -@@ -3494,7 +4335,6 @@ +@@ -3494,7 +4339,6 @@ requires_tensor: True device_guard: False @@ -3574,7 +3578,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _nnz(Tensor self) -> int use_c10_dispatcher: full variants: method -@@ -3504,7 +4344,6 @@ +@@ -3504,7 +4348,6 @@ requires_tensor: True device_guard: False @@ -3582,7 +3586,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: coalesce(Tensor self) -> Tensor use_c10_dispatcher: full variants: method -@@ -3513,7 +4352,6 @@ +@@ -3513,7 +4356,6 @@ SparseCUDA: coalesce_sparse_cuda requires_tensor: True @@ -3590,7 +3594,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: is_coalesced(Tensor self) -> bool use_c10_dispatcher: full variants: method -@@ -3524,7 +4362,6 @@ +@@ -3524,7 +4366,6 @@ device_guard: False supports_named_tensor: True @@ -3598,7 +3602,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _indices(Tensor(a) self) -> Tensor(a) variants: method dispatch: -@@ -3568,7 +4405,6 @@ +@@ -3568,7 +4409,6 @@ requires_tensor: True device_guard: False @@ -3606,7 +3610,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) dispatch: SparseCPU: hspmm_out_sparse_cpu -@@ -3630,11 +4466,15 @@ +@@ -3630,11 +4470,15 @@ variants: function dispatch: CPU: quantize_per_tensor_cpu @@ -3622,7 +3626,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dequantize(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -3713,20 +4553,28 @@ +@@ -3713,20 +4557,28 @@ variants: method device_guard: False supports_named_tensor: True @@ -3651,7 +3655,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: meshgrid(Tensor[] tensors) -> Tensor[] -@@ -3765,6 +4613,8 @@ +@@ -3765,6 +4617,8 @@ dispatch: CPU: _local_scalar_dense_cpu CUDA: _local_scalar_dense_cuda @@ -3660,7 +3664,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= variants: function supports_named_tensor: True -@@ -3791,10 +4641,16 @@ +@@ -3791,10 +4645,16 @@ # RNN cells and layers - func: lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor) @@ -3677,7 +3681,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor) -@@ -3839,10 +4695,14 @@ +@@ -3839,10 +4699,14 @@ # PackedSequence utilities - func: _pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor) @@ -3692,7 +3696,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # wrappers for legacy TH methods -@@ -3852,6 +4712,8 @@ +@@ -3852,6 +4716,8 @@ dispatch: CPU: set_ CUDA: set_ @@ -3701,7 +3705,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!) variants: method -@@ -3860,6 +4722,8 @@ +@@ -3860,6 +4726,8 @@ CPU: legacy::cpu::_th_set_ CUDA: legacy::cuda::_th_set_ QuantizedCPU: set_storage @@ -3710,7 +3714,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!) variants: method -@@ -3867,12 +4731,16 @@ +@@ -3867,12 +4735,16 @@ dispatch: CPU: set_tensor_ CUDA: set_tensor_ @@ -3727,7 +3731,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_quantizer_(Tensor(a!) self, ConstQuantizerPtr quantizer) -> Tensor(a!) variants: method -@@ -3892,6 +4760,8 @@ +@@ -3892,6 +4764,8 @@ dispatch: CPU: masked_fill__cpu CUDA: masked_fill__cuda @@ -3736,7 +3740,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor -@@ -3904,6 +4774,8 @@ +@@ -3904,6 +4778,8 @@ dispatch: CPU: masked_fill__cpu CUDA: masked_fill__cuda @@ -3745,7 +3749,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor -@@ -3916,6 +4788,8 @@ +@@ -3916,6 +4792,8 @@ dispatch: CPU: masked_scatter__cpu CUDA: masked_scatter__cuda @@ -3754,7 +3758,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor use_c10_dispatcher: full -@@ -3929,25 +4803,35 @@ +@@ -3929,25 +4807,35 @@ CUDA: view MkldnnCPU: mkldnn_view QuantizedCPU: view @@ -3790,7 +3794,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!) variants: method -@@ -3955,11 +4839,15 @@ +@@ -3955,11 +4843,15 @@ dispatch: CPU: legacy::cpu::_th_index_fill_ CUDA: legacy::cuda::_th_index_fill_ @@ -3806,7 +3810,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!) variants: method -@@ -3967,11 +4855,15 @@ +@@ -3967,11 +4859,15 @@ CPU: index_fill_ CUDA: index_fill_ supports_named_tensor: True @@ -3822,7 +3826,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!) variants: method -@@ -3994,6 +4886,8 @@ +@@ -3994,6 +4890,8 @@ dispatch: CPU: scatter_cpu_ CUDA: legacy::cuda::_th_scatter_ @@ -3831,7 +3835,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor use_c10_dispatcher: full -@@ -4004,6 +4898,8 @@ +@@ -4004,6 +4902,8 @@ dispatch: CPU: scatter_fill_cpu_ CUDA: legacy::cuda::_th_scatter_ @@ -3840,7 +3844,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor use_c10_dispatcher: full -@@ -4020,81 +4916,127 @@ +@@ -4020,81 +4920,127 @@ dispatch: CPU: scatter_add_cpu_ CUDA: legacy::cuda::_th_scatter_add_ @@ -3968,7 +3972,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: __iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) variants: method -@@ -4107,70 +5049,106 @@ +@@ -4107,70 +5053,106 @@ dispatch: CPU: bitwise_or_out CUDA: bitwise_or_out @@ -4075,7 +4079,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: __ixor__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) variants: method -@@ -4240,18 +5218,24 @@ +@@ -4240,18 +5222,24 @@ - func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!) supports_named_tensor: True variants: method @@ -4100,7 +4104,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: digamma_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -4266,6 +5250,8 @@ +@@ -4266,6 +5254,8 @@ dispatch: CPU: legacy::cpu::_th_renorm_ CUDA: legacy::cuda::_th_renorm_ @@ -4109,7 +4113,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!) supports_named_tensor: True -@@ -4273,6 +5259,8 @@ +@@ -4273,6 +5263,8 @@ dispatch: CPU: pow_ CUDA: pow_ @@ -4118,7 +4122,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!) supports_named_tensor: True -@@ -4280,53 +5268,71 @@ +@@ -4280,53 +5272,71 @@ dispatch: CPU: pow_ CUDA: pow_ @@ -4190,7 +4194,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor use_c10_dispatcher: full -@@ -4334,28 +5340,40 @@ +@@ -4334,28 +5344,40 @@ dispatch: CPU: legacy::cpu::_th_addbmm CUDA: legacy::cuda::_th_addbmm @@ -4231,7 +4235,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!) -@@ -4380,6 +5398,8 @@ +@@ -4380,6 +5402,8 @@ dispatch: CPU: legacy::cpu::_th_diag_out CUDA: legacy::cuda::_th_diag_out @@ -4240,7 +4244,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: diag(Tensor self, int diagonal=0) -> Tensor use_c10_dispatcher: full -@@ -4387,40 +5407,58 @@ +@@ -4387,40 +5411,58 @@ dispatch: CPU: legacy::cpu::_th_diag CUDA: legacy::cuda::_th_diag @@ -4299,7 +4303,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: trace(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -4435,6 +5473,8 @@ +@@ -4435,6 +5477,8 @@ CPU: ne_out CUDA: ne_out QuantizedCPU: ne_out_quantized_cpu @@ -4308,7 +4312,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4444,6 +5484,8 @@ +@@ -4444,6 +5488,8 @@ CPU: ne CUDA: ne QuantizedCPU: ne_quantized_cpu @@ -4317,7 +4321,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4451,6 +5493,8 @@ +@@ -4451,6 +5497,8 @@ CPU: ne_out CUDA: ne_out QuantizedCPU: ne_out_quantized_cpu @@ -4326,7 +4330,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4460,6 +5504,8 @@ +@@ -4460,6 +5508,8 @@ CPU: ne CUDA: ne QuantizedCPU: ne_quantized_cpu @@ -4335,7 +4339,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4467,6 +5513,8 @@ +@@ -4467,6 +5517,8 @@ CPU: eq_out CUDA: eq_out QuantizedCPU: eq_out_quantized_cpu @@ -4344,7 +4348,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4476,6 +5524,8 @@ +@@ -4476,6 +5528,8 @@ CPU: eq CUDA: eq QuantizedCPU: eq_quantized_cpu @@ -4353,7 +4357,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4483,6 +5533,8 @@ +@@ -4483,6 +5537,8 @@ CPU: eq_out CUDA: eq_out QuantizedCPU: eq_out_quantized_cpu @@ -4362,7 +4366,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4492,6 +5544,8 @@ +@@ -4492,6 +5548,8 @@ CPU: eq CUDA: eq QuantizedCPU: eq_quantized_cpu @@ -4371,7 +4375,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4499,6 +5553,8 @@ +@@ -4499,6 +5557,8 @@ CPU: ge_out CUDA: ge_out QuantizedCPU: ge_out_quantized_cpu @@ -4380,7 +4384,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4508,6 +5564,8 @@ +@@ -4508,6 +5568,8 @@ CPU: ge CUDA: ge QuantizedCPU: ge_quantized_cpu @@ -4389,7 +4393,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4515,6 +5573,8 @@ +@@ -4515,6 +5577,8 @@ CPU: ge_out CUDA: ge_out QuantizedCPU: ge_out_quantized_cpu @@ -4398,7 +4402,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4524,6 +5584,8 @@ +@@ -4524,6 +5588,8 @@ CPU: ge CUDA: ge QuantizedCPU: ge_quantized_cpu @@ -4407,7 +4411,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4531,6 +5593,8 @@ +@@ -4531,6 +5597,8 @@ CPU: le_out CUDA: le_out QuantizedCPU: le_out_quantized_cpu @@ -4416,7 +4420,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4540,6 +5604,8 @@ +@@ -4540,6 +5608,8 @@ CPU: le CUDA: le QuantizedCPU: le_quantized_cpu @@ -4425,7 +4429,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4547,6 +5613,8 @@ +@@ -4547,6 +5617,8 @@ CPU: le_out CUDA: le_out QuantizedCPU: le_out_quantized_cpu @@ -4434,7 +4438,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4556,6 +5624,8 @@ +@@ -4556,6 +5628,8 @@ CPU: le CUDA: le QuantizedCPU: le_quantized_cpu @@ -4443,7 +4447,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4563,6 +5633,8 @@ +@@ -4563,6 +5637,8 @@ CPU: gt_out CUDA: gt_out QuantizedCPU: gt_out_quantized_cpu @@ -4452,7 +4456,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4572,6 +5644,8 @@ +@@ -4572,6 +5648,8 @@ CPU: gt CUDA: gt QuantizedCPU: gt_quantized_cpu @@ -4461,7 +4465,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4579,6 +5653,8 @@ +@@ -4579,6 +5657,8 @@ CPU: gt_out CUDA: gt_out QuantizedCPU: gt_out_quantized_cpu @@ -4470,7 +4474,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4588,6 +5664,8 @@ +@@ -4588,6 +5668,8 @@ CPU: gt CUDA: gt QuantizedCPU: gt_quantized_cpu @@ -4479,7 +4483,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4595,6 +5673,8 @@ +@@ -4595,6 +5677,8 @@ CPU: lt_out CUDA: lt_out QuantizedCPU: lt_out_quantized_cpu @@ -4488,7 +4492,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4604,6 +5684,8 @@ +@@ -4604,6 +5688,8 @@ CPU: lt CUDA: lt QuantizedCPU: lt_quantized_cpu @@ -4497,7 +4501,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4611,6 +5693,8 @@ +@@ -4611,6 +5697,8 @@ CPU: lt_out CUDA: lt_out QuantizedCPU: lt_out_quantized_cpu @@ -4506,7 +4510,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4620,11 +5704,16 @@ +@@ -4620,11 +5708,16 @@ CPU: lt CUDA: lt QuantizedCPU: lt_quantized_cpu @@ -4523,7 +4527,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: take(Tensor self, Tensor index) -> Tensor use_c10_dispatcher: full -@@ -4632,11 +5721,16 @@ +@@ -4632,11 +5725,16 @@ dispatch: CPU: legacy::cpu::_th_take CUDA: legacy::cuda::_th_take @@ -4540,7 +4544,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_select(Tensor self, int dim, Tensor index) -> Tensor use_c10_dispatcher: full -@@ -4646,17 +5740,25 @@ +@@ -4646,17 +5744,25 @@ CUDA: legacy::cuda::_th_index_select SparseCPU: index_select_sparse SparseCUDA: index_select_sparse @@ -4566,7 +4570,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: masked_select(Tensor self, Tensor mask) -> Tensor use_c10_dispatcher: full -@@ -4665,11 +5767,15 @@ +@@ -4665,11 +5771,15 @@ CPU: masked_select_cpu CUDA: masked_select_cuda supports_named_tensor: True @@ -4582,7 +4586,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: nonzero(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -4677,6 +5783,8 @@ +@@ -4677,6 +5787,8 @@ dispatch: CPU: legacy::cpu::_th_nonzero CUDA: legacy::cuda::_th_nonzero @@ -4591,7 +4595,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: nonzero_numpy(Tensor self) -> Tensor[] variants: method, function -@@ -4685,6 +5793,8 @@ +@@ -4685,6 +5797,8 @@ dispatch: CPU: gather_out_cpu CUDA: gather_out_cuda @@ -4600,7 +4604,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor use_c10_dispatcher: full -@@ -4692,34 +5802,50 @@ +@@ -4692,34 +5806,50 @@ dispatch: CPU: gather_cpu CUDA: gather_cuda @@ -4651,7 +4655,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lstsq.X(Tensor self, Tensor A, *, Tensor(a!) X, Tensor(b!) qr) -> (Tensor(a!) solution, Tensor(b!) QR) dispatch: -@@ -4742,6 +5868,8 @@ +@@ -4742,6 +5872,8 @@ dispatch: CPU: _triangular_solve_helper_cpu CUDA: _triangular_solve_helper_cuda @@ -4660,7 +4664,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) -@@ -4753,6 +5881,8 @@ +@@ -4753,6 +5885,8 @@ dispatch: CPU: _symeig_helper_cpu CUDA: _symeig_helper_cuda @@ -4669,7 +4673,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eig.e(Tensor self, bool eigenvectors=False, *, Tensor(a!) e, Tensor(b!) v) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) dispatch: -@@ -4775,6 +5905,8 @@ +@@ -4775,6 +5909,8 @@ dispatch: CPU: _svd_helper_cpu CUDA: _svd_helper_cuda @@ -4678,7 +4682,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!) -@@ -4826,9 +5958,13 @@ +@@ -4826,9 +5962,13 @@ CUDA: legacy::cuda::_th_potri - func: qr.Q(Tensor self, bool some=True, *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R) @@ -4692,7 +4696,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _qr_helper(Tensor self, bool some) -> (Tensor, Tensor) variants: function -@@ -4891,12 +6027,16 @@ +@@ -4891,12 +6031,16 @@ dispatch: CPU: multinomial_out CUDA: multinomial_out @@ -4709,7 +4713,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _multinomial_alias_setup(Tensor probs) -> (Tensor, Tensor) variants: function -@@ -4947,6 +6087,8 @@ +@@ -4947,6 +6091,8 @@ dispatch: CPU: erfinv CUDA: erfinv @@ -4718,7 +4722,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: erfinv_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -4954,26 +6096,36 @@ +@@ -4954,26 +6100,36 @@ dispatch: CPU: _erfinv__cpu CUDA: _erfinv__cuda @@ -4755,7 +4759,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor use_c10_dispatcher: full -@@ -4981,21 +6133,29 @@ +@@ -4981,21 +6137,29 @@ - func: atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True @@ -4785,7 +4789,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor use_c10_dispatcher: full -@@ -5003,6 +6163,8 @@ +@@ -5003,6 +6167,8 @@ dispatch: CPU: lerp_cpu_scalar CUDA: lerp_cuda_scalar @@ -4794,7 +4798,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor use_c10_dispatcher: full -@@ -5010,6 +6172,8 @@ +@@ -5010,6 +6176,8 @@ dispatch: CPU: lerp_cpu_tensor CUDA: lerp_cuda_tensor @@ -4803,7 +4807,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!) dispatch: -@@ -5027,6 +6191,8 @@ +@@ -5027,6 +6195,8 @@ dispatch: CPU: fmod_out CUDA: legacy::cuda::_th_fmod_out @@ -4812,7 +4816,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: fmod.Scalar(Tensor self, Scalar other) -> Tensor use_c10_dispatcher: full -@@ -5034,11 +6200,15 @@ +@@ -5034,11 +6204,15 @@ dispatch: CPU: fmod CUDA: legacy::cuda::_th_fmod @@ -4828,7 +4832,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: fmod.Tensor(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -5046,11 +6216,15 @@ +@@ -5046,11 +6220,15 @@ dispatch: CPU: fmod CUDA: legacy::cuda::_th_fmod @@ -4844,7 +4848,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: remainder.Scalar(Tensor self, Scalar other) -> Tensor use_c10_dispatcher: full -@@ -5058,11 +6232,15 @@ +@@ -5058,11 +6236,15 @@ dispatch: CPU: remainder CUDA: remainder @@ -4860,7 +4864,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: remainder.Tensor(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -5070,12 +6248,18 @@ +@@ -5070,12 +6252,18 @@ dispatch: CPU: remainder CUDA: remainder @@ -4879,7 +4883,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: min(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5084,13 +6268,19 @@ +@@ -5084,13 +6272,19 @@ CPU: min CUDA: legacy::cuda::_th_min QuantizedCPU: min_quant @@ -4899,7 +4903,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: max(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5099,6 +6289,8 @@ +@@ -5099,6 +6293,8 @@ CPU: max CUDA: legacy::cuda::_th_max QuantizedCPU: max_quant @@ -4908,7 +4912,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: median(Tensor self) -> Tensor -@@ -5107,12 +6299,16 @@ +@@ -5107,12 +6303,16 @@ dispatch: CPU: median_cpu CUDA: median_cuda @@ -4925,7 +4929,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) variants: method, function -@@ -5120,23 +6316,45 @@ +@@ -5120,23 +6320,45 @@ CPU: legacy::cpu::_th_sort CUDA: legacy::cuda::_th_sort QuantizedCPU: sort_quant @@ -4971,7 +4975,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: topk(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices) variants: method, function -@@ -5144,11 +6362,15 @@ +@@ -5144,11 +6366,15 @@ CPU: topk CUDA: topk QuantizedCPU: quantized_topk_cpu @@ -4987,7 +4991,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: any(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5159,11 +6381,15 @@ +@@ -5159,11 +6385,15 @@ CUDA: any SparseCPU: any_sparse SparseCUDA: any_sparse @@ -5003,7 +5007,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor use_c10_dispatcher: full -@@ -5171,6 +6397,8 @@ +@@ -5171,6 +6401,8 @@ dispatch: CPU: legacy::cpu::_th_renorm CUDA: legacy::cuda::_th_renorm @@ -5012,7 +5016,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a) variants: method -@@ -5178,6 +6406,8 @@ +@@ -5178,6 +6410,8 @@ dispatch: CPU: unfold CUDA: unfold @@ -5021,7 +5025,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: equal(Tensor self, Tensor other) -> bool use_c10_dispatcher: full -@@ -5186,6 +6416,8 @@ +@@ -5186,6 +6420,8 @@ CPU: legacy::cpu::_th_equal CUDA: legacy::cuda::_th_equal QuantizedCPU: quantized_equal @@ -5030,7 +5034,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!) -@@ -5193,6 +6425,8 @@ +@@ -5193,6 +6429,8 @@ dispatch: CPU: pow_out CUDA: pow_out @@ -5039,7 +5043,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor use_c10_dispatcher: full -@@ -5201,12 +6435,16 @@ +@@ -5201,12 +6439,16 @@ dispatch: CPU: pow CUDA: pow @@ -5056,7 +5060,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor use_c10_dispatcher: full -@@ -5214,6 +6452,8 @@ +@@ -5214,6 +6456,8 @@ dispatch: CPU: pow CUDA: pow @@ -5065,7 +5069,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!) variants: method -@@ -5221,40 +6461,58 @@ +@@ -5221,40 +6465,58 @@ CPU: normal_cpu_ CUDA: normal_cuda_ supports_named_tensor: True @@ -5124,7 +5128,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: alias(Tensor(a) self) -> Tensor(a) variants: method, function -@@ -5265,43 +6523,59 @@ +@@ -5265,43 +6527,59 @@ dispatch: CPU: legacy::cpu::_th_addr CUDA: legacy::cuda::_th_addr @@ -5185,7 +5189,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _var(Tensor self, bool unbiased=True) -> Tensor use_c10_dispatcher: full -@@ -5309,6 +6583,8 @@ +@@ -5309,6 +6587,8 @@ CPU: legacy::cpu::_th_var CUDA: legacy::cuda::_th_var supports_named_tensor: True @@ -5194,7 +5198,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _std(Tensor self, bool unbiased=True) -> Tensor use_c10_dispatcher: full -@@ -5321,6 +6597,8 @@ +@@ -5321,6 +6601,8 @@ variants: function dispatch: CUDA: _amp_non_finite_check_and_unscale_cuda_ @@ -5203,7 +5207,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _amp_update_scale(Tensor(a!) growth_tracker, Tensor current_scale, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor variants: function -@@ -5332,12 +6610,16 @@ +@@ -5332,12 +6614,16 @@ CPU: _cat_cpu CUDA: cat_cuda QuantizedCPU: quantized_cat @@ -5220,7 +5224,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor, Tensor) dispatch: -@@ -5353,36 +6635,50 @@ +@@ -5353,36 +6639,50 @@ dispatch: CPU: legacy::cpu::_th_max CUDA: legacy::cuda::_th_max @@ -5271,7 +5275,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor use_c10_dispatcher: full -@@ -5390,23 +6686,33 @@ +@@ -5390,23 +6690,33 @@ dispatch: CPU: mse_loss_backward CUDA: mse_loss_backward @@ -5305,7 +5309,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: multi_margin_loss.out(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5434,22 +6740,30 @@ +@@ -5434,22 +6744,30 @@ - func: multilabel_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -5336,7 +5340,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: multilabel_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -5466,97 +6780,137 @@ +@@ -5466,97 +6784,137 @@ - func: nll_loss.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -5474,7 +5478,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu.out(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5564,6 +6918,8 @@ +@@ -5564,6 +6922,8 @@ CPU: elu_out CUDA: elu_out QuantizedCPU: quantized_elu_out @@ -5483,7 +5487,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor use_c10_dispatcher: full -@@ -5572,16 +6928,22 @@ +@@ -5572,16 +6932,22 @@ CPU: elu CUDA: elu QuantizedCPU: quantized_elu @@ -5506,7 +5510,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!) python_module: nn -@@ -5589,12 +6951,16 @@ +@@ -5589,12 +6955,16 @@ CPU: elu_ CUDA: elu_ QuantizedCPU: quantized_elu_ @@ -5523,7 +5527,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: glu(Tensor self, int dim=-1) -> Tensor use_c10_dispatcher: full -@@ -5602,12 +6968,16 @@ +@@ -5602,12 +6972,16 @@ dispatch: CPU: glu CUDA: legacy::cuda::_thnn_glu_forward @@ -5540,7 +5544,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor use_c10_dispatcher: full -@@ -5615,20 +6985,30 @@ +@@ -5615,20 +6989,30 @@ dispatch: CPU: glu_backward CUDA: legacy::cuda::_thnn_glu_backward @@ -5571,7 +5575,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh.out(Tensor self, Scalar min_val=-1, Scalar max_val=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5636,6 +7016,8 @@ +@@ -5636,6 +7020,8 @@ CPU: hardtanh_out CUDA: hardtanh_out QuantizedCPU: quantized_hardtanh_out @@ -5580,7 +5584,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor use_c10_dispatcher: full -@@ -5644,16 +7026,22 @@ +@@ -5644,16 +7030,22 @@ CPU: hardtanh CUDA: hardtanh QuantizedCPU: quantized_hardtanh @@ -5603,7 +5607,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!) python_module: nn -@@ -5661,6 +7049,8 @@ +@@ -5661,6 +7053,8 @@ CPU: hardtanh_ CUDA: hardtanh_ QuantizedCPU: quantized_hardtanh_ @@ -5612,7 +5616,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5668,6 +7058,8 @@ +@@ -5668,6 +7062,8 @@ CPU: leaky_relu_out CUDA: leaky_relu_out QuantizedCPU: quantized_leaky_relu_out @@ -5621,7 +5625,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor use_c10_dispatcher: full -@@ -5676,10 +7068,14 @@ +@@ -5676,10 +7072,14 @@ CPU: leaky_relu CUDA: leaky_relu QuantizedCPU: quantized_leaky_relu @@ -5636,7 +5640,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!) python_module: nn -@@ -5687,31 +7083,44 @@ +@@ -5687,31 +7087,44 @@ CPU: leaky_relu_ CUDA: leaky_relu_ QuantizedCPU: quantized_leaky_relu_ @@ -5681,7 +5685,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor use_c10_dispatcher: full -@@ -5719,62 +7128,88 @@ +@@ -5719,62 +7132,88 @@ dispatch: CPU: log_sigmoid_backward_cpu CUDA: legacy::cuda::_thnn_log_sigmoid_backward @@ -5770,7 +5774,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_avg_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5782,9 +7217,13 @@ +@@ -5782,9 +7221,13 @@ CPU: adaptive_avg_pool2d_out_cpu CUDA: adaptive_avg_pool2d_out_cuda MkldnnCPU: mkldnn_adaptive_avg_pool2d_out @@ -5784,7 +5788,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor dispatch: -@@ -5796,6 +7235,8 @@ +@@ -5796,6 +7239,8 @@ CPU: adaptive_avg_pool2d_cpu CUDA: adaptive_avg_pool2d_cuda QuantizedCPU: quantized_adaptive_avg_pool2d @@ -5793,7 +5797,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5803,24 +7244,32 @@ +@@ -5803,24 +7248,32 @@ dispatch: CPU: adaptive_avg_pool2d_backward_cpu CUDA: adaptive_avg_pool2d_backward_cuda @@ -5826,7 +5830,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_avg_pool3d_backward(Tensor grad_output, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5828,6 +7277,8 @@ +@@ -5828,6 +7281,8 @@ dispatch: CPU: adaptive_avg_pool3d_backward_cpu CUDA: adaptive_avg_pool3d_backward_cuda @@ -5835,7 +5839,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5835,6 +7286,8 @@ +@@ -5835,6 +7290,8 @@ dispatch: CPU: adaptive_max_pool2d_out_cpu CUDA: adaptive_max_pool2d_out_cuda @@ -5844,7 +5848,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor) -@@ -5842,12 +7295,16 @@ +@@ -5842,12 +7299,16 @@ dispatch: CPU: adaptive_max_pool2d_cpu CUDA: adaptive_max_pool2d_cuda @@ -5861,7 +5865,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor use_c10_dispatcher: full -@@ -5855,6 +7312,8 @@ +@@ -5855,6 +7316,8 @@ dispatch: CPU: adaptive_max_pool2d_backward_cpu CUDA: adaptive_max_pool2d_backward_cuda @@ -5870,7 +5874,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool3d.out(Tensor self, int[3] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5889,6 +7348,8 @@ +@@ -5889,6 +7352,8 @@ CPU: avg_pool2d_out_cpu CUDA: avg_pool2d_out_cuda MkldnnCPU: mkldnn_avg_pool2d_out @@ -5879,7 +5883,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor python_module: nn -@@ -5897,24 +7358,32 @@ +@@ -5897,24 +7362,32 @@ CUDA: avg_pool2d_cuda MkldnnCPU: mkldnn_avg_pool2d QuantizedCPU: quantized_avg_pool2d @@ -5912,7 +5916,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor python_module: nn -@@ -5922,18 +7391,24 @@ +@@ -5922,18 +7395,24 @@ CPU: avg_pool3d_cpu CUDA: avg_pool3d_cuda QuantizedCPU: quantized_avg_pool3d @@ -5937,7 +5941,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: fractional_max_pool2d.output(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5993,6 +7468,8 @@ +@@ -5993,6 +7472,8 @@ dispatch: CPU: max_pool2d_with_indices_out_cpu CUDA: max_pool2d_with_indices_out_cuda @@ -5946,7 +5950,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -6000,6 +7477,8 @@ +@@ -6000,6 +7481,8 @@ dispatch: CPU: max_pool2d_with_indices_cpu CUDA: max_pool2d_with_indices_cuda @@ -5955,7 +5959,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: max_pool2d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) -@@ -6007,12 +7486,16 @@ +@@ -6007,12 +7490,16 @@ dispatch: CPU: max_pool2d_with_indices_backward_out_cpu CUDA: max_pool2d_with_indices_backward_out_cuda @@ -5972,7 +5976,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool3d_with_indices.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -6020,6 +7503,8 @@ +@@ -6020,6 +7507,8 @@ dispatch: CPU: max_pool3d_with_indices_out_cpu CUDA: max_pool3d_with_indices_out_cuda @@ -5981,7 +5985,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -6027,6 +7512,8 @@ +@@ -6027,6 +7516,8 @@ dispatch: CPU: max_pool3d_with_indices_cpu CUDA: max_pool3d_with_indices_cuda @@ -5990,7 +5994,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) -@@ -6034,12 +7521,17 @@ +@@ -6034,12 +7525,17 @@ dispatch: CPU: max_pool3d_with_indices_backward_out_cpu CUDA: max_pool3d_with_indices_backward_out_cuda @@ -6008,7 +6012,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: max_unpool2d.out(Tensor self, Tensor indices, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6166,12 +7658,16 @@ +@@ -6166,12 +7662,16 @@ dispatch: CPU: replication_pad2d_out_cpu CUDA: replication_pad2d_out_cuda @@ -6025,7 +6029,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: replication_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6214,12 +7710,16 @@ +@@ -6214,12 +7714,16 @@ dispatch: CPU: upsample_linear1d_out_cpu CUDA: upsample_linear1d_out_cuda @@ -6042,7 +6046,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_linear1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, bool align_corners, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6232,12 +7732,16 @@ +@@ -6232,12 +7736,16 @@ dispatch: CPU: upsample_linear1d_backward_cpu CUDA: upsample_linear1d_backward_cuda @@ -6059,7 +6063,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_bilinear2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6245,96 +7749,128 @@ +@@ -6245,96 +7753,128 @@ CPU: upsample_bilinear2d_cpu CUDA: upsample_bilinear2d_cuda QuantizedCPU: quantized_upsample_bilinear2d_cpu @@ -6188,7 +6192,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6342,24 +7878,32 @@ +@@ -6342,24 +7882,32 @@ CPU: upsample_nearest2d_cpu CUDA: upsample_nearest2d_cuda QuantizedCPU: quantized_upsample_nearest2d_cpu @@ -6221,7 +6225,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6367,38 +7911,52 @@ +@@ -6367,38 +7915,52 @@ CPU: upsample_nearest3d_cpu CUDA: upsample_nearest3d_cuda QuantizedCPU: quantized_upsample_nearest3d_cpu @@ -6274,7 +6278,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # What's a thnn_conv_ versus a slow_conv_? # -@@ -6423,24 +7981,32 @@ +@@ -6423,24 +7985,32 @@ dispatch: CPU: slow_conv_transpose2d_out_cpu CUDA: slow_conv_transpose2d_out_cuda @@ -6307,7 +6311,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6468,21 +8034,29 @@ +@@ -6468,21 +8038,29 @@ - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -6337,7 +6341,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!)? grad_input, Tensor(b!)? grad_weight, Tensor(c!)? grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) python_module: nn -@@ -6495,32 +8069,46 @@ +@@ -6495,32 +8073,46 @@ dispatch: CPU: slow_conv2d_backward_cpu CUDA: legacy::cuda::_thnn_conv2d_backward @@ -6384,7 +6388,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6553,12 +8141,16 @@ +@@ -6553,12 +8145,16 @@ dispatch: CPU: slow_conv_dilated2d_cpu CUDA: slow_conv_dilated2d_cuda @@ -6401,7 +6405,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor python_module: nn -@@ -6577,57 +8169,413 @@ +@@ -6577,57 +8173,413 @@ dispatch: CPU: col2im_out_cpu CUDA: col2im_out_cuda @@ -6818,7 +6822,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S --- pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-29 20:15:45.647574795 +0800 ++++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-08-03 16:07:55.818413078 +0800 @@ -659,14 +659,14 @@ SUB x1, x1, 4 @@ -6844,7 +6848,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CMP x1, 2 diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp pytorch-develop/aten/src/ATen/native/TensorCompare.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-29 20:15:45.599573074 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-08-03 16:07:55.762412610 +0800 @@ -64,7 +64,7 @@ Tensor isinf(const Tensor &self) { @@ -6856,7 +6860,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return AT_DISPATCH_FLOATING_TYPES_AND_HALF(self.scalar_type(), "isinf", [&]() { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp pytorch-develop/aten/src/ATen/native/TensorFactories.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-29 20:15:45.603573218 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-08-03 16:07:55.762412610 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6901,7 +6905,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp pytorch-develop/aten/src/ATen/native/TensorProperties.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-29 20:15:45.603573218 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-08-03 16:07:55.762412610 +0800 @@ -87,6 +87,7 @@ if (self.is_contiguous(memory_format)) { return self; @@ -6912,7 +6916,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= "preserve memory format is unsupported by the contiguous operator"); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp --- pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-29 20:15:45.603573218 +0800 ++++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-08-03 16:07:55.766412643 +0800 @@ -26,7 +26,7 @@ const scalar_t* in = &idata[output_y * input_width + output_x]; scalar_t* out = &odata[output_y * output_width + output_x]; @@ -6924,7 +6928,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= out += output_width * output_height; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native_parse.py pytorch-develop/aten/src/ATen/native_parse.py --- pytorch-v1.5.0/aten/src/ATen/native_parse.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-29 20:15:45.659575226 +0800 ++++ pytorch-develop/aten/src/ATen/native_parse.py 2021-08-03 16:07:55.834413211 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6962,7 +6966,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= msg = '''Exception raised in processing function: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py pytorch-develop/aten/src/ATen/preprocess_declarations.py --- pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-29 20:15:45.659575226 +0800 ++++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-08-03 16:07:55.834413211 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6994,7 +6998,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h pytorch-develop/aten/src/ATen/templates/TensorBody.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-29 20:15:45.659575226 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-08-03 16:07:55.834413211 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7027,7 +7031,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h pytorch-develop/aten/src/ATen/templates/TensorMethods.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-29 20:15:45.659575226 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-08-03 16:07:55.834413211 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7061,7 +7065,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/CMakeLists.txt pytorch-develop/aten/src/TH/CMakeLists.txt --- pytorch-v1.5.0/aten/src/TH/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-29 20:15:45.659575226 +0800 ++++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-08-03 16:07:55.838413245 +0800 @@ -48,6 +48,11 @@ ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE) @@ -7076,7 +7080,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp pytorch-develop/aten/src/TH/generic/THStorage.cpp --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-29 20:15:45.663575368 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-08-03 16:07:55.838413245 +0800 @@ -1,9 +1,32 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7185,7 +7189,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.h pytorch-develop/aten/src/TH/generic/THStorage.h --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-29 20:15:45.663575368 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-08-03 16:07:55.838413245 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7224,7 +7228,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/CMakeLists.txt pytorch-develop/c10/CMakeLists.txt --- pytorch-v1.5.0/c10/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/CMakeLists.txt 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/CMakeLists.txt 2021-08-03 16:07:55.850413345 +0800 @@ -63,6 +63,14 @@ message(STATUS "don't use NUMA") endif() @@ -7253,7 +7257,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # not checked in diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Backend.h pytorch-develop/c10/core/Backend.h --- pytorch-v1.5.0/c10/core/Backend.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Backend.h 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/Backend.h 2021-08-03 16:07:55.850413345 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7348,7 +7352,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.cpp pytorch-develop/c10/core/Device.cpp --- pytorch-v1.5.0/c10/core/Device.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.cpp 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/Device.cpp 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7388,7 +7392,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= types.begin(), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.h pytorch-develop/c10/core/Device.h --- pytorch-v1.5.0/c10/core/Device.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.h 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/Device.h 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7423,7 +7427,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return type_ == DeviceType::CPU; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.cpp pytorch-develop/c10/core/DeviceType.cpp --- pytorch-v1.5.0/c10/core/DeviceType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/DeviceType.cpp 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7463,7 +7467,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return false; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.h pytorch-develop/c10/core/DeviceType.h --- pytorch-v1.5.0/c10/core/DeviceType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.h 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/DeviceType.h 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7506,7 +7510,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= constexpr DeviceType kXLA = DeviceType::XLA; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.cpp pytorch-develop/c10/core/DispatchKey.cpp --- pytorch-v1.5.0/c10/core/DispatchKey.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/DispatchKey.cpp 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7538,7 +7542,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= case DispatchKey::TESTING_ONLY_GenericModeTensorId: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.h pytorch-develop/c10/core/DispatchKey.h --- pytorch-v1.5.0/c10/core/DispatchKey.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.h 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/DispatchKey.h 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7570,7 +7574,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Storage.h pytorch-develop/c10/core/Storage.h --- pytorch-v1.5.0/c10/core/Storage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Storage.h 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/Storage.h 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7604,7 +7608,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/StorageImpl.h pytorch-develop/c10/core/StorageImpl.h --- pytorch-v1.5.0/c10/core/StorageImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/StorageImpl.h 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/StorageImpl.h 2021-08-03 16:07:55.854413378 +0800 @@ -1,12 +1,39 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7661,7 +7665,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorImpl.h pytorch-develop/c10/core/TensorImpl.h --- pytorch-v1.5.0/c10/core/TensorImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorImpl.h 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/TensorImpl.h 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7731,7 +7735,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorOptions.h pytorch-develop/c10/core/TensorOptions.h --- pytorch-v1.5.0/c10/core/TensorOptions.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorOptions.h 2021-07-29 20:15:45.675575799 +0800 ++++ pytorch-develop/c10/core/TensorOptions.h 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7772,7 +7776,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/macros/Export.h pytorch-develop/c10/macros/Export.h --- pytorch-v1.5.0/c10/macros/Export.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/macros/Export.h 2021-07-29 20:15:45.679575942 +0800 ++++ pytorch-develop/c10/macros/Export.h 2021-08-03 16:07:55.854413378 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7899,7 +7903,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/caffe2/CMakeLists.txt pytorch-develop/caffe2/CMakeLists.txt --- pytorch-v1.5.0/caffe2/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-29 20:15:45.687576229 +0800 ++++ pytorch-develop/caffe2/CMakeLists.txt 2021-08-03 16:07:55.862413445 +0800 @@ -32,6 +32,7 @@ # Add source, includes, and libs to lists list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS}) @@ -8046,7 +8050,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs. diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.clang-format pytorch-develop/.clang-format --- pytorch-v1.5.0/.clang-format 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.clang-format 2021-07-29 20:15:45.575572213 +0800 ++++ pytorch-develop/.clang-format 2021-08-03 16:07:55.730412343 +0800 @@ -84,5 +84,4 @@ SpacesInSquareBrackets: false Standard: Cpp11 @@ -8057,7 +8061,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/BuildVariables.cmake pytorch-develop/cmake/BuildVariables.cmake --- pytorch-v1.5.0/cmake/BuildVariables.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-29 20:15:45.795580102 +0800 ++++ pytorch-develop/cmake/BuildVariables.cmake 2021-08-03 16:07:55.974414381 +0800 @@ -11,6 +11,7 @@ # CMakeLists.txt files under each folder respectively. set(Caffe2_CPU_SRCS) @@ -8084,7 +8088,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # symbols. However, if the lib is whole linked in caffe2 lib, we don't want diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Codegen.cmake pytorch-develop/cmake/Codegen.cmake --- pytorch-v1.5.0/cmake/Codegen.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Codegen.cmake 2021-07-29 20:15:45.795580102 +0800 ++++ pytorch-develop/cmake/Codegen.cmake 2021-08-03 16:07:55.974414381 +0800 @@ -191,13 +191,14 @@ file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt generated_cpp) file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt-cuda cuda_generated_cpp) @@ -8115,7 +8119,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Dependencies.cmake pytorch-develop/cmake/Dependencies.cmake --- pytorch-v1.5.0/cmake/Dependencies.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Dependencies.cmake 2021-07-29 20:15:45.795580102 +0800 ++++ pytorch-develop/cmake/Dependencies.cmake 2021-08-03 16:07:55.974414381 +0800 @@ -1509,6 +1509,13 @@ ENDIF(NOT C_HAS_THREAD) endif() @@ -8132,7 +8136,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Summary.cmake pytorch-develop/cmake/Summary.cmake --- pytorch-v1.5.0/cmake/Summary.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Summary.cmake 2021-07-29 20:15:45.799580245 +0800 ++++ pytorch-develop/cmake/Summary.cmake 2021-08-03 16:07:55.978414414 +0800 @@ -134,6 +134,7 @@ if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") @@ -8143,7 +8147,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endfunction() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/TorchConfig.cmake.in pytorch-develop/cmake/TorchConfig.cmake.in --- pytorch-v1.5.0/cmake/TorchConfig.cmake.in 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-29 20:15:45.799580245 +0800 ++++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-08-03 16:07:55.978414414 +0800 @@ -112,6 +112,11 @@ list(APPEND TORCH_LIBRARIES ${TORCH_CUDA_LIBRARIES}) endif() @@ -8158,7 +8162,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/CMakeLists.txt pytorch-develop/CMakeLists.txt --- pytorch-v1.5.0/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/CMakeLists.txt 2021-07-29 20:15:45.579572357 +0800 ++++ pytorch-develop/CMakeLists.txt 2021-08-03 16:07:55.734412376 +0800 @@ -205,6 +205,10 @@ option(USE_TBB "Use TBB" OFF) option(ONNX_ML "Enable traditional ONNX ML API." ON) @@ -8225,7 +8229,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.dockerignore pytorch-develop/.dockerignore --- pytorch-v1.5.0/.dockerignore 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.dockerignore 2021-07-29 20:15:45.575572213 +0800 ++++ pytorch-develop/.dockerignore 2021-08-03 16:07:55.730412343 +0800 @@ -1,257 +1 @@ -# READ THIS BEFORE YOU REFACTOR ME -# @@ -8501,7 +8505,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/requirements.txt pytorch-develop/requirements.txt --- pytorch-v1.5.0/requirements.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/requirements.txt 2021-07-29 20:15:45.819580962 +0800 ++++ pytorch-develop/requirements.txt 2021-08-03 16:07:55.994414547 +0800 @@ -4,4 +4,12 @@ requests setuptools @@ -8520,7 +8524,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/setup.py pytorch-develop/setup.py --- pytorch-v1.5.0/setup.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/setup.py 2021-07-29 20:15:45.819580962 +0800 ++++ pytorch-develop/setup.py 2021-08-03 16:07:55.998414581 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -8619,7 +8623,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'python/serialized_test/data/operator_test/*.zip', diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/derivatives.yaml pytorch-develop/tools/autograd/derivatives.yaml --- pytorch-v1.5.0/tools/autograd/derivatives.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/derivatives.yaml 2021-08-03 16:07:57.142424136 +0800 @@ -107,6 +107,10 @@ # # NB: The parameter names here MUST be consistent with the parameter names @@ -8663,20 +8667,23 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - name: triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient) self, A: triangular_solve_backward(grads[0], grads[1], self, A, solution, upper, transpose, unitriangular, grad_input_mask) -@@ -1453,6 +1460,12 @@ +@@ -1453,6 +1460,15 @@ - name: cudnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[2] output_mask) -> (Tensor, Tensor) grad_output, self, weight: _convolution_double_backward(grads[0], grads[1], Tensor(), grad_output, weight, self, stride, padding, dilation, false, std::vector(padding.size(), 0), groups, benchmark, deterministic, true, grad_input_mask) +- name: npu_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor + input, weight, bias: npu_convolution_backward(input, grad, weight, stride, padding, dilation, groups, grad_input_mask) + ++- name: npu_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[] stride, int[] padding, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor) ++ grad_output, input, weight: npu_convolution_double_backward(grads[0], grads[1], grads[2], input, grad_output, weight, stride, padding, dilation, groups, grad_input_mask) ++ +- name: npu_convolution_transpose(Tensor input, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups) -> Tensor + input, weight, bias: npu_convolution_transpose_backward(input, grad, weight, padding, output_padding, stride, dilation, groups, grad_input_mask) + # The above backward definitions are equivalent to the definitions below. Why do we bundle # everything up? It's because it's more convenient to define double backwards # when there is a single function that manages everything. -@@ -1630,3 +1643,55 @@ +@@ -1630,3 +1646,55 @@ - name: nonzero(Tensor self) -> Tensor output_differentiability: [False] @@ -8735,7 +8742,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/dump_utils.py pytorch-develop/tools/autograd/dump_utils.py --- pytorch-v1.5.0/tools/autograd/dump_utils.py 1970-01-01 08:00:00.000000000 +0800 -+++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/dump_utils.py 2021-08-03 16:07:57.142424136 +0800 @@ -0,0 +1,115 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# All rights reserved. @@ -8854,7 +8861,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py pytorch-develop/tools/autograd/gen_autograd_functions.py --- pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-08-03 16:07:57.142424136 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9040,7 +9047,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_python_functions.py pytorch-develop/tools/autograd/gen_python_functions.py --- pytorch-v1.5.0/tools/autograd/gen_python_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-08-03 16:07:57.142424136 +0800 @@ -1,3 +1,20 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9082,7 +9089,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'value': argname, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_variable_type.py pytorch-develop/tools/autograd/gen_variable_type.py --- pytorch-v1.5.0/tools/autograd/gen_variable_type.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-08-03 16:07:57.142424136 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9255,7 +9262,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/Functions.cpp pytorch-develop/tools/autograd/templates/Functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/Functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-08-03 16:07:57.142424136 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9335,7 +9342,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto sparse = sparse_.coalesce(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp pytorch-develop/tools/autograd/templates/python_torch_functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-08-03 16:07:57.142424136 +0800 @@ -22,7 +22,7 @@ #include "torch/csrc/autograd/generated/variable_factories.h" #include "torch/csrc/utils/structseq.h" @@ -9419,7 +9426,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp pytorch-develop/tools/autograd/templates/python_variable_methods.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-08-03 16:07:57.142424136 +0800 @@ -15,7 +15,13 @@ #include "torch/csrc/cuda/Stream.h" #include "torch/csrc/cuda/Event.h" @@ -9506,7 +9513,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"has_names", (PyCFunction)THPVariable_has_names, METH_NOARGS, NULL}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp pytorch-develop/tools/autograd/templates/VariableType.cpp --- pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-08-03 16:07:57.142424136 +0800 @@ -1,7 +1,27 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9537,7 +9544,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.h pytorch-develop/tools/autograd/templates/VariableType.h --- pytorch-v1.5.0/tools/autograd/templates/VariableType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-08-03 16:07:57.142424136 +0800 @@ -1,3 +1,20 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9569,7 +9576,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= const at::Tensor & unpack(const Tensor & t, const char * name, int pos); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/build_variables.bzl pytorch-develop/tools/build_variables.bzl --- pytorch-v1.5.0/tools/build_variables.bzl 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/build_variables.bzl 2021-07-29 20:15:46.963621981 +0800 ++++ pytorch-develop/tools/build_variables.bzl 2021-08-03 16:07:57.142424136 +0800 @@ -46,6 +46,7 @@ "torch/csrc/autograd/functions/utils.cpp", "torch/csrc/autograd/input_buffer.cpp", @@ -9655,7 +9662,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def grad(outputs: _TensorOrTensors, inputs: _TensorOrTensors, grad_outputs: Optional[_TensorOrTensors]=..., retain_graph: Optional[bool]=..., create_graph: bool=..., only_inputs: bool=..., allow_unused: bool=...) -> Tuple[Tensor, ...]: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/autograd/profiler.py pytorch-develop/torch/autograd/profiler.py --- pytorch-v1.5.0/torch/autograd/profiler.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/autograd/profiler.py 2021-07-29 20:15:46.971622268 +0800 ++++ pytorch-develop/torch/autograd/profiler.py 2021-08-03 16:07:57.150424202 +0800 @@ -1,8 +1,25 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -10128,7 +10135,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return ''.join(result) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/CMakeLists.txt pytorch-develop/torch/CMakeLists.txt --- pytorch-v1.5.0/torch/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/CMakeLists.txt 2021-07-29 20:15:46.967622124 +0800 ++++ pytorch-develop/torch/CMakeLists.txt 2021-08-03 16:07:57.146424169 +0800 @@ -97,6 +97,7 @@ ${TORCH_SRC_DIR}/csrc/tensor/python_tensor.cpp ${TORCH_SRC_DIR}/csrc/utils.cpp @@ -10160,7 +10167,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/engine.cpp pytorch-develop/torch/csrc/autograd/engine.cpp --- pytorch-v1.5.0/torch/csrc/autograd/engine.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-29 20:15:46.979622554 +0800 ++++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-08-03 16:07:57.158424269 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10283,7 +10290,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto event = c10::Event{c10::DeviceType::CUDA}; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp pytorch-develop/torch/csrc/autograd/functions/tensor.cpp --- pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-29 20:15:46.983622698 +0800 ++++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-08-03 16:07:57.162424303 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10315,7 +10322,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= /*non_blocking=*/false, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/init.cpp pytorch-develop/torch/csrc/autograd/init.cpp --- pytorch-v1.5.0/torch/csrc/autograd/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-29 20:15:46.983622698 +0800 ++++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-08-03 16:07:57.162424303 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10358,7 +10365,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= m.def("_enable_profiler", enableProfiler); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp pytorch-develop/torch/csrc/autograd/input_buffer.cpp --- pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-29 20:15:46.983622698 +0800 ++++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-08-03 16:07:57.162424303 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10410,7 +10417,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto& old_var = buffer[pos]; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp pytorch-develop/torch/csrc/autograd/profiler.cpp --- pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-29 20:15:46.983622698 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-08-03 16:07:57.162424303 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10606,7 +10613,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CUDAStubs::~CUDAStubs() = default; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.h pytorch-develop/torch/csrc/autograd/profiler.h --- pytorch-v1.5.0/torch/csrc/autograd/profiler.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-29 20:15:46.983622698 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-08-03 16:07:57.162424303 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10731,7 +10738,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp pytorch-develop/torch/csrc/autograd/python_variable.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-29 20:15:46.983622698 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-08-03 16:07:57.162424303 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10785,7 +10792,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"is_complex", (getter)THPVariable_is_complex, nullptr, nullptr, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-29 20:15:46.983622698 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-08-03 16:07:57.162424303 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10826,7 +10833,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h --- pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-29 20:15:46.983622698 +0800 ++++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-08-03 16:07:57.162424303 +0800 @@ -168,6 +168,45 @@ return r.release(); } @@ -10875,7 +10882,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!r) throw python_error(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp --- pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-29 20:15:46.979622554 +0800 ++++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-08-03 16:07:57.158424269 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10909,7 +10916,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!t.defined()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp pytorch-develop/torch/csrc/distributed/c10d/comm.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-29 20:15:46.987622841 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-08-03 16:07:57.166424336 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11015,7 +11022,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= while (!in_flight.empty()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp pytorch-develop/torch/csrc/distributed/c10d/init.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-29 20:15:46.987622841 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-08-03 16:07:57.166424336 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11072,7 +11079,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= .def("is_success", &::c10d::ProcessGroup::Work::isSuccess) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-29 20:15:46.987622841 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-08-03 16:07:57.166424336 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11197,7 +11204,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp pytorch-develop/torch/csrc/DynamicTypes.cpp --- pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-29 20:15:46.971622268 +0800 ++++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-08-03 16:07:57.150424202 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11246,7 +11253,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return it->second; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Generator.cpp pytorch-develop/torch/csrc/Generator.cpp --- pytorch-v1.5.0/torch/csrc/Generator.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-29 20:15:46.971622268 +0800 ++++ pytorch-develop/torch/csrc/Generator.cpp 2021-08-03 16:07:57.150424202 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11314,7 +11321,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= #endif diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/serialization.cpp pytorch-develop/torch/csrc/generic/serialization.cpp --- pytorch-v1.5.0/torch/csrc/generic/serialization.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-29 20:15:46.987622841 +0800 ++++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-08-03 16:07:57.166424336 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11414,7 +11421,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/Storage.cpp pytorch-develop/torch/csrc/generic/Storage.cpp --- pytorch-v1.5.0/torch/csrc/generic/Storage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-29 20:15:46.987622841 +0800 ++++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-08-03 16:07:57.166424336 +0800 @@ -1,7 +1,25 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11493,7 +11500,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for (Py_ssize_t i = 0; i < length; i++) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp pytorch-develop/torch/csrc/generic/StorageMethods.cpp --- pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-29 20:15:46.987622841 +0800 ++++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-08-03 16:07:57.166424336 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11541,7 +11548,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"_write_file", (PyCFunction)THPStorage_(writeFile), METH_VARARGS, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Module.cpp pytorch-develop/torch/csrc/Module.cpp --- pytorch-v1.5.0/torch/csrc/Module.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Module.cpp 2021-07-29 20:15:46.971622268 +0800 ++++ pytorch-develop/torch/csrc/Module.cpp 2021-08-03 16:07:57.150424202 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11685,7 +11692,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto set_module_attr = [&](const char* name, PyObject* v, bool incref = true) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp pytorch-develop/torch/csrc/tensor/python_tensor.cpp --- pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-29 20:15:47.011623702 +0800 ++++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-08-03 16:07:57.190424536 +0800 @@ -1,18 +1,35 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12062,7 +12069,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +} // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.cpp pytorch-develop/torch/csrc/utils/init.cpp --- pytorch-v1.5.0/torch/csrc/utils/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-29 20:15:47.011623702 +0800 ++++ pytorch-develop/torch/csrc/utils/init.cpp 2021-08-03 16:07:57.190424536 +0800 @@ -1,6 +1,10 @@ #include #include @@ -12150,7 +12157,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.h pytorch-develop/torch/csrc/utils/init.h --- pytorch-v1.5.0/torch/csrc/utils/init.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.h 2021-07-29 20:15:47.011623702 +0800 ++++ pytorch-develop/torch/csrc/utils/init.h 2021-08-03 16:07:57.190424536 +0800 @@ -8,4 +8,7 @@ void initThroughputBenchmarkBindings(PyObject* module); @@ -12161,7 +12168,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h pytorch-develop/torch/csrc/utils/python_arg_parser.h --- pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-29 20:15:47.011623702 +0800 ++++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-08-03 16:07:57.190424536 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12196,7 +12203,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return at::Device(device_str); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp pytorch-develop/torch/csrc/utils/tensor_layouts.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-29 20:15:47.011623702 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-08-03 16:07:57.190424536 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12227,7 +12234,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= registerLayoutObject((THPLayout*)strided_layout, at::Backend::QuantizedCPU); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp pytorch-develop/torch/csrc/utils/tensor_new.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-29 20:15:47.011623702 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-08-03 16:07:57.190424536 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12363,7 +12370,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } else if(expected_layout == c10::kSparse) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp pytorch-develop/torch/csrc/utils/tensor_types.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-29 20:15:47.011623702 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-08-03 16:07:57.190424536 +0800 @@ -1,58 +1,91 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12576,7 +12583,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def get_rng_state(): ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/distributed/distributed_c10d.py pytorch-develop/torch/distributed/distributed_c10d.py --- pytorch-v1.5.0/torch/distributed/distributed_c10d.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-29 20:15:47.015623845 +0800 ++++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-08-03 16:07:57.194424570 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12657,7 +12664,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/__init__.py pytorch-develop/torch/__init__.py --- pytorch-v1.5.0/torch/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/__init__.py 2021-07-29 20:15:46.967622124 +0800 ++++ pytorch-develop/torch/__init__.py 2021-08-03 16:07:57.146424169 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12700,7 +12707,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt pytorch-develop/torch/lib/c10d/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-29 20:15:47.019623989 +0800 ++++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-08-03 16:07:57.198424603 +0800 @@ -28,6 +28,10 @@ option(USE_C10D_NCCL "USE C10D NCCL" ON) endif() @@ -12753,7 +12760,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= copy_header(ProcessGroupMPI.hpp) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt pytorch-develop/torch/lib/libshm/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-29 20:15:47.019623989 +0800 ++++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-08-03 16:07:57.198424603 +0800 @@ -37,8 +37,11 @@ SET_TARGET_PROPERTIES(shm PROPERTIES PREFIX "lib" @@ -12810,7 +12817,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -_maybe_indices_t = _scalar_or_tuple_2_t[Tensor] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/functional.py pytorch-develop/torch/nn/functional.py --- pytorch-v1.5.0/torch/nn/functional.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/functional.py 2021-07-29 20:15:47.023624132 +0800 ++++ pytorch-develop/torch/nn/functional.py 2021-08-03 16:07:57.202424637 +0800 @@ -1611,7 +1611,7 @@ else: output = input.matmul(weight.t()) @@ -12833,7 +12840,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -from . import parallel as parallel diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/batchnorm.py pytorch-develop/torch/nn/modules/batchnorm.py --- pytorch-v1.5.0/torch/nn/modules/batchnorm.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-29 20:15:47.023624132 +0800 ++++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-08-03 16:07:57.202424637 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12865,7 +12872,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= self.register_parameter('running_var', None) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/module.py pytorch-develop/torch/nn/modules/module.py --- pytorch-v1.5.0/torch/nn/modules/module.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/module.py 2021-07-29 20:15:47.023624132 +0800 ++++ pytorch-develop/torch/nn/modules/module.py 2021-08-03 16:07:57.202424637 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13008,7 +13015,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return t.to(device, dtype if t.is_floating_point() else None, non_blocking, memory_format=convert_to_format) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/normalization.py pytorch-develop/torch/nn/modules/normalization.py --- pytorch-v1.5.0/torch/nn/modules/normalization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-29 20:15:47.023624132 +0800 ++++ pytorch-develop/torch/nn/modules/normalization.py 2021-08-03 16:07:57.202424637 +0800 @@ -128,13 +128,14 @@ """ __constants__ = ['normalized_shape', 'eps', 'elementwise_affine'] @@ -13077,7 +13084,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - module_kwargs: Optional[Any] = ...) -> Tensor: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/parallel/distributed.py pytorch-develop/torch/nn/parallel/distributed.py --- pytorch-v1.5.0/torch/nn/parallel/distributed.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-29 20:15:47.023624132 +0800 ++++ pytorch-develop/torch/nn/parallel/distributed.py 2021-08-03 16:07:57.206424670 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13428,7 +13435,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def remove_weight_norm(module: T_module, name: str = ...) -> T_module: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/onnx/symbolic_opset9.py pytorch-develop/torch/onnx/symbolic_opset9.py --- pytorch-v1.5.0/torch/onnx/symbolic_opset9.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-29 20:15:47.027624276 +0800 ++++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-08-03 16:07:57.206424670 +0800 @@ -1621,14 +1621,23 @@ slices = [sym_help._slice_helper(g, w, axes=[0], starts=[x * n], ends=[y * n]) for x, y in intervals] return g.op('Concat', *slices, axis_i=0) @@ -13506,7 +13513,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., lr_decay: float=..., weight_decay: float=..., initial_accumulator_value: float=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/optim/adamax.py pytorch-develop/torch/optim/adamax.py --- pytorch-v1.5.0/torch/optim/adamax.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/optim/adamax.py 2021-07-29 20:15:47.027624276 +0800 ++++ pytorch-develop/torch/optim/adamax.py 2021-08-03 16:07:57.206424670 +0800 @@ -80,8 +80,8 @@ exp_inf.mul_(beta2).unsqueeze(0), grad.abs().add_(eps).unsqueeze_(0) @@ -13683,7 +13690,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/serialization.py pytorch-develop/torch/serialization.py --- pytorch-v1.5.0/torch/serialization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/serialization.py 2021-07-29 20:15:47.031624418 +0800 ++++ pytorch-develop/torch/serialization.py 2021-08-03 16:07:57.210424704 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13767,7 +13774,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def location_tag(storage): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/storage.py pytorch-develop/torch/storage.py --- pytorch-v1.5.0/torch/storage.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/storage.py 2021-07-29 20:15:47.031624418 +0800 ++++ pytorch-develop/torch/storage.py 2021-08-03 16:07:57.210424704 +0800 @@ -7,6 +7,7 @@ class _StorageBase(object): @@ -13787,7 +13794,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= else: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/tensor.py pytorch-develop/torch/tensor.py --- pytorch-v1.5.0/torch/tensor.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/tensor.py 2021-07-29 20:15:47.031624418 +0800 ++++ pytorch-develop/torch/tensor.py 2021-08-03 16:07:57.210424704 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13849,7 +13856,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def __reversed__(self): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_tensor_str.py pytorch-develop/torch/_tensor_str.py --- pytorch-v1.5.0/torch/_tensor_str.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_tensor_str.py 2021-07-29 20:15:46.967622124 +0800 ++++ pytorch-develop/torch/_tensor_str.py 2021-08-03 16:07:57.146424169 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13903,7 +13910,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= has_default_dtype = self.dtype in (torch.get_default_dtype(), torch.int64, torch.bool) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/dataloader.py pytorch-develop/torch/utils/data/dataloader.py --- pytorch-v1.5.0/torch/utils/data/dataloader.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-29 20:15:47.035624563 +0800 ++++ pytorch-develop/torch/utils/data/dataloader.py 2021-08-03 16:07:57.214424737 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14112,7 +14119,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, sampler: Sampler[int], batch_size: int, drop_last: bool) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py pytorch-develop/torch/utils/data/_utils/pin_memory.py --- pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-29 20:15:47.035624563 +0800 ++++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-08-03 16:07:57.214424737 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14173,7 +14180,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/__init__.py pytorch-develop/torch/utils/__init__.py --- pytorch-v1.5.0/torch/utils/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/__init__.py 2021-07-29 20:15:47.031624418 +0800 ++++ pytorch-develop/torch/utils/__init__.py 2021-08-03 16:07:57.214424737 +0800 @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals @@ -14184,7 +14191,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def set_module(obj, mod): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_utils.py pytorch-develop/torch/_utils.py --- pytorch-v1.5.0/torch/_utils.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_utils.py 2021-07-29 20:15:46.967622124 +0800 ++++ pytorch-develop/torch/_utils.py 2021-08-03 16:07:57.146424169 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. diff --git a/src/aten/src/ATen/native/native_functions.yaml b/src/aten/src/ATen/native/native_functions.yaml index dca0dd5388c322ea093ef763dd8ead2522d48c10..bc3d4e9c24a1a112a2fdb1f4d5f2f7a215a410b4 100644 --- a/src/aten/src/ATen/native/native_functions.yaml +++ b/src/aten/src/ATen/native/native_functions.yaml @@ -1128,6 +1128,10 @@ npu_dispatch_only: NPU: npu_convolution_backward +- func: npu_convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor input, Tensor gO, Tensor weight, int[] stride, int[] padding, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor) + npu_dispatch_only: + NPU: npu_convolution_double_backward + - func: npu_conv2d(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor npu_dispatch_only: NPU: conv2d_npu diff --git a/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp b/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp index 6814d60261599c033c0b6f16b62965e660799d96..517692c54ad305791b52512ac901e89022bcd108 100644 --- a/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp @@ -44,16 +44,27 @@ Tensor& index_put_nocheck( auto masksTensor = CalcuOpUtil::copy_tensor_host_to_device( from_blob(masks.data(), {masks.size()}, dtype(ScalarType::Long))); + Tensor tempSelf = self; + Tensor tempValue = value; + if (self.scalar_type() == ScalarType::Half) { + tempSelf = self.npu_dtype_cast(ScalarType::Float); + tempValue = value.npu_dtype_cast(ScalarType::Float); + result = result.npu_dtype_cast(ScalarType::Float); + } + OpCommand cmd; cmd.Name("IndexPut") - .Input(self) - .Input(value) + .Input(tempSelf) + .Input(tempValue) .Input(masksTensor) .Inputs(allDefinedIndices) .Output(result) .Attr("accumulate", accumulate) .Run(); + if (self.scalar_type() == ScalarType::Half) { + result = result.npu_dtype_cast(ScalarType::Half); + } return result; } diff --git a/src/aten/src/ATen/native/npu/NormalKernelNpu.cpp b/src/aten/src/ATen/native/npu/NormalKernelNpu.cpp index 158b7c74a1de4e0ecc9cac144b37ec951bf5a9a7..22a1b3b5228d02a80503db39f30762b1f3174a2d 100644 --- a/src/aten/src/ATen/native/npu/NormalKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/NormalKernelNpu.cpp @@ -86,7 +86,6 @@ Tensor& normal_out_npu( if (dtypeCastOfStd.scalar_type() == ScalarType::Half) { dtypeCastOfStd = dtypeCastOfStd.to(ScalarType::Float); } - OpCommand cmd; cmd.Name("Normal") .Input(dtypeCastOfMean) @@ -112,8 +111,8 @@ Tensor& normal_out_npu( if (formatCastOfResult.scalar_type() == ScalarType::Half) { formatCastOfResult = formatCastOfResult.to(ScalarType::Float); } - - Tensor meanTensor = OpPreparation::ApplyTensor(size, result.options(), result); + + Tensor meanTensor = OpPreparation::ApplyTensor(size, formatCastOfResult.options(), result); meanTensor.fill_(mean); OpCommand cmd; cmd.Name("Normal") diff --git a/src/aten/src/ATen/native/npu/ScatterAddKernelNpu.cpp b/src/aten/src/ATen/native/npu/ScatterAddKernelNpu.cpp index e013a92a9af934d25a2eb91ae450c616b741f7ac..86706651710f7fc91b6af77058b7380a675c8d0a 100644 --- a/src/aten/src/ATen/native/npu/ScatterAddKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/ScatterAddKernelNpu.cpp @@ -25,47 +25,14 @@ Tensor scatter_add_out_npu( int64_t dim, const Tensor& index, const Tensor& src) { - int64_t index_dim = index.dim(); - auto index_sizes = index.sizes(); - auto self_dim = self.dim(); - TORCH_CHECK(index.scalar_type() == ScalarType::Long, "index.scalar_type() != ScalarType::Long"); - TORCH_CHECK(dim < index_dim, "dim must smaller than index.dim()"); - TORCH_CHECK(index_dim == self_dim, "index.dim() must eq to self.dim()"); - TORCH_CHECK(src.dim() == self_dim, "src.dim() must eq to self.dim()"); - - Tensor src_flatten = src.reshape(-1); - Tensor index_flatten = index.cpu().reshape(-1); - std::vector index_sizes_new(index_sizes.begin(), index_sizes.end()); - index_sizes_new.push_back(index_dim); - Tensor new_index = at::empty(index_sizes_new, index_flatten.options()); - new_index = new_index.reshape({-1, index_dim}).fill_(0); - int64_t numel_num = index.numel(); - int64_t stride = 1; - int64_t data_stride = index_dim; - int64_t* org_data_ptr = index_flatten.data_ptr(); - int64_t* data_ptr = new_index.data_ptr(); - - for (--index_dim; index_dim >= 0; index_dim--) { - int64_t dim_size = index.size(index_dim); - for (int64_t i = 0; i < numel_num; i++) { - if (dim != index_dim) { - if (i >= stride) { - data_ptr[i * data_stride + index_dim] = (i / stride) % dim_size; - } - } else { - data_ptr[i * data_stride + index_dim] = org_data_ptr[i]; - } - } - stride = stride * dim_size; - } - OpCommand cmd; - cmd.Name("ScatterNdAdd") + cmd.Name("PTScatterAdd") .Input(self) - .Input(new_index.to("npu")) - .Input(src_flatten) + .Input(index) + .Input(src) .Output(result) - .Attr("use_locking", false) + .Attr("dim", dim) + .Attr("kernel_name", "PTScatterAdd") .Run(); return result; diff --git a/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp b/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp index 33962dc935a2d611e99743c8d867874a90612513..19954a6c9464dedc80f3df1a728675d62eb0d547 100644 --- a/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp @@ -460,10 +460,33 @@ tuple npu_convolution_backward( groups, grad_input_mask); } - + // Note:weight.grad should be equal weight + if (std::get<1>(output).defined()) { + std::get<1>(output) = std::get<1>(output).npu_dtype_cast(weight.scalar_type()); + } return output; } +tuple npu_convolution_double_backward( + const Tensor& ggI, const Tensor& ggW, const Tensor& ggb, + const Tensor& input, const Tensor& gO_r, const Tensor& weight_r, + IntArrayRef stride_, IntArrayRef padding_, IntArrayRef dilation_, + int64_t groups_, std::array grad_input_mask){ + int64_t dim = input.ndimension(); + Tensor ggO; + Tensor gI; + Tensor gW; + if (dim == 4) { + std::tie(ggO, gI, gW) = at::_convolution_double_backward(ggI, ggW, ggb, gO_r, weight_r, input, stride_, padding_, + {{1, 1}}, false, {{0, 0}}, 1, false, false, false, grad_input_mask); + } + if (dim == 5) { + std::tie(ggO, gI, gW) = at::_convolution_double_backward(ggI, ggW, ggb, gO_r, weight_r, input, stride_, padding_, + {{1, 1, 1}}, false, {{0, 0, 0}}, 1, false, false, false, grad_input_mask); + } + return std::tie(ggO, gI, gW); +} + Tensor _convolution_nogroup_npu( const Tensor& input, const Tensor& weight, diff --git a/src/aten/src/ATen/utils/DumpUtils.cpp b/src/aten/src/ATen/utils/DumpUtils.cpp index 198affc63d8cb5d6891008b16c675a000e951788..72a45565c6c29d98a544a731f29894738a416ffe 100644 --- a/src/aten/src/ATen/utils/DumpUtils.cpp +++ b/src/aten/src/ATen/utils/DumpUtils.cpp @@ -235,34 +235,37 @@ namespace at { int typeData = static_cast(SaveType::TENSOR); PrepareSimpleHdf5Attr(dataset, ATTR_TYPE_NAME, PredType::STD_I32LE, &typeData); + // create contiguous cpu tensor + auto tensor_cpu = tensor.detach().cpu().clone(); + // prepare stride attribute rank = 1; - hsize_t dims[1] = {tensor.strides().size()}; + hsize_t dims[1] = {tensor_cpu.strides().size()}; DataSpace strideDataspace = DataSpace(rank, dims); Attribute attribute = dataset->createAttribute(ATTR_STRIDE_NAME, PredType::STD_I64LE, strideDataspace); - attribute.write(PredType::STD_I64LE, tensor.strides().data()); + attribute.write(PredType::STD_I64LE, tensor_cpu.strides().data()); // write to dataset if (tensor.device().type() == DeviceType::CPU) { - dataset->write(tensor.storage().data_ptr().get(), ScalarTypeToPredType(tensor.scalar_type())); + dataset->write(tensor_cpu.storage().data_ptr().get(), ScalarTypeToPredType(tensor_cpu.scalar_type())); } else if (tensor.device().type() == DeviceType::CUDA) { if (tensor.scalar_type() != ScalarType::Half) { dataset->write( - tensor.detach().cpu().storage().data_ptr().get(), + tensor_cpu.storage().data_ptr().get(), ScalarTypeToPredType(tensor.scalar_type())); } else { dataset->write( - tensor.detach().to(c10::kFloat).cpu().storage().data_ptr().get(), + tensor_cpu.to(c10::kFloat).storage().data_ptr().get(), PredType::IEEE_F32LE); } } else if (tensor.device().type() == DeviceType::NPU) { if (tensor.scalar_type() != ScalarType::Half) { dataset->write( - tensor.detach().cpu().storage().data_ptr().get(), + tensor_cpu.storage().data_ptr().get(), ScalarTypeToPredType(tensor.scalar_type())); } else { dataset->write( - tensor.detach().npu_dtype_cast(ScalarType::Float).cpu().storage().data_ptr().get(), + tensor_cpu.to(c10::kFloat).storage().data_ptr().get(), PredType::IEEE_F32LE); } } diff --git a/src/aten/src/ATen/utils/LoadUtils.cpp b/src/aten/src/ATen/utils/LoadUtils.cpp index f526fb5a22d48bee0765b22ca9d429d937a68581..d0d677bdd9713240c306dbcedc7cb24e08add5d1 100644 --- a/src/aten/src/ATen/utils/LoadUtils.cpp +++ b/src/aten/src/ATen/utils/LoadUtils.cpp @@ -383,26 +383,6 @@ namespace at { is_matched = false; break; } - - //2.stride - attr = dataset.openAttribute("Stride"); - int h5StrideSize = static_cast(attr.getSpace().getSimpleExtentNpoints()); - if (h5StrideSize == (*it).tensor.strides().size()) { - int64_t* stride = new int64_t[h5StrideSize]; - attr.read(attr.getDataType(), stride); - IntArrayRef tensorStride = (*it).tensor.strides(); - for (int k = 0; k < h5StrideSize; k++) { - if (tensorStride[k] != stride[k]) { - is_matched = false; - break; - } - } - delete stride; - } else { - is_matched = false; - break; - } - } } return is_matched; @@ -770,11 +750,7 @@ namespace at { Tensor thArray; if ((*it).tensor.scalar_type() != ScalarType::Half) { auto options = at::TensorOptions().dtype((*it).tensor.scalar_type()); - if (deviceTypeValue[0] == 10) { - thArray = at::from_blob(data, (*it).tensor.sizes(), options); - } else { - thArray = at::from_blob(data, (*it).tensor.sizes(), (*it).tensor.strides(), options); - } + thArray = at::from_blob(data, (*it).tensor.sizes(), options); auto verCountBefore = (*it).tensor.unsafeGetTensorImpl()->version_counter().current_version(); CopyMaybeWithZeroStride((*it).tensor.detach(), thArray.to((*it).tensor.device()).to((*it).tensor.dtype())); auto verCountAfter = (*it).tensor.unsafeGetTensorImpl()->version_counter().current_version(); @@ -783,11 +759,7 @@ namespace at { } } else { auto options = at::TensorOptions().dtype(at::kFloat); - if (deviceTypeValue[0] == 10) { - thArray = at::from_blob(data, (*it).tensor.sizes(), options); - } else { - thArray = at::from_blob(data, (*it).tensor.sizes(), (*it).tensor.strides(), options); - } + thArray = at::from_blob(data, (*it).tensor.sizes(), options); auto verCountBefore = (*it).tensor.unsafeGetTensorImpl()->version_counter().current_version(); CopyMaybeWithZeroStride((*it).tensor.detach(), thArray.to(at::kHalf).to((*it).tensor.device())); auto verCountAfter = (*it).tensor.unsafeGetTensorImpl()->version_counter().current_version(); diff --git a/src/build.sh b/src/build.sh index ff4ffe2c7fff7d0ddad46e87486305f1ae6546af..509444620d8219e22f29818e21a791bf2651269f 100644 --- a/src/build.sh +++ b/src/build.sh @@ -31,7 +31,7 @@ function main() # make clean export TORCH_PACKAGE_NAME=torch export PYTORCH_BUILD_VERSION='1.5.0+ascend' - export PYTORCH_BUILD_NUMBER=2 + export PYTORCH_BUILD_NUMBER=3 #for build GPU torch:DEBUG=0 USE_DISTRIBUTED=0 USE_HCCL=0 USE_NCCL=0 USE_MKLDNN=0 USE_CUDA=1 USE_NPU=0 BUILD_TEST=0 USE_NNPACK=0 python3.7 setup.py build bdist_wheel DEBUG=0 USE_DISTRIBUTED=1 USE_HCCL=1 USE_MKLDNN=0 USE_CUDA=0 USE_NPU=1 BUILD_TEST=0 USE_NNPACK=0 python3.7 setup.py build bdist_wheel if [ $? != 0 ]; then diff --git a/src/tools/autograd/derivatives.yaml b/src/tools/autograd/derivatives.yaml index ee68e09e8dccd12c5bd3023a5cc16d06814822e5..8a6a3b57771be73ac5191a7b21db0d7193fbfb97 100644 --- a/src/tools/autograd/derivatives.yaml +++ b/src/tools/autograd/derivatives.yaml @@ -1463,6 +1463,9 @@ - name: npu_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor input, weight, bias: npu_convolution_backward(input, grad, weight, stride, padding, dilation, groups, grad_input_mask) +- name: npu_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[] stride, int[] padding, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor) + grad_output, input, weight: npu_convolution_double_backward(grads[0], grads[1], grads[2], input, grad_output, weight, stride, padding, dilation, groups, grad_input_mask) + - name: npu_convolution_transpose(Tensor input, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups) -> Tensor input, weight, bias: npu_convolution_transpose_backward(input, grad, weight, padding, output_padding, stride, dilation, groups, grad_input_mask) diff --git a/src/torch/contrib/npu/optimized_lib/__init__.py b/src/torch/contrib/npu/optimized_lib/__init__.py index 66ad1baa9fbfc14a733bf37ac8ffd3c2a4b3c97c..f3f9db424a4cd27d07fed4220dcfcdb95094b6e0 100644 --- a/src/torch/contrib/npu/optimized_lib/__init__.py +++ b/src/torch/contrib/npu/optimized_lib/__init__.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from function import pairwise_iou, fast_rcnn_inference_single_image, npu_multiclass_nms, npu_batched_multiclass_nms -from module import ChannelShuffle, InvertedResidual, PreLoader +from .function import iou, ptiou, npu_multiclass_nms, npu_batched_multiclass_nms +from .module import ChannelShuffle, Prefetcher, DropoutV2, LabelSmoothingCrossEntropy, ROIAlign, DCNv2, \ + ModulatedDeformConv __all__ = [ - "pairwise_iou", - "fast_rcnn_inference_single_image", - "ChannelShuffle", - "PreLoader", + "iou", + "ptiou", "npu_multiclass_nms", "npu_batched_multiclass_nms", -] \ No newline at end of file + "ChannelShuffle", + "Prefetcher", + "DropoutV2", + "LabelSmoothingCrossEntropy", + "ROIAlign", + "DCNv2", + "ModulatedDeformConv", +] diff --git a/src/torch/contrib/npu/optimized_lib/module/__init__.py b/src/torch/contrib/npu/optimized_lib/module/__init__.py index 905c8cafd8bf7af67285668c2c4b7dd724607fcc..6ff5fd3b0064d9b1b7866fe1fb72f8be4189932a 100644 --- a/src/torch/contrib/npu/optimized_lib/module/__init__.py +++ b/src/torch/contrib/npu/optimized_lib/module/__init__.py @@ -13,15 +13,18 @@ # limitations under the License. from .channel_shuffle import ChannelShuffle -from .preloader import PreLoader +from .prefetcher import Prefetcher from .dropout import DropoutV2 from .crossentropy import LabelSmoothingCrossEntropy from .roi_align import ROIAlign +from .deform_conv import ModulatedDeformConv, DCNv2 __all__ = [ "ChannelShuffle", - "PreLoader", + "Prefetcher", "DropoutV2", "LabelSmoothingCrossEntropy", "ROIAlign", -] \ No newline at end of file + "DCNv2", + "ModulatedDeformConv", +] diff --git a/src/torch/contrib/npu/optimized_lib/module/deform_conv.py b/src/torch/contrib/npu/optimized_lib/module/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..4c1f102339d84f22e54aacbb983f859c785bd5d8 --- /dev/null +++ b/src/torch/contrib/npu/optimized_lib/module/deform_conv.py @@ -0,0 +1,235 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair, _single +import math + + +class ModulatedDeformConv2dFunction(Function): + + @staticmethod + def forward(ctx, + input_tensor, + offset_ori, + mask, + weight, + bias=None, + with_bias=False, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + sort_index_for_npu_fp=None, + sort_index_for_npu_bp=None, + ): + + input_tensor = input_tensor.float() + offset_ori = offset_ori.float() + mask = mask.float() + + ctx.stride = stride + ctx.padding = padding + ctx.dilation = dilation + ctx.groups = groups + ctx.deformable_groups = deformable_groups + ctx.sort_index_for_npu_bp = sort_index_for_npu_bp + ctx.with_bias = with_bias + + offset = offset_ori.index_select(1, sort_index_for_npu_fp) + offset_all = torch.cat([offset, mask], dim=1) + output, offset_out = torch.npu_deformable_conv2d( + input_tensor, weight, offset_all, bias, + kernel_size=[weight.shape[3], weight.shape[2]], + stride=[1, 1, ctx.stride, ctx.stride], + padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], + dilation=[1, 1, ctx.dilation, ctx.dilation], + groups=ctx.groups, deformable_groups=ctx.deformable_groups, + modulated=True) + if weight.requires_grad or mask.requires_grad or offset.requires_grad \ + or input_tensor.requires_grad: + ctx.save_for_backward(input_tensor, weight, offset_out, offset_all) + return output + + @staticmethod + def backward(ctx, grad_output): + input_tensor, weight, offset_out, offset_all = ctx.saved_tensors + grad_input, grad_weight, grad_offset_all, grad_bias = torch.npu_deformable_conv2dbk( + input_tensor, grad_output, offset_out, weight, offset_all, + kernel_size=[weight.shape[3], weight.shape[2]], + stride=[1, 1, ctx.stride, ctx.stride], + padding=[ctx.padding, ctx.padding, ctx.padding, ctx.padding], + dilation=[1, 1, ctx.dilation, ctx.dilation], + groups=ctx.groups, deformable_groups=ctx.deformable_groups, modulated=True) + grad_offset = grad_offset_all.index_select(1, ctx.sort_index_for_npu_bp) + grad_mask = grad_offset_all[:, grad_offset.shape[1]:, :, :] + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, + None, None, None, None, None, None, None, None) + + +class ModulatedDeformConv(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deformable_groups=1, + bias=True, + pack=True, + ): + + r"""Applies an NPU based Modulated Deformable 2D convolution operation. + + Paper link: + [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168) + + Reference implementation link: + https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/modulated_deform_conv.py + + The implementation of this ModulatedDeformConv is mainly based + on the implementation of mmcv for design and reconstruction. + Through the modular ModulatedDeformConvFunction, the forward and reverse are customized, + and the input is reconstructed in combination with the NPU underlying operator IR, + and finally the function is completed. + + It is worth mentioning that deformconv (DCNv1) is also implemented + by setting modulated = False. Due to the difference between input + and initialization, there is no additional implementation here. + + + .. note:: + ModulatedDeformConv only implements operations under fp32 data types. + Notice, conv_ Weight and bias of offset must be initialized to 0. + + Args: + in_channels (int): Number of channels in the input image. + out_channels (int): Number of channels produced by the convolution. + kernel_size(int, tuple): Size of the convolving kernel. + stride(int, tuple): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + deform_groups (int): Number of deformable group partitions. + bias (bool): If True, adds a learnable bias to the output. Default: False. + pack (bool): If True, conv_offset and mask will be included in this module. Default: True. + + Examples:: + >>> m = ModulatedDeformConv(32, 32, 1) + >>> input_tensor = torch.randn(2, 32, 5, 5) + >>> output = m(input_tensor) + """ + + super(ModulatedDeformConv, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.deformable_groups = deformable_groups + self.with_bias = bias + self.pack = pack + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.bias = torch.zeros(self.weight.shape[0]) + + if self.pack: + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deformable_groups * 3 * self.kernel_size[0] * + self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + bias=True) + + self.split_num = self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1] + sort_index_for_npu = list(range(self.split_num)) + sort_index_for_npu_fp = sort_index_for_npu[1::2] + sort_index_for_npu[::2] + sort_index_for_npu_bp_dict = {i: idx for idx, i in enumerate(sort_index_for_npu_fp)} + sort_index_for_npu_bp = [sort_index_for_npu_bp_dict[i] for i in sort_index_for_npu] + self.sort_index_for_npu_fp = torch.IntTensor(sort_index_for_npu_fp) + self.sort_index_for_npu_bp = torch.IntTensor(sort_index_for_npu_bp) + self.sort_index_for_npu_todevice = False + + self.init_param() + + def init_param(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + if self.pack: + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + if self.pack: + out = self.conv_offset(x) + offset = out[:, :self.split_num, ...] + mask = torch.sigmoid(out[:, self.split_num:, ...]) + else: + x, offset, mask = x + + if not self.sort_index_for_npu_todevice: + self.sort_index_for_npu_fp = self.sort_index_for_npu_fp.to(x.device) + self.sort_index_for_npu_bp = self.sort_index_for_npu_bp.to(x.device) + self.bias = self.bias.to(x.device) + self.sort_index_for_npu_todevice = True + + return ModulatedDeformConv2dFunction.apply( + x, offset, mask, self.weight, self.bias, self.with_bias, + self.stride, self.padding, self.dilation, + self.groups, self.deformable_groups, + self.sort_index_for_npu_fp, + self.sort_index_for_npu_bp, + ) + + +DCNv2 = ModulatedDeformConv + +if __name__ == "__main__": + x = torch.randn(2, 32, 7, 7) + model = DCNv2(32, 32, 3, 2, 1) + + torch.npu.set_device(0) + x = x.npu() + model = model.npu() + + o = model(x) + l = o.sum() + l.backward() + print(l) diff --git a/src/torch/contrib/npu/optimized_lib/module/dropout.py b/src/torch/contrib/npu/optimized_lib/module/dropout.py index 8ad82cf02ded023e6a25dcdc7fe2f9b69045996e..daf68d75a432c2edeed3221d4c74b73dc0f8823e 100644 --- a/src/torch/contrib/npu/optimized_lib/module/dropout.py +++ b/src/torch/contrib/npu/optimized_lib/module/dropout.py @@ -20,14 +20,14 @@ import numpy as np class DropoutV2(nn.Module): r"""Applies an NPU compatible dropout operation. - This dropout method generates pseudo-random seed based on LCG(linear congruential generator) method. - Since Ascend910 does not have a hardware unit that can generate real random numbers, - we used the LCG method to generate pseudo-random seeds - - .. note:: - max_seed is a hyper-parameter strongly related to the underlying operator. - Please check the MAX(2 ** 31 - 1 / 2 ** 10 - 1) in dropout_v2.py in the opp package for matching settings. - By default, it is matched by the Pytorch and OPP packages. + This dropout method generates pseudo-random seed based on LCG(linear congruential generator) method. + Since Ascend910 does not have a hardware unit that can generate real random numbers, + we used the LCG method to generate pseudo-random seeds + + .. note:: + max_seed is a hyper-parameter strongly related to the underlying operator. + Please check the MAX(2 ** 31 - 1 / 2 ** 10 - 1) in dropout_v2.py in the opp package for matching settings. + By default, it is matched by the Pytorch and OPP packages. Args: p: probability of an element to be zeroed. Default: 0.5 diff --git a/src/torch/contrib/npu/optimized_lib/module/preloader.py b/src/torch/contrib/npu/optimized_lib/module/prefetcher.py similarity index 31% rename from src/torch/contrib/npu/optimized_lib/module/preloader.py rename to src/torch/contrib/npu/optimized_lib/module/prefetcher.py index 8e71d7f7b7d34c79b7e33922281097843ef849ca..1d302ccc70493ef58375235a05606ac777f8187d 100644 --- a/src/torch/contrib/npu/optimized_lib/module/preloader.py +++ b/src/torch/contrib/npu/optimized_lib/module/prefetcher.py @@ -1,10 +1,10 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# Copyright 2021 Huawei Technologies Co., Ltd # -# Licensed under the BSD 3-Clause License (the "License"); +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# https://opensource.org/licenses/BSD-3-Clause +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -14,31 +14,48 @@ import torch -class PreLoader(object): - def __init__(self, loader, device): - self.device = device + +class Prefetcher(object): + """Prefetcher using on npu device. + + Origin Code URL: + https://github.com/implus/PytorchInsight/blob/master/classification/imagenet_fast.py#L280 + + Args: + loder (torch.utils.data.DataLoader or DataLoader like iterator): + Using to generate inputs after preprocessing. + stream (torch.npu.Stream): Default None. + Because of the limitation of NPU's memory mechanism, + if prefetcher is initialized repeatedly during training, + a defined stream should be introduced to prevent memory leakage; + if prefetcher is initialized only once during training, + a defined stream is not necessary. + + Returns: + float: tensors of shape (k, 5) and (k, 1). Labels are 0-based. + """ + + def __init__(self, loader, stream=None): self.loader = iter(loader) - self.stream = torch.npu.Stream() + self.stream = stream if stream is not None else torch.npu.Stream() self.preload() - def __len__(self): - return len(self.loader) - def preload(self): try: - self.next_data = next(self.loader) + self.next_input, self.next_target = next(self.loader) except StopIteration: - self.next_data = None + self.next_input = None + self.next_target = None return with torch.npu.stream(self.stream): - for d in self.next_data: - d['image_preprocess'] = d['image_preprocess'].to(self.device, non_blocking=True) - if "instances" in d: - d['instances'] = d['instances'].to(self.device, non_blocking=True) + self.next_input = self.next_input.npu(non_blocking=True) + self.next_target = self.next_target.npu(non_blocking=True) def next(self): torch.npu.current_stream().wait_stream(self.stream) - data = self.next_data - self.preload() - return data + next_input = self.next_input + next_target = self.next_target + if next_target is not None: + self.preload() + return next_input, next_target diff --git a/test/test_npu/test_network_ops/test_convolution_double_backward.py b/test/test_npu/test_network_ops/test_convolution_double_backward.py new file mode 100644 index 0000000000000000000000000000000000000000..8f584568ababd23417fb760e76aae8470e8ed3dd --- /dev/null +++ b/test/test_npu/test_network_ops/test_convolution_double_backward.py @@ -0,0 +1,91 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import torch +import numpy as np +import torch.nn as nn +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + + +class TestConvolutionDoubleBackward(TestCase): + def op_exec(self, npu_flag, input, weight, in_channels, out_channels, kernel_size, + padding=0, stride=1, dilation=1, bias=True, groups=1): + input1 = input + weight1 = weight + input1.requires_grad = True + + m1 = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias, groups=groups) + m1.weight.data = weight1 + if npu_flag: + m1 = m1.to("npu") + output = m1(input1) + grads = torch.autograd.grad(outputs=output, inputs=(input1, m1.weight), grad_outputs=torch.ones_like(output), + retain_graph=True, create_graph=True, only_inputs=True) + input_grads, weight_grads = grads + input_grads.retain_grad() + weight_grads.retain_grad() + loss = torch.sum(input_grads ** 2) + torch.sum(weight_grads ** 2) + loss.backward(torch.ones_like(loss)) + input_grads_grad = input_grads.grad + weight_grads_grad = weight_grads.grad + if npu_flag: + output = output.to("cpu") + input_grads_grad = input_grads_grad.to("cpu") + weight_grads_grad = weight_grads_grad.to("cpu") + return output, input_grads_grad, weight_grads_grad + + def test_convolution_double_backward_shape_format_fp16(self, device): + shape_format = [ + [[np.float16, 3, [1024, 232, 7, 7]], [np.float16, 4, [232, 232, 1, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [1024, 116, 14, 14]], [np.float16, 4, [116, 116, 1, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 0, [192, 384, 1, 3]], 0, 1, 1, None, 1], + [[np.float16, 3, [4, 256, 75, 5]], [np.float16, 4, [128, 256, 3, 3]], [2,1], 1, 1, None, 1], + [[np.float16, 3, [4, 384, 75, 1]], [np.float16, 4, [192, 384, 3, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 384, 75, 1]], [np.float16, 4, [192, 384, 3, 1]], 0, 1, 1, None, 1], + [[np.float16, 0, [4, 384, 1, 75]], [np.float16, 4, [192, 384, 1, 3]], 0, 1, 1, None, 1] + ] + for item in shape_format: + input_cpu, input_npu = create_common_tensor(item[0], -1, 1) + if input_cpu.dtype == torch.float16: + input_cpu = input_cpu.to(torch.float32) + weight_cpu, weight_npu = create_common_tensor(item[1], -1, 1) + if weight_cpu.dtype == torch.float16: + weight_cpu = weight_cpu.to(torch.float32) + kernel_size = (item[1][2][2], item[1][2][3]) + assert item[0][2][1]/item[6] == item[1][2][1] + cpu_output, cpu_input_grads_grad, cpu_weight_grads_grad = self.op_exec(0, input_cpu, weight_cpu, + item[0][2][1], item[1][2][0], kernel_size=kernel_size,padding=item[2], + stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) + weight_npu = weight_npu.to("cpu") + npu_output, npu_input_grads_grad, npu_weight_grads_grad = self.op_exec(1, input_npu, weight_npu, + item[0][2][1], item[1][2][0], kernel_size=kernel_size, padding=item[2], + stride=item[3], dilation=item[4], bias=item[5], groups=item[6]) + + npu_output = npu_output.to(torch.float16) + npu_input_grads_grad = npu_input_grads_grad.to(torch.float16) + npu_weight_grads_grad = npu_weight_grads_grad.to(torch.float16) + cpu_output = cpu_output.to(torch.float16) + cpu_input_grads_grad = cpu_input_grads_grad.to(torch.float16) + cpu_weight_grads_grad = cpu_weight_grads_grad.to(torch.float16) + + self.assertRtolEqual(cpu_output.detach().numpy(), npu_output.detach().numpy()) + self.assertRtolEqual(cpu_input_grads_grad.numpy(), npu_input_grads_grad.numpy()) + self.assertRtolEqual(cpu_weight_grads_grad.numpy(), npu_weight_grads_grad.numpy()) + +instantiate_device_type_tests(TestConvolutionDoubleBackward, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests()