diff --git a/CVE-2020-15265.patch b/CVE-2020-15265.patch new file mode 100644 index 0000000000000000000000000000000000000000..b82175f7028ecc275215e4e58c38910b7286ee0f --- /dev/null +++ b/CVE-2020-15265.patch @@ -0,0 +1,53 @@ +From eccb7ec454e6617738554a255d77f08e60ee0808 Mon Sep 17 00:00:00 2001 +From: Mihai Maruseac +Date: Mon, 19 Oct 2020 17:56:36 -0700 +Subject: [PATCH] Prevent segfault in `quantize_and_dequantize` + +--- + .../core/kernels/quantize_and_dequantize_op.cc | 4 ++++ + tensorflow/python/kernel_tests/array_ops_test.py | 14 ++++++++++++++ + 2 files changed, 18 insertions(+) + +diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc +index 8f71d09c..fda54208 100644 +--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc ++++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc +@@ -71,6 +71,10 @@ class QuantizeAndDequantizeV2Op : public OpKernel { + + void Compute(OpKernelContext* ctx) override { + const Tensor& input = ctx->input(0); ++ OP_REQUIRES( ++ ctx, (axis_ == -1 || axis_ < input.shape().dims()), ++ errors::InvalidArgument("Shape must be at least rank", axis_ + 1, ++ " but is rank ", input.shape().dims())); + const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_); + Tensor input_min_tensor; + Tensor input_max_tensor; +diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py +index dbff3a1b..c498ff62 100644 +--- a/tensorflow/python/kernel_tests/array_ops_test.py ++++ b/tensorflow/python/kernel_tests/array_ops_test.py +@@ -1541,6 +1541,20 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase): + axis=(axis - 4))) + self.assertAllClose(fake_quantized, expected) + ++ def testBadAxis(self): ++ input_tensor = [2.5, 2.5] ++ input_min = [0, 0] ++ input_max = [1, 1] ++ error_message_pattern = "Shape must be at least rank 11 but is rank 1" ++ # TODO(b/171260356): Eager mode and graph mode throw different error types ++ error = errors.InvalidArgumentError if context.executing_eagerly( ++ ) else ValueError ++ with self.assertRaisesRegex(error, error_message_pattern): self.evaluate( ++ array_ops.quantize_and_dequantize_v2( ++ input=input_tensor, ++ input_min=input_min, ++ input_max=input_max, ++ axis=10)) + + @test_util.run_all_in_graph_and_eager_modes + class SortedSearchTest(test_util.TensorFlowTestCase): +-- +2.23.0 + diff --git a/CVE-2020-15266.patch b/CVE-2020-15266.patch new file mode 100644 index 0000000000000000000000000000000000000000..25c42ffb3098dabdbf6f3ce4ffcdb253829b34da --- /dev/null +++ b/CVE-2020-15266.patch @@ -0,0 +1,67 @@ +From 3ade2efec2e90c6237de32a19680caaa3ebc2845 Mon Sep 17 00:00:00 2001 +From: Yong Tang +Date: Sat, 8 Aug 2020 00:47:35 +0000 +Subject: [PATCH] Fix segmentation fault in tf.image.crop_and_resize when boxes + +--- + tensorflow/core/kernels/crop_and_resize_op.cc | 13 +++++++++++++ + tensorflow/python/ops/image_ops_test.py | 12 ++++++++++++ + 2 files changed, 25 insertions(+) + +diff --git a/tensorflow/core/kernels/crop_and_resize_op.cc b/tensorflow/core/kernels/crop_and_resize_op.cc +index 4ecd3bc0..e14f4e43 100644 +--- a/tensorflow/core/kernels/crop_and_resize_op.cc ++++ b/tensorflow/core/kernels/crop_and_resize_op.cc +@@ -71,6 +71,18 @@ static inline Status ParseAndCheckBoxSizes(const Tensor& boxes, + if (boxes.dim_size(1) != 4) { + return errors::InvalidArgument("boxes must have 4 columns"); + } ++ for (int64 i = 0; i < *num_boxes; i++) { ++ for (int64 j = 0; j < 4; j++) { ++ if (!isfinite(boxes.tensor()(i, j))) { ++ return errors::InvalidArgument( ++ "boxes values must be finite, received boxes[", i, "]: ", ++ boxes.tensor()(i, 0), ", ", ++ boxes.tensor()(i, 1), ", ", ++ boxes.tensor()(i, 2), ", ", ++ boxes.tensor()(i, 3)); ++ } ++ } ++ } + // The shape of 'box_index' is [num_boxes]. + if (box_index.dims() != 1) { + return errors::InvalidArgument("box_index must be 1-D", +@@ -256,6 +268,7 @@ struct CropAndResize { + continue; + } + if (method_name == "bilinear") { ++ + const int top_y_index = floorf(in_y); + const int bottom_y_index = ceilf(in_y); + const float y_lerp = in_y - top_y_index; +diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py +index 0206ccf9..0630b6fc 100644 +--- a/tensorflow/python/ops/image_ops_test.py ++++ b/tensorflow/python/ops/image_ops_test.py +@@ -5275,6 +5275,18 @@ class DecodeImageTest(test_util.TensorFlowTestCase): + self.assertAllEqual(list(image0.shape), [40, 20, 3]) + self.assertAllEqual(image0, image1) + ++ def testImageCropAndResize(self): ++ # Test case for GitHub issue 42129 ++ message = "boxes values must be finite" ++ with self.assertRaisesRegex( ++ (errors.InvalidArgumentError, ValueError), message): ++ v = image_ops_impl.crop_and_resize_v2( ++ image=array_ops.zeros((2, 1, 1, 1)), ++ boxes=[[1.0e+40, 0, 0, 0]], ++ box_indices=[1], ++ crop_size=[1, 1]) ++ self.evaluate(v) ++ + + if __name__ == "__main__": + googletest.main() +-- +2.23.0 + diff --git a/CVE-2021-29513.patch b/CVE-2021-29513.patch new file mode 100644 index 0000000000000000000000000000000000000000..d32d3bb8f4c9df3914a6e9644e26428f26bd77a2 --- /dev/null +++ b/CVE-2021-29513.patch @@ -0,0 +1,37 @@ +From 030af767d357d1b4088c4a25c72cb3906abac489 Mon Sep 17 00:00:00 2001 +From: Amit Patankar +Date: Tue, 13 Apr 2021 14:25:01 -0700 +Subject: [PATCH] Fix `tf.raw_ops.ResourceCountUpTo` null pointer dereference. + +PiperOrigin-RevId: 368294347 +Change-Id: I2c16fbfc9b4966c402c3d8e311f0d665a9c852d8 +--- + tensorflow/python/lib/core/ndarray_tensor.cc | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc +index 03fbea397485e..6cf51ceebbdaa 100644 +--- a/tensorflow/python/lib/core/ndarray_tensor.cc ++++ b/tensorflow/python/lib/core/ndarray_tensor.cc +@@ -16,6 +16,7 @@ limitations under the License. + #include "tensorflow/python/lib/core/ndarray_tensor.h" + + #include ++#include + + #include "tensorflow/c/eager/tfe_context_internal.h" + #include "tensorflow/c/tf_tensor_internal.h" +@@ -74,6 +75,13 @@ Status PyArrayDescr_to_TF_DataType(PyArray_Descr* descr, + PyObject* key; + PyObject* value; + Py_ssize_t pos = 0; ++ ++ // Return an error if the fields attribute is null. ++ // Occurs with an improper conversion attempt to resource. ++ if (descr->fields == nullptr) { ++ return errors::Internal("Unexpected numpy data type"); ++ } ++ + if (PyDict_Next(descr->fields, &pos, &key, &value)) { + // In Python 3, the keys of numpy custom struct types are unicode, unlike + // Python 2, where the keys are bytes. diff --git a/CVE-2021-29517-1.patch b/CVE-2021-29517-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..a54e77b50aba0ed48c721ab0c3c9c512eeeba914 --- /dev/null +++ b/CVE-2021-29517-1.patch @@ -0,0 +1,611 @@ +From cc5ea8469641b6680971eb76020407f81ab3f573 Mon Sep 17 00:00:00 2001 +From: Anna R +Date: Wed, 9 Dec 2020 16:13:53 -0800 +Subject: [PATCH] Remove changes made to support TFRT-based OpKernel classes in + +--- + tensorflow/core/framework/BUILD | 3 - + tensorflow/core/framework/numeric_op.h | 21 ++- + tensorflow/core/framework/numeric_op_base.h | 49 ----- + tensorflow/core/kernels/BUILD | 47 +---- + tensorflow/core/kernels/conv_ops_3d.cc | 153 ++++++++++++++-- + tensorflow/core/kernels/conv_ops_3d.h | 187 -------------------- + 6 files changed, 161 insertions(+), 299 deletions(-) + +diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD +index d47c74a6..9b6ddb2a 100644 +--- a/tensorflow/core/framework/BUILD ++++ b/tensorflow/core/framework/BUILD +@@ -51,7 +51,6 @@ exports_files( + "model.h", + "node_def_builder.h", + "numeric_op.h", +- "numeric_op_base.h", + "op_kernel.h", + "op_requires.h", + "op_segment.h", +@@ -183,7 +182,6 @@ filegroup( + "node_def_util.h", + "node_properties.h", + "numeric_op.h", +- "numeric_op_base.h", + "numeric_types.h", + "op.h", + "op_def_builder.h", +@@ -280,7 +278,6 @@ filegroup( + "kernel_shape_util.h", + "log_memory.cc", + "log_memory.h", +- "numeric_op_base.h", + "numeric_types.h", + "op_requires.h", + "ops_util.cc", +diff --git a/tensorflow/core/framework/numeric_op.h b/tensorflow/core/framework/numeric_op.h +index 9f8ceed2..ad452bcd 100644 +--- a/tensorflow/core/framework/numeric_op.h ++++ b/tensorflow/core/framework/numeric_op.h +@@ -15,19 +15,34 @@ limitations under the License. + #ifndef TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_H_ + #define TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_H_ + +-#include "tensorflow/core/framework/numeric_op_base.h" + #include "tensorflow/core/framework/op_kernel.h" + #include "tensorflow/core/framework/tensor.h" ++#include "tensorflow/core/framework/types.h" ++#include "tensorflow/core/framework/types.pb.h" + #include "tensorflow/core/lib/core/errors.h" + #include "tensorflow/core/lib/core/status.h" + + namespace tensorflow { + ++// One input and one output, both the same type. + template +-using UnaryOp = UnaryOpBase; ++class UnaryOp : public OpKernel { ++ public: ++ explicit UnaryOp(OpKernelConstruction* context) : OpKernel(context) { ++ const DataType dt = DataTypeToEnum::v(); ++ OP_REQUIRES_OK(context, context->MatchSignature({dt}, {dt})); ++ } ++}; + ++// Two inputs and one output, all the same type. + template +-using BinaryOp = BinaryOpBase; ++class BinaryOp : public OpKernel { ++ public: ++ explicit BinaryOp(OpKernelConstruction* context) : OpKernel(context) { ++ const DataType dt = DataTypeToEnum::v(); ++ OP_REQUIRES_OK(context, context->MatchSignature({dt, dt}, {dt})); ++ } ++}; + + // For operations where the input and output are the same shape. + // +diff --git a/tensorflow/core/framework/numeric_op_base.h b/tensorflow/core/framework/numeric_op_base.h +index be7d3bf8..e69de29b 100644 +--- a/tensorflow/core/framework/numeric_op_base.h ++++ b/tensorflow/core/framework/numeric_op_base.h +@@ -1,49 +0,0 @@ +-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +- +-Licensed under the Apache License, Version 2.0 (the "License"); +-you may not use this file except in compliance with the License. +-You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +-Unless required by applicable law or agreed to in writing, software +-distributed under the License is distributed on an "AS IS" BASIS, +-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-See the License for the specific language governing permissions and +-limitations under the License. +-==============================================================================*/ +- +-#ifndef TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_ +-#define TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_ +- +-#include "tensorflow/core/framework/op_requires.h" +-#include "tensorflow/core/framework/types.h" +-#include "tensorflow/core/framework/types.pb.h" +-#include "tensorflow/core/lib/core/status.h" +- +-namespace tensorflow { +- +-// One input and one output, both the same type. +-template +-class UnaryOpBase : public OpKernelT { +- public: +- explicit UnaryOpBase(OpKernelConstructionT* construction) : +- OpKernelT(construction) { +- const DataType dt = DataTypeToEnum::v(); +- OP_REQUIRES_OK(construction, construction->MatchSignature({dt}, {dt})); +- } +-}; +- +-// Two inputs and one output, all the same type. +-template +-class BinaryOpBase : public OpKernelT { +- public: +- explicit BinaryOpBase(OpKernelConstructionT* construction) : +- OpKernelT(construction) { +- const DataType dt = DataTypeToEnum::v(); +- OP_REQUIRES_OK(construction, construction->MatchSignature({dt, dt}, {dt})); +- } +-}; +-} // namespace tensorflow +- +-#endif // TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_ +diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD +index 14f7d99b..5f8fa80b 100644 +--- a/tensorflow/core/kernels/BUILD ++++ b/tensorflow/core/kernels/BUILD +@@ -4048,48 +4048,6 @@ cc_library( + }), + ) + +-# TODO(annarev): conv_ops_3d_headers currently depends on android target build +-# from selected sources. We should switch to use granular dependencies instead. +-# Then, we can just depend on "conv3d". +-cc_library( +- name = "conv_3d_mobile", +- hdrs = [ +- "conv_3d.h", +- "eigen_backward_cuboid_convolutions.h", +- "eigen_convolution_helpers.h", +- "eigen_cuboid_convolution.h", +- "eigen_volume_patch.h", +- ], +- deps = [ +- ":eigen_spatial_convolutions-inl", +- ] + select({ +- "//tensorflow:android": [ +- "//tensorflow/core:portable_tensorflow_lib_lite", # TODO(annarev): exclude runtime srcs +- ], +- "//conditions:default": [ +- "//tensorflow/core:framework", +- ], +- }), +-) +- +-cc_library( +- name = "conv_ops_3d_headers", +- hdrs = [ +- "conv_ops_3d.h", +- ], +- deps = select({ +- "//tensorflow:android": [ +- ":conv_3d_mobile", +- "//tensorflow/core:portable_tensorflow_lib_lite", # TODO(annarev): exclude runtime srcs +- ], +- "//conditions:default": [ +- ":conv_3d", +- "//third_party/eigen3", +- "//tensorflow/core:framework", +- ], +- }), +-) +- + tf_kernel_library( + name = "argmax_op", + prefix = "argmax_op", +@@ -4673,6 +4631,7 @@ tf_kernel_library( + "deep_conv2d.h", + "gemm_functors.h", + "winograd_transform.h", ++ "conv_ops_fused_impl.h", + ] + select({ + ":xsmm_convolutions": ["xsmm_conv2d.h"], + "//conditions:default": [], +@@ -4687,8 +4646,6 @@ tf_kernel_library( + prefix = "conv_ops", + deps = [ + ":conv_grad_shape_utils", +- ":conv_ops_3d_headers", +- ":bounds_check", + ":conv_2d", + ":conv_3d", + ":eigen_contraction_kernel", +@@ -6710,7 +6667,6 @@ filegroup( + "conv_2d.h", + "conv_3d.h", + "conv_ops.h", +- "conv_ops_3d.h", + "conv_ops_gpu.h", + "data_format_ops.h", + "depthtospace_op.h", +@@ -7160,7 +7116,6 @@ filegroup( + "stateful_random_ops_cpu_gpu.h", + # Allows conv_3d ops for android but excluded from *_3d* rule above. + "conv_3d.h", +- "conv_ops_3d.h", + "conv_ops_3d.cc", + "conv_ops_gpu.h", + ], +diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc +index 289a083a..52356443 100644 +--- a/tensorflow/core/kernels/conv_ops_3d.cc ++++ b/tensorflow/core/kernels/conv_ops_3d.cc +@@ -16,8 +16,7 @@ limitations under the License. + #define USE_EIGEN_TENSOR + #define EIGEN_USE_THREADS + +-#include "tensorflow/core/kernels/conv_ops_3d.h" +- ++#include "tensorflow/core/framework/kernel_shape_util.h" + #include "tensorflow/core/framework/numeric_op.h" + #include "tensorflow/core/framework/op_kernel.h" + #include "tensorflow/core/framework/register_types.h" +@@ -51,11 +50,146 @@ namespace tensorflow { + typedef Eigen::ThreadPoolDevice CPUDevice; + typedef Eigen::GpuDevice GPUDevice; + ++template ++ struct LaunchConvOp; ++template ++struct LaunchConvOp { ++ static void launch(OpKernelContext* context, bool cudnn_use_autotune, ++ const Tensor& input, const Tensor& filter, ++ const std::array& dilations, ++ const std::array& strides, const Padding padding, ++ TensorFormat data_format, Tensor* output) { ++ OP_REQUIRES(context, data_format == FORMAT_NHWC, ++ errors::InvalidArgument("CPU implementation of Conv3D " ++ "currently only supports the NHWC " ++ "tensor format.")); ++ OP_REQUIRES(context, ++ dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1, ++ errors::InvalidArgument("CPU implementation of Conv3D " ++ "currently only supports dilated rates " ++ "of 1.")); ++ functor::CuboidConvolution()( ++ context->eigen_device(), output->tensor(), ++ input.tensor(), filter.tensor(), strides[2], strides[1], ++ strides[0], BrainPadding2EigenPadding(padding)); ++ } ++}; ++ ++template ++class Conv3DOp : public BinaryOp { ++ public: ++ explicit Conv3DOp(OpKernelConstruction* context) : BinaryOp(context) { ++ string data_format; ++ OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); ++ OP_REQUIRES(context, FormatFromString(data_format, &data_format_), ++ errors::InvalidArgument("Invalid data format")); ++ OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); ++ OP_REQUIRES(context, stride_.size() == 5, ++ errors::InvalidArgument("Sliding window strides field must " ++ "specify 5 dimensions")); ++ OP_REQUIRES( ++ context, ++ (GetTensorDim(stride_, data_format_, 'N') == 1 && ++ GetTensorDim(stride_, data_format_, 'C') == 1), ++ errors::InvalidArgument("Current implementation does not yet support " ++ "strides in the batch and depth dimensions.")); ++ OP_REQUIRES( ++ context, ++ (GetTensorDim(stride_, data_format_, '0') > 0 && ++ GetTensorDim(stride_, data_format_, '1') > 0 && ++ GetTensorDim(stride_, data_format_, '2') > 0), ++ errors::InvalidArgument("Spatial strides should be larger than 0.")); ++ OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_)); ++ OP_REQUIRES(context, dilation_.size() == 5, ++ errors::InvalidArgument("Dilation rates field must " ++ "specify 5 dimensions")); ++ OP_REQUIRES(context, ++ (GetTensorDim(dilation_, data_format_, 'N') == 1 && ++ GetTensorDim(dilation_, data_format_, 'C') == 1), ++ errors::InvalidArgument( ++ "Current implementation does not yet support " ++ "dilation rates in the batch and depth dimensions.")); ++ OP_REQUIRES( ++ context, ++ (GetTensorDim(dilation_, data_format_, '0') > 0 && ++ GetTensorDim(dilation_, data_format_, '1') > 0 && ++ GetTensorDim(dilation_, data_format_, '2') > 0), ++ errors::InvalidArgument("Dilated rates should be larger than 0.")); ++ OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); ++ cudnn_use_autotune_ = CudnnUseAutotune(); ++ } ++ ++ void Compute(OpKernelContext* context) override { ++ // Input tensor is of the following dimensions: ++ // [ batch, in_z, in_y, in_x, in_channels ] ++ const Tensor& input = context->input(0); ++ ++ // Input filter is of the following dimensions: ++ // [ filter_z, filter_y, filter_x, in_channels, out_channels] ++ const Tensor& filter = context->input(1); ++ ++ // NOTE: The ordering of the spatial dimensions is arbitrary, but has to be ++ // kept consistent between input/filter/output. ++ OP_REQUIRES(context, input.dims() == 5, ++ errors::InvalidArgument("input must be 5-dimensional")); ++ OP_REQUIRES(context, filter.dims() == 5, ++ errors::InvalidArgument("filter must be 5-dimensional")); ++ ++ const int64 in_depth = GetTensorDim(input, data_format_, 'C'); ++ const int64 in_batch = GetTensorDim(input, data_format_, 'N'); ++ ++ const int64 filter_depth = filter.dim_size(3); ++ const int64 out_depth = filter.dim_size(4); ++ ++ OP_REQUIRES(context, in_depth % filter_depth == 0, ++ errors::InvalidArgument( ++ "Input depth must be evenly divisible by filter depth: ", ++ in_depth, " vs ", filter_depth)); ++ ++ // Dimension order for these arrays is: z, y, x. ++ std::array input_size = { ++ {GetTensorDim(input, data_format_, '0'), ++ GetTensorDim(input, data_format_, '1'), ++ GetTensorDim(input, data_format_, '2')}}; ++ std::array filter_size = { ++ {filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}}; ++ std::array dilations = { ++ {GetTensorDim(dilation_, data_format_, '0'), ++ GetTensorDim(dilation_, data_format_, '1'), ++ GetTensorDim(dilation_, data_format_, '2')}}; ++ std::array strides = {{GetTensorDim(stride_, data_format_, '0'), ++ GetTensorDim(stride_, data_format_, '1'), ++ GetTensorDim(stride_, data_format_, '2')}}; ++ std::array out, padding; ++ ++ OP_REQUIRES_OK( ++ context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides, ++ padding_, &out, &padding)); ++ TensorShape out_shape = ShapeFromFormat( ++ data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth); ++ Tensor* output; ++ OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); ++ ++ // Return early if nothing to do. ++ if (out_shape.num_elements() == 0) return; ++ ++ LaunchConvOp::launch(context, cudnn_use_autotune_, input, filter, ++ dilations, strides, padding_, data_format_, ++ output); ++ } ++ ++ private: ++ std::vector dilation_; ++ std::vector stride_; ++ Padding padding_; ++ TensorFormat data_format_; ++ bool cudnn_use_autotune_; ++}; ++ + #define REGISTER_CPU_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Conv3D").Device(DEVICE_CPU).TypeConstraint("T"), \ +- Conv3DOp); ++ Conv3DOp); + TF_CALL_half(REGISTER_CPU_KERNEL); + TF_CALL_float(REGISTER_CPU_KERNEL); + TF_CALL_double(REGISTER_CPU_KERNEL); +@@ -73,7 +207,7 @@ typedef AutoTuneSingleton +-struct LaunchConvOp { ++struct LaunchConvOp { + static void launch(OpKernelContext* ctx, bool cudnn_use_autotune, + const Tensor& input_param, const Tensor& filter, + const std::array& dilations, +@@ -559,16 +693,13 @@ DECLARE_GPU_SPEC(double); + // Registration of the GPU implementations. + REGISTER_KERNEL_BUILDER( + Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), +- Conv3DOp); ++ Conv3DOp); + REGISTER_KERNEL_BUILDER( + Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), +- Conv3DOp); ++ Conv3DOp); + REGISTER_KERNEL_BUILDER( + Name("Conv3D").Device(DEVICE_GPU).TypeConstraint("T"), +- Conv3DOp); ++ Conv3DOp); + #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM + + } // namespace tensorflow +diff --git a/tensorflow/core/kernels/conv_ops_3d.h b/tensorflow/core/kernels/conv_ops_3d.h +index 9dcdea5b..e69de29b 100644 +--- a/tensorflow/core/kernels/conv_ops_3d.h ++++ b/tensorflow/core/kernels/conv_ops_3d.h +@@ -1,187 +0,0 @@ +-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +- +-Licensed under the Apache License, Version 2.0 (the "License"); +-you may not use this file except in compliance with the License. +-You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +-Unless required by applicable law or agreed to in writing, software +-distributed under the License is distributed on an "AS IS" BASIS, +-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-See the License for the specific language governing permissions and +-limitations under the License. +-==============================================================================*/ +-#ifndef TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_ +-#define TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_ +- +-#include +- +-#define USE_EIGEN_TENSOR +-#define EIGEN_USE_THREADS +- +-#include "tensorflow/core/framework/numeric_op_base.h" +-#include "tensorflow/core/framework/kernel_shape_util.h" +-#include "tensorflow/core/framework/op_requires.h" +-#include "tensorflow/core/framework/ops_util.h" +-#include "tensorflow/core/framework/tensor.h" +-#include "tensorflow/core/framework/tensor_shape.h" +-#include "tensorflow/core/kernels/conv_3d.h" +-#include "tensorflow/core/platform/errors.h" +-#include "tensorflow/core/util/padding.h" +-#include "tensorflow/core/util/tensor_format.h" +-#if GOOGLE_CUDA +-#include "tensorflow/core/util/use_cudnn.h" +-#endif +- +-namespace tensorflow { +-typedef Eigen::ThreadPoolDevice CPUDevice; +- +-template +-struct LaunchConvOp; +- +-template +-struct LaunchConvOp { +- static void launch(OpKernelContextT* context, bool cudnn_use_autotune, +- const Tensor& input, const Tensor& filter, +- const std::array& dilations, +- const std::array& strides, const Padding padding, +- TensorFormat data_format, Tensor* output) { +- OP_REQUIRES(context, data_format == FORMAT_NHWC, +- errors::InvalidArgument("CPU implementation of Conv3D " +- "currently only supports the NHWC " +- "tensor format.")); +- OP_REQUIRES(context, +- dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1, +- errors::InvalidArgument("CPU implementation of Conv3D " +- "currently only supports dilated rates " +- "of 1.")); +- functor::CuboidConvolution()( +- context->template eigen_device(), output->tensor(), +- input.tensor(), filter.tensor(), strides[2], strides[1], +- strides[0], BrainPadding2EigenPadding(padding)); +- } +-}; +- +-template +-class Conv3DOp : public BinaryOpBase { +- public: +- explicit Conv3DOp(OpKernelConstructionT* context) : +- BinaryOpBase(context) { +- string data_format; +- OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); +- OP_REQUIRES(context, FormatFromString(data_format, &data_format_), +- errors::InvalidArgument("Invalid data format")); +- OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); +- OP_REQUIRES(context, stride_.size() == 5, +- errors::InvalidArgument("Sliding window strides field must " +- "specify 5 dimensions")); +- OP_REQUIRES( +- context, +- (GetTensorDim(stride_, data_format_, 'N') == 1 && +- GetTensorDim(stride_, data_format_, 'C') == 1), +- errors::InvalidArgument("Current implementation does not yet support " +- "strides in the batch and depth dimensions.")); +- OP_REQUIRES( +- context, +- (GetTensorDim(stride_, data_format_, '0') > 0 && +- GetTensorDim(stride_, data_format_, '1') > 0 && +- GetTensorDim(stride_, data_format_, '2') > 0), +- errors::InvalidArgument("Spatial strides should be larger than 0.")); +- OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_)); +- OP_REQUIRES(context, dilation_.size() == 5, +- errors::InvalidArgument("Dilation rates field must " +- "specify 5 dimensions")); +- OP_REQUIRES(context, +- (GetTensorDim(dilation_, data_format_, 'N') == 1 && +- GetTensorDim(dilation_, data_format_, 'C') == 1), +- errors::InvalidArgument( +- "Current implementation does not yet support " +- "dilation rates in the batch and depth dimensions.")); +- OP_REQUIRES( +- context, +- (GetTensorDim(dilation_, data_format_, '0') > 0 && +- GetTensorDim(dilation_, data_format_, '1') > 0 && +- GetTensorDim(dilation_, data_format_, '2') > 0), +- errors::InvalidArgument("Dilated rates should be larger than 0.")); +- OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); +-#if GOOGLE_CUDA +- cudnn_use_autotune_ = CudnnUseAutotune(); +-#else +- cudnn_use_autotune_ = false; +-#endif +- } +- +- void Compute(OpKernelContextT* context) override { +- // Input tensor is of the following dimensions: +- // [ batch, in_z, in_y, in_x, in_channels ] +- const Tensor& input = context->input(0); +- +- // Input filter is of the following dimensions: +- // [ filter_z, filter_y, filter_x, in_channels, out_channels] +- const Tensor& filter = context->input(1); +- +- // NOTE: The ordering of the spatial dimensions is arbitrary, but has to be +- // kept consistent between input/filter/output. +- OP_REQUIRES(context, input.dims() == 5, +- errors::InvalidArgument("input must be 5-dimensional")); +- OP_REQUIRES(context, filter.dims() == 5, +- errors::InvalidArgument("filter must be 5-dimensional")); +- +- const int64 in_depth = GetTensorDim(input, data_format_, 'C'); +- const int64 in_batch = GetTensorDim(input, data_format_, 'N'); +- +- const int64 filter_depth = filter.dim_size(3); +- const int64 out_depth = filter.dim_size(4); +- +- OP_REQUIRES(context, in_depth % filter_depth == 0, +- errors::InvalidArgument( +- "Input depth must be evenly divisible by filter depth: ", +- in_depth, " vs ", filter_depth)); +- +- // Dimension order for these arrays is: z, y, x. +- std::array input_size = { +- {GetTensorDim(input, data_format_, '0'), +- GetTensorDim(input, data_format_, '1'), +- GetTensorDim(input, data_format_, '2')}}; +- std::array filter_size = { +- {filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}}; +- std::array dilations = { +- {GetTensorDim(dilation_, data_format_, '0'), +- GetTensorDim(dilation_, data_format_, '1'), +- GetTensorDim(dilation_, data_format_, '2')}}; +- std::array strides = {{GetTensorDim(stride_, data_format_, '0'), +- GetTensorDim(stride_, data_format_, '1'), +- GetTensorDim(stride_, data_format_, '2')}}; +- std::array out, padding; +- +- OP_REQUIRES_OK( +- context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides, +- padding_, &out, &padding)); +- TensorShape out_shape = ShapeFromFormat( +- data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth); +- Tensor* output; +- OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); +- +- // Return early if nothing to do. +- if (out_shape.num_elements() == 0) return; +- +- LaunchConvOp::launch( +- context, cudnn_use_autotune_, input, filter, +- dilations, strides, padding_, data_format_, +- output); +- } +- +- private: +- std::vector dilation_; +- std::vector stride_; +- Padding padding_; +- TensorFormat data_format_; +- bool cudnn_use_autotune_; +-}; +- +-} // namespace tensorflow +- +- +-#endif // TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_ +-- +2.23.0 + diff --git a/CVE-2021-29517-2.patch b/CVE-2021-29517-2.patch new file mode 100644 index 0000000000000000000000000000000000000000..542a3c8b23bc1172ac45afffd6036833cd9fa0a4 --- /dev/null +++ b/CVE-2021-29517-2.patch @@ -0,0 +1,37 @@ +From 799f835a3dfa00a4d852defa29b15841eea9d64f Mon Sep 17 00:00:00 2001 +From: Mihai Maruseac +Date: Mon, 19 Apr 2021 09:56:46 -0700 +Subject: [PATCH] Fix 2 issues with `Conv3D`. + +--- + tensorflow/core/kernels/conv_ops_3d.cc | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc +index 52356443..75a0a043 100644 +--- a/tensorflow/core/kernels/conv_ops_3d.cc ++++ b/tensorflow/core/kernels/conv_ops_3d.cc +@@ -68,6 +68,11 @@ struct LaunchConvOp { + errors::InvalidArgument("CPU implementation of Conv3D " + "currently only supports dilated rates " + "of 1.")); ++ OP_REQUIRES(context, filter.dim_size(3) == input.dim_size(input.dims() - 1), ++ errors::InvalidArgument( ++ "Number of channels in filter (", filter.dim_size(3), ++ ") must match last dimension of input (", ++ input.dim_size(input.dims() - 1), ")")); + functor::CuboidConvolution()( + context->eigen_device(), output->tensor(), + input.tensor(), filter.tensor(), strides[2], strides[1], +@@ -141,6 +146,8 @@ class Conv3DOp : public BinaryOp { + const int64 filter_depth = filter.dim_size(3); + const int64 out_depth = filter.dim_size(4); + ++ OP_REQUIRES(context, filter_depth != 0, ++ errors::InvalidArgument("filter_depth must be non-zero")); + OP_REQUIRES(context, in_depth % filter_depth == 0, + errors::InvalidArgument( + "Input depth must be evenly divisible by filter depth: ", +-- +2.23.0 + diff --git a/CVE-2021-29518.patch b/CVE-2021-29518.patch new file mode 100644 index 0000000000000000000000000000000000000000..e026c6a47a0c6fc397d29244b3b02688a1b8469b --- /dev/null +++ b/CVE-2021-29518.patch @@ -0,0 +1,42 @@ +From ff70c47a396ef1e3cb73c90513da4f5cb71bebba Mon Sep 17 00:00:00 2001 +From: Amit Patankar +Date: Tue, 13 Apr 2021 14:24:00 -0700 +Subject: [PATCH] Fix `tf.raw_ops.GetSessionTensor` and + `tf.raw_ops.DeleteSessionTensor` null pointer dereferences. + +--- + tensorflow/core/kernels/session_ops.cc | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/tensorflow/core/kernels/session_ops.cc b/tensorflow/core/kernels/session_ops.cc +index e7e73549..dab59e70 100644 +--- a/tensorflow/core/kernels/session_ops.cc ++++ b/tensorflow/core/kernels/session_ops.cc +@@ -119,6 +119,11 @@ class GetSessionTensorOp : public OpKernel { + const string& name = handle.scalar()(); + Tensor val; + OP_REQUIRES_OK(ctx, ctx->session_state()->GetTensor(name, &val)); ++ auto session_state = ctx->session_state(); ++ OP_REQUIRES(ctx, session_state != nullptr, ++ errors::FailedPrecondition( ++ "GetSessionTensor called on null session state")); ++ OP_REQUIRES_OK(ctx, session_state->GetTensor(name, &val)); + ctx->set_output(0, val); + } + +@@ -160,7 +165,11 @@ class DeleteSessionTensorOp : public OpKernel { + void Compute(OpKernelContext* ctx) override { + const Tensor& handle = ctx->input(0); + const string& name = handle.scalar()(); +- OP_REQUIRES_OK(ctx, ctx->session_state()->DeleteTensor(name)); ++ auto session_state = ctx->session_state(); ++ OP_REQUIRES(ctx, session_state != nullptr, ++ errors::FailedPrecondition( ++ "DeleteSessionTensor called on null session state")); ++ OP_REQUIRES_OK(ctx, session_state->DeleteTensor(name)); + } + + TF_DISALLOW_COPY_AND_ASSIGN(DeleteSessionTensorOp); +-- +2.23.0 + diff --git a/CVE-2021-29521.patch b/CVE-2021-29521.patch new file mode 100644 index 0000000000000000000000000000000000000000..16a6ea723856ef6a29cd589a50da2a050ab7be2a --- /dev/null +++ b/CVE-2021-29521.patch @@ -0,0 +1,35 @@ +From c57c0b9f3a4f8684f3489dd9a9ec627ad8b599f5 Mon Sep 17 00:00:00 2001 +From: Amit Patankar +Date: Mon, 19 Apr 2021 11:33:50 -0700 +Subject: [PATCH] Fix the segfault in `tf.raw_ops.SparseCountSparseOutput`. + +--- + tensorflow/core/kernels/count_ops.cc | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/tensorflow/core/kernels/count_ops.cc b/tensorflow/core/kernels/count_ops.cc +index b7bb3ed9..67aafebe 100644 +--- a/tensorflow/core/kernels/count_ops.cc ++++ b/tensorflow/core/kernels/count_ops.cc +@@ -200,9 +200,17 @@ class SparseCount : public OpKernel { + "The shape argument requires at least one element.")); + + bool is_1d = shape.NumElements() == 1; +- int num_batches = is_1d ? 1 : shape.flat()(0); ++ auto shape_vector = shape.flat(); ++ int num_batches = is_1d ? 1 : shape_vector(0); + int num_values = values.NumElements(); + ++ for (int b = 0; b < shape_vector.size(); b++) { ++ OP_REQUIRES(context, shape_vector(b) >= 0, ++ errors::InvalidArgument( ++ "Elements in dense_shape must be >= 0. Instead got:", ++ shape.DebugString())); ++ } ++ + OP_REQUIRES(context, num_values == indices.shape().dim_size(0), + errors::InvalidArgument( + "Number of values must match first dimension of indices.", +-- +2.23.0 + diff --git a/CVE-2021-29526-1.patch b/CVE-2021-29526-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..87eb9fd5772d6fea3dd181a9798fee98255d83b0 --- /dev/null +++ b/CVE-2021-29526-1.patch @@ -0,0 +1,322 @@ +From 7b8db6083b34520688dbc71f341f7aeaf156bf17 Mon Sep 17 00:00:00 2001 +From: Eugene Zhulenev +Date: Fri, 19 Mar 2021 16:16:41 -0700 +Subject: [PATCH] Implement grouped convolution on CPU + +To get better compute resources utilization group-compute loop has to be parallelized, but it involves a lot of changes in Conv2D primitives. Will address that later if it will be critical for some of the users. + +Fix for: https://github.com/tensorflow/tensorflow/issues/29005 + +PiperOrigin-RevId: 363991782 +Change-Id: I97f375b1133833c4de5181199316be7cbf4ebee0 +--- + tensorflow/core/kernels/BUILD | 1 + + tensorflow/core/kernels/conv_2d.h | 54 +++++++ + tensorflow/core/kernels/conv_ops.cc | 133 ++++++++++++++++-- + .../python/kernel_tests/conv_ops_test.py | 20 +-- + 4 files changed, 189 insertions(+), 19 deletions(-) + +diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD +index 8e49f1e0a5caf..bc455626f4322 100644 +--- a/tensorflow/core/kernels/BUILD ++++ b/tensorflow/core/kernels/BUILD +@@ -3818,6 +3818,7 @@ tf_kernel_library( + ":ops_util", + "@com_google_absl//absl/base:dynamic_annotations", + "@com_google_absl//absl/strings", ++ "@com_google_absl//absl/synchronization", + "//third_party/eigen3", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", +diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h +index b9a8c977e11ee..87df4a848dd56 100644 +--- a/tensorflow/core/kernels/conv_2d.h ++++ b/tensorflow/core/kernels/conv_2d.h +@@ -43,6 +43,9 @@ void SpatialConvolutionFunc(const Device& d, Output output, Input input, + padding_bottom); + } + ++// TODO(ezhulenev): Non-templated `operator()` are required by explicit template ++// instantiations for the GPU device. However they are almost certainly not used ++// in any of the kernel implementation. Check if they can be removed. + template + struct SpatialConvolution { +@@ -55,6 +58,16 @@ struct SpatialConvolution { + SpatialConvolutionFunc(d, output, input, filter, row_stride, col_stride, + row_dilation, col_dilation, padding, output_kernel); + } ++ ++ template ++ void operator()(const Device& d, Output output, Input input, Filter filter, ++ int row_stride, int col_stride, int row_dilation, ++ int col_dilation, const Eigen::PaddingType& padding, ++ const OutputKernel& output_kernel = OutputKernel()) { ++ SpatialConvolutionFunc(d, output, input, filter, row_stride, col_stride, ++ row_dilation, col_dilation, padding, output_kernel); ++ } ++ + void operator()(const Device& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + typename TTypes::ConstTensor filter, int row_stride, +@@ -67,6 +80,18 @@ struct SpatialConvolution { + col_dilation, Eigen::PaddingType::PADDING_VALID, output_kernel, + padding_top, padding_bottom, padding_left, padding_right); + } ++ ++ template ++ void operator()(const Device& d, Output output, Input input, Filter filter, ++ int row_stride, int col_stride, int row_dilation, ++ int col_dilation, int padding_top, int padding_bottom, ++ int padding_left, int padding_right, ++ const OutputKernel& output_kernel = OutputKernel()) { ++ SpatialConvolutionFunc( ++ d, output, input, filter, row_stride, col_stride, row_dilation, ++ col_dilation, Eigen::PaddingType::PADDING_VALID, output_kernel, ++ padding_top, padding_bottom, padding_left, padding_right); ++ } + }; + + template +@@ -84,6 +109,20 @@ struct SpatialConvolution { + row_dilation, output_kernel) + .template cast(); + } ++ ++ template ++ void operator()(const Device& d, Output output, Input input, Filter filter, ++ int row_stride, int col_stride, int row_dilation, ++ int col_dilation, const Eigen::PaddingType& padding, ++ const OutputKernel& output_kernel = OutputKernel()) { ++ output.device(d) = ++ Eigen::SpatialConvolution(input.template cast(), ++ filter.template cast(), col_stride, ++ row_stride, padding, col_dilation, ++ row_dilation, output_kernel) ++ .template cast(); ++ } ++ + void operator()(const Device& d, + typename TTypes::Tensor output, + typename TTypes::ConstTensor input, +@@ -100,6 +139,21 @@ struct SpatialConvolution { + padding_bottom) + .template cast(); + } ++ ++ template ++ void operator()(const Device& d, Output output, Input input, Filter filter, ++ int row_stride, int col_stride, int row_dilation, ++ int col_dilation, int padding_top, int padding_bottom, ++ int padding_left, int padding_right, ++ const OutputKernel& output_kernel = OutputKernel()) { ++ output.device(d) = ++ Eigen::SpatialConvolution( ++ input.template cast(), filter.template cast(), ++ col_stride, row_stride, Eigen::PaddingType::PADDING_VALID, ++ col_dilation, row_dilation, output_kernel, padding_left, ++ padding_right, padding_top, padding_bottom) ++ .template cast(); ++ } + }; + + template +diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc +index 025a8e37a94e9..8fdfe04bd1c67 100644 +--- a/tensorflow/core/kernels/conv_ops.cc ++++ b/tensorflow/core/kernels/conv_ops.cc +@@ -30,6 +30,7 @@ limitations under the License. + #include + #include + ++#include "absl/synchronization/blocking_counter.h" + #include "tensorflow/core/framework/allocator.h" + #include "tensorflow/core/framework/bounds_check.h" + #include "tensorflow/core/framework/kernel_shape_util.h" +@@ -138,6 +139,98 @@ struct LaunchGeneric { + } + } + }; ++ ++// Compute grouped 2D convolutions on CPU. Unlike grouped convolution ++// implementation in cuDNN this is faaaaaar from optimal and needs more work ++// to deliver competitive performance. Currently it exists to close the feature ++// parity gap between convolution operations on different devices. ++template ++struct LaunchGrouped { ++ void operator()(OpKernelContext* ctx, const Tensor& input, ++ const Tensor& filter, int row_stride, int col_stride, ++ int row_dilation, int col_dilation, const Padding& padding, ++ const std::vector& explicit_paddings, Tensor* output, ++ TensorFormat data_format) { ++ DCHECK(data_format == FORMAT_NHWC) ++ << "Grouped conv implementation only " ++ "supports NHWC tensor format for now."; ++ ++ const int64 in_depth = input.dim_size(3); ++ const int64 patch_depth = filter.dim_size(2); ++ const int64 num_groups = in_depth / patch_depth; ++ ++ // Shuffle input/filter tensors to have group as a leading dimension. ++ std::array shuffle({3, 0, 1, 2, 4}); ++ ++ // Compute pre shuffle dimemnsions. ++ auto pre_shuffle = [&](const Tensor& tensor) -> std::array { ++ return {tensor.dim_size(0), tensor.dim_size(1), tensor.dim_size(2), ++ num_groups, tensor.dim_size(3) / num_groups}; ++ }; ++ ++ // Compute post shuffle dimemnsions. ++ auto post_shuffle = [&](const Tensor& tensor) -> std::array { ++ return {num_groups, tensor.dim_size(0), tensor.dim_size(1), ++ tensor.dim_size(2), tensor.dim_size(3) / num_groups}; ++ }; ++ ++ auto& device = ctx->eigen_device(); ++ ++ absl::BlockingCounter shuffles_completed(2); ++ auto on_shuffled = [&]() { shuffles_completed.DecrementCount(); }; ++ ++ // Shuffle input into temporary tensor. ++ Tensor input_shuffled(input.dtype(), TensorShape(post_shuffle(input))); ++ input_shuffled.tensor().device(device, on_shuffled) = ++ input.shaped(pre_shuffle(input)).shuffle(shuffle); ++ ++ // Shuffle filter into temporary tensor. ++ Tensor filter_shuffled(filter.dtype(), TensorShape(post_shuffle(filter))); ++ filter_shuffled.tensor().device(device, on_shuffled) = ++ filter.shaped(pre_shuffle(filter)).shuffle(shuffle); ++ ++ // Wait for the completion of input/filter shuffles. ++ shuffles_completed.Wait(); ++ ++ // Write group convolution results into temporary output tensor. ++ Tensor output_shuffled(output->dtype(), TensorShape(post_shuffle(*output))); ++ ++ for (int64 i = 0; i < num_groups; ++i) { ++ // TODO(ezhulenev): Run this loop using `parallelFor` (regular parallelFor ++ // will lead to deadlock, SpatialConvolution has to use async Eigen ++ // assignment). This requires small changes to Eigen to support async ++ // exeuction for tensor chipping operation. ++ ++ // TODO(ezhulenev): Grouped convolution should also support 1x1 filter ++ // optimization. ++ ++ auto input_slice = input_shuffled.tensor().template chip<0>(i); ++ auto filter_slice = filter_shuffled.tensor().template chip<0>(i); ++ auto output_slice = output_shuffled.tensor().template chip<0>(i); ++ ++ if (padding == EXPLICIT) { ++ functor::SpatialConvolution()( ++ ctx->eigen_device(), output_slice, input_slice, ++ filter_slice, row_stride, col_stride, row_dilation, col_dilation, ++ static_cast(explicit_paddings[2]), ++ static_cast(explicit_paddings[3]), ++ static_cast(explicit_paddings[4]), ++ static_cast(explicit_paddings[5])); ++ } else { ++ functor::SpatialConvolution()( ++ ctx->eigen_device(), output_slice, input_slice, ++ filter_slice, row_stride, col_stride, row_dilation, col_dilation, ++ BrainPadding2EigenPadding(padding)); ++ } ++ } ++ ++ // Shuffle temporary output back into pre-shuffled shape. ++ std::array rev_shuffle({1, 2, 3, 0, 4}); ++ output->shaped(pre_shuffle(*output)).device(device) = ++ output_shuffled.tensor().shuffle(rev_shuffle); ++ } ++}; ++ + } // namespace + + template +@@ -155,14 +248,6 @@ struct LaunchConv2DOp { + ToString(data_format))); + return; + } +- const int64 in_depth = GetTensorDim(input, data_format, 'C'); +- OP_REQUIRES(ctx, in_depth == filter.dim_size(2), +- errors::Unimplemented( +- "The Conv2D op currently does not support grouped " +- "convolutions on the CPU. A grouped convolution was " +- "attempted to be run because the input depth of ", +- in_depth, " does not match the filter input depth of ", +- filter.dim_size(2))); + + for (int64 explicit_padding : explicit_paddings) { + if (!FastBoundsCheck(explicit_padding, std::numeric_limits::max())) { +@@ -170,9 +255,35 @@ struct LaunchConv2DOp { + return; + } + } +- LaunchGeneric()(ctx, input, filter, row_stride, col_stride, +- row_dilation, col_dilation, padding, +- explicit_paddings, output, data_format); ++ ++ const int64 in_depth = input.dim_size(3); ++ const int64 out_depth = output->dim_size(3); ++ const int64 patch_depth = filter.dim_size(2); ++ ++ if (in_depth % patch_depth != 0) { ++ ctx->SetStatus(errors::InvalidArgument( ++ "input depth must be evenly divisible by filter depth: ", in_depth, ++ " vs ", patch_depth)); ++ return; ++ } ++ ++ const int64 num_groups = in_depth / patch_depth; ++ if (out_depth % num_groups != 0 || out_depth < num_groups) { ++ ctx->SetStatus(errors::InvalidArgument( ++ "output depth must be evenly divisible by number of groups: ", ++ out_depth, " vs ", num_groups)); ++ return; ++ } ++ ++ if (in_depth != patch_depth) { ++ LaunchGrouped()(ctx, input, filter, row_stride, col_stride, ++ row_dilation, col_dilation, padding, explicit_paddings, ++ output, data_format); ++ } else { ++ LaunchGeneric()(ctx, input, filter, row_stride, col_stride, ++ row_dilation, col_dilation, padding, ++ explicit_paddings, output, data_format); ++ } + } + }; + +diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py +index 44a67ccc55f0a..92af04359caa9 100644 +--- a/tensorflow/python/kernel_tests/conv_ops_test.py ++++ b/tensorflow/python/kernel_tests/conv_ops_test.py +@@ -834,17 +834,21 @@ def MakeConv2d(inputs, filters): + results[0], results[1], atol=tol_to_use, rtol=tol_to_use) + + @test_util.run_in_graph_and_eager_modes +- @test_util.run_cuda_only + def testConv2DGroupConvFwd(self): +- for data_format in ["NHWC", "NCHW"]: ++ if test.is_gpu_available(cuda_only=True): ++ data_formats = ["NHWC", "NCHW"] ++ else: ++ data_formats = ["NHWC"] ++ for data_format in data_formats: + for dilation in [1, 2]: + for stride in [1, 2]: +- self._VerifyGroupConvFwd([10, 32, 32, 16], [3, 3, 4, 8], +- dilations=[dilation, dilation], +- strides=[stride, stride], +- padding="SAME", +- data_format=data_format, +- dtype=dtypes.float32) ++ for filter_dims in [[3, 3, 4, 8], [1, 1, 2, 16]]: ++ self._VerifyGroupConvFwd([10, 32, 32, 16], filter_dims, ++ dilations=[dilation, dilation], ++ strides=[stride, stride], ++ padding="SAME", ++ data_format=data_format, ++ dtype=dtypes.float32) + + @test_util.deprecated_graph_mode_only + @test_util.run_cuda_only diff --git a/CVE-2021-29526-2.patch b/CVE-2021-29526-2.patch new file mode 100644 index 0000000000000000000000000000000000000000..5a1bb66149131a84feec7dfe818ab8546784eab9 --- /dev/null +++ b/CVE-2021-29526-2.patch @@ -0,0 +1,50 @@ +From b12aa1d44352de21d1a6faaf04172d8c2508b42b Mon Sep 17 00:00:00 2001 +From: Mihai Maruseac +Date: Mon, 19 Apr 2021 18:32:56 -0700 +Subject: [PATCH] Fix one more FPE. + +--- + tensorflow/core/kernels/conv_ops.cc | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc +index ef13eb3f..2d357710 100644 +--- a/tensorflow/core/kernels/conv_ops.cc ++++ b/tensorflow/core/kernels/conv_ops.cc +@@ -260,6 +260,11 @@ struct LaunchConv2DOp { + const int64 out_depth = output->dim_size(3); + const int64 patch_depth = filter.dim_size(2); + ++ if (patch_depth <= 0) { ++ ctx->SetStatus(errors::InvalidArgument( ++ "filter depth must be stricly positive, got ", patch_depth)); ++ return; ++ } + if (in_depth % patch_depth != 0) { + ctx->SetStatus(errors::InvalidArgument( + "input depth must be evenly divisible by filter depth: ", in_depth, +@@ -268,6 +273,11 @@ struct LaunchConv2DOp { + } + + const int64 num_groups = in_depth / patch_depth; ++ if (num_groups <= 0) { ++ ctx->SetStatus(errors::InvalidArgument( ++ "number of groups must be stricly positive, got ", num_groups)); ++ return; ++ } + if (out_depth % num_groups != 0 || out_depth < num_groups) { + ctx->SetStatus(errors::InvalidArgument( + "output depth must be evenly divisible by number of groups: ", +@@ -536,6 +546,9 @@ Status ComputeConv2DDimension(const Conv2DParameters& params, + errors::InvalidArgument("Patch depth too large")); + const int in_depth = static_cast(in_depth_raw); + const int patch_depth = static_cast(patch_depth_raw); ++ TF_REQUIRES(patch_depth > 0, ++ errors::InvalidArgument( ++ "filter depth must be stricly positive, got", patch_depth)); + TF_REQUIRES(in_depth % patch_depth == 0, + errors::InvalidArgument( + "input depth must be evenly divisible by filter depth: ", +-- +2.23.0 + diff --git a/CVE-2021-29533.patch b/CVE-2021-29533.patch new file mode 100644 index 0000000000000000000000000000000000000000..11aef222c3c571f5c18d9f5b7f4257544425a623 --- /dev/null +++ b/CVE-2021-29533.patch @@ -0,0 +1,76 @@ +From b432a38fe0e1b4b904a6c222cbce794c39703e87 Mon Sep 17 00:00:00 2001 +From: Amit Patankar +Date: Wed, 21 Apr 2021 15:57:36 -0700 +Subject: [PATCH] Fix overflow CHECK issue with `tf.raw_ops.DrawBoundingBoxes`. + +--- + .../core/kernels/draw_bounding_box_op.cc | 49 ++++++++++++++----- + 1 file changed, 37 insertions(+), 12 deletions(-) + +diff --git a/tensorflow/core/kernels/draw_bounding_box_op.cc b/tensorflow/core/kernels/draw_bounding_box_op.cc +index 30de99b7..39519523 100644 +--- a/tensorflow/core/kernels/draw_bounding_box_op.cc ++++ b/tensorflow/core/kernels/draw_bounding_box_op.cc +@@ -147,22 +147,47 @@ class DrawBoundingBoxesOp : public OpKernel { + + // At this point, {min,max}_box_{row,col}_clamp are inside the + // image. +- CHECK_GE(min_box_row_clamp, 0); +- CHECK_GE(max_box_row_clamp, 0); +- CHECK_LT(min_box_row_clamp, height); +- CHECK_LT(max_box_row_clamp, height); +- CHECK_GE(min_box_col_clamp, 0); +- CHECK_GE(max_box_col_clamp, 0); +- CHECK_LT(min_box_col_clamp, width); +- CHECK_LT(max_box_col_clamp, width); ++ ++ OP_REQUIRES( ++ context, min_box_row_clamp >= 0, ++ errors::InvalidArgument("Min box row clamp is less than 0.")); ++ OP_REQUIRES( ++ context, max_box_row_clamp >= 0, ++ errors::InvalidArgument("Max box row clamp is less than 0.")); ++ OP_REQUIRES(context, min_box_row_clamp <= height, ++ errors::InvalidArgument( ++ "Min box row clamp is greater than height.")); ++ OP_REQUIRES(context, max_box_row_clamp <= height, ++ errors::InvalidArgument( ++ "Max box row clamp is greater than height.")); ++ ++ OP_REQUIRES( ++ context, min_box_col_clamp >= 0, ++ errors::InvalidArgument("Min box col clamp is less than 0.")); ++ OP_REQUIRES( ++ context, max_box_col_clamp >= 0, ++ errors::InvalidArgument("Max box col clamp is less than 0.")); ++ OP_REQUIRES(context, min_box_col_clamp <= width, ++ errors::InvalidArgument( ++ "Min box col clamp is greater than width.")); ++ OP_REQUIRES(context, max_box_col_clamp <= width, ++ errors::InvalidArgument( ++ "Max box col clamp is greater than width.")); + + // At this point, the min_box_row and min_box_col are either + // in the image or above/left of it, and max_box_row and + // max_box_col are either in the image or below/right or it. +- CHECK_LT(min_box_row, height); +- CHECK_GE(max_box_row, 0); +- CHECK_LT(min_box_col, width); +- CHECK_GE(max_box_col, 0); ++ ++ OP_REQUIRES( ++ context, min_box_row <= height, ++ errors::InvalidArgument("Min box row is greater than height.")); ++ OP_REQUIRES(context, max_box_row >= 0, ++ errors::InvalidArgument("Max box row is less than 0.")); ++ OP_REQUIRES( ++ context, min_box_col <= width, ++ errors::InvalidArgument("Min box col is greater than width.")); ++ OP_REQUIRES(context, max_box_col >= 0, ++ errors::InvalidArgument("Max box col is less than 0.")); + + // Draw top line. + if (min_box_row >= 0) { +-- +2.23.0 + diff --git a/CVE-2021-29534.patch b/CVE-2021-29534.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c26d769e7779ff20822d0eb9bb71575d3c784c0 --- /dev/null +++ b/CVE-2021-29534.patch @@ -0,0 +1,65 @@ +diff --git a/tensorflow/core/kernels/sparse_tensors_map_ops.cc b/tensorflow/core/kernels/sparse_tensors_map_ops.cc +index c2c0e43c..12c68406 100644 +--- a/tensorflow/core/kernels/sparse_tensors_map_ops.cc ++++ b/tensorflow/core/kernels/sparse_tensors_map_ops.cc +@@ -21,16 +21,12 @@ limitations under the License. + #include + #include + +-#include "tensorflow/core/framework/op_kernel.h" +-#include "tensorflow/core/framework/register_types.h" +- +-#include "tensorflow/core/framework/op_kernel.h" +-#include "tensorflow/core/framework/register_types.h" + #include "tensorflow/core/framework/resource_mgr.h" + #include "tensorflow/core/framework/tensor.h" + #include "tensorflow/core/framework/tensor_util.h" + #include "tensorflow/core/framework/types.h" + #include "tensorflow/core/lib/gtl/inlined_vector.h" ++#include "tensorflow/core/util/overflow.h" + #include "tensorflow/core/util/sparse/sparse_tensor.h" + + namespace tensorflow { +@@ -255,6 +251,21 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp { + "Rank of input SparseTensor should be > 1, but saw rank: ", rank)); + + TensorShape tensor_input_shape(input_shape->vec()); ++ auto input_shape_vec = input_shape->vec(); ++ int new_num_elements = 1; ++ bool overflow_ocurred = false; ++ for (int i = 0; i < input_shape_vec.size(); i++) { ++ new_num_elements = ++ MultiplyWithoutOverflow(new_num_elements, input_shape_vec(i)); ++ if (new_num_elements < 0) { ++ overflow_ocurred = true; ++ } ++ } ++ ++ OP_REQUIRES( ++ context, !overflow_ocurred, ++ errors::Internal("Encountered overflow from large input shape.")); ++ TensorShape tensor_input_shape(input_shape_vec); + gtl::InlinedVector std_order(rank); + std::iota(std_order.begin(), std_order.end(), 0); + SparseTensor input_st; +@@ -263,7 +274,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp { + &input_st)); + + auto input_shape_t = input_shape->vec(); +- const int64 N = input_shape_t(0); ++ const int64 N = input_shape_vec(0); + + Tensor sparse_handles(DT_INT64, TensorShape({N})); + auto sparse_handles_t = sparse_handles.vec(); +@@ -274,7 +285,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp { + // minibatch entries. + TensorShape output_shape; + OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( +- input_shape_t.data() + 1, ++ input_shape_vec.data() + 1, + input_shape->NumElements() - 1, &output_shape)); + + // Get groups by minibatch dimension +-- +2.23.0 + diff --git a/CVE-2021-29535.patch b/CVE-2021-29535.patch new file mode 100644 index 0000000000000000000000000000000000000000..a708b4d2d97926829e5dc234d4b3ec39b0f3f53b --- /dev/null +++ b/CVE-2021-29535.patch @@ -0,0 +1,42 @@ +From efea03b38fb8d3b81762237dc85e579cc5fc6e87 Mon Sep 17 00:00:00 2001 +From: Mihai Maruseac +Date: Wed, 21 Apr 2021 16:15:46 -0700 +Subject: [PATCH] Validate inputs to `QuantizedMul` + +PiperOrigin-RevId: 369756982 +Change-Id: I00d960cc3b9316fd7a86bd37a44e341c96e17624 +--- + tensorflow/core/kernels/quantized_mul_op.cc | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +diff --git a/tensorflow/core/kernels/quantized_mul_op.cc b/tensorflow/core/kernels/quantized_mul_op.cc +index fb56f68bf14db..22cff8939449a 100644 +--- a/tensorflow/core/kernels/quantized_mul_op.cc ++++ b/tensorflow/core/kernels/quantized_mul_op.cc +@@ -284,10 +284,22 @@ class QuantizedMulOp : public OpKernel { + void Compute(OpKernelContext* context) override { + const Tensor& x = context->input(0); + const Tensor& y = context->input(1); +- const float min_x = context->input(2).flat()(0); +- const float max_x = context->input(3).flat()(0); +- const float min_y = context->input(4).flat()(0); +- const float max_y = context->input(5).flat()(0); ++ auto& min_x_tensor = context->input(2); ++ OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()), ++ errors::InvalidArgument("min_x must be a scalar")); ++ const float min_x = min_x_tensor.flat()(0); ++ auto& max_x_tensor = context->input(3); ++ OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()), ++ errors::InvalidArgument("max_x must be a scalar")); ++ const float max_x = max_x_tensor.flat()(0); ++ auto& min_y_tensor = context->input(4); ++ OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()), ++ errors::InvalidArgument("min_y must be a scalar")); ++ const float min_y = min_y_tensor.flat()(0); ++ auto& max_y_tensor = context->input(5); ++ OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()), ++ errors::InvalidArgument("max_y must be a scalar")); ++ const float max_y = max_y_tensor.flat()(0); + + BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape())); + if (!bcast.IsValid()) { diff --git a/CVE-2021-29537.patch b/CVE-2021-29537.patch new file mode 100644 index 0000000000000000000000000000000000000000..fd8e12e01be6869cbc2f65e5bd48511ac285d3c2 --- /dev/null +++ b/CVE-2021-29537.patch @@ -0,0 +1,33 @@ +From f6c40f0c6cbf00d46c7717a26419f2062f2f8694 Mon Sep 17 00:00:00 2001 +From: Mihai Maruseac +Date: Wed, 21 Apr 2021 17:00:39 -0700 +Subject: [PATCH] Validate min and max arguments to `QuantizedResizeBilinear`. + +--- + .../core/kernels/quantized_resize_bilinear_op.cc | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc +index 8270fc11..a94f56a5 100644 +--- a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc ++++ b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc +@@ -703,8 +703,14 @@ class QuantizedResizeBilinearOp : public OpKernel { + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); +- const float in_min = context->input(2).flat()(0); +- const float in_max = context->input(3).flat()(0); ++ const auto& in_min_tensor = context->input(2); ++ OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_min_tensor.shape()), ++ errors::InvalidArgument("min must be a scalar")); ++ const float in_min = in_min_tensor.flat()(0); ++ const auto& in_max_tensor = context->input(3); ++ OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_max_tensor.shape()), ++ errors::InvalidArgument("max must be a scalar")); ++ const float in_max = in_max_tensor.flat()(0); + + ImageResizerState st(align_corners_, false); + st.ValidateAndCreateOutput(context, input); +-- +2.23.0 + diff --git a/CVE-2021-29538.patch b/CVE-2021-29538.patch new file mode 100644 index 0000000000000000000000000000000000000000..ad661e111a801043ce4ee95099f5864ca4ea5399 --- /dev/null +++ b/CVE-2021-29538.patch @@ -0,0 +1,42 @@ +From c570e2ecfc822941335ad48f6e10df4e21f11c96 Mon Sep 17 00:00:00 2001 +From: Mihai Maruseac +Date: Wed, 21 Apr 2021 17:50:10 -0700 +Subject: [PATCH] Fix issues in Conv2DBackpropFilter. + +PiperOrigin-RevId: 369772454 +Change-Id: I49b465f2ae2ce91def61b56cea8000197d5177d8 +--- + tensorflow/core/kernels/conv_grad_filter_ops.cc | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc +index fb48e3e285a27..2645d850ab7cf 100644 +--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc ++++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc +@@ -495,6 +495,14 @@ class Conv2DCustomBackpropFilterOp : public OpKernel { + const int filter_total_size = dims.spatial_dims[0].filter_size * + dims.spatial_dims[1].filter_size * + dims.in_depth; ++ OP_REQUIRES( ++ context, ++ filter_total_size * dims.out_depth == filter_backprop->NumElements(), ++ errors::InvalidArgument( ++ "filter_size does not have enough elements, requested ", ++ filter_total_size * dims.out_depth, ", got ", ++ filter_backprop->NumElements())); ++ + // The output image size is the spatial size of the output. + const int output_image_size = + dims.spatial_dims[0].output_size * dims.spatial_dims[1].output_size; +@@ -518,6 +526,11 @@ class Conv2DCustomBackpropFilterOp : public OpKernel { + + const size_t work_unit_size = size_A + size_B + size_C; + ++ OP_REQUIRES( ++ context, work_unit_size != 0, ++ errors::InvalidArgument( ++ "Work size for convolution would be 0, which is not acceptable")); ++ + const size_t shard_size = + (target_working_set_size + work_unit_size - 1) / work_unit_size; + diff --git a/CVE-2021-29544-1.patch b/CVE-2021-29544-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2ca044153460b3b2b47631f01f464d6205ece68 --- /dev/null +++ b/CVE-2021-29544-1.patch @@ -0,0 +1,919 @@ +From 52df91c5634e6c666843849a1c6ff29b3d2676be Mon Sep 17 00:00:00 2001 +From: Pankaj Kanwar +Date: Mon, 12 Oct 2020 10:30:20 -0700 +Subject: [PATCH] Create a V2 Op to stop the gradient when the input is out of + range. + +PiperOrigin-RevId: 336692325 +Change-Id: I36fd3fcfc58a30d5218beca512fbfc7c24b8b5cb +--- + tensorflow/cc/gradients/array_grad.cc | 29 ++-- + tensorflow/compiler/tests/unary_ops_test.py | 6 +- + .../api_def_QuantizeAndDequantizeV4.pbtxt | 8 ++ + .../api_def_QuantizeAndDequantizeV4Grad.pbtxt | 8 ++ + .../api_def_QuantizeAndDequantizeV4.pbtxt | 3 + + .../api_def_QuantizeAndDequantizeV4Grad.pbtxt | 3 + + .../api_def_QuantizeAndDequantizeV4.pbtxt | 4 + + .../api_def_QuantizeAndDequantizeV4Grad.pbtxt | 4 + + .../kernels/quantize_and_dequantize_op.cc | 126 ++++++++++++++++++ + .../core/kernels/quantize_and_dequantize_op.h | 71 ++++++++++ + .../quantize_and_dequantize_op_gpu.cu.cc | 40 ++++++ + .../quantize_and_dequantize_op_test.cc | 48 +++++++ + tensorflow/core/ops/array_ops.cc | 64 +++++++++ + .../python/kernel_tests/array_ops_test.py | 21 ++- + tensorflow/python/ops/array_ops.py | 113 +++++++++++++++- + .../tools/api/golden/v1/tensorflow.pbtxt | 4 + + .../golden/v1/tensorflow.quantization.pbtxt | 4 + + .../api/golden/v1/tensorflow.raw_ops.pbtxt | 8 ++ + .../tools/api/golden/v2/tensorflow.pbtxt | 4 + + .../golden/v2/tensorflow.quantization.pbtxt | 4 + + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 8 ++ + 21 files changed, 564 insertions(+), 16 deletions(-) + create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4.pbtxt + create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt + create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4.pbtxt + create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt + create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4.pbtxt + create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt + +diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc +index e9173227..480243a2 100644 +--- a/tensorflow/cc/gradients/array_grad.cc ++++ b/tensorflow/cc/gradients/array_grad.cc +@@ -15,13 +15,12 @@ limitations under the License. + + #include + ++#include "tensorflow/cc/framework/grad_op_registry.h" ++#include "tensorflow/cc/framework/gradients.h" + #include "tensorflow/cc/ops/array_ops_internal.h" + #include "tensorflow/cc/ops/standard_ops.h" + #include "tensorflow/core/lib/strings/strcat.h" + +-#include "tensorflow/cc/framework/grad_op_registry.h" +-#include "tensorflow/cc/framework/gradients.h" +- + namespace tensorflow { + namespace ops { + namespace { +@@ -90,15 +89,25 @@ Status QuantizeAndDequantizeGrad(const Scope& scope, const Operation& op, + } + REGISTER_GRADIENT_OP("QuantizeAndDequantize", QuantizeAndDequantizeGrad); + +-Status QuantizeAndDequantizeV2Grad(const Scope& scope, const Operation& op, +- const std::vector& grad_inputs, +- std::vector* grad_outputs) { +- grad_outputs->push_back(Identity(scope, grad_inputs[0])); +- grad_outputs->push_back(NoGradient()); +- grad_outputs->push_back(NoGradient()); ++Status QuantizeAndDequantizeV4GradHelper(const Scope& scope, ++ const Operation& op, ++ const std::vector& grad_inputs, ++ std::vector* grad_outputs) { ++ Input input = Shape(scope, op.input(0)); ++ Input input_min = op.input(1); ++ Input input_max = op.input(2); ++ int64 axis; ++ TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "axis", &axis)); ++ auto qdq_v4_grad = QuantizeAndDequantizeV4Grad( ++ scope, grad_inputs[0], input, input_min, input_max, ++ QuantizeAndDequantizeV4Grad::Axis(axis)); ++ grad_outputs->push_back(qdq_v4_grad.input_backprop); ++ grad_outputs->push_back(qdq_v4_grad.input_min_backprop); ++ grad_outputs->push_back(qdq_v4_grad.input_max_backprop); + return scope.status(); + } +-REGISTER_GRADIENT_OP("QuantizeAndDequantizeV2", QuantizeAndDequantizeV2Grad); ++REGISTER_GRADIENT_OP("QuantizeAndDequantizeV4", ++ QuantizeAndDequantizeV4GradHelper); + + Status QuantizeAndDequantizeV3Grad(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, +diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py +index 162693a9..dacd7232 100644 +--- a/tensorflow/compiler/tests/unary_ops_test.py ++++ b/tensorflow/compiler/tests/unary_ops_test.py +@@ -535,7 +535,7 @@ class UnaryOpsTest(xla_test.XLATestCase): + for dtype in self.float_types: + + def quantize_and_dequantize_v2(x): +- return array_ops.quantize_and_dequantize_v2( ++ return array_ops.quantize_and_dequantize( + x, -127, 127, signed_input=True, num_bits=8) + + self._assertOpOutputMatchesExpected( +@@ -544,7 +544,7 @@ class UnaryOpsTest(xla_test.XLATestCase): + expected=np.array([-1., -0.5, 0., 0.296875], dtype=dtype)) + + def quantize_and_dequantize_v2_round_half_up(x): +- return array_ops.quantize_and_dequantize_v2( ++ return array_ops.quantize_and_dequantize( + x, + -1, + 1.0, +@@ -568,7 +568,7 @@ class UnaryOpsTest(xla_test.XLATestCase): + dtype=dtype)) + + def quantize_and_dequantize_v2_round_half_to_even(x): +- return array_ops.quantize_and_dequantize_v2( ++ return array_ops.quantize_and_dequantize( + x, + -1.0, + 1.0, +diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4.pbtxt +new file mode 100644 +index 00000000..a84ccb78 +--- /dev/null ++++ b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4.pbtxt +@@ -0,0 +1,8 @@ ++op { ++ graph_op_name: "QuantizeAndDequantizeV4" ++ summary: "Returns the gradient of `QuantizeAndDequantizeV4`." ++ description: <