From 2365283f701ff0422b08ab6c99374fa139f1e2da Mon Sep 17 00:00:00 2001 From: Hui Li Date: Tue, 29 Aug 2023 13:04:49 +0800 Subject: [PATCH] bcc: Add support for loongarch64 Signed-off-by: Hui Li --- ...0-Add-libbpf-with-commit-0667206913b.patch | 63499 ++++++++++++++++ bcc.spec | 31 +- 2 files changed, 63527 insertions(+), 3 deletions(-) create mode 100644 bcc-0.25.0-Add-libbpf-with-commit-0667206913b.patch diff --git a/bcc-0.25.0-Add-libbpf-with-commit-0667206913b.patch b/bcc-0.25.0-Add-libbpf-with-commit-0667206913b.patch new file mode 100644 index 0000000..f72cf9a --- /dev/null +++ b/bcc-0.25.0-Add-libbpf-with-commit-0667206913b.patch @@ -0,0 +1,63499 @@ +From 7dd7ae2cf9a5ef951bafdf9fc59b2f96ab5a048b Mon Sep 17 00:00:00 2001 +From: Hui Li +Date: Thu, 3 Aug 2023 19:28:12 +0800 +Subject: [PATCH] Add corresponding submodule libbpf with commit 0667206913b32 + +https://github.com/libbpf/libbpf.git + +Signed-off-by: Hui Li +--- + src/cc/libbpf/.lgtm.yml | 14 + + src/cc/libbpf/.readthedocs.yaml | 22 + + src/cc/libbpf/BPF-CHECKPOINT-COMMIT | 1 + + src/cc/libbpf/CHECKPOINT-COMMIT | 1 + + src/cc/libbpf/LICENSE | 1 + + src/cc/libbpf/LICENSE.BSD-2-Clause | 32 + + src/cc/libbpf/LICENSE.LGPL-2.1 | 503 + + src/cc/libbpf/README.md | 165 + + src/cc/libbpf/docs/api.rst | 93 + + src/cc/libbpf/docs/conf.py | 40 + + src/cc/libbpf/docs/index.rst | 21 + + src/cc/libbpf/docs/libbpf_build.rst | 37 + + .../libbpf/docs/libbpf_naming_convention.rst | 193 + + src/cc/libbpf/docs/sphinx/Makefile | 9 + + src/cc/libbpf/docs/sphinx/doxygen/Doxyfile | 277 + + src/cc/libbpf/docs/sphinx/requirements.txt | 1 + + src/cc/libbpf/fuzz/bpf-object-fuzzer.c | 23 + + src/cc/libbpf/include/asm/barrier.h | 7 + + src/cc/libbpf/include/linux/compiler.h | 70 + + src/cc/libbpf/include/linux/err.h | 38 + + src/cc/libbpf/include/linux/filter.h | 134 + + src/cc/libbpf/include/linux/kernel.h | 44 + + src/cc/libbpf/include/linux/list.h | 91 + + src/cc/libbpf/include/linux/overflow.h | 90 + + src/cc/libbpf/include/linux/ring_buffer.h | 18 + + src/cc/libbpf/include/linux/types.h | 33 + + src/cc/libbpf/include/uapi/linux/bpf.h | 6851 +++++++++ + src/cc/libbpf/include/uapi/linux/bpf_common.h | 57 + + src/cc/libbpf/include/uapi/linux/btf.h | 200 + + src/cc/libbpf/include/uapi/linux/if_link.h | 1282 ++ + src/cc/libbpf/include/uapi/linux/if_xdp.h | 111 + + src/cc/libbpf/include/uapi/linux/netlink.h | 252 + + src/cc/libbpf/include/uapi/linux/perf_event.h | 1395 ++ + src/cc/libbpf/include/uapi/linux/pkt_cls.h | 612 + + src/cc/libbpf/include/uapi/linux/pkt_sched.h | 1164 ++ + src/cc/libbpf/scripts/build-fuzzers.sh | 81 + + src/cc/libbpf/scripts/coverity.sh | 105 + + src/cc/libbpf/scripts/sync-kernel.sh | 350 + + src/cc/libbpf/src/Makefile | 182 + + src/cc/libbpf/src/bpf.c | 1101 ++ + src/cc/libbpf/src/bpf.h | 438 + + src/cc/libbpf/src/bpf_core_read.h | 484 + + src/cc/libbpf/src/bpf_endian.h | 99 + + src/cc/libbpf/src/bpf_gen_internal.h | 72 + + src/cc/libbpf/src/bpf_helper_defs.h | 4582 ++++++ + src/cc/libbpf/src/bpf_helpers.h | 301 + + src/cc/libbpf/src/bpf_prog_linfo.c | 246 + + src/cc/libbpf/src/bpf_tracing.h | 563 + + src/cc/libbpf/src/btf.c | 4909 ++++++ + src/cc/libbpf/src/btf.h | 553 + + src/cc/libbpf/src/btf_dump.c | 2403 +++ + src/cc/libbpf/src/gen_loader.c | 1121 ++ + src/cc/libbpf/src/hashmap.c | 240 + + src/cc/libbpf/src/hashmap.h | 195 + + src/cc/libbpf/src/libbpf.c | 12388 ++++++++++++++++ + src/cc/libbpf/src/libbpf.h | 1418 ++ + src/cc/libbpf/src/libbpf.map | 368 + + src/cc/libbpf/src/libbpf.pc.template | 12 + + src/cc/libbpf/src/libbpf_common.h | 73 + + src/cc/libbpf/src/libbpf_errno.c | 67 + + src/cc/libbpf/src/libbpf_internal.h | 576 + + src/cc/libbpf/src/libbpf_legacy.h | 138 + + src/cc/libbpf/src/libbpf_probes.c | 362 + + src/cc/libbpf/src/libbpf_version.h | 9 + + src/cc/libbpf/src/linker.c | 2900 ++++ + src/cc/libbpf/src/netlink.c | 812 + + src/cc/libbpf/src/nlattr.c | 195 + + src/cc/libbpf/src/nlattr.h | 164 + + src/cc/libbpf/src/relo_core.c | 1690 +++ + src/cc/libbpf/src/relo_core.h | 99 + + src/cc/libbpf/src/ringbuf.c | 302 + + src/cc/libbpf/src/skel_internal.h | 349 + + src/cc/libbpf/src/str_error.c | 21 + + src/cc/libbpf/src/str_error.h | 6 + + src/cc/libbpf/src/strset.c | 177 + + src/cc/libbpf/src/strset.h | 21 + + src/cc/libbpf/src/usdt.bpf.h | 247 + + src/cc/libbpf/src/usdt.c | 1519 ++ + .../diffs/.do_not_use_dot_patch_here | 0 + .../001-fix-oob-write-in-test_verifier.diff | 35 + + src/cc/libbpf/travis-ci/managers/debian.sh | 90 + + .../libbpf/travis-ci/managers/test_compile.sh | 15 + + .../travis-ci/managers/travis_wait.bash | 61 + + src/cc/libbpf/travis-ci/managers/ubuntu.sh | 24 + + .../libbpf/travis-ci/rootfs/mkrootfs_arch.sh | 107 + + .../travis-ci/rootfs/mkrootfs_debian.sh | 52 + + .../libbpf/travis-ci/rootfs/mkrootfs_tweak.sh | 61 + + .../s390x-self-hosted-builder/README.md | 107 + + .../actions-runner-libbpf.Dockerfile | 50 + + .../actions-runner-libbpf.service | 24 + + .../fs/usr/bin/actions-runner | 40 + + .../fs/usr/bin/entrypoint | 35 + + .../qemu-user-static.service | 11 + + .../vmtest/configs/blacklist/BLACKLIST-5.5.0 | 118 + + .../vmtest/configs/blacklist/BLACKLIST-latest | 6 + + .../configs/blacklist/BLACKLIST-latest.s390x | 67 + + .../vmtest/configs/config-latest.s390x | 2711 ++++ + .../vmtest/configs/config-latest.x86_64 | 3073 ++++ + .../vmtest/configs/whitelist/WHITELIST-4.9.0 | 8 + + .../vmtest/configs/whitelist/WHITELIST-5.5.0 | 55 + + src/cc/libbpf/travis-ci/vmtest/helpers.sh | 36 + + .../libbpf/travis-ci/vmtest/run_selftests.sh | 63 + + 102 files changed, 62669 insertions(+) + create mode 100644 src/cc/libbpf/.lgtm.yml + create mode 100644 src/cc/libbpf/.readthedocs.yaml + create mode 100644 src/cc/libbpf/BPF-CHECKPOINT-COMMIT + create mode 100644 src/cc/libbpf/CHECKPOINT-COMMIT + create mode 100644 src/cc/libbpf/LICENSE + create mode 100644 src/cc/libbpf/LICENSE.BSD-2-Clause + create mode 100644 src/cc/libbpf/LICENSE.LGPL-2.1 + create mode 100644 src/cc/libbpf/README.md + create mode 100644 src/cc/libbpf/docs/api.rst + create mode 100644 src/cc/libbpf/docs/conf.py + create mode 100644 src/cc/libbpf/docs/index.rst + create mode 100644 src/cc/libbpf/docs/libbpf_build.rst + create mode 100644 src/cc/libbpf/docs/libbpf_naming_convention.rst + create mode 100644 src/cc/libbpf/docs/sphinx/Makefile + create mode 100644 src/cc/libbpf/docs/sphinx/doxygen/Doxyfile + create mode 100644 src/cc/libbpf/docs/sphinx/requirements.txt + create mode 100644 src/cc/libbpf/fuzz/bpf-object-fuzzer.c + create mode 100644 src/cc/libbpf/include/asm/barrier.h + create mode 100644 src/cc/libbpf/include/linux/compiler.h + create mode 100644 src/cc/libbpf/include/linux/err.h + create mode 100644 src/cc/libbpf/include/linux/filter.h + create mode 100644 src/cc/libbpf/include/linux/kernel.h + create mode 100644 src/cc/libbpf/include/linux/list.h + create mode 100644 src/cc/libbpf/include/linux/overflow.h + create mode 100644 src/cc/libbpf/include/linux/ring_buffer.h + create mode 100644 src/cc/libbpf/include/linux/types.h + create mode 100644 src/cc/libbpf/include/uapi/linux/bpf.h + create mode 100644 src/cc/libbpf/include/uapi/linux/bpf_common.h + create mode 100644 src/cc/libbpf/include/uapi/linux/btf.h + create mode 100644 src/cc/libbpf/include/uapi/linux/if_link.h + create mode 100644 src/cc/libbpf/include/uapi/linux/if_xdp.h + create mode 100644 src/cc/libbpf/include/uapi/linux/netlink.h + create mode 100644 src/cc/libbpf/include/uapi/linux/perf_event.h + create mode 100644 src/cc/libbpf/include/uapi/linux/pkt_cls.h + create mode 100644 src/cc/libbpf/include/uapi/linux/pkt_sched.h + create mode 100755 src/cc/libbpf/scripts/build-fuzzers.sh + create mode 100755 src/cc/libbpf/scripts/coverity.sh + create mode 100755 src/cc/libbpf/scripts/sync-kernel.sh + create mode 100644 src/cc/libbpf/src/Makefile + create mode 100644 src/cc/libbpf/src/bpf.c + create mode 100644 src/cc/libbpf/src/bpf.h + create mode 100644 src/cc/libbpf/src/bpf_core_read.h + create mode 100644 src/cc/libbpf/src/bpf_endian.h + create mode 100644 src/cc/libbpf/src/bpf_gen_internal.h + create mode 100644 src/cc/libbpf/src/bpf_helper_defs.h + create mode 100644 src/cc/libbpf/src/bpf_helpers.h + create mode 100644 src/cc/libbpf/src/bpf_prog_linfo.c + create mode 100644 src/cc/libbpf/src/bpf_tracing.h + create mode 100644 src/cc/libbpf/src/btf.c + create mode 100644 src/cc/libbpf/src/btf.h + create mode 100644 src/cc/libbpf/src/btf_dump.c + create mode 100644 src/cc/libbpf/src/gen_loader.c + create mode 100644 src/cc/libbpf/src/hashmap.c + create mode 100644 src/cc/libbpf/src/hashmap.h + create mode 100644 src/cc/libbpf/src/libbpf.c + create mode 100644 src/cc/libbpf/src/libbpf.h + create mode 100644 src/cc/libbpf/src/libbpf.map + create mode 100644 src/cc/libbpf/src/libbpf.pc.template + create mode 100644 src/cc/libbpf/src/libbpf_common.h + create mode 100644 src/cc/libbpf/src/libbpf_errno.c + create mode 100644 src/cc/libbpf/src/libbpf_internal.h + create mode 100644 src/cc/libbpf/src/libbpf_legacy.h + create mode 100644 src/cc/libbpf/src/libbpf_probes.c + create mode 100644 src/cc/libbpf/src/libbpf_version.h + create mode 100644 src/cc/libbpf/src/linker.c + create mode 100644 src/cc/libbpf/src/netlink.c + create mode 100644 src/cc/libbpf/src/nlattr.c + create mode 100644 src/cc/libbpf/src/nlattr.h + create mode 100644 src/cc/libbpf/src/relo_core.c + create mode 100644 src/cc/libbpf/src/relo_core.h + create mode 100644 src/cc/libbpf/src/ringbuf.c + create mode 100644 src/cc/libbpf/src/skel_internal.h + create mode 100644 src/cc/libbpf/src/str_error.c + create mode 100644 src/cc/libbpf/src/str_error.h + create mode 100644 src/cc/libbpf/src/strset.c + create mode 100644 src/cc/libbpf/src/strset.h + create mode 100644 src/cc/libbpf/src/usdt.bpf.h + create mode 100644 src/cc/libbpf/src/usdt.c + create mode 100644 src/cc/libbpf/travis-ci/diffs/.do_not_use_dot_patch_here + create mode 100644 src/cc/libbpf/travis-ci/diffs/001-fix-oob-write-in-test_verifier.diff + create mode 100755 src/cc/libbpf/travis-ci/managers/debian.sh + create mode 100755 src/cc/libbpf/travis-ci/managers/test_compile.sh + create mode 100644 src/cc/libbpf/travis-ci/managers/travis_wait.bash + create mode 100755 src/cc/libbpf/travis-ci/managers/ubuntu.sh + create mode 100755 src/cc/libbpf/travis-ci/rootfs/mkrootfs_arch.sh + create mode 100755 src/cc/libbpf/travis-ci/rootfs/mkrootfs_debian.sh + create mode 100755 src/cc/libbpf/travis-ci/rootfs/mkrootfs_tweak.sh + create mode 100644 src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/README.md + create mode 100644 src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.Dockerfile + create mode 100644 src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.service + create mode 100755 src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/actions-runner + create mode 100755 src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/entrypoint + create mode 100644 src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/qemu-user-static.service + create mode 100644 src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 + create mode 100644 src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest + create mode 100644 src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest.s390x + create mode 100644 src/cc/libbpf/travis-ci/vmtest/configs/config-latest.s390x + create mode 100644 src/cc/libbpf/travis-ci/vmtest/configs/config-latest.x86_64 + create mode 100644 src/cc/libbpf/travis-ci/vmtest/configs/whitelist/WHITELIST-4.9.0 + create mode 100644 src/cc/libbpf/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0 + create mode 100755 src/cc/libbpf/travis-ci/vmtest/helpers.sh + create mode 100755 src/cc/libbpf/travis-ci/vmtest/run_selftests.sh + +diff --git a/src/cc/libbpf/.lgtm.yml b/src/cc/libbpf/.lgtm.yml +new file mode 100644 +index 0000000..f27137b +--- /dev/null ++++ b/src/cc/libbpf/.lgtm.yml +@@ -0,0 +1,14 @@ ++# vi: set ts=2 sw=2: ++extraction: ++ cpp: ++ prepare: ++ packages: ++ - libelf-dev ++ - pkg-config ++ after_prepare: ++ # As the buildsystem detection by LGTM is performed _only_ during the ++ # 'configure' phase, we need to trick LGTM we use a supported build ++ # system (configure, meson, cmake, etc.). This way LGTM correctly detects ++ # that our sources are in the src/ subfolder. ++ - touch src/configure ++ - chmod +x src/configure +diff --git a/src/cc/libbpf/.readthedocs.yaml b/src/cc/libbpf/.readthedocs.yaml +new file mode 100644 +index 0000000..803dfa2 +--- /dev/null ++++ b/src/cc/libbpf/.readthedocs.yaml +@@ -0,0 +1,22 @@ ++# .readthedocs.yaml ++# Read the Docs configuration file ++# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details ++ ++# Required ++version: 2 ++ ++# Build documentation in the docs/ directory with Sphinx ++sphinx: ++ builder: html ++ configuration: docs/conf.py ++ ++formats: ++ - htmlzip ++ - pdf ++ - epub ++ ++# Optionally set the version of Python and requirements required to build your docs ++python: ++ version: 3.7 ++ install: ++ - requirements: docs/sphinx/requirements.txt +\ No newline at end of file +diff --git a/src/cc/libbpf/BPF-CHECKPOINT-COMMIT b/src/cc/libbpf/BPF-CHECKPOINT-COMMIT +new file mode 100644 +index 0000000..2af218b +--- /dev/null ++++ b/src/cc/libbpf/BPF-CHECKPOINT-COMMIT +@@ -0,0 +1 @@ ++f946964a9f79f8dcb5a6329265281eebfc23aee5 +diff --git a/src/cc/libbpf/CHECKPOINT-COMMIT b/src/cc/libbpf/CHECKPOINT-COMMIT +new file mode 100644 +index 0000000..0494e42 +--- /dev/null ++++ b/src/cc/libbpf/CHECKPOINT-COMMIT +@@ -0,0 +1 @@ ++71930846b36f8e4e68267f8a3d47e33435c3657a +diff --git a/src/cc/libbpf/LICENSE b/src/cc/libbpf/LICENSE +new file mode 100644 +index 0000000..d38fed3 +--- /dev/null ++++ b/src/cc/libbpf/LICENSE +@@ -0,0 +1 @@ ++LGPL-2.1 OR BSD-2-Clause +diff --git a/src/cc/libbpf/LICENSE.BSD-2-Clause b/src/cc/libbpf/LICENSE.BSD-2-Clause +new file mode 100644 +index 0000000..bce40aa +--- /dev/null ++++ b/src/cc/libbpf/LICENSE.BSD-2-Clause +@@ -0,0 +1,32 @@ ++Valid-License-Identifier: BSD-2-Clause ++SPDX-URL: https://spdx.org/licenses/BSD-2-Clause.html ++Usage-Guide: ++ To use the BSD 2-clause "Simplified" License put the following SPDX ++ tag/value pair into a comment according to the placement guidelines in ++ the licensing rules documentation: ++ SPDX-License-Identifier: BSD-2-Clause ++License-Text: ++ ++Copyright (c) 2015 The Libbpf Authors. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, ++ this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ++AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE ++LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ++CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ++SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ++POSSIBILITY OF SUCH DAMAGE. +diff --git a/src/cc/libbpf/LICENSE.LGPL-2.1 b/src/cc/libbpf/LICENSE.LGPL-2.1 +new file mode 100644 +index 0000000..27bb434 +--- /dev/null ++++ b/src/cc/libbpf/LICENSE.LGPL-2.1 +@@ -0,0 +1,503 @@ ++Valid-License-Identifier: LGPL-2.1 ++Valid-License-Identifier: LGPL-2.1+ ++SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html ++Usage-Guide: ++ To use this license in source code, put one of the following SPDX ++ tag/value pairs into a comment according to the placement ++ guidelines in the licensing rules documentation. ++ For 'GNU Lesser General Public License (LGPL) version 2.1 only' use: ++ SPDX-License-Identifier: LGPL-2.1 ++ For 'GNU Lesser General Public License (LGPL) version 2.1 or any later ++ version' use: ++ SPDX-License-Identifier: LGPL-2.1+ ++License-Text: ++ ++GNU LESSER GENERAL PUBLIC LICENSE ++Version 2.1, February 1999 ++ ++Copyright (C) 1991, 1999 Free Software Foundation, Inc. ++51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ ++Everyone is permitted to copy and distribute verbatim copies of this ++license document, but changing it is not allowed. ++ ++[This is the first released version of the Lesser GPL. It also counts as ++the successor of the GNU Library Public License, version 2, hence the ++version number 2.1.] ++ ++Preamble ++ ++The licenses for most software are designed to take away your freedom to ++share and change it. By contrast, the GNU General Public Licenses are ++intended to guarantee your freedom to share and change free software--to ++make sure the software is free for all its users. ++ ++This license, the Lesser General Public License, applies to some specially ++designated software packages--typically libraries--of the Free Software ++Foundation and other authors who decide to use it. You can use it too, but ++we suggest you first think carefully about whether this license or the ++ordinary General Public License is the better strategy to use in any ++particular case, based on the explanations below. ++ ++When we speak of free software, we are referring to freedom of use, not ++price. Our General Public Licenses are designed to make sure that you have ++the freedom to distribute copies of free software (and charge for this ++service if you wish); that you receive source code or can get it if you ++want it; that you can change the software and use pieces of it in new free ++programs; and that you are informed that you can do these things. ++ ++To protect your rights, we need to make restrictions that forbid ++distributors to deny you these rights or to ask you to surrender these ++rights. These restrictions translate to certain responsibilities for you if ++you distribute copies of the library or if you modify it. ++ ++For example, if you distribute copies of the library, whether gratis or for ++a fee, you must give the recipients all the rights that we gave you. You ++must make sure that they, too, receive or can get the source code. If you ++link other code with the library, you must provide complete object files to ++the recipients, so that they can relink them with the library after making ++changes to the library and recompiling it. And you must show them these ++terms so they know their rights. ++ ++We protect your rights with a two-step method: (1) we copyright the ++library, and (2) we offer you this license, which gives you legal ++permission to copy, distribute and/or modify the library. ++ ++To protect each distributor, we want to make it very clear that there is no ++warranty for the free library. Also, if the library is modified by someone ++else and passed on, the recipients should know that what they have is not ++the original version, so that the original author's reputation will not be ++affected by problems that might be introduced by others. ++ ++Finally, software patents pose a constant threat to the existence of any ++free program. We wish to make sure that a company cannot effectively ++restrict the users of a free program by obtaining a restrictive license ++from a patent holder. Therefore, we insist that any patent license obtained ++for a version of the library must be consistent with the full freedom of ++use specified in this license. ++ ++Most GNU software, including some libraries, is covered by the ordinary GNU ++General Public License. This license, the GNU Lesser General Public ++License, applies to certain designated libraries, and is quite different ++from the ordinary General Public License. We use this license for certain ++libraries in order to permit linking those libraries into non-free ++programs. ++ ++When a program is linked with a library, whether statically or using a ++shared library, the combination of the two is legally speaking a combined ++work, a derivative of the original library. The ordinary General Public ++License therefore permits such linking only if the entire combination fits ++its criteria of freedom. The Lesser General Public License permits more lax ++criteria for linking other code with the library. ++ ++We call this license the "Lesser" General Public License because it does ++Less to protect the user's freedom than the ordinary General Public ++License. It also provides other free software developers Less of an ++advantage over competing non-free programs. These disadvantages are the ++reason we use the ordinary General Public License for many ++libraries. However, the Lesser license provides advantages in certain ++special circumstances. ++ ++For example, on rare occasions, there may be a special need to encourage ++the widest possible use of a certain library, so that it becomes a de-facto ++standard. To achieve this, non-free programs must be allowed to use the ++library. A more frequent case is that a free library does the same job as ++widely used non-free libraries. In this case, there is little to gain by ++limiting the free library to free software only, so we use the Lesser ++General Public License. ++ ++In other cases, permission to use a particular library in non-free programs ++enables a greater number of people to use a large body of free ++software. For example, permission to use the GNU C Library in non-free ++programs enables many more people to use the whole GNU operating system, as ++well as its variant, the GNU/Linux operating system. ++ ++Although the Lesser General Public License is Less protective of the users' ++freedom, it does ensure that the user of a program that is linked with the ++Library has the freedom and the wherewithal to run that program using a ++modified version of the Library. ++ ++The precise terms and conditions for copying, distribution and modification ++follow. Pay close attention to the difference between a "work based on the ++library" and a "work that uses the library". The former contains code ++derived from the library, whereas the latter must be combined with the ++library in order to run. ++ ++TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION ++ ++0. This License Agreement applies to any software library or other program ++ which contains a notice placed by the copyright holder or other ++ authorized party saying it may be distributed under the terms of this ++ Lesser General Public License (also called "this License"). Each ++ licensee is addressed as "you". ++ ++ A "library" means a collection of software functions and/or data ++ prepared so as to be conveniently linked with application programs ++ (which use some of those functions and data) to form executables. ++ ++ The "Library", below, refers to any such software library or work which ++ has been distributed under these terms. A "work based on the Library" ++ means either the Library or any derivative work under copyright law: ++ that is to say, a work containing the Library or a portion of it, either ++ verbatim or with modifications and/or translated straightforwardly into ++ another language. (Hereinafter, translation is included without ++ limitation in the term "modification".) ++ ++ "Source code" for a work means the preferred form of the work for making ++ modifications to it. For a library, complete source code means all the ++ source code for all modules it contains, plus any associated interface ++ definition files, plus the scripts used to control compilation and ++ installation of the library. ++ ++ Activities other than copying, distribution and modification are not ++ covered by this License; they are outside its scope. The act of running ++ a program using the Library is not restricted, and output from such a ++ program is covered only if its contents constitute a work based on the ++ Library (independent of the use of the Library in a tool for writing ++ it). Whether that is true depends on what the Library does and what the ++ program that uses the Library does. ++ ++1. You may copy and distribute verbatim copies of the Library's complete ++ source code as you receive it, in any medium, provided that you ++ conspicuously and appropriately publish on each copy an appropriate ++ copyright notice and disclaimer of warranty; keep intact all the notices ++ that refer to this License and to the absence of any warranty; and ++ distribute a copy of this License along with the Library. ++ ++ You may charge a fee for the physical act of transferring a copy, and ++ you may at your option offer warranty protection in exchange for a fee. ++ ++2. You may modify your copy or copies of the Library or any portion of it, ++ thus forming a work based on the Library, and copy and distribute such ++ modifications or work under the terms of Section 1 above, provided that ++ you also meet all of these conditions: ++ ++ a) The modified work must itself be a software library. ++ ++ b) You must cause the files modified to carry prominent notices stating ++ that you changed the files and the date of any change. ++ ++ c) You must cause the whole of the work to be licensed at no charge to ++ all third parties under the terms of this License. ++ ++ d) If a facility in the modified Library refers to a function or a table ++ of data to be supplied by an application program that uses the ++ facility, other than as an argument passed when the facility is ++ invoked, then you must make a good faith effort to ensure that, in ++ the event an application does not supply such function or table, the ++ facility still operates, and performs whatever part of its purpose ++ remains meaningful. ++ ++ (For example, a function in a library to compute square roots has a ++ purpose that is entirely well-defined independent of the ++ application. Therefore, Subsection 2d requires that any ++ application-supplied function or table used by this function must be ++ optional: if the application does not supply it, the square root ++ function must still compute square roots.) ++ ++ These requirements apply to the modified work as a whole. If ++ identifiable sections of that work are not derived from the Library, and ++ can be reasonably considered independent and separate works in ++ themselves, then this License, and its terms, do not apply to those ++ sections when you distribute them as separate works. But when you ++ distribute the same sections as part of a whole which is a work based on ++ the Library, the distribution of the whole must be on the terms of this ++ License, whose permissions for other licensees extend to the entire ++ whole, and thus to each and every part regardless of who wrote it. ++ ++ Thus, it is not the intent of this section to claim rights or contest ++ your rights to work written entirely by you; rather, the intent is to ++ exercise the right to control the distribution of derivative or ++ collective works based on the Library. ++ ++ In addition, mere aggregation of another work not based on the Library ++ with the Library (or with a work based on the Library) on a volume of a ++ storage or distribution medium does not bring the other work under the ++ scope of this License. ++ ++3. You may opt to apply the terms of the ordinary GNU General Public ++ License instead of this License to a given copy of the Library. To do ++ this, you must alter all the notices that refer to this License, so that ++ they refer to the ordinary GNU General Public License, version 2, ++ instead of to this License. (If a newer version than version 2 of the ++ ordinary GNU General Public License has appeared, then you can specify ++ that version instead if you wish.) Do not make any other change in these ++ notices. ++ ++ Once this change is made in a given copy, it is irreversible for that ++ copy, so the ordinary GNU General Public License applies to all ++ subsequent copies and derivative works made from that copy. ++ ++ This option is useful when you wish to copy part of the code of the ++ Library into a program that is not a library. ++ ++4. You may copy and distribute the Library (or a portion or derivative of ++ it, under Section 2) in object code or executable form under the terms ++ of Sections 1 and 2 above provided that you accompany it with the ++ complete corresponding machine-readable source code, which must be ++ distributed under the terms of Sections 1 and 2 above on a medium ++ customarily used for software interchange. ++ ++ If distribution of object code is made by offering access to copy from a ++ designated place, then offering equivalent access to copy the source ++ code from the same place satisfies the requirement to distribute the ++ source code, even though third parties are not compelled to copy the ++ source along with the object code. ++ ++5. A program that contains no derivative of any portion of the Library, but ++ is designed to work with the Library by being compiled or linked with ++ it, is called a "work that uses the Library". Such a work, in isolation, ++ is not a derivative work of the Library, and therefore falls outside the ++ scope of this License. ++ ++ However, linking a "work that uses the Library" with the Library creates ++ an executable that is a derivative of the Library (because it contains ++ portions of the Library), rather than a "work that uses the ++ library". The executable is therefore covered by this License. Section 6 ++ states terms for distribution of such executables. ++ ++ When a "work that uses the Library" uses material from a header file ++ that is part of the Library, the object code for the work may be a ++ derivative work of the Library even though the source code is ++ not. Whether this is true is especially significant if the work can be ++ linked without the Library, or if the work is itself a library. The ++ threshold for this to be true is not precisely defined by law. ++ ++ If such an object file uses only numerical parameters, data structure ++ layouts and accessors, and small macros and small inline functions (ten ++ lines or less in length), then the use of the object file is ++ unrestricted, regardless of whether it is legally a derivative ++ work. (Executables containing this object code plus portions of the ++ Library will still fall under Section 6.) ++ ++ Otherwise, if the work is a derivative of the Library, you may ++ distribute the object code for the work under the terms of Section ++ 6. Any executables containing that work also fall under Section 6, ++ whether or not they are linked directly with the Library itself. ++ ++6. As an exception to the Sections above, you may also combine or link a ++ "work that uses the Library" with the Library to produce a work ++ containing portions of the Library, and distribute that work under terms ++ of your choice, provided that the terms permit modification of the work ++ for the customer's own use and reverse engineering for debugging such ++ modifications. ++ ++ You must give prominent notice with each copy of the work that the ++ Library is used in it and that the Library and its use are covered by ++ this License. You must supply a copy of this License. If the work during ++ execution displays copyright notices, you must include the copyright ++ notice for the Library among them, as well as a reference directing the ++ user to the copy of this License. Also, you must do one of these things: ++ ++ a) Accompany the work with the complete corresponding machine-readable ++ source code for the Library including whatever changes were used in ++ the work (which must be distributed under Sections 1 and 2 above); ++ and, if the work is an executable linked with the Library, with the ++ complete machine-readable "work that uses the Library", as object ++ code and/or source code, so that the user can modify the Library and ++ then relink to produce a modified executable containing the modified ++ Library. (It is understood that the user who changes the contents of ++ definitions files in the Library will not necessarily be able to ++ recompile the application to use the modified definitions.) ++ ++ b) Use a suitable shared library mechanism for linking with the ++ Library. A suitable mechanism is one that (1) uses at run time a copy ++ of the library already present on the user's computer system, rather ++ than copying library functions into the executable, and (2) will ++ operate properly with a modified version of the library, if the user ++ installs one, as long as the modified version is interface-compatible ++ with the version that the work was made with. ++ ++ c) Accompany the work with a written offer, valid for at least three ++ years, to give the same user the materials specified in Subsection ++ 6a, above, for a charge no more than the cost of performing this ++ distribution. ++ ++ d) If distribution of the work is made by offering access to copy from a ++ designated place, offer equivalent access to copy the above specified ++ materials from the same place. ++ ++ e) Verify that the user has already received a copy of these materials ++ or that you have already sent this user a copy. ++ ++ For an executable, the required form of the "work that uses the Library" ++ must include any data and utility programs needed for reproducing the ++ executable from it. However, as a special exception, the materials to be ++ distributed need not include anything that is normally distributed (in ++ either source or binary form) with the major components (compiler, ++ kernel, and so on) of the operating system on which the executable runs, ++ unless that component itself accompanies the executable. ++ ++ It may happen that this requirement contradicts the license restrictions ++ of other proprietary libraries that do not normally accompany the ++ operating system. Such a contradiction means you cannot use both them ++ and the Library together in an executable that you distribute. ++ ++7. You may place library facilities that are a work based on the Library ++ side-by-side in a single library together with other library facilities ++ not covered by this License, and distribute such a combined library, ++ provided that the separate distribution of the work based on the Library ++ and of the other library facilities is otherwise permitted, and provided ++ that you do these two things: ++ ++ a) Accompany the combined library with a copy of the same work based on ++ the Library, uncombined with any other library facilities. This must ++ be distributed under the terms of the Sections above. ++ ++ b) Give prominent notice with the combined library of the fact that part ++ of it is a work based on the Library, and explaining where to find ++ the accompanying uncombined form of the same work. ++ ++8. You may not copy, modify, sublicense, link with, or distribute the ++ Library except as expressly provided under this License. Any attempt ++ otherwise to copy, modify, sublicense, link with, or distribute the ++ Library is void, and will automatically terminate your rights under this ++ License. However, parties who have received copies, or rights, from you ++ under this License will not have their licenses terminated so long as ++ such parties remain in full compliance. ++ ++9. You are not required to accept this License, since you have not signed ++ it. However, nothing else grants you permission to modify or distribute ++ the Library or its derivative works. These actions are prohibited by law ++ if you do not accept this License. Therefore, by modifying or ++ distributing the Library (or any work based on the Library), you ++ indicate your acceptance of this License to do so, and all its terms and ++ conditions for copying, distributing or modifying the Library or works ++ based on it. ++ ++10. Each time you redistribute the Library (or any work based on the ++ Library), the recipient automatically receives a license from the ++ original licensor to copy, distribute, link with or modify the Library ++ subject to these terms and conditions. You may not impose any further ++ restrictions on the recipients' exercise of the rights granted ++ herein. You are not responsible for enforcing compliance by third ++ parties with this License. ++ ++11. If, as a consequence of a court judgment or allegation of patent ++ infringement or for any other reason (not limited to patent issues), ++ conditions are imposed on you (whether by court order, agreement or ++ otherwise) that contradict the conditions of this License, they do not ++ excuse you from the conditions of this License. If you cannot ++ distribute so as to satisfy simultaneously your obligations under this ++ License and any other pertinent obligations, then as a consequence you ++ may not distribute the Library at all. For example, if a patent license ++ would not permit royalty-free redistribution of the Library by all ++ those who receive copies directly or indirectly through you, then the ++ only way you could satisfy both it and this License would be to refrain ++ entirely from distribution of the Library. ++ ++ If any portion of this section is held invalid or unenforceable under ++ any particular circumstance, the balance of the section is intended to ++ apply, and the section as a whole is intended to apply in other ++ circumstances. ++ ++ It is not the purpose of this section to induce you to infringe any ++ patents or other property right claims or to contest validity of any ++ such claims; this section has the sole purpose of protecting the ++ integrity of the free software distribution system which is implemented ++ by public license practices. Many people have made generous ++ contributions to the wide range of software distributed through that ++ system in reliance on consistent application of that system; it is up ++ to the author/donor to decide if he or she is willing to distribute ++ software through any other system and a licensee cannot impose that ++ choice. ++ ++ This section is intended to make thoroughly clear what is believed to ++ be a consequence of the rest of this License. ++ ++12. If the distribution and/or use of the Library is restricted in certain ++ countries either by patents or by copyrighted interfaces, the original ++ copyright holder who places the Library under this License may add an ++ explicit geographical distribution limitation excluding those ++ countries, so that distribution is permitted only in or among countries ++ not thus excluded. In such case, this License incorporates the ++ limitation as if written in the body of this License. ++ ++13. The Free Software Foundation may publish revised and/or new versions of ++ the Lesser General Public License from time to time. Such new versions ++ will be similar in spirit to the present version, but may differ in ++ detail to address new problems or concerns. ++ ++ Each version is given a distinguishing version number. If the Library ++ specifies a version number of this License which applies to it and "any ++ later version", you have the option of following the terms and ++ conditions either of that version or of any later version published by ++ the Free Software Foundation. If the Library does not specify a license ++ version number, you may choose any version ever published by the Free ++ Software Foundation. ++ ++14. If you wish to incorporate parts of the Library into other free ++ programs whose distribution conditions are incompatible with these, ++ write to the author to ask for permission. For software which is ++ copyrighted by the Free Software Foundation, write to the Free Software ++ Foundation; we sometimes make exceptions for this. Our decision will be ++ guided by the two goals of preserving the free status of all ++ derivatives of our free software and of promoting the sharing and reuse ++ of software generally. ++ ++NO WARRANTY ++ ++15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY ++ FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN ++ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES ++ PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER ++ EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ++ ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH ++ YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL ++ NECESSARY SERVICING, REPAIR OR CORRECTION. ++ ++16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING ++ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR ++ REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR ++ DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL ++ DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY ++ (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED ++ INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF ++ THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR ++ OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. ++ ++END OF TERMS AND CONDITIONS ++ ++How to Apply These Terms to Your New Libraries ++ ++If you develop a new library, and you want it to be of the greatest ++possible use to the public, we recommend making it free software that ++everyone can redistribute and change. You can do so by permitting ++redistribution under these terms (or, alternatively, under the terms of the ++ordinary General Public License). ++ ++To apply these terms, attach the following notices to the library. It is ++safest to attach them to the start of each source file to most effectively ++convey the exclusion of warranty; and each file should have at least the ++"copyright" line and a pointer to where the full notice is found. ++ ++one line to give the library's name and an idea of what it does. ++Copyright (C) year name of author ++ ++This library is free software; you can redistribute it and/or modify it ++under the terms of the GNU Lesser General Public License as published by ++the Free Software Foundation; either version 2.1 of the License, or (at ++your option) any later version. ++ ++This library is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License ++for more details. ++ ++You should have received a copy of the GNU Lesser General Public License ++along with this library; if not, write to the Free Software Foundation, ++Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add ++information on how to contact you by electronic and paper mail. ++ ++You should also get your employer (if you work as a programmer) or your ++school, if any, to sign a "copyright disclaimer" for the library, if ++necessary. Here is a sample; alter the names: ++ ++Yoyodyne, Inc., hereby disclaims all copyright interest in ++the library `Frob' (a library for tweaking knobs) written ++by James Random Hacker. ++ ++signature of Ty Coon, 1 April 1990 ++Ty Coon, President of Vice ++That's all there is to it! +diff --git a/src/cc/libbpf/README.md b/src/cc/libbpf/README.md +new file mode 100644 +index 0000000..7202bbc +--- /dev/null ++++ b/src/cc/libbpf/README.md +@@ -0,0 +1,165 @@ ++This is a mirror of [bpf-next Linux source ++tree](https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next)'s ++`tools/lib/bpf` directory plus its supporting header files. ++ ++All the gory details of syncing can be found in `scripts/sync-kernel.sh` ++script. ++ ++Some header files in this repo (`include/linux/*.h`) are reduced versions of ++their counterpart files at ++[bpf-next](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/)'s ++`tools/include/linux/*.h` to make compilation successful. ++ ++BPF/libbpf usage and questions ++============================== ++ ++Please check out [libbpf-bootstrap](https://github.com/libbpf/libbpf-bootstrap) ++and [the companion blog post](https://nakryiko.com/posts/libbpf-bootstrap/) for ++the examples of building BPF applications with libbpf. ++[libbpf-tools](https://github.com/iovisor/bcc/tree/master/libbpf-tools) are also ++a good source of the real-world libbpf-based tracing tools. ++ ++See also ["BPF CO-RE reference guide"](https://nakryiko.com/posts/bpf-core-reference-guide/) ++for the coverage of practical aspects of building BPF CO-RE applications and ++["BPF CO-RE"](https://nakryiko.com/posts/bpf-portability-and-co-re/) for ++general introduction into BPF portability issues and BPF CO-RE origins. ++ ++All general BPF questions, including kernel functionality, libbpf APIs and ++their application, should be sent to bpf@vger.kernel.org mailing list. You can ++subscribe to it [here](http://vger.kernel.org/vger-lists.html#bpf) and search ++its archive [here](https://lore.kernel.org/bpf/). Please search the archive ++before asking new questions. It very well might be that this was already ++addressed or answered before. ++ ++bpf@vger.kernel.org is monitored by many more people and they will happily try ++to help you with whatever issue you have. This repository's PRs and issues ++should be opened only for dealing with issues pertaining to specific way this ++libbpf mirror repo is set up and organized. ++ ++Build ++[![Github Actions Builds & Tests](https://github.com/libbpf/libbpf/actions/workflows/test.yml/badge.svg)](https://github.com/libbpf/libbpf/actions/workflows/test.yml) ++[![Total alerts](https://img.shields.io/lgtm/alerts/g/libbpf/libbpf.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/libbpf/libbpf/alerts/) ++[![Coverity](https://img.shields.io/coverity/scan/18195.svg)](https://scan.coverity.com/projects/libbpf) ++[![OSS-Fuzz Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/libbpf.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#libbpf) ++===== ++libelf is an internal dependency of libbpf and thus it is required to link ++against and must be installed on the system for applications to work. ++pkg-config is used by default to find libelf, and the program called can be ++overridden with `PKG_CONFIG`. ++ ++If using `pkg-config` at build time is not desired, it can be disabled by ++setting `NO_PKG_CONFIG=1` when calling make. ++ ++To build both static libbpf.a and shared libbpf.so: ++```bash ++$ cd src ++$ make ++``` ++ ++To build only static libbpf.a library in directory ++build/ and install them together with libbpf headers in a staging directory ++root/: ++```bash ++$ cd src ++$ mkdir build root ++$ BUILD_STATIC_ONLY=y OBJDIR=build DESTDIR=root make install ++``` ++ ++To build both static libbpf.a and shared libbpf.so against a custom libelf ++dependency installed in /build/root/ and install them together with libbpf ++headers in a build directory /build/root/: ++```bash ++$ cd src ++$ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make install ++``` ++ ++BPF CO-RE (Compile Once – Run Everywhere) ++========================================= ++ ++Libbpf supports building BPF CO-RE-enabled applications, which, in contrast to ++[BCC](https://github.com/iovisor/bcc/), do not require Clang/LLVM runtime ++being deployed to target servers and doesn't rely on kernel-devel headers ++being available. ++ ++It does rely on kernel to be built with [BTF type ++information](https://www.kernel.org/doc/html/latest/bpf/btf.html), though. ++Some major Linux distributions come with kernel BTF already built in: ++ - Fedora 31+ ++ - RHEL 8.2+ ++ - OpenSUSE Tumbleweed (in the next release, as of 2020-06-04) ++ - Arch Linux (from kernel 5.7.1.arch1-1) ++ - Manjaro (from kernel 5.4 if compiled after 2021-06-18) ++ - Ubuntu 20.10 ++ - Debian 11 (amd64/arm64) ++ ++If your kernel doesn't come with BTF built-in, you'll need to build custom ++kernel. You'll need: ++ - `pahole` 1.16+ tool (part of `dwarves` package), which performs DWARF to ++ BTF conversion; ++ - kernel built with `CONFIG_DEBUG_INFO_BTF=y` option; ++ - you can check if your kernel has BTF built-in by looking for ++ `/sys/kernel/btf/vmlinux` file: ++ ++```shell ++$ ls -la /sys/kernel/btf/vmlinux ++-r--r--r--. 1 root root 3541561 Jun 2 18:16 /sys/kernel/btf/vmlinux ++``` ++ ++To develop and build BPF programs, you'll need Clang/LLVM 10+. The following ++distributions have Clang/LLVM 10+ packaged by default: ++ - Fedora 32+ ++ - Ubuntu 20.04+ ++ - Arch Linux ++ - Ubuntu 20.10 (LLVM 11) ++ - Debian 11 (LLVM 11) ++ - Alpine 3.13+ ++ ++Otherwise, please make sure to update it on your system. ++ ++The following resources are useful to understand what BPF CO-RE is and how to ++use it: ++- [BPF CO-RE reference guide](https://nakryiko.com/posts/bpf-core-reference-guide/) ++- [BPF Portability and CO-RE](https://nakryiko.com/posts/bpf-portability-and-co-re/) ++- [HOWTO: BCC to libbpf conversion](https://nakryiko.com/posts/bcc-to-libbpf-howto-guide/) ++- [libbpf-tools in BCC repo](https://github.com/iovisor/bcc/tree/master/libbpf-tools) ++ contain lots of real-world tools converted from BCC to BPF CO-RE. Consider ++ converting some more to both contribute to the BPF community and gain some ++ more experience with it. ++ ++Distributions ++============= ++ ++Distributions packaging libbpf from this mirror: ++ - [Fedora](https://src.fedoraproject.org/rpms/libbpf) ++ - [Gentoo](https://packages.gentoo.org/packages/dev-libs/libbpf) ++ - [Debian](https://packages.debian.org/source/sid/libbpf) ++ - [Arch](https://www.archlinux.org/packages/extra/x86_64/libbpf/) ++ - [Ubuntu](https://packages.ubuntu.com/source/impish/libbpf) ++ - [Alpine](https://pkgs.alpinelinux.org/packages?name=libbpf) ++ ++Benefits of packaging from the mirror over packaging from kernel sources: ++ - Consistent versioning across distributions. ++ - No ties to any specific kernel, transparent handling of older kernels. ++ Libbpf is designed to be kernel-agnostic and work across multitude of ++ kernel versions. It has built-in mechanisms to gracefully handle older ++ kernels, that are missing some of the features, by working around or ++ gracefully degrading functionality. Thus libbpf is not tied to a specific ++ kernel version and can/should be packaged and versioned independently. ++ - Continuous integration testing via ++ [GitHub Actions](https://github.com/libbpf/libbpf/actions). ++ - Static code analysis via [LGTM](https://lgtm.com/projects/g/libbpf/libbpf) ++ and [Coverity](https://scan.coverity.com/projects/libbpf). ++ ++Package dependencies of libbpf, package names may vary across distros: ++ - zlib ++ - libelf ++ ++[![libbpf distro packaging status](https://repology.org/badge/vertical-allrepos/libbpf.svg)](https://repology.org/project/libbpf/versions) ++ ++License ++======= ++ ++This work is dual-licensed under BSD 2-clause license and GNU LGPL v2.1 license. ++You can choose between one of them if you use this work. ++ ++`SPDX-License-Identifier: BSD-2-Clause OR LGPL-2.1` +diff --git a/src/cc/libbpf/docs/api.rst b/src/cc/libbpf/docs/api.rst +new file mode 100644 +index 0000000..7a8e709 +--- /dev/null ++++ b/src/cc/libbpf/docs/api.rst +@@ -0,0 +1,93 @@ ++.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++.. _api: ++ ++.. toctree:: Table of Contents ++ ++ ++LIBBPF API ++========== ++ ++Error Handling ++-------------- ++ ++When libbpf is used in "libbpf 1.0 mode", API functions can return errors in one of two ways. ++ ++You can set "libbpf 1.0" mode with the following line: ++ ++.. code-block:: ++ ++ libbpf_set_strict_mode(LIBBPF_STRICT_DIRECT_ERRS | LIBBPF_STRICT_CLEAN_PTRS); ++ ++If the function returns an error code directly, it uses 0 to indicate success ++and a negative error code to indicate what caused the error. In this case the ++error code should be checked directly from the return, you do not need to check ++errno. ++ ++For example: ++ ++.. code-block:: ++ ++ err = some_libbpf_api_with_error_return(...); ++ if (err < 0) { ++ /* Handle error accordingly */ ++ } ++ ++If the function returns a pointer, it will return NULL to indicate there was ++an error. In this case errno should be checked for the error code. ++ ++For example: ++ ++.. code-block:: ++ ++ ptr = some_libbpf_api_returning_ptr(); ++ if (!ptr) { ++ /* note no minus sign for EINVAL and E2BIG below */ ++ if (errno == EINVAL) { ++ /* handle EINVAL error */ ++ } else if (errno == E2BIG) { ++ /* handle E2BIG error */ ++ } ++ } ++ ++libbpf.h ++-------- ++.. doxygenfile:: libbpf.h ++ :project: libbpf ++ :sections: func define public-type enum ++ ++bpf.h ++----- ++.. doxygenfile:: bpf.h ++ :project: libbpf ++ :sections: func define public-type enum ++ ++btf.h ++----- ++.. doxygenfile:: btf.h ++ :project: libbpf ++ :sections: func define public-type enum ++ ++xsk.h ++----- ++.. doxygenfile:: xsk.h ++ :project: libbpf ++ :sections: func define public-type enum ++ ++bpf_tracing.h ++------------- ++.. doxygenfile:: bpf_tracing.h ++ :project: libbpf ++ :sections: func define public-type enum ++ ++bpf_core_read.h ++--------------- ++.. doxygenfile:: bpf_core_read.h ++ :project: libbpf ++ :sections: func define public-type enum ++ ++bpf_endian.h ++------------ ++.. doxygenfile:: bpf_endian.h ++ :project: libbpf ++ :sections: func define public-type enum +diff --git a/src/cc/libbpf/docs/conf.py b/src/cc/libbpf/docs/conf.py +new file mode 100644 +index 0000000..1d8714e +--- /dev/null ++++ b/src/cc/libbpf/docs/conf.py +@@ -0,0 +1,40 @@ ++#!/usr/bin/env python3 ++# SPDX-License-Identifier: GPL-2.0 ++# Configuration file for the Sphinx documentation builder. ++# ++# This file only contains a selection of the most common options. For a full ++# list see the documentation: ++# https://www.sphinx-doc.org/en/master/usage/configuration.html ++ ++import os ++import subprocess ++ ++project = "libbpf" ++ ++extensions = [ ++ 'sphinx.ext.autodoc', ++ 'sphinx.ext.doctest', ++ 'sphinx.ext.mathjax', ++ 'sphinx.ext.viewcode', ++ 'sphinx.ext.imgmath', ++ 'sphinx.ext.todo', ++ 'breathe', ++] ++ ++# List of patterns, relative to source directory, that match files and ++# directories to ignore when looking for source files. ++# This pattern also affects html_static_path and html_extra_path. ++exclude_patterns = [] ++ ++read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True' ++ ++if read_the_docs_build: ++ subprocess.call('cd sphinx ; make clean', shell=True) ++ subprocess.call('cd sphinx/doxygen ; doxygen', shell=True) ++ ++html_theme = 'sphinx_rtd_theme' ++ ++breathe_projects = { "libbpf": "./sphinx/doxygen/build/xml/" } ++breathe_default_project = "libbpf" ++breathe_show_define_initializer = True ++breathe_show_enumvalue_initializer = True +diff --git a/src/cc/libbpf/docs/index.rst b/src/cc/libbpf/docs/index.rst +new file mode 100644 +index 0000000..3722537 +--- /dev/null ++++ b/src/cc/libbpf/docs/index.rst +@@ -0,0 +1,21 @@ ++.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++libbpf ++====== ++ ++.. toctree:: ++ :maxdepth: 1 ++ ++ API Documentation ++ libbpf_naming_convention ++ libbpf_build ++ ++This is documentation for libbpf, a userspace library for loading and ++interacting with bpf programs. ++ ++All general BPF questions, including kernel functionality, libbpf APIs and ++their application, should be sent to bpf@vger.kernel.org mailing list. ++You can `subscribe `_ to the ++mailing list search its `archive `_. ++Please search the archive before asking new questions. It very well might ++be that this was already addressed or answered before. +diff --git a/src/cc/libbpf/docs/libbpf_build.rst b/src/cc/libbpf/docs/libbpf_build.rst +new file mode 100644 +index 0000000..8e8c23e +--- /dev/null ++++ b/src/cc/libbpf/docs/libbpf_build.rst +@@ -0,0 +1,37 @@ ++.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++Building libbpf ++=============== ++ ++libelf and zlib are internal dependencies of libbpf and thus are required to link ++against and must be installed on the system for applications to work. ++pkg-config is used by default to find libelf, and the program called ++can be overridden with PKG_CONFIG. ++ ++If using pkg-config at build time is not desired, it can be disabled by ++setting NO_PKG_CONFIG=1 when calling make. ++ ++To build both static libbpf.a and shared libbpf.so: ++ ++.. code-block:: bash ++ ++ $ cd src ++ $ make ++ ++To build only static libbpf.a library in directory build/ and install them ++together with libbpf headers in a staging directory root/: ++ ++.. code-block:: bash ++ ++ $ cd src ++ $ mkdir build root ++ $ BUILD_STATIC_ONLY=y OBJDIR=build DESTDIR=root make install ++ ++To build both static libbpf.a and shared libbpf.so against a custom libelf ++dependency installed in /build/root/ and install them together with libbpf ++headers in a build directory /build/root/: ++ ++.. code-block:: bash ++ ++ $ cd src ++ $ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make +\ No newline at end of file +diff --git a/src/cc/libbpf/docs/libbpf_naming_convention.rst b/src/cc/libbpf/docs/libbpf_naming_convention.rst +new file mode 100644 +index 0000000..c5ac97f +--- /dev/null ++++ b/src/cc/libbpf/docs/libbpf_naming_convention.rst +@@ -0,0 +1,193 @@ ++.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++API naming convention ++===================== ++ ++libbpf API provides access to a few logically separated groups of ++functions and types. Every group has its own naming convention ++described here. It's recommended to follow these conventions whenever a ++new function or type is added to keep libbpf API clean and consistent. ++ ++All types and functions provided by libbpf API should have one of the ++following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``btf_dump_``, ++``ring_buffer_``, ``perf_buffer_``. ++ ++System call wrappers ++-------------------- ++ ++System call wrappers are simple wrappers for commands supported by ++sys_bpf system call. These wrappers should go to ``bpf.h`` header file ++and map one to one to corresponding commands. ++ ++For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` ++command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. ++ ++Objects ++------- ++ ++Another class of types and functions provided by libbpf API is "objects" ++and functions to work with them. Objects are high-level abstractions ++such as BPF program or BPF map. They're represented by corresponding ++structures such as ``struct bpf_object``, ``struct bpf_program``, ++``struct bpf_map``, etc. ++ ++Structures are forward declared and access to their fields should be ++provided via corresponding getters and setters rather than directly. ++ ++These objects are associated with corresponding parts of ELF object that ++contains compiled BPF programs. ++ ++For example ``struct bpf_object`` represents ELF object itself created ++from an ELF file or from a buffer, ``struct bpf_program`` represents a ++program in ELF object and ``struct bpf_map`` is a map. ++ ++Functions that work with an object have names built from object name, ++double underscore and part that describes function purpose. ++ ++For example ``bpf_object__open`` consists of the name of corresponding ++object, ``bpf_object``, double underscore and ``open`` that defines the ++purpose of the function to open ELF file and create ``bpf_object`` from ++it. ++ ++All objects and corresponding functions other than BTF related should go ++to ``libbpf.h``. BTF types and functions should go to ``btf.h``. ++ ++Auxiliary functions ++------------------- ++ ++Auxiliary functions and types that don't fit well in any of categories ++described above should have ``libbpf_`` prefix, e.g. ++``libbpf_get_error`` or ``libbpf_prog_type_by_name``. ++ ++ABI ++--- ++ ++libbpf can be both linked statically or used as DSO. To avoid possible ++conflicts with other libraries an application is linked with, all ++non-static libbpf symbols should have one of the prefixes mentioned in ++API documentation above. See API naming convention to choose the right ++name for a new symbol. ++ ++Symbol visibility ++----------------- ++ ++libbpf follow the model when all global symbols have visibility "hidden" ++by default and to make a symbol visible it has to be explicitly ++attributed with ``LIBBPF_API`` macro. For example: ++ ++.. code-block:: c ++ ++ LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); ++ ++This prevents from accidentally exporting a symbol, that is not supposed ++to be a part of ABI what, in turn, improves both libbpf developer- and ++user-experiences. ++ ++ABI versionning ++--------------- ++ ++To make future ABI extensions possible libbpf ABI is versioned. ++Versioning is implemented by ``libbpf.map`` version script that is ++passed to linker. ++ ++Version name is ``LIBBPF_`` prefix + three-component numeric version, ++starting from ``0.0.1``. ++ ++Every time ABI is being changed, e.g. because a new symbol is added or ++semantic of existing symbol is changed, ABI version should be bumped. ++This bump in ABI version is at most once per kernel development cycle. ++ ++For example, if current state of ``libbpf.map`` is: ++ ++.. code-block:: none ++ ++ LIBBPF_0.0.1 { ++ global: ++ bpf_func_a; ++ bpf_func_b; ++ local: ++ \*; ++ }; ++ ++, and a new symbol ``bpf_func_c`` is being introduced, then ++``libbpf.map`` should be changed like this: ++ ++.. code-block:: none ++ ++ LIBBPF_0.0.1 { ++ global: ++ bpf_func_a; ++ bpf_func_b; ++ local: ++ \*; ++ }; ++ LIBBPF_0.0.2 { ++ global: ++ bpf_func_c; ++ } LIBBPF_0.0.1; ++ ++, where new version ``LIBBPF_0.0.2`` depends on the previous ++``LIBBPF_0.0.1``. ++ ++Format of version script and ways to handle ABI changes, including ++incompatible ones, described in details in [1]. ++ ++Stand-alone build ++------------------- ++ ++Under https://github.com/libbpf/libbpf there is a (semi-)automated ++mirror of the mainline's version of libbpf for a stand-alone build. ++ ++However, all changes to libbpf's code base must be upstreamed through ++the mainline kernel tree. ++ ++ ++API documentation convention ++============================ ++ ++The libbpf API is documented via comments above definitions in ++header files. These comments can be rendered by doxygen and sphinx ++for well organized html output. This section describes the ++convention in which these comments should be formated. ++ ++Here is an example from btf.h: ++ ++.. code-block:: c ++ ++ /** ++ * @brief **btf__new()** creates a new instance of a BTF object from the raw ++ * bytes of an ELF's BTF section ++ * @param data raw bytes ++ * @param size number of bytes passed in `data` ++ * @return new BTF object instance which has to be eventually freed with ++ * **btf__free()** ++ * ++ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract ++ * error code from such a pointer `libbpf_get_error()` should be used. If ++ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is ++ * returned on error instead. In both cases thread-local `errno` variable is ++ * always set to error code as well. ++ */ ++ ++The comment must start with a block comment of the form '/\*\*'. ++ ++The documentation always starts with a @brief directive. This line is a short ++description about this API. It starts with the name of the API, denoted in bold ++like so: **api_name**. Please include an open and close parenthesis if this is a ++function. Follow with the short description of the API. A longer form description ++can be added below the last directive, at the bottom of the comment. ++ ++Parameters are denoted with the @param directive, there should be one for each ++parameter. If this is a function with a non-void return, use the @return directive ++to document it. ++ ++License ++------------------- ++ ++libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause. ++ ++Links ++------------------- ++ ++[1] https://www.akkadia.org/drepper/dsohowto.pdf ++ (Chapter 3. Maintaining APIs and ABIs). +diff --git a/src/cc/libbpf/docs/sphinx/Makefile b/src/cc/libbpf/docs/sphinx/Makefile +new file mode 100644 +index 0000000..5dc39c5 +--- /dev/null ++++ b/src/cc/libbpf/docs/sphinx/Makefile +@@ -0,0 +1,9 @@ ++SPHINXBUILD ?= sphinx-build ++SOURCEDIR = ../src ++BUILDDIR = build ++ ++help: ++ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" ++ ++%: ++ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" +diff --git a/src/cc/libbpf/docs/sphinx/doxygen/Doxyfile b/src/cc/libbpf/docs/sphinx/doxygen/Doxyfile +new file mode 100644 +index 0000000..b04c115 +--- /dev/null ++++ b/src/cc/libbpf/docs/sphinx/doxygen/Doxyfile +@@ -0,0 +1,277 @@ ++DOXYFILE_ENCODING = UTF-8 ++PROJECT_NAME = "libbpf" ++PROJECT_NUMBER = ++PROJECT_BRIEF = ++PROJECT_LOGO = ++OUTPUT_DIRECTORY = ./build ++CREATE_SUBDIRS = NO ++ALLOW_UNICODE_NAMES = NO ++OUTPUT_LANGUAGE = English ++OUTPUT_TEXT_DIRECTION = None ++BRIEF_MEMBER_DESC = YES ++REPEAT_BRIEF = YES ++ALWAYS_DETAILED_SEC = NO ++INLINE_INHERITED_MEMB = NO ++FULL_PATH_NAMES = YES ++STRIP_FROM_PATH = ++STRIP_FROM_INC_PATH = ++SHORT_NAMES = NO ++JAVADOC_AUTOBRIEF = NO ++JAVADOC_BANNER = NO ++QT_AUTOBRIEF = NO ++MULTILINE_CPP_IS_BRIEF = NO ++PYTHON_DOCSTRING = NO ++INHERIT_DOCS = YES ++SEPARATE_MEMBER_PAGES = NO ++TAB_SIZE = 4 ++ALIASES = ++OPTIMIZE_OUTPUT_FOR_C = YES ++OPTIMIZE_OUTPUT_JAVA = NO ++OPTIMIZE_FOR_FORTRAN = NO ++OPTIMIZE_OUTPUT_VHDL = NO ++OPTIMIZE_OUTPUT_SLICE = NO ++EXTENSION_MAPPING = ++MARKDOWN_SUPPORT = YES ++TOC_INCLUDE_HEADINGS = 5 ++AUTOLINK_SUPPORT = YES ++BUILTIN_STL_SUPPORT = NO ++CPP_CLI_SUPPORT = NO ++SIP_SUPPORT = NO ++IDL_PROPERTY_SUPPORT = YES ++DISTRIBUTE_GROUP_DOC = NO ++GROUP_NESTED_COMPOUNDS = NO ++SUBGROUPING = YES ++INLINE_GROUPED_CLASSES = NO ++INLINE_SIMPLE_STRUCTS = NO ++TYPEDEF_HIDES_STRUCT = NO ++LOOKUP_CACHE_SIZE = 0 ++NUM_PROC_THREADS = 1 ++EXTRACT_ALL = NO ++EXTRACT_PRIVATE = NO ++EXTRACT_PRIV_VIRTUAL = NO ++EXTRACT_PACKAGE = NO ++EXTRACT_STATIC = NO ++EXTRACT_LOCAL_CLASSES = YES ++EXTRACT_LOCAL_METHODS = NO ++EXTRACT_ANON_NSPACES = NO ++RESOLVE_UNNAMED_PARAMS = YES ++HIDE_UNDOC_MEMBERS = NO ++HIDE_UNDOC_CLASSES = NO ++HIDE_FRIEND_COMPOUNDS = NO ++HIDE_IN_BODY_DOCS = NO ++INTERNAL_DOCS = NO ++CASE_SENSE_NAMES = YES ++HIDE_SCOPE_NAMES = NO ++HIDE_COMPOUND_REFERENCE= NO ++SHOW_INCLUDE_FILES = YES ++SHOW_GROUPED_MEMB_INC = NO ++FORCE_LOCAL_INCLUDES = NO ++INLINE_INFO = YES ++SORT_MEMBER_DOCS = YES ++SORT_BRIEF_DOCS = NO ++SORT_MEMBERS_CTORS_1ST = NO ++SORT_GROUP_NAMES = NO ++SORT_BY_SCOPE_NAME = NO ++STRICT_PROTO_MATCHING = NO ++GENERATE_TODOLIST = YES ++GENERATE_TESTLIST = YES ++GENERATE_BUGLIST = YES ++GENERATE_DEPRECATEDLIST= YES ++ENABLED_SECTIONS = ++MAX_INITIALIZER_LINES = 30 ++SHOW_USED_FILES = YES ++SHOW_FILES = YES ++SHOW_NAMESPACES = YES ++FILE_VERSION_FILTER = ++LAYOUT_FILE = ++CITE_BIB_FILES = ++QUIET = NO ++WARNINGS = YES ++WARN_IF_UNDOCUMENTED = YES ++WARN_IF_DOC_ERROR = YES ++WARN_NO_PARAMDOC = NO ++WARN_AS_ERROR = NO ++WARN_FORMAT = "$file:$line: $text" ++WARN_LOGFILE = ++INPUT = ../../../src ++INPUT_ENCODING = UTF-8 ++FILE_PATTERNS = *.c \ ++ *.h ++RECURSIVE = NO ++EXCLUDE = ++EXCLUDE_SYMLINKS = NO ++EXCLUDE_PATTERNS = ++EXCLUDE_SYMBOLS = ___* ++EXAMPLE_PATH = ++EXAMPLE_PATTERNS = * ++EXAMPLE_RECURSIVE = NO ++IMAGE_PATH = ++INPUT_FILTER = ++FILTER_PATTERNS = ++FILTER_SOURCE_FILES = NO ++FILTER_SOURCE_PATTERNS = ++USE_MDFILE_AS_MAINPAGE = YES ++SOURCE_BROWSER = NO ++INLINE_SOURCES = NO ++STRIP_CODE_COMMENTS = YES ++REFERENCED_BY_RELATION = NO ++REFERENCES_RELATION = NO ++REFERENCES_LINK_SOURCE = YES ++SOURCE_TOOLTIPS = YES ++USE_HTAGS = NO ++VERBATIM_HEADERS = YES ++ALPHABETICAL_INDEX = YES ++IGNORE_PREFIX = ++GENERATE_HTML = NO ++HTML_OUTPUT = html ++HTML_FILE_EXTENSION = .html ++HTML_HEADER = ++HTML_FOOTER = ++HTML_STYLESHEET = ++HTML_EXTRA_STYLESHEET = ++HTML_EXTRA_FILES = ++HTML_COLORSTYLE_HUE = 220 ++HTML_COLORSTYLE_SAT = 100 ++HTML_COLORSTYLE_GAMMA = 80 ++HTML_TIMESTAMP = NO ++HTML_DYNAMIC_MENUS = YES ++HTML_DYNAMIC_SECTIONS = NO ++HTML_INDEX_NUM_ENTRIES = 100 ++GENERATE_DOCSET = NO ++DOCSET_FEEDNAME = "Doxygen generated docs" ++DOCSET_BUNDLE_ID = org.doxygen.Project ++DOCSET_PUBLISHER_ID = org.doxygen.Publisher ++DOCSET_PUBLISHER_NAME = Publisher ++GENERATE_HTMLHELP = NO ++CHM_FILE = ++HHC_LOCATION = ++GENERATE_CHI = NO ++CHM_INDEX_ENCODING = ++BINARY_TOC = NO ++TOC_EXPAND = NO ++GENERATE_QHP = NO ++QCH_FILE = ++QHP_NAMESPACE = org.doxygen.Project ++QHP_VIRTUAL_FOLDER = doc ++QHP_CUST_FILTER_NAME = ++QHP_CUST_FILTER_ATTRS = ++QHP_SECT_FILTER_ATTRS = ++QHG_LOCATION = ++GENERATE_ECLIPSEHELP = NO ++ECLIPSE_DOC_ID = org.doxygen.Project ++DISABLE_INDEX = NO ++GENERATE_TREEVIEW = NO ++ENUM_VALUES_PER_LINE = 4 ++TREEVIEW_WIDTH = 250 ++EXT_LINKS_IN_WINDOW = NO ++HTML_FORMULA_FORMAT = png ++FORMULA_FONTSIZE = 10 ++FORMULA_TRANSPARENT = YES ++FORMULA_MACROFILE = ++USE_MATHJAX = NO ++MATHJAX_FORMAT = HTML-CSS ++MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@2 ++MATHJAX_EXTENSIONS = ++MATHJAX_CODEFILE = ++SEARCHENGINE = YES ++SERVER_BASED_SEARCH = NO ++EXTERNAL_SEARCH = NO ++SEARCHENGINE_URL = ++SEARCHDATA_FILE = searchdata.xml ++EXTERNAL_SEARCH_ID = ++EXTRA_SEARCH_MAPPINGS = ++GENERATE_LATEX = NO ++LATEX_OUTPUT = latex ++LATEX_CMD_NAME = ++MAKEINDEX_CMD_NAME = makeindex ++LATEX_MAKEINDEX_CMD = makeindex ++COMPACT_LATEX = NO ++PAPER_TYPE = a4 ++EXTRA_PACKAGES = ++LATEX_HEADER = ++LATEX_FOOTER = ++LATEX_EXTRA_STYLESHEET = ++LATEX_EXTRA_FILES = ++PDF_HYPERLINKS = YES ++USE_PDFLATEX = YES ++LATEX_BATCHMODE = NO ++LATEX_HIDE_INDICES = NO ++LATEX_SOURCE_CODE = NO ++LATEX_BIB_STYLE = plain ++LATEX_TIMESTAMP = NO ++LATEX_EMOJI_DIRECTORY = ++GENERATE_RTF = NO ++RTF_OUTPUT = rtf ++COMPACT_RTF = NO ++RTF_HYPERLINKS = NO ++RTF_STYLESHEET_FILE = ++RTF_EXTENSIONS_FILE = ++RTF_SOURCE_CODE = NO ++GENERATE_MAN = NO ++MAN_OUTPUT = man ++MAN_EXTENSION = .3 ++MAN_SUBDIR = ++MAN_LINKS = NO ++GENERATE_XML = YES ++XML_OUTPUT = xml ++XML_PROGRAMLISTING = YES ++XML_NS_MEMB_FILE_SCOPE = NO ++GENERATE_DOCBOOK = NO ++DOCBOOK_OUTPUT = docbook ++DOCBOOK_PROGRAMLISTING = NO ++GENERATE_AUTOGEN_DEF = NO ++GENERATE_PERLMOD = NO ++PERLMOD_LATEX = NO ++PERLMOD_PRETTY = YES ++PERLMOD_MAKEVAR_PREFIX = ++ENABLE_PREPROCESSING = YES ++MACRO_EXPANSION = NO ++EXPAND_ONLY_PREDEF = YES ++SEARCH_INCLUDES = YES ++INCLUDE_PATH = ++INCLUDE_FILE_PATTERNS = ++PREDEFINED = ++EXPAND_AS_DEFINED = ++SKIP_FUNCTION_MACROS = NO ++TAGFILES = ++GENERATE_TAGFILE = ++ALLEXTERNALS = NO ++EXTERNAL_GROUPS = YES ++EXTERNAL_PAGES = YES ++CLASS_DIAGRAMS = YES ++DIA_PATH = ++HIDE_UNDOC_RELATIONS = YES ++HAVE_DOT = NO ++DOT_NUM_THREADS = 0 ++DOT_FONTNAME = Helvetica ++DOT_FONTSIZE = 10 ++DOT_FONTPATH = ++CLASS_GRAPH = YES ++COLLABORATION_GRAPH = YES ++GROUP_GRAPHS = YES ++UML_LOOK = NO ++UML_LIMIT_NUM_FIELDS = 10 ++DOT_UML_DETAILS = NO ++DOT_WRAP_THRESHOLD = 17 ++TEMPLATE_RELATIONS = NO ++INCLUDE_GRAPH = YES ++INCLUDED_BY_GRAPH = YES ++CALL_GRAPH = NO ++CALLER_GRAPH = NO ++GRAPHICAL_HIERARCHY = YES ++DIRECTORY_GRAPH = YES ++DOT_IMAGE_FORMAT = png ++INTERACTIVE_SVG = NO ++DOT_PATH = ++DOTFILE_DIRS = ++MSCFILE_DIRS = ++DIAFILE_DIRS = ++PLANTUML_JAR_PATH = ++PLANTUML_CFG_FILE = ++PLANTUML_INCLUDE_PATH = ++DOT_GRAPH_MAX_NODES = 50 ++MAX_DOT_GRAPH_DEPTH = 0 ++DOT_TRANSPARENT = NO ++DOT_MULTI_TARGETS = NO ++GENERATE_LEGEND = YES ++DOT_CLEANUP = YES +diff --git a/src/cc/libbpf/docs/sphinx/requirements.txt b/src/cc/libbpf/docs/sphinx/requirements.txt +new file mode 100644 +index 0000000..188f51e +--- /dev/null ++++ b/src/cc/libbpf/docs/sphinx/requirements.txt +@@ -0,0 +1 @@ ++breathe +\ No newline at end of file +diff --git a/src/cc/libbpf/fuzz/bpf-object-fuzzer.c b/src/cc/libbpf/fuzz/bpf-object-fuzzer.c +new file mode 100644 +index 0000000..89286e2 +--- /dev/null ++++ b/src/cc/libbpf/fuzz/bpf-object-fuzzer.c +@@ -0,0 +1,23 @@ ++#include "libbpf.h" ++ ++static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) ++{ ++ return 0; ++} ++ ++int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { ++ struct bpf_object *obj = NULL; ++ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); ++ int err; ++ ++ libbpf_set_print(libbpf_print_fn); ++ ++ opts.object_name = "fuzz-object"; ++ obj = bpf_object__open_mem(data, size, &opts); ++ err = libbpf_get_error(obj); ++ if (err) ++ return 0; ++ ++ bpf_object__close(obj); ++ return 0; ++} +diff --git a/src/cc/libbpf/include/asm/barrier.h b/src/cc/libbpf/include/asm/barrier.h +new file mode 100644 +index 0000000..1fc6aee +--- /dev/null ++++ b/src/cc/libbpf/include/asm/barrier.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++#ifndef __ASM_BARRIER_H ++#define __ASM_BARRIER_H ++ ++#include ++ ++#endif +diff --git a/src/cc/libbpf/include/linux/compiler.h b/src/cc/libbpf/include/linux/compiler.h +new file mode 100644 +index 0000000..26336dc +--- /dev/null ++++ b/src/cc/libbpf/include/linux/compiler.h +@@ -0,0 +1,70 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++#ifndef __LINUX_COMPILER_H ++#define __LINUX_COMPILER_H ++ ++#define likely(x) __builtin_expect(!!(x), 1) ++#define unlikely(x) __builtin_expect(!!(x), 0) ++ ++#define READ_ONCE(x) (*(volatile typeof(x) *)&x) ++#define WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) ++ ++#define barrier() asm volatile("" ::: "memory") ++ ++#if defined(__x86_64__) ++ ++# define smp_rmb() barrier() ++# define smp_wmb() barrier() ++# define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") ++ ++# define smp_store_release(p, v) \ ++do { \ ++ barrier(); \ ++ WRITE_ONCE(*p, v); \ ++} while (0) ++ ++# define smp_load_acquire(p) \ ++({ \ ++ typeof(*p) ___p = READ_ONCE(*p); \ ++ barrier(); \ ++ ___p; \ ++}) ++ ++#elif defined(__aarch64__) ++ ++# define smp_rmb() asm volatile("dmb ishld" ::: "memory") ++# define smp_wmb() asm volatile("dmb ishst" ::: "memory") ++# define smp_mb() asm volatile("dmb ish" ::: "memory") ++ ++#endif ++ ++#ifndef smp_mb ++# define smp_mb() __sync_synchronize() ++#endif ++ ++#ifndef smp_rmb ++# define smp_rmb() smp_mb() ++#endif ++ ++#ifndef smp_wmb ++# define smp_wmb() smp_mb() ++#endif ++ ++#ifndef smp_store_release ++# define smp_store_release(p, v) \ ++do { \ ++ smp_mb(); \ ++ WRITE_ONCE(*p, v); \ ++} while (0) ++#endif ++ ++#ifndef smp_load_acquire ++# define smp_load_acquire(p) \ ++({ \ ++ typeof(*p) ___p = READ_ONCE(*p); \ ++ smp_mb(); \ ++ ___p; \ ++}) ++#endif ++ ++#endif /* __LINUX_COMPILER_H */ +diff --git a/src/cc/libbpf/include/linux/err.h b/src/cc/libbpf/include/linux/err.h +new file mode 100644 +index 0000000..1b1dafb +--- /dev/null ++++ b/src/cc/libbpf/include/linux/err.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++#ifndef __LINUX_ERR_H ++#define __LINUX_ERR_H ++ ++#include ++#include ++ ++#define MAX_ERRNO 4095 ++ ++#define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO) ++ ++static inline void * ERR_PTR(long error_) ++{ ++ return (void *) error_; ++} ++ ++static inline long PTR_ERR(const void *ptr) ++{ ++ return (long) ptr; ++} ++ ++static inline bool IS_ERR(const void *ptr) ++{ ++ return IS_ERR_VALUE((unsigned long)ptr); ++} ++ ++static inline bool IS_ERR_OR_NULL(const void *ptr) ++{ ++ return (!ptr) || IS_ERR_VALUE((unsigned long)ptr); ++} ++ ++static inline long PTR_ERR_OR_ZERO(const void *ptr) ++{ ++ return IS_ERR(ptr) ? PTR_ERR(ptr) : 0; ++} ++ ++#endif +diff --git a/src/cc/libbpf/include/linux/filter.h b/src/cc/libbpf/include/linux/filter.h +new file mode 100644 +index 0000000..e7e3373 +--- /dev/null ++++ b/src/cc/libbpf/include/linux/filter.h +@@ -0,0 +1,134 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++#ifndef __LINUX_FILTER_H ++#define __LINUX_FILTER_H ++ ++#include ++ ++#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ ++ ((struct bpf_insn) { \ ++ .code = CODE, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = OFF, \ ++ .imm = IMM }) ++ ++#define BPF_ALU32_IMM(OP, DST, IMM) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = IMM }) ++ ++#define BPF_ALU64_IMM(OP, DST, IMM) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = IMM }) ++ ++#define BPF_MOV64_IMM(DST, IMM) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = IMM }) ++ ++#define BPF_EXIT_INSN() \ ++ ((struct bpf_insn) { \ ++ .code = BPF_JMP | BPF_EXIT, \ ++ .dst_reg = 0, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = 0 }) ++ ++#define BPF_EMIT_CALL(FUNC) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_JMP | BPF_CALL, \ ++ .dst_reg = 0, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = ((FUNC) - BPF_FUNC_unspec) }) ++ ++#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = OFF, \ ++ .imm = 0 }) ++ ++#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = OFF, \ ++ .imm = 0 }) ++ ++#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = OFF, \ ++ .imm = IMM }) ++ ++#define BPF_MOV64_REG(DST, SRC) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = 0, \ ++ .imm = 0 }) ++ ++#define BPF_MOV32_IMM(DST, IMM) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_ALU | BPF_MOV | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = IMM }) ++ ++#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_LD | BPF_DW | BPF_IMM, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = OFF1, \ ++ .imm = IMM1 }), \ ++ ((struct bpf_insn) { \ ++ .code = 0, \ ++ .dst_reg = 0, \ ++ .src_reg = 0, \ ++ .off = OFF2, \ ++ .imm = IMM2 }) ++ ++#define BPF_LD_MAP_FD(DST, MAP_FD) \ ++ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \ ++ MAP_FD, 0) ++ ++#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \ ++ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \ ++ MAP_FD, VALUE_OFF) ++ ++#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = OFF, \ ++ .imm = IMM }) ++ ++#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = OFF, \ ++ .imm = IMM }) ++ ++#endif +diff --git a/src/cc/libbpf/include/linux/kernel.h b/src/cc/libbpf/include/linux/kernel.h +new file mode 100644 +index 0000000..a4a7a9d +--- /dev/null ++++ b/src/cc/libbpf/include/linux/kernel.h +@@ -0,0 +1,44 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++#ifndef __LINUX_KERNEL_H ++#define __LINUX_KERNEL_H ++ ++#ifndef offsetof ++#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) ++#endif ++ ++#ifndef container_of ++#define container_of(ptr, type, member) ({ \ ++ const typeof(((type *)0)->member) * __mptr = (ptr); \ ++ (type *)((char *)__mptr - offsetof(type, member)); }) ++#endif ++ ++#ifndef max ++#define max(x, y) ({ \ ++ typeof(x) _max1 = (x); \ ++ typeof(y) _max2 = (y); \ ++ (void) (&_max1 == &_max2); \ ++ _max1 > _max2 ? _max1 : _max2; }) ++#endif ++ ++#ifndef min ++#define min(x, y) ({ \ ++ typeof(x) _min1 = (x); \ ++ typeof(y) _min2 = (y); \ ++ (void) (&_min1 == &_min2); \ ++ _min1 < _min2 ? _min1 : _min2; }) ++#endif ++ ++#ifndef roundup ++#define roundup(x, y) ( \ ++{ \ ++ const typeof(y) __y = y; \ ++ (((x) + (__y - 1)) / __y) * __y; \ ++} \ ++) ++#endif ++ ++#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) ++#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) ++ ++#endif +diff --git a/src/cc/libbpf/include/linux/list.h b/src/cc/libbpf/include/linux/list.h +new file mode 100644 +index 0000000..fc91c34 +--- /dev/null ++++ b/src/cc/libbpf/include/linux/list.h +@@ -0,0 +1,91 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++#ifndef __LINUX_LIST_H ++#define __LINUX_LIST_H ++ ++#define LIST_HEAD_INIT(name) { &(name), &(name) } ++#define LIST_HEAD(name) \ ++ struct list_head name = LIST_HEAD_INIT(name) ++ ++#define POISON_POINTER_DELTA 0 ++#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) ++#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) ++ ++ ++static inline void INIT_LIST_HEAD(struct list_head *list) ++{ ++ list->next = list; ++ list->prev = list; ++} ++ ++static inline void __list_add(struct list_head *new, ++ struct list_head *prev, ++ struct list_head *next) ++{ ++ next->prev = new; ++ new->next = next; ++ new->prev = prev; ++ prev->next = new; ++} ++ ++/** ++ * list_add - add a new entry ++ * @new: new entry to be added ++ * @head: list head to add it after ++ * ++ * Insert a new entry after the specified head. ++ * This is good for implementing stacks. ++ */ ++static inline void list_add(struct list_head *new, struct list_head *head) ++{ ++ __list_add(new, head, head->next); ++} ++ ++/* ++ * Delete a list entry by making the prev/next entries ++ * point to each other. ++ * ++ * This is only for internal list manipulation where we know ++ * the prev/next entries already! ++ */ ++static inline void __list_del(struct list_head * prev, struct list_head * next) ++{ ++ next->prev = prev; ++ prev->next = next; ++} ++ ++/** ++ * list_del - deletes entry from list. ++ * @entry: the element to delete from the list. ++ * Note: list_empty() on entry does not return true after this, the entry is ++ * in an undefined state. ++ */ ++static inline void __list_del_entry(struct list_head *entry) ++{ ++ __list_del(entry->prev, entry->next); ++} ++ ++static inline void list_del(struct list_head *entry) ++{ ++ __list_del(entry->prev, entry->next); ++ entry->next = LIST_POISON1; ++ entry->prev = LIST_POISON2; ++} ++ ++static inline int list_empty(const struct list_head *head) ++{ ++ return head->next == head; ++} ++ ++#define list_entry(ptr, type, member) \ ++ container_of(ptr, type, member) ++#define list_first_entry(ptr, type, member) \ ++ list_entry((ptr)->next, type, member) ++#define list_next_entry(pos, member) \ ++ list_entry((pos)->member.next, typeof(*(pos)), member) ++#define list_for_each_entry(pos, head, member) \ ++ for (pos = list_first_entry(head, typeof(*pos), member); \ ++ &pos->member != (head); \ ++ pos = list_next_entry(pos, member)) ++ ++#endif +diff --git a/src/cc/libbpf/include/linux/overflow.h b/src/cc/libbpf/include/linux/overflow.h +new file mode 100644 +index 0000000..53d7580 +--- /dev/null ++++ b/src/cc/libbpf/include/linux/overflow.h +@@ -0,0 +1,90 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++#ifndef __LINUX_OVERFLOW_H ++#define __LINUX_OVERFLOW_H ++ ++#define is_signed_type(type) (((type)(-1)) < (type)1) ++#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) ++#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) ++#define type_min(T) ((T)((T)-type_max(T)-(T)1)) ++ ++#ifndef unlikely ++#define unlikely(x) __builtin_expect(!!(x), 0) ++#endif ++ ++#ifdef __GNUC__ ++#define GCC_VERSION (__GNUC__ * 10000 \ ++ + __GNUC_MINOR__ * 100 \ ++ + __GNUC_PATCHLEVEL__) ++#if GCC_VERSION >= 50100 ++#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 ++#endif ++#endif ++ ++#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW ++ ++#define check_mul_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ __builtin_mul_overflow(__a, __b, __d); \ ++}) ++ ++#else ++ ++/* ++ * If one of a or b is a compile-time constant, this avoids a division. ++ */ ++#define __unsigned_mul_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ *__d = __a * __b; \ ++ __builtin_constant_p(__b) ? \ ++ __b > 0 && __a > type_max(typeof(__a)) / __b : \ ++ __a > 0 && __b > type_max(typeof(__b)) / __a; \ ++}) ++ ++/* ++ * Signed multiplication is rather hard. gcc always follows C99, so ++ * division is truncated towards 0. This means that we can write the ++ * overflow check like this: ++ * ++ * (a > 0 && (b > MAX/a || b < MIN/a)) || ++ * (a < -1 && (b > MIN/a || b < MAX/a) || ++ * (a == -1 && b == MIN) ++ * ++ * The redundant casts of -1 are to silence an annoying -Wtype-limits ++ * (included in -Wextra) warning: When the type is u8 or u16, the ++ * __b_c_e in check_mul_overflow obviously selects ++ * __unsigned_mul_overflow, but unfortunately gcc still parses this ++ * code and warns about the limited range of __b. ++ */ ++ ++#define __signed_mul_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ typeof(a) __tmax = type_max(typeof(a)); \ ++ typeof(a) __tmin = type_min(typeof(a)); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ *__d = (__u64)__a * (__u64)__b; \ ++ (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ ++ (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ ++ (__b == (typeof(__b))-1 && __a == __tmin); \ ++}) ++ ++#define check_mul_overflow(a, b, d) \ ++ __builtin_choose_expr(is_signed_type(typeof(a)), \ ++ __signed_mul_overflow(a, b, d), \ ++ __unsigned_mul_overflow(a, b, d)) ++ ++ ++#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ ++ ++#endif +diff --git a/src/cc/libbpf/include/linux/ring_buffer.h b/src/cc/libbpf/include/linux/ring_buffer.h +new file mode 100644 +index 0000000..fc4677b +--- /dev/null ++++ b/src/cc/libbpf/include/linux/ring_buffer.h +@@ -0,0 +1,18 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++#ifndef _TOOLS_LINUX_RING_BUFFER_H_ ++#define _TOOLS_LINUX_RING_BUFFER_H_ ++ ++#include ++ ++static inline __u64 ring_buffer_read_head(struct perf_event_mmap_page *base) ++{ ++ return smp_load_acquire(&base->data_head); ++} ++ ++static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, ++ __u64 tail) ++{ ++ smp_store_release(&base->data_tail, tail); ++} ++ ++#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */ +diff --git a/src/cc/libbpf/include/linux/types.h b/src/cc/libbpf/include/linux/types.h +new file mode 100644 +index 0000000..b15252a +--- /dev/null ++++ b/src/cc/libbpf/include/linux/types.h +@@ -0,0 +1,33 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++#ifndef __LINUX_TYPES_H ++#define __LINUX_TYPES_H ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++#define __bitwise__ ++#define __bitwise __bitwise__ ++ ++typedef __u16 __bitwise __le16; ++typedef __u16 __bitwise __be16; ++typedef __u32 __bitwise __le32; ++typedef __u32 __bitwise __be32; ++typedef __u64 __bitwise __le64; ++typedef __u64 __bitwise __be64; ++ ++#ifndef __aligned_u64 ++# define __aligned_u64 __u64 __attribute__((aligned(8))) ++#endif ++ ++struct list_head { ++ struct list_head *next, *prev; ++}; ++ ++#endif +diff --git a/src/cc/libbpf/include/uapi/linux/bpf.h b/src/cc/libbpf/include/uapi/linux/bpf.h +new file mode 100644 +index 0000000..59a217c +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/bpf.h +@@ -0,0 +1,6851 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of version 2 of the GNU General Public ++ * License as published by the Free Software Foundation. ++ */ ++#ifndef _UAPI__LINUX_BPF_H__ ++#define _UAPI__LINUX_BPF_H__ ++ ++#include ++#include ++ ++/* Extended instruction set based on top of classic BPF */ ++ ++/* instruction classes */ ++#define BPF_JMP32 0x06 /* jmp mode in word width */ ++#define BPF_ALU64 0x07 /* alu mode in double word width */ ++ ++/* ld/ldx fields */ ++#define BPF_DW 0x18 /* double word (64-bit) */ ++#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */ ++#define BPF_XADD 0xc0 /* exclusive add - legacy name */ ++ ++/* alu/jmp fields */ ++#define BPF_MOV 0xb0 /* mov reg to reg */ ++#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ ++ ++/* change endianness of a register */ ++#define BPF_END 0xd0 /* flags for endianness conversion: */ ++#define BPF_TO_LE 0x00 /* convert to little-endian */ ++#define BPF_TO_BE 0x08 /* convert to big-endian */ ++#define BPF_FROM_LE BPF_TO_LE ++#define BPF_FROM_BE BPF_TO_BE ++ ++/* jmp encodings */ ++#define BPF_JNE 0x50 /* jump != */ ++#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ ++#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ ++#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ ++#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ ++#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ ++#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ ++#define BPF_CALL 0x80 /* function call */ ++#define BPF_EXIT 0x90 /* function return */ ++ ++/* atomic op type fields (stored in immediate) */ ++#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */ ++#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ ++#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ ++ ++/* Register numbers */ ++enum { ++ BPF_REG_0 = 0, ++ BPF_REG_1, ++ BPF_REG_2, ++ BPF_REG_3, ++ BPF_REG_4, ++ BPF_REG_5, ++ BPF_REG_6, ++ BPF_REG_7, ++ BPF_REG_8, ++ BPF_REG_9, ++ BPF_REG_10, ++ __MAX_BPF_REG, ++}; ++ ++/* BPF has 10 general purpose 64-bit registers and stack frame. */ ++#define MAX_BPF_REG __MAX_BPF_REG ++ ++struct bpf_insn { ++ __u8 code; /* opcode */ ++ __u8 dst_reg:4; /* dest register */ ++ __u8 src_reg:4; /* source register */ ++ __s16 off; /* signed offset */ ++ __s32 imm; /* signed immediate constant */ ++}; ++ ++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ ++struct bpf_lpm_trie_key { ++ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ ++ __u8 data[0]; /* Arbitrary size */ ++}; ++ ++struct bpf_cgroup_storage_key { ++ __u64 cgroup_inode_id; /* cgroup inode id */ ++ __u32 attach_type; /* program attach type (enum bpf_attach_type) */ ++}; ++ ++union bpf_iter_link_info { ++ struct { ++ __u32 map_fd; ++ } map; ++}; ++ ++/* BPF syscall commands, see bpf(2) man-page for more details. */ ++/** ++ * DOC: eBPF Syscall Preamble ++ * ++ * The operation to be performed by the **bpf**\ () system call is determined ++ * by the *cmd* argument. Each operation takes an accompanying argument, ++ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see ++ * below). The size argument is the size of the union pointed to by *attr*. ++ */ ++/** ++ * DOC: eBPF Syscall Commands ++ * ++ * BPF_MAP_CREATE ++ * Description ++ * Create a map and return a file descriptor that refers to the ++ * map. The close-on-exec file descriptor flag (see **fcntl**\ (2)) ++ * is automatically enabled for the new file descriptor. ++ * ++ * Applying **close**\ (2) to the file descriptor returned by ++ * **BPF_MAP_CREATE** will delete the map (but see NOTES). ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_MAP_LOOKUP_ELEM ++ * Description ++ * Look up an element with a given *key* in the map referred to ++ * by the file descriptor *map_fd*. ++ * ++ * The *flags* argument may be specified as one of the ++ * following: ++ * ++ * **BPF_F_LOCK** ++ * Look up the value of a spin-locked map without ++ * returning the lock. This must be specified if the ++ * elements contain a spinlock. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_UPDATE_ELEM ++ * Description ++ * Create or update an element (key/value pair) in a specified map. ++ * ++ * The *flags* argument should be specified as one of the ++ * following: ++ * ++ * **BPF_ANY** ++ * Create a new element or update an existing element. ++ * **BPF_NOEXIST** ++ * Create a new element only if it did not exist. ++ * **BPF_EXIST** ++ * Update an existing element. ++ * **BPF_F_LOCK** ++ * Update a spin_lock-ed map element. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, ++ * **E2BIG**, **EEXIST**, or **ENOENT**. ++ * ++ * **E2BIG** ++ * The number of elements in the map reached the ++ * *max_entries* limit specified at map creation time. ++ * **EEXIST** ++ * If *flags* specifies **BPF_NOEXIST** and the element ++ * with *key* already exists in the map. ++ * **ENOENT** ++ * If *flags* specifies **BPF_EXIST** and the element with ++ * *key* does not exist in the map. ++ * ++ * BPF_MAP_DELETE_ELEM ++ * Description ++ * Look up and delete an element by key in a specified map. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_GET_NEXT_KEY ++ * Description ++ * Look up an element by key in a specified map and return the key ++ * of the next element. Can be used to iterate over all elements ++ * in the map. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * The following cases can be used to iterate over all elements of ++ * the map: ++ * ++ * * If *key* is not found, the operation returns zero and sets ++ * the *next_key* pointer to the key of the first element. ++ * * If *key* is found, the operation returns zero and sets the ++ * *next_key* pointer to the key of the next element. ++ * * If *key* is the last element, returns -1 and *errno* is set ++ * to **ENOENT**. ++ * ++ * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or ++ * **EINVAL** on error. ++ * ++ * BPF_PROG_LOAD ++ * Description ++ * Verify and load an eBPF program, returning a new file ++ * descriptor associated with the program. ++ * ++ * Applying **close**\ (2) to the file descriptor returned by ++ * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES). ++ * ++ * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is ++ * automatically enabled for the new file descriptor. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_OBJ_PIN ++ * Description ++ * Pin an eBPF program or map referred by the specified *bpf_fd* ++ * to the provided *pathname* on the filesystem. ++ * ++ * The *pathname* argument must not contain a dot ("."). ++ * ++ * On success, *pathname* retains a reference to the eBPF object, ++ * preventing deallocation of the object when the original ++ * *bpf_fd* is closed. This allow the eBPF object to live beyond ++ * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent ++ * process. ++ * ++ * Applying **unlink**\ (2) or similar calls to the *pathname* ++ * unpins the object from the filesystem, removing the reference. ++ * If no other file descriptors or filesystem nodes refer to the ++ * same object, it will be deallocated (see NOTES). ++ * ++ * The filesystem type for the parent directory of *pathname* must ++ * be **BPF_FS_MAGIC**. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_OBJ_GET ++ * Description ++ * Open a file descriptor for the eBPF object pinned to the ++ * specified *pathname*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_PROG_ATTACH ++ * Description ++ * Attach an eBPF program to a *target_fd* at the specified ++ * *attach_type* hook. ++ * ++ * The *attach_type* specifies the eBPF attachment point to ++ * attach the program to, and must be one of *bpf_attach_type* ++ * (see below). ++ * ++ * The *attach_bpf_fd* must be a valid file descriptor for a ++ * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap ++ * or sock_ops type corresponding to the specified *attach_type*. ++ * ++ * The *target_fd* must be a valid file descriptor for a kernel ++ * object which depends on the attach type of *attach_bpf_fd*: ++ * ++ * **BPF_PROG_TYPE_CGROUP_DEVICE**, ++ * **BPF_PROG_TYPE_CGROUP_SKB**, ++ * **BPF_PROG_TYPE_CGROUP_SOCK**, ++ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, ++ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, ++ * **BPF_PROG_TYPE_CGROUP_SYSCTL**, ++ * **BPF_PROG_TYPE_SOCK_OPS** ++ * ++ * Control Group v2 hierarchy with the eBPF controller ++ * enabled. Requires the kernel to be compiled with ++ * **CONFIG_CGROUP_BPF**. ++ * ++ * **BPF_PROG_TYPE_FLOW_DISSECTOR** ++ * ++ * Network namespace (eg /proc/self/ns/net). ++ * ++ * **BPF_PROG_TYPE_LIRC_MODE2** ++ * ++ * LIRC device path (eg /dev/lircN). Requires the kernel ++ * to be compiled with **CONFIG_BPF_LIRC_MODE2**. ++ * ++ * **BPF_PROG_TYPE_SK_SKB**, ++ * **BPF_PROG_TYPE_SK_MSG** ++ * ++ * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**). ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_PROG_DETACH ++ * Description ++ * Detach the eBPF program associated with the *target_fd* at the ++ * hook specified by *attach_type*. The program must have been ++ * previously attached using **BPF_PROG_ATTACH**. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_PROG_TEST_RUN ++ * Description ++ * Run the eBPF program associated with the *prog_fd* a *repeat* ++ * number of times against a provided program context *ctx_in* and ++ * data *data_in*, and return the modified program context ++ * *ctx_out*, *data_out* (for example, packet data), result of the ++ * execution *retval*, and *duration* of the test run. ++ * ++ * The sizes of the buffers provided as input and output ++ * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must ++ * be provided in the corresponding variables *ctx_size_in*, ++ * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any ++ * of these parameters are not provided (ie set to NULL), the ++ * corresponding size field must be zero. ++ * ++ * Some program types have particular requirements: ++ * ++ * **BPF_PROG_TYPE_SK_LOOKUP** ++ * *data_in* and *data_out* must be NULL. ++ * ++ * **BPF_PROG_TYPE_RAW_TRACEPOINT**, ++ * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** ++ * ++ * *ctx_out*, *data_in* and *data_out* must be NULL. ++ * *repeat* must be zero. ++ * ++ * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * **ENOSPC** ++ * Either *data_size_out* or *ctx_size_out* is too small. ++ * **ENOTSUPP** ++ * This command is not supported by the program type of ++ * the program referred to by *prog_fd*. ++ * ++ * BPF_PROG_GET_NEXT_ID ++ * Description ++ * Fetch the next eBPF program currently loaded into the kernel. ++ * ++ * Looks for the eBPF program with an id greater than *start_id* ++ * and updates *next_id* on success. If no other eBPF programs ++ * remain with ids higher than *start_id*, returns -1 and sets ++ * *errno* to **ENOENT**. ++ * ++ * Return ++ * Returns zero on success. On error, or when no id remains, -1 ++ * is returned and *errno* is set appropriately. ++ * ++ * BPF_MAP_GET_NEXT_ID ++ * Description ++ * Fetch the next eBPF map currently loaded into the kernel. ++ * ++ * Looks for the eBPF map with an id greater than *start_id* ++ * and updates *next_id* on success. If no other eBPF maps ++ * remain with ids higher than *start_id*, returns -1 and sets ++ * *errno* to **ENOENT**. ++ * ++ * Return ++ * Returns zero on success. On error, or when no id remains, -1 ++ * is returned and *errno* is set appropriately. ++ * ++ * BPF_PROG_GET_FD_BY_ID ++ * Description ++ * Open a file descriptor for the eBPF program corresponding to ++ * *prog_id*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_MAP_GET_FD_BY_ID ++ * Description ++ * Open a file descriptor for the eBPF map corresponding to ++ * *map_id*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_OBJ_GET_INFO_BY_FD ++ * Description ++ * Obtain information about the eBPF object corresponding to ++ * *bpf_fd*. ++ * ++ * Populates up to *info_len* bytes of *info*, which will be in ++ * one of the following formats depending on the eBPF object type ++ * of *bpf_fd*: ++ * ++ * * **struct bpf_prog_info** ++ * * **struct bpf_map_info** ++ * * **struct bpf_btf_info** ++ * * **struct bpf_link_info** ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_PROG_QUERY ++ * Description ++ * Obtain information about eBPF programs associated with the ++ * specified *attach_type* hook. ++ * ++ * The *target_fd* must be a valid file descriptor for a kernel ++ * object which depends on the attach type of *attach_bpf_fd*: ++ * ++ * **BPF_PROG_TYPE_CGROUP_DEVICE**, ++ * **BPF_PROG_TYPE_CGROUP_SKB**, ++ * **BPF_PROG_TYPE_CGROUP_SOCK**, ++ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, ++ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, ++ * **BPF_PROG_TYPE_CGROUP_SYSCTL**, ++ * **BPF_PROG_TYPE_SOCK_OPS** ++ * ++ * Control Group v2 hierarchy with the eBPF controller ++ * enabled. Requires the kernel to be compiled with ++ * **CONFIG_CGROUP_BPF**. ++ * ++ * **BPF_PROG_TYPE_FLOW_DISSECTOR** ++ * ++ * Network namespace (eg /proc/self/ns/net). ++ * ++ * **BPF_PROG_TYPE_LIRC_MODE2** ++ * ++ * LIRC device path (eg /dev/lircN). Requires the kernel ++ * to be compiled with **CONFIG_BPF_LIRC_MODE2**. ++ * ++ * **BPF_PROG_QUERY** always fetches the number of programs ++ * attached and the *attach_flags* which were used to attach those ++ * programs. Additionally, if *prog_ids* is nonzero and the number ++ * of attached programs is less than *prog_cnt*, populates ++ * *prog_ids* with the eBPF program ids of the programs attached ++ * at *target_fd*. ++ * ++ * The following flags may alter the result: ++ * ++ * **BPF_F_QUERY_EFFECTIVE** ++ * Only return information regarding programs which are ++ * currently effective at the specified *target_fd*. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_RAW_TRACEPOINT_OPEN ++ * Description ++ * Attach an eBPF program to a tracepoint *name* to access kernel ++ * internal arguments of the tracepoint in their raw form. ++ * ++ * The *prog_fd* must be a valid file descriptor associated with ++ * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**. ++ * ++ * No ABI guarantees are made about the content of tracepoint ++ * arguments exposed to the corresponding eBPF program. ++ * ++ * Applying **close**\ (2) to the file descriptor returned by ++ * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES). ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_BTF_LOAD ++ * Description ++ * Verify and load BPF Type Format (BTF) metadata into the kernel, ++ * returning a new file descriptor associated with the metadata. ++ * BTF is described in more detail at ++ * https://www.kernel.org/doc/html/latest/bpf/btf.html. ++ * ++ * The *btf* parameter must point to valid memory providing ++ * *btf_size* bytes of BTF binary metadata. ++ * ++ * The returned file descriptor can be passed to other **bpf**\ () ++ * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to ++ * associate the BTF with those objects. ++ * ++ * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional ++ * parameters to specify a *btf_log_buf*, *btf_log_size* and ++ * *btf_log_level* which allow the kernel to return freeform log ++ * output regarding the BTF verification process. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_BTF_GET_FD_BY_ID ++ * Description ++ * Open a file descriptor for the BPF Type Format (BTF) ++ * corresponding to *btf_id*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_TASK_FD_QUERY ++ * Description ++ * Obtain information about eBPF programs associated with the ++ * target process identified by *pid* and *fd*. ++ * ++ * If the *pid* and *fd* are associated with a tracepoint, kprobe ++ * or uprobe perf event, then the *prog_id* and *fd_type* will ++ * be populated with the eBPF program id and file descriptor type ++ * of type **bpf_task_fd_type**. If associated with a kprobe or ++ * uprobe, the *probe_offset* and *probe_addr* will also be ++ * populated. Optionally, if *buf* is provided, then up to ++ * *buf_len* bytes of *buf* will be populated with the name of ++ * the tracepoint, kprobe or uprobe. ++ * ++ * The resulting *prog_id* may be introspected in deeper detail ++ * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_LOOKUP_AND_DELETE_ELEM ++ * Description ++ * Look up an element with the given *key* in the map referred to ++ * by the file descriptor *fd*, and if found, delete the element. ++ * ++ * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map ++ * types, the *flags* argument needs to be set to 0, but for other ++ * map types, it may be specified as: ++ * ++ * **BPF_F_LOCK** ++ * Look up and delete the value of a spin-locked map ++ * without returning the lock. This must be specified if ++ * the elements contain a spinlock. ++ * ++ * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types ++ * implement this command as a "pop" operation, deleting the top ++ * element rather than one corresponding to *key*. ++ * The *key* and *key_len* parameters should be zeroed when ++ * issuing this operation for these map types. ++ * ++ * This command is only valid for the following map types: ++ * * **BPF_MAP_TYPE_QUEUE** ++ * * **BPF_MAP_TYPE_STACK** ++ * * **BPF_MAP_TYPE_HASH** ++ * * **BPF_MAP_TYPE_PERCPU_HASH** ++ * * **BPF_MAP_TYPE_LRU_HASH** ++ * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_FREEZE ++ * Description ++ * Freeze the permissions of the specified map. ++ * ++ * Write permissions may be frozen by passing zero *flags*. ++ * Upon success, no future syscall invocations may alter the ++ * map state of *map_fd*. Write operations from eBPF programs ++ * are still possible for a frozen map. ++ * ++ * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_BTF_GET_NEXT_ID ++ * Description ++ * Fetch the next BPF Type Format (BTF) object currently loaded ++ * into the kernel. ++ * ++ * Looks for the BTF object with an id greater than *start_id* ++ * and updates *next_id* on success. If no other BTF objects ++ * remain with ids higher than *start_id*, returns -1 and sets ++ * *errno* to **ENOENT**. ++ * ++ * Return ++ * Returns zero on success. On error, or when no id remains, -1 ++ * is returned and *errno* is set appropriately. ++ * ++ * BPF_MAP_LOOKUP_BATCH ++ * Description ++ * Iterate and fetch multiple elements in a map. ++ * ++ * Two opaque values are used to manage batch operations, ++ * *in_batch* and *out_batch*. Initially, *in_batch* must be set ++ * to NULL to begin the batched operation. After each subsequent ++ * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant ++ * *out_batch* as the *in_batch* for the next operation to ++ * continue iteration from the current point. ++ * ++ * The *keys* and *values* are output parameters which must point ++ * to memory large enough to hold *count* items based on the key ++ * and value size of the map *map_fd*. The *keys* buffer must be ++ * of *key_size* * *count*. The *values* buffer must be of ++ * *value_size* * *count*. ++ * ++ * The *elem_flags* argument may be specified as one of the ++ * following: ++ * ++ * **BPF_F_LOCK** ++ * Look up the value of a spin-locked map without ++ * returning the lock. This must be specified if the ++ * elements contain a spinlock. ++ * ++ * On success, *count* elements from the map are copied into the ++ * user buffer, with the keys copied into *keys* and the values ++ * copied into the corresponding indices in *values*. ++ * ++ * If an error is returned and *errno* is not **EFAULT**, *count* ++ * is set to the number of successfully processed elements. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * May set *errno* to **ENOSPC** to indicate that *keys* or ++ * *values* is too small to dump an entire bucket during ++ * iteration of a hash-based map type. ++ * ++ * BPF_MAP_LOOKUP_AND_DELETE_BATCH ++ * Description ++ * Iterate and delete all elements in a map. ++ * ++ * This operation has the same behavior as ++ * **BPF_MAP_LOOKUP_BATCH** with two exceptions: ++ * ++ * * Every element that is successfully returned is also deleted ++ * from the map. This is at least *count* elements. Note that ++ * *count* is both an input and an output parameter. ++ * * Upon returning with *errno* set to **EFAULT**, up to ++ * *count* elements may be deleted without returning the keys ++ * and values of the deleted elements. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_UPDATE_BATCH ++ * Description ++ * Update multiple elements in a map by *key*. ++ * ++ * The *keys* and *values* are input parameters which must point ++ * to memory large enough to hold *count* items based on the key ++ * and value size of the map *map_fd*. The *keys* buffer must be ++ * of *key_size* * *count*. The *values* buffer must be of ++ * *value_size* * *count*. ++ * ++ * Each element specified in *keys* is sequentially updated to the ++ * value in the corresponding index in *values*. The *in_batch* ++ * and *out_batch* parameters are ignored and should be zeroed. ++ * ++ * The *elem_flags* argument should be specified as one of the ++ * following: ++ * ++ * **BPF_ANY** ++ * Create new elements or update a existing elements. ++ * **BPF_NOEXIST** ++ * Create new elements only if they do not exist. ++ * **BPF_EXIST** ++ * Update existing elements. ++ * **BPF_F_LOCK** ++ * Update spin_lock-ed map elements. This must be ++ * specified if the map value contains a spinlock. ++ * ++ * On success, *count* elements from the map are updated. ++ * ++ * If an error is returned and *errno* is not **EFAULT**, *count* ++ * is set to the number of successfully processed elements. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or ++ * **E2BIG**. **E2BIG** indicates that the number of elements in ++ * the map reached the *max_entries* limit specified at map ++ * creation time. ++ * ++ * May set *errno* to one of the following error codes under ++ * specific circumstances: ++ * ++ * **EEXIST** ++ * If *flags* specifies **BPF_NOEXIST** and the element ++ * with *key* already exists in the map. ++ * **ENOENT** ++ * If *flags* specifies **BPF_EXIST** and the element with ++ * *key* does not exist in the map. ++ * ++ * BPF_MAP_DELETE_BATCH ++ * Description ++ * Delete multiple elements in a map by *key*. ++ * ++ * The *keys* parameter is an input parameter which must point ++ * to memory large enough to hold *count* items based on the key ++ * size of the map *map_fd*, that is, *key_size* * *count*. ++ * ++ * Each element specified in *keys* is sequentially deleted. The ++ * *in_batch*, *out_batch*, and *values* parameters are ignored ++ * and should be zeroed. ++ * ++ * The *elem_flags* argument may be specified as one of the ++ * following: ++ * ++ * **BPF_F_LOCK** ++ * Look up the value of a spin-locked map without ++ * returning the lock. This must be specified if the ++ * elements contain a spinlock. ++ * ++ * On success, *count* elements from the map are updated. ++ * ++ * If an error is returned and *errno* is not **EFAULT**, *count* ++ * is set to the number of successfully processed elements. If ++ * *errno* is **EFAULT**, up to *count* elements may be been ++ * deleted. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_LINK_CREATE ++ * Description ++ * Attach an eBPF program to a *target_fd* at the specified ++ * *attach_type* hook and return a file descriptor handle for ++ * managing the link. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_LINK_UPDATE ++ * Description ++ * Update the eBPF program in the specified *link_fd* to ++ * *new_prog_fd*. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_LINK_GET_FD_BY_ID ++ * Description ++ * Open a file descriptor for the eBPF Link corresponding to ++ * *link_id*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_LINK_GET_NEXT_ID ++ * Description ++ * Fetch the next eBPF link currently loaded into the kernel. ++ * ++ * Looks for the eBPF link with an id greater than *start_id* ++ * and updates *next_id* on success. If no other eBPF links ++ * remain with ids higher than *start_id*, returns -1 and sets ++ * *errno* to **ENOENT**. ++ * ++ * Return ++ * Returns zero on success. On error, or when no id remains, -1 ++ * is returned and *errno* is set appropriately. ++ * ++ * BPF_ENABLE_STATS ++ * Description ++ * Enable eBPF runtime statistics gathering. ++ * ++ * Runtime statistics gathering for the eBPF runtime is disabled ++ * by default to minimize the corresponding performance overhead. ++ * This command enables statistics globally. ++ * ++ * Multiple programs may independently enable statistics. ++ * After gathering the desired statistics, eBPF runtime statistics ++ * may be disabled again by calling **close**\ (2) for the file ++ * descriptor returned by this function. Statistics will only be ++ * disabled system-wide when all outstanding file descriptors ++ * returned by prior calls for this subcommand are closed. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_ITER_CREATE ++ * Description ++ * Create an iterator on top of the specified *link_fd* (as ++ * previously created using **BPF_LINK_CREATE**) and return a ++ * file descriptor that can be used to trigger the iteration. ++ * ++ * If the resulting file descriptor is pinned to the filesystem ++ * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls ++ * for that path will trigger the iterator to read kernel state ++ * using the eBPF program attached to *link_fd*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_LINK_DETACH ++ * Description ++ * Forcefully detach the specified *link_fd* from its ++ * corresponding attachment point. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_PROG_BIND_MAP ++ * Description ++ * Bind a map to the lifetime of an eBPF program. ++ * ++ * The map identified by *map_fd* is bound to the program ++ * identified by *prog_fd* and only released when *prog_fd* is ++ * released. This may be used in cases where metadata should be ++ * associated with a program which otherwise does not contain any ++ * references to the map (for example, embedded in the eBPF ++ * program instructions). ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * NOTES ++ * eBPF objects (maps and programs) can be shared between processes. ++ * ++ * * After **fork**\ (2), the child inherits file descriptors ++ * referring to the same eBPF objects. ++ * * File descriptors referring to eBPF objects can be transferred over ++ * **unix**\ (7) domain sockets. ++ * * File descriptors referring to eBPF objects can be duplicated in the ++ * usual way, using **dup**\ (2) and similar calls. ++ * * File descriptors referring to eBPF objects can be pinned to the ++ * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2). ++ * ++ * An eBPF object is deallocated only after all file descriptors referring ++ * to the object have been closed and no references remain pinned to the ++ * filesystem or attached (for example, bound to a program or device). ++ */ ++enum bpf_cmd { ++ BPF_MAP_CREATE, ++ BPF_MAP_LOOKUP_ELEM, ++ BPF_MAP_UPDATE_ELEM, ++ BPF_MAP_DELETE_ELEM, ++ BPF_MAP_GET_NEXT_KEY, ++ BPF_PROG_LOAD, ++ BPF_OBJ_PIN, ++ BPF_OBJ_GET, ++ BPF_PROG_ATTACH, ++ BPF_PROG_DETACH, ++ BPF_PROG_TEST_RUN, ++ BPF_PROG_RUN = BPF_PROG_TEST_RUN, ++ BPF_PROG_GET_NEXT_ID, ++ BPF_MAP_GET_NEXT_ID, ++ BPF_PROG_GET_FD_BY_ID, ++ BPF_MAP_GET_FD_BY_ID, ++ BPF_OBJ_GET_INFO_BY_FD, ++ BPF_PROG_QUERY, ++ BPF_RAW_TRACEPOINT_OPEN, ++ BPF_BTF_LOAD, ++ BPF_BTF_GET_FD_BY_ID, ++ BPF_TASK_FD_QUERY, ++ BPF_MAP_LOOKUP_AND_DELETE_ELEM, ++ BPF_MAP_FREEZE, ++ BPF_BTF_GET_NEXT_ID, ++ BPF_MAP_LOOKUP_BATCH, ++ BPF_MAP_LOOKUP_AND_DELETE_BATCH, ++ BPF_MAP_UPDATE_BATCH, ++ BPF_MAP_DELETE_BATCH, ++ BPF_LINK_CREATE, ++ BPF_LINK_UPDATE, ++ BPF_LINK_GET_FD_BY_ID, ++ BPF_LINK_GET_NEXT_ID, ++ BPF_ENABLE_STATS, ++ BPF_ITER_CREATE, ++ BPF_LINK_DETACH, ++ BPF_PROG_BIND_MAP, ++}; ++ ++enum bpf_map_type { ++ BPF_MAP_TYPE_UNSPEC, ++ BPF_MAP_TYPE_HASH, ++ BPF_MAP_TYPE_ARRAY, ++ BPF_MAP_TYPE_PROG_ARRAY, ++ BPF_MAP_TYPE_PERF_EVENT_ARRAY, ++ BPF_MAP_TYPE_PERCPU_HASH, ++ BPF_MAP_TYPE_PERCPU_ARRAY, ++ BPF_MAP_TYPE_STACK_TRACE, ++ BPF_MAP_TYPE_CGROUP_ARRAY, ++ BPF_MAP_TYPE_LRU_HASH, ++ BPF_MAP_TYPE_LRU_PERCPU_HASH, ++ BPF_MAP_TYPE_LPM_TRIE, ++ BPF_MAP_TYPE_ARRAY_OF_MAPS, ++ BPF_MAP_TYPE_HASH_OF_MAPS, ++ BPF_MAP_TYPE_DEVMAP, ++ BPF_MAP_TYPE_SOCKMAP, ++ BPF_MAP_TYPE_CPUMAP, ++ BPF_MAP_TYPE_XSKMAP, ++ BPF_MAP_TYPE_SOCKHASH, ++ BPF_MAP_TYPE_CGROUP_STORAGE, ++ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, ++ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, ++ BPF_MAP_TYPE_QUEUE, ++ BPF_MAP_TYPE_STACK, ++ BPF_MAP_TYPE_SK_STORAGE, ++ BPF_MAP_TYPE_DEVMAP_HASH, ++ BPF_MAP_TYPE_STRUCT_OPS, ++ BPF_MAP_TYPE_RINGBUF, ++ BPF_MAP_TYPE_INODE_STORAGE, ++ BPF_MAP_TYPE_TASK_STORAGE, ++ BPF_MAP_TYPE_BLOOM_FILTER, ++}; ++ ++/* Note that tracing related programs such as ++ * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} ++ * are not subject to a stable API since kernel internal data ++ * structures can change from release to release and may ++ * therefore break existing tracing BPF programs. Tracing BPF ++ * programs correspond to /a/ specific kernel which is to be ++ * analyzed, and not /a/ specific kernel /and/ all future ones. ++ */ ++enum bpf_prog_type { ++ BPF_PROG_TYPE_UNSPEC, ++ BPF_PROG_TYPE_SOCKET_FILTER, ++ BPF_PROG_TYPE_KPROBE, ++ BPF_PROG_TYPE_SCHED_CLS, ++ BPF_PROG_TYPE_SCHED_ACT, ++ BPF_PROG_TYPE_TRACEPOINT, ++ BPF_PROG_TYPE_XDP, ++ BPF_PROG_TYPE_PERF_EVENT, ++ BPF_PROG_TYPE_CGROUP_SKB, ++ BPF_PROG_TYPE_CGROUP_SOCK, ++ BPF_PROG_TYPE_LWT_IN, ++ BPF_PROG_TYPE_LWT_OUT, ++ BPF_PROG_TYPE_LWT_XMIT, ++ BPF_PROG_TYPE_SOCK_OPS, ++ BPF_PROG_TYPE_SK_SKB, ++ BPF_PROG_TYPE_CGROUP_DEVICE, ++ BPF_PROG_TYPE_SK_MSG, ++ BPF_PROG_TYPE_RAW_TRACEPOINT, ++ BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ++ BPF_PROG_TYPE_LWT_SEG6LOCAL, ++ BPF_PROG_TYPE_LIRC_MODE2, ++ BPF_PROG_TYPE_SK_REUSEPORT, ++ BPF_PROG_TYPE_FLOW_DISSECTOR, ++ BPF_PROG_TYPE_CGROUP_SYSCTL, ++ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, ++ BPF_PROG_TYPE_CGROUP_SOCKOPT, ++ BPF_PROG_TYPE_TRACING, ++ BPF_PROG_TYPE_STRUCT_OPS, ++ BPF_PROG_TYPE_EXT, ++ BPF_PROG_TYPE_LSM, ++ BPF_PROG_TYPE_SK_LOOKUP, ++ BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ ++}; ++ ++enum bpf_attach_type { ++ BPF_CGROUP_INET_INGRESS, ++ BPF_CGROUP_INET_EGRESS, ++ BPF_CGROUP_INET_SOCK_CREATE, ++ BPF_CGROUP_SOCK_OPS, ++ BPF_SK_SKB_STREAM_PARSER, ++ BPF_SK_SKB_STREAM_VERDICT, ++ BPF_CGROUP_DEVICE, ++ BPF_SK_MSG_VERDICT, ++ BPF_CGROUP_INET4_BIND, ++ BPF_CGROUP_INET6_BIND, ++ BPF_CGROUP_INET4_CONNECT, ++ BPF_CGROUP_INET6_CONNECT, ++ BPF_CGROUP_INET4_POST_BIND, ++ BPF_CGROUP_INET6_POST_BIND, ++ BPF_CGROUP_UDP4_SENDMSG, ++ BPF_CGROUP_UDP6_SENDMSG, ++ BPF_LIRC_MODE2, ++ BPF_FLOW_DISSECTOR, ++ BPF_CGROUP_SYSCTL, ++ BPF_CGROUP_UDP4_RECVMSG, ++ BPF_CGROUP_UDP6_RECVMSG, ++ BPF_CGROUP_GETSOCKOPT, ++ BPF_CGROUP_SETSOCKOPT, ++ BPF_TRACE_RAW_TP, ++ BPF_TRACE_FENTRY, ++ BPF_TRACE_FEXIT, ++ BPF_MODIFY_RETURN, ++ BPF_LSM_MAC, ++ BPF_TRACE_ITER, ++ BPF_CGROUP_INET4_GETPEERNAME, ++ BPF_CGROUP_INET6_GETPEERNAME, ++ BPF_CGROUP_INET4_GETSOCKNAME, ++ BPF_CGROUP_INET6_GETSOCKNAME, ++ BPF_XDP_DEVMAP, ++ BPF_CGROUP_INET_SOCK_RELEASE, ++ BPF_XDP_CPUMAP, ++ BPF_SK_LOOKUP, ++ BPF_XDP, ++ BPF_SK_SKB_VERDICT, ++ BPF_SK_REUSEPORT_SELECT, ++ BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, ++ BPF_PERF_EVENT, ++ BPF_TRACE_KPROBE_MULTI, ++ BPF_LSM_CGROUP, ++ __MAX_BPF_ATTACH_TYPE ++}; ++ ++#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE ++ ++enum bpf_link_type { ++ BPF_LINK_TYPE_UNSPEC = 0, ++ BPF_LINK_TYPE_RAW_TRACEPOINT = 1, ++ BPF_LINK_TYPE_TRACING = 2, ++ BPF_LINK_TYPE_CGROUP = 3, ++ BPF_LINK_TYPE_ITER = 4, ++ BPF_LINK_TYPE_NETNS = 5, ++ BPF_LINK_TYPE_XDP = 6, ++ BPF_LINK_TYPE_PERF_EVENT = 7, ++ BPF_LINK_TYPE_KPROBE_MULTI = 8, ++ BPF_LINK_TYPE_STRUCT_OPS = 9, ++ ++ MAX_BPF_LINK_TYPE, ++}; ++ ++/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command ++ * ++ * NONE(default): No further bpf programs allowed in the subtree. ++ * ++ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, ++ * the program in this cgroup yields to sub-cgroup program. ++ * ++ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, ++ * that cgroup program gets run in addition to the program in this cgroup. ++ * ++ * Only one program is allowed to be attached to a cgroup with ++ * NONE or BPF_F_ALLOW_OVERRIDE flag. ++ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will ++ * release old program and attach the new one. Attach flags has to match. ++ * ++ * Multiple programs are allowed to be attached to a cgroup with ++ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order ++ * (those that were attached first, run first) ++ * The programs of sub-cgroup are executed first, then programs of ++ * this cgroup and then programs of parent cgroup. ++ * When children program makes decision (like picking TCP CA or sock bind) ++ * parent program has a chance to override it. ++ * ++ * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of ++ * programs for a cgroup. Though it's possible to replace an old program at ++ * any position by also specifying BPF_F_REPLACE flag and position itself in ++ * replace_bpf_fd attribute. Old program at this position will be released. ++ * ++ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. ++ * A cgroup with NONE doesn't allow any programs in sub-cgroups. ++ * Ex1: ++ * cgrp1 (MULTI progs A, B) -> ++ * cgrp2 (OVERRIDE prog C) -> ++ * cgrp3 (MULTI prog D) -> ++ * cgrp4 (OVERRIDE prog E) -> ++ * cgrp5 (NONE prog F) ++ * the event in cgrp5 triggers execution of F,D,A,B in that order. ++ * if prog F is detached, the execution is E,D,A,B ++ * if prog F and D are detached, the execution is E,A,B ++ * if prog F, E and D are detached, the execution is C,A,B ++ * ++ * All eligible programs are executed regardless of return code from ++ * earlier programs. ++ */ ++#define BPF_F_ALLOW_OVERRIDE (1U << 0) ++#define BPF_F_ALLOW_MULTI (1U << 1) ++#define BPF_F_REPLACE (1U << 2) ++ ++/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the ++ * verifier will perform strict alignment checking as if the kernel ++ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, ++ * and NET_IP_ALIGN defined to 2. ++ */ ++#define BPF_F_STRICT_ALIGNMENT (1U << 0) ++ ++/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the ++ * verifier will allow any alignment whatsoever. On platforms ++ * with strict alignment requirements for loads ands stores (such ++ * as sparc and mips) the verifier validates that all loads and ++ * stores provably follow this requirement. This flag turns that ++ * checking and enforcement off. ++ * ++ * It is mostly used for testing when we want to validate the ++ * context and memory access aspects of the verifier, but because ++ * of an unaligned access the alignment check would trigger before ++ * the one we are interested in. ++ */ ++#define BPF_F_ANY_ALIGNMENT (1U << 1) ++ ++/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. ++ * Verifier does sub-register def/use analysis and identifies instructions whose ++ * def only matters for low 32-bit, high 32-bit is never referenced later ++ * through implicit zero extension. Therefore verifier notifies JIT back-ends ++ * that it is safe to ignore clearing high 32-bit for these instructions. This ++ * saves some back-ends a lot of code-gen. However such optimization is not ++ * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends ++ * hence hasn't used verifier's analysis result. But, we really want to have a ++ * way to be able to verify the correctness of the described optimization on ++ * x86_64 on which testsuites are frequently exercised. ++ * ++ * So, this flag is introduced. Once it is set, verifier will randomize high ++ * 32-bit for those instructions who has been identified as safe to ignore them. ++ * Then, if verifier is not doing correct analysis, such randomization will ++ * regress tests to expose bugs. ++ */ ++#define BPF_F_TEST_RND_HI32 (1U << 2) ++ ++/* The verifier internal test flag. Behavior is undefined */ ++#define BPF_F_TEST_STATE_FREQ (1U << 3) ++ ++/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will ++ * restrict map and helper usage for such programs. Sleepable BPF programs can ++ * only be attached to hooks where kernel execution context allows sleeping. ++ * Such programs are allowed to use helpers that may sleep like ++ * bpf_copy_from_user(). ++ */ ++#define BPF_F_SLEEPABLE (1U << 4) ++ ++/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program ++ * fully support xdp frags. ++ */ ++#define BPF_F_XDP_HAS_FRAGS (1U << 5) ++ ++/* link_create.kprobe_multi.flags used in LINK_CREATE command for ++ * BPF_TRACE_KPROBE_MULTI attach type to create return probe. ++ */ ++#define BPF_F_KPROBE_MULTI_RETURN (1U << 0) ++ ++/* When BPF ldimm64's insn[0].src_reg != 0 then this can have ++ * the following extensions: ++ * ++ * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] ++ * insn[0].imm: map fd or fd_idx ++ * insn[1].imm: 0 ++ * insn[0].off: 0 ++ * insn[1].off: 0 ++ * ldimm64 rewrite: address of map ++ * verifier type: CONST_PTR_TO_MAP ++ */ ++#define BPF_PSEUDO_MAP_FD 1 ++#define BPF_PSEUDO_MAP_IDX 5 ++ ++/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE ++ * insn[0].imm: map fd or fd_idx ++ * insn[1].imm: offset into value ++ * insn[0].off: 0 ++ * insn[1].off: 0 ++ * ldimm64 rewrite: address of map[0]+offset ++ * verifier type: PTR_TO_MAP_VALUE ++ */ ++#define BPF_PSEUDO_MAP_VALUE 2 ++#define BPF_PSEUDO_MAP_IDX_VALUE 6 ++ ++/* insn[0].src_reg: BPF_PSEUDO_BTF_ID ++ * insn[0].imm: kernel btd id of VAR ++ * insn[1].imm: 0 ++ * insn[0].off: 0 ++ * insn[1].off: 0 ++ * ldimm64 rewrite: address of the kernel variable ++ * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var ++ * is struct/union. ++ */ ++#define BPF_PSEUDO_BTF_ID 3 ++/* insn[0].src_reg: BPF_PSEUDO_FUNC ++ * insn[0].imm: insn offset to the func ++ * insn[1].imm: 0 ++ * insn[0].off: 0 ++ * insn[1].off: 0 ++ * ldimm64 rewrite: address of the function ++ * verifier type: PTR_TO_FUNC. ++ */ ++#define BPF_PSEUDO_FUNC 4 ++ ++/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative ++ * offset to another bpf function ++ */ ++#define BPF_PSEUDO_CALL 1 ++/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL, ++ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel ++ */ ++#define BPF_PSEUDO_KFUNC_CALL 2 ++ ++/* flags for BPF_MAP_UPDATE_ELEM command */ ++enum { ++ BPF_ANY = 0, /* create new element or update existing */ ++ BPF_NOEXIST = 1, /* create new element if it didn't exist */ ++ BPF_EXIST = 2, /* update existing element */ ++ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ ++}; ++ ++/* flags for BPF_MAP_CREATE command */ ++enum { ++ BPF_F_NO_PREALLOC = (1U << 0), ++/* Instead of having one common LRU list in the ++ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list ++ * which can scale and perform better. ++ * Note, the LRU nodes (including free nodes) cannot be moved ++ * across different LRU lists. ++ */ ++ BPF_F_NO_COMMON_LRU = (1U << 1), ++/* Specify numa node during map creation */ ++ BPF_F_NUMA_NODE = (1U << 2), ++ ++/* Flags for accessing BPF object from syscall side. */ ++ BPF_F_RDONLY = (1U << 3), ++ BPF_F_WRONLY = (1U << 4), ++ ++/* Flag for stack_map, store build_id+offset instead of pointer */ ++ BPF_F_STACK_BUILD_ID = (1U << 5), ++ ++/* Zero-initialize hash function seed. This should only be used for testing. */ ++ BPF_F_ZERO_SEED = (1U << 6), ++ ++/* Flags for accessing BPF object from program side. */ ++ BPF_F_RDONLY_PROG = (1U << 7), ++ BPF_F_WRONLY_PROG = (1U << 8), ++ ++/* Clone map from listener for newly accepted socket */ ++ BPF_F_CLONE = (1U << 9), ++ ++/* Enable memory-mapping BPF map */ ++ BPF_F_MMAPABLE = (1U << 10), ++ ++/* Share perf_event among processes */ ++ BPF_F_PRESERVE_ELEMS = (1U << 11), ++ ++/* Create a map that is suitable to be an inner map with dynamic max entries */ ++ BPF_F_INNER_MAP = (1U << 12), ++}; ++ ++/* Flags for BPF_PROG_QUERY. */ ++ ++/* Query effective (directly attached + inherited from ancestor cgroups) ++ * programs that will be executed for events within a cgroup. ++ * attach_flags with this flag are returned only for directly attached programs. ++ */ ++#define BPF_F_QUERY_EFFECTIVE (1U << 0) ++ ++/* Flags for BPF_PROG_TEST_RUN */ ++ ++/* If set, run the test on the cpu specified by bpf_attr.test.cpu */ ++#define BPF_F_TEST_RUN_ON_CPU (1U << 0) ++/* If set, XDP frames will be transmitted after processing */ ++#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) ++ ++/* type for BPF_ENABLE_STATS */ ++enum bpf_stats_type { ++ /* enabled run_time_ns and run_cnt */ ++ BPF_STATS_RUN_TIME = 0, ++}; ++ ++enum bpf_stack_build_id_status { ++ /* user space need an empty entry to identify end of a trace */ ++ BPF_STACK_BUILD_ID_EMPTY = 0, ++ /* with valid build_id and offset */ ++ BPF_STACK_BUILD_ID_VALID = 1, ++ /* couldn't get build_id, fallback to ip */ ++ BPF_STACK_BUILD_ID_IP = 2, ++}; ++ ++#define BPF_BUILD_ID_SIZE 20 ++struct bpf_stack_build_id { ++ __s32 status; ++ unsigned char build_id[BPF_BUILD_ID_SIZE]; ++ union { ++ __u64 offset; ++ __u64 ip; ++ }; ++}; ++ ++#define BPF_OBJ_NAME_LEN 16U ++ ++union bpf_attr { ++ struct { /* anonymous struct used by BPF_MAP_CREATE command */ ++ __u32 map_type; /* one of enum bpf_map_type */ ++ __u32 key_size; /* size of key in bytes */ ++ __u32 value_size; /* size of value in bytes */ ++ __u32 max_entries; /* max number of entries in a map */ ++ __u32 map_flags; /* BPF_MAP_CREATE related ++ * flags defined above. ++ */ ++ __u32 inner_map_fd; /* fd pointing to the inner map */ ++ __u32 numa_node; /* numa node (effective only if ++ * BPF_F_NUMA_NODE is set). ++ */ ++ char map_name[BPF_OBJ_NAME_LEN]; ++ __u32 map_ifindex; /* ifindex of netdev to create on */ ++ __u32 btf_fd; /* fd pointing to a BTF type data */ ++ __u32 btf_key_type_id; /* BTF type_id of the key */ ++ __u32 btf_value_type_id; /* BTF type_id of the value */ ++ __u32 btf_vmlinux_value_type_id;/* BTF type_id of a kernel- ++ * struct stored as the ++ * map value ++ */ ++ /* Any per-map-type extra fields ++ * ++ * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the ++ * number of hash functions (if 0, the bloom filter will default ++ * to using 5 hash functions). ++ */ ++ __u64 map_extra; ++ }; ++ ++ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ ++ __u32 map_fd; ++ __aligned_u64 key; ++ union { ++ __aligned_u64 value; ++ __aligned_u64 next_key; ++ }; ++ __u64 flags; ++ }; ++ ++ struct { /* struct used by BPF_MAP_*_BATCH commands */ ++ __aligned_u64 in_batch; /* start batch, ++ * NULL to start from beginning ++ */ ++ __aligned_u64 out_batch; /* output: next start batch */ ++ __aligned_u64 keys; ++ __aligned_u64 values; ++ __u32 count; /* input/output: ++ * input: # of key/value ++ * elements ++ * output: # of filled elements ++ */ ++ __u32 map_fd; ++ __u64 elem_flags; ++ __u64 flags; ++ } batch; ++ ++ struct { /* anonymous struct used by BPF_PROG_LOAD command */ ++ __u32 prog_type; /* one of enum bpf_prog_type */ ++ __u32 insn_cnt; ++ __aligned_u64 insns; ++ __aligned_u64 license; ++ __u32 log_level; /* verbosity level of verifier */ ++ __u32 log_size; /* size of user buffer */ ++ __aligned_u64 log_buf; /* user supplied buffer */ ++ __u32 kern_version; /* not used */ ++ __u32 prog_flags; ++ char prog_name[BPF_OBJ_NAME_LEN]; ++ __u32 prog_ifindex; /* ifindex of netdev to prep for */ ++ /* For some prog types expected attach type must be known at ++ * load time to verify attach type specific parts of prog ++ * (context accesses, allowed helpers, etc). ++ */ ++ __u32 expected_attach_type; ++ __u32 prog_btf_fd; /* fd pointing to BTF type data */ ++ __u32 func_info_rec_size; /* userspace bpf_func_info size */ ++ __aligned_u64 func_info; /* func info */ ++ __u32 func_info_cnt; /* number of bpf_func_info records */ ++ __u32 line_info_rec_size; /* userspace bpf_line_info size */ ++ __aligned_u64 line_info; /* line info */ ++ __u32 line_info_cnt; /* number of bpf_line_info records */ ++ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ ++ union { ++ /* valid prog_fd to attach to bpf prog */ ++ __u32 attach_prog_fd; ++ /* or valid module BTF object fd or 0 to attach to vmlinux */ ++ __u32 attach_btf_obj_fd; ++ }; ++ __u32 core_relo_cnt; /* number of bpf_core_relo */ ++ __aligned_u64 fd_array; /* array of FDs */ ++ __aligned_u64 core_relos; ++ __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ ++ }; ++ ++ struct { /* anonymous struct used by BPF_OBJ_* commands */ ++ __aligned_u64 pathname; ++ __u32 bpf_fd; ++ __u32 file_flags; ++ }; ++ ++ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ ++ __u32 target_fd; /* container object to attach to */ ++ __u32 attach_bpf_fd; /* eBPF program to attach */ ++ __u32 attach_type; ++ __u32 attach_flags; ++ __u32 replace_bpf_fd; /* previously attached eBPF ++ * program to replace if ++ * BPF_F_REPLACE is used ++ */ ++ }; ++ ++ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ ++ __u32 prog_fd; ++ __u32 retval; ++ __u32 data_size_in; /* input: len of data_in */ ++ __u32 data_size_out; /* input/output: len of data_out ++ * returns ENOSPC if data_out ++ * is too small. ++ */ ++ __aligned_u64 data_in; ++ __aligned_u64 data_out; ++ __u32 repeat; ++ __u32 duration; ++ __u32 ctx_size_in; /* input: len of ctx_in */ ++ __u32 ctx_size_out; /* input/output: len of ctx_out ++ * returns ENOSPC if ctx_out ++ * is too small. ++ */ ++ __aligned_u64 ctx_in; ++ __aligned_u64 ctx_out; ++ __u32 flags; ++ __u32 cpu; ++ __u32 batch_size; ++ } test; ++ ++ struct { /* anonymous struct used by BPF_*_GET_*_ID */ ++ union { ++ __u32 start_id; ++ __u32 prog_id; ++ __u32 map_id; ++ __u32 btf_id; ++ __u32 link_id; ++ }; ++ __u32 next_id; ++ __u32 open_flags; ++ }; ++ ++ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ ++ __u32 bpf_fd; ++ __u32 info_len; ++ __aligned_u64 info; ++ } info; ++ ++ struct { /* anonymous struct used by BPF_PROG_QUERY command */ ++ __u32 target_fd; /* container object to query */ ++ __u32 attach_type; ++ __u32 query_flags; ++ __u32 attach_flags; ++ __aligned_u64 prog_ids; ++ __u32 prog_cnt; ++ __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ ++ } query; ++ ++ struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ ++ __u64 name; ++ __u32 prog_fd; ++ } raw_tracepoint; ++ ++ struct { /* anonymous struct for BPF_BTF_LOAD */ ++ __aligned_u64 btf; ++ __aligned_u64 btf_log_buf; ++ __u32 btf_size; ++ __u32 btf_log_size; ++ __u32 btf_log_level; ++ }; ++ ++ struct { ++ __u32 pid; /* input: pid */ ++ __u32 fd; /* input: fd */ ++ __u32 flags; /* input: flags */ ++ __u32 buf_len; /* input/output: buf len */ ++ __aligned_u64 buf; /* input/output: ++ * tp_name for tracepoint ++ * symbol for kprobe ++ * filename for uprobe ++ */ ++ __u32 prog_id; /* output: prod_id */ ++ __u32 fd_type; /* output: BPF_FD_TYPE_* */ ++ __u64 probe_offset; /* output: probe_offset */ ++ __u64 probe_addr; /* output: probe_addr */ ++ } task_fd_query; ++ ++ struct { /* struct used by BPF_LINK_CREATE command */ ++ __u32 prog_fd; /* eBPF program to attach */ ++ union { ++ __u32 target_fd; /* object to attach to */ ++ __u32 target_ifindex; /* target ifindex */ ++ }; ++ __u32 attach_type; /* attach type */ ++ __u32 flags; /* extra flags */ ++ union { ++ __u32 target_btf_id; /* btf_id of target to attach to */ ++ struct { ++ __aligned_u64 iter_info; /* extra bpf_iter_link_info */ ++ __u32 iter_info_len; /* iter_info length */ ++ }; ++ struct { ++ /* black box user-provided value passed through ++ * to BPF program at the execution time and ++ * accessible through bpf_get_attach_cookie() BPF helper ++ */ ++ __u64 bpf_cookie; ++ } perf_event; ++ struct { ++ __u32 flags; ++ __u32 cnt; ++ __aligned_u64 syms; ++ __aligned_u64 addrs; ++ __aligned_u64 cookies; ++ } kprobe_multi; ++ struct { ++ /* this is overlaid with the target_btf_id above. */ ++ __u32 target_btf_id; ++ /* black box user-provided value passed through ++ * to BPF program at the execution time and ++ * accessible through bpf_get_attach_cookie() BPF helper ++ */ ++ __u64 cookie; ++ } tracing; ++ }; ++ } link_create; ++ ++ struct { /* struct used by BPF_LINK_UPDATE command */ ++ __u32 link_fd; /* link fd */ ++ /* new program fd to update link with */ ++ __u32 new_prog_fd; ++ __u32 flags; /* extra flags */ ++ /* expected link's program fd; is specified only if ++ * BPF_F_REPLACE flag is set in flags */ ++ __u32 old_prog_fd; ++ } link_update; ++ ++ struct { ++ __u32 link_fd; ++ } link_detach; ++ ++ struct { /* struct used by BPF_ENABLE_STATS command */ ++ __u32 type; ++ } enable_stats; ++ ++ struct { /* struct used by BPF_ITER_CREATE command */ ++ __u32 link_fd; ++ __u32 flags; ++ } iter_create; ++ ++ struct { /* struct used by BPF_PROG_BIND_MAP command */ ++ __u32 prog_fd; ++ __u32 map_fd; ++ __u32 flags; /* extra flags */ ++ } prog_bind_map; ++ ++} __attribute__((aligned(8))); ++ ++/* The description below is an attempt at providing documentation to eBPF ++ * developers about the multiple available eBPF helper functions. It can be ++ * parsed and used to produce a manual page. The workflow is the following, ++ * and requires the rst2man utility: ++ * ++ * $ ./scripts/bpf_doc.py \ ++ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst ++ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 ++ * $ man /tmp/bpf-helpers.7 ++ * ++ * Note that in order to produce this external documentation, some RST ++ * formatting is used in the descriptions to get "bold" and "italics" in ++ * manual pages. Also note that the few trailing white spaces are ++ * intentional, removing them would break paragraphs for rst2man. ++ * ++ * Start of BPF helper function descriptions: ++ * ++ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) ++ * Description ++ * Perform a lookup in *map* for an entry associated to *key*. ++ * Return ++ * Map value associated to *key*, or **NULL** if no entry was ++ * found. ++ * ++ * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) ++ * Description ++ * Add or update the value of the entry associated to *key* in ++ * *map* with *value*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * Flag value **BPF_NOEXIST** cannot be used for maps of types ++ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all ++ * elements always exist), the helper would return an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_map_delete_elem(struct bpf_map *map, const void *key) ++ * Description ++ * Delete entry with *key* from *map*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) ++ * Description ++ * For tracing programs, safely attempt to read *size* bytes from ++ * kernel space address *unsafe_ptr* and store the data in *dst*. ++ * ++ * Generally, use **bpf_probe_read_user**\ () or ++ * **bpf_probe_read_kernel**\ () instead. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_ktime_get_ns(void) ++ * Description ++ * Return the time elapsed since system boot, in nanoseconds. ++ * Does not include time the system was suspended. ++ * See: **clock_gettime**\ (**CLOCK_MONOTONIC**) ++ * Return ++ * Current *ktime*. ++ * ++ * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) ++ * Description ++ * This helper is a "printk()-like" facility for debugging. It ++ * prints a message defined by format *fmt* (of size *fmt_size*) ++ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if ++ * available. It can take up to three additional **u64** ++ * arguments (as an eBPF helpers, the total number of arguments is ++ * limited to five). ++ * ++ * Each time the helper is called, it appends a line to the trace. ++ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is ++ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. ++ * The format of the trace is customizable, and the exact output ++ * one will get depends on the options set in ++ * *\/sys/kernel/debug/tracing/trace_options* (see also the ++ * *README* file under the same directory). However, it usually ++ * defaults to something like: ++ * ++ * :: ++ * ++ * telnet-470 [001] .N.. 419421.045894: 0x00000001: ++ * ++ * In the above: ++ * ++ * * ``telnet`` is the name of the current task. ++ * * ``470`` is the PID of the current task. ++ * * ``001`` is the CPU number on which the task is ++ * running. ++ * * In ``.N..``, each character refers to a set of ++ * options (whether irqs are enabled, scheduling ++ * options, whether hard/softirqs are running, level of ++ * preempt_disabled respectively). **N** means that ++ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** ++ * are set. ++ * * ``419421.045894`` is a timestamp. ++ * * ``0x00000001`` is a fake value used by BPF for the ++ * instruction pointer register. ++ * * ```` is the message formatted with ++ * *fmt*. ++ * ++ * The conversion specifiers supported by *fmt* are similar, but ++ * more limited than for printk(). They are **%d**, **%i**, ++ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, ++ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size ++ * of field, padding with zeroes, etc.) is available, and the ++ * helper will return **-EINVAL** (but print nothing) if it ++ * encounters an unknown specifier. ++ * ++ * Also, note that **bpf_trace_printk**\ () is slow, and should ++ * only be used for debugging purposes. For this reason, a notice ++ * block (spanning several lines) is printed to kernel logs and ++ * states that the helper should not be used "for production use" ++ * the first time this helper is used (or more precisely, when ++ * **trace_printk**\ () buffers are allocated). For passing values ++ * to user space, perf events should be preferred. ++ * Return ++ * The number of bytes written to the buffer, or a negative error ++ * in case of failure. ++ * ++ * u32 bpf_get_prandom_u32(void) ++ * Description ++ * Get a pseudo-random number. ++ * ++ * From a security point of view, this helper uses its own ++ * pseudo-random internal state, and cannot be used to infer the ++ * seed of other random functions in the kernel. However, it is ++ * essential to note that the generator used by the helper is not ++ * cryptographically secure. ++ * Return ++ * A random 32-bit unsigned value. ++ * ++ * u32 bpf_get_smp_processor_id(void) ++ * Description ++ * Get the SMP (symmetric multiprocessing) processor id. Note that ++ * all programs run with migration disabled, which means that the ++ * SMP processor id is stable during all the execution of the ++ * program. ++ * Return ++ * The SMP id of the processor running the program. ++ * ++ * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) ++ * Description ++ * Store *len* bytes from address *from* into the packet ++ * associated to *skb*, at *offset*. *flags* are a combination of ++ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the ++ * checksum for the packet after storing the bytes) and ++ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ ++ * **->swhash** and *skb*\ **->l4hash** to 0). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) ++ * Description ++ * Recompute the layer 3 (e.g. IP) checksum for the packet ++ * associated to *skb*. Computation is incremental, so the helper ++ * must know the former value of the header field that was ++ * modified (*from*), the new value of this field (*to*), and the ++ * number of bytes (2 or 4) for this field, stored in *size*. ++ * Alternatively, it is possible to store the difference between ++ * the previous and the new values of the header field in *to*, by ++ * setting *from* and *size* to 0. For both methods, *offset* ++ * indicates the location of the IP checksum within the packet. ++ * ++ * This helper works in combination with **bpf_csum_diff**\ (), ++ * which does not update the checksum in-place, but offers more ++ * flexibility and can handle sizes larger than 2 or 4 for the ++ * checksum to update. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) ++ * Description ++ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the ++ * packet associated to *skb*. Computation is incremental, so the ++ * helper must know the former value of the header field that was ++ * modified (*from*), the new value of this field (*to*), and the ++ * number of bytes (2 or 4) for this field, stored on the lowest ++ * four bits of *flags*. Alternatively, it is possible to store ++ * the difference between the previous and the new values of the ++ * header field in *to*, by setting *from* and the four lowest ++ * bits of *flags* to 0. For both methods, *offset* indicates the ++ * location of the IP checksum within the packet. In addition to ++ * the size of the field, *flags* can be added (bitwise OR) actual ++ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left ++ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and ++ * for updates resulting in a null checksum the value is set to ++ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates ++ * the checksum is to be computed against a pseudo-header. ++ * ++ * This helper works in combination with **bpf_csum_diff**\ (), ++ * which does not update the checksum in-place, but offers more ++ * flexibility and can handle sizes larger than 2 or 4 for the ++ * checksum to update. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) ++ * Description ++ * This special helper is used to trigger a "tail call", or in ++ * other words, to jump into another eBPF program. The same stack ++ * frame is used (but values on stack and in registers for the ++ * caller are not accessible to the callee). This mechanism allows ++ * for program chaining, either for raising the maximum number of ++ * available eBPF instructions, or to execute given programs in ++ * conditional blocks. For security reasons, there is an upper ++ * limit to the number of successive tail calls that can be ++ * performed. ++ * ++ * Upon call of this helper, the program attempts to jump into a ++ * program referenced at index *index* in *prog_array_map*, a ++ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes ++ * *ctx*, a pointer to the context. ++ * ++ * If the call succeeds, the kernel immediately runs the first ++ * instruction of the new program. This is not a function call, ++ * and it never returns to the previous program. If the call ++ * fails, then the helper has no effect, and the caller continues ++ * to run its subsequent instructions. A call can fail if the ++ * destination program for the jump does not exist (i.e. *index* ++ * is superior to the number of entries in *prog_array_map*), or ++ * if the maximum number of tail calls has been reached for this ++ * chain of programs. This limit is defined in the kernel by the ++ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), ++ * which is currently set to 33. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) ++ * Description ++ * Clone and redirect the packet associated to *skb* to another ++ * net device of index *ifindex*. Both ingress and egress ++ * interfaces can be used for redirection. The **BPF_F_INGRESS** ++ * value in *flags* is used to make the distinction (ingress path ++ * is selected if the flag is present, egress path otherwise). ++ * This is the only flag supported for now. ++ * ++ * In comparison with **bpf_redirect**\ () helper, ++ * **bpf_clone_redirect**\ () has the associated cost of ++ * duplicating the packet buffer, but this can be executed out of ++ * the eBPF program. Conversely, **bpf_redirect**\ () is more ++ * efficient, but it is handled through an action code where the ++ * redirection happens only after the eBPF program has returned. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_get_current_pid_tgid(void) ++ * Description ++ * Get the current pid and tgid. ++ * Return ++ * A 64-bit integer containing the current tgid and pid, and ++ * created as such: ++ * *current_task*\ **->tgid << 32 \|** ++ * *current_task*\ **->pid**. ++ * ++ * u64 bpf_get_current_uid_gid(void) ++ * Description ++ * Get the current uid and gid. ++ * Return ++ * A 64-bit integer containing the current GID and UID, and ++ * created as such: *current_gid* **<< 32 \|** *current_uid*. ++ * ++ * long bpf_get_current_comm(void *buf, u32 size_of_buf) ++ * Description ++ * Copy the **comm** attribute of the current task into *buf* of ++ * *size_of_buf*. The **comm** attribute contains the name of ++ * the executable (excluding the path) for the current task. The ++ * *size_of_buf* must be strictly positive. On success, the ++ * helper makes sure that the *buf* is NUL-terminated. On failure, ++ * it is filled with zeroes. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u32 bpf_get_cgroup_classid(struct sk_buff *skb) ++ * Description ++ * Retrieve the classid for the current task, i.e. for the net_cls ++ * cgroup to which *skb* belongs. ++ * ++ * This helper can be used on TC egress path, but not on ingress. ++ * ++ * The net_cls cgroup provides an interface to tag network packets ++ * based on a user-provided identifier for all traffic coming from ++ * the tasks belonging to the related cgroup. See also the related ++ * kernel documentation, available from the Linux sources in file ++ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. ++ * ++ * The Linux kernel has two versions for cgroups: there are ++ * cgroups v1 and cgroups v2. Both are available to users, who can ++ * use a mixture of them, but note that the net_cls cgroup is for ++ * cgroup v1 only. This makes it incompatible with BPF programs ++ * run on cgroups, which is a cgroup-v2-only feature (a socket can ++ * only hold data for one version of cgroups at a time). ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to ++ * "**y**" or to "**m**". ++ * Return ++ * The classid, or 0 for the default unconfigured classid. ++ * ++ * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) ++ * Description ++ * Push a *vlan_tci* (VLAN tag control information) of protocol ++ * *vlan_proto* to the packet associated to *skb*, then update ++ * the checksum. Note that if *vlan_proto* is different from ++ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to ++ * be **ETH_P_8021Q**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_vlan_pop(struct sk_buff *skb) ++ * Description ++ * Pop a VLAN header from the packet associated to *skb*. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) ++ * Description ++ * Get tunnel metadata. This helper takes a pointer *key* to an ++ * empty **struct bpf_tunnel_key** of **size**, that will be ++ * filled with tunnel metadata for the packet associated to *skb*. ++ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which ++ * indicates that the tunnel is based on IPv6 protocol instead of ++ * IPv4. ++ * ++ * The **struct bpf_tunnel_key** is an object that generalizes the ++ * principal parameters used by various tunneling protocols into a ++ * single struct. This way, it can be used to easily make a ++ * decision based on the contents of the encapsulation header, ++ * "summarized" in this struct. In particular, it holds the IP ++ * address of the remote end (IPv4 or IPv6, depending on the case) ++ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, ++ * this struct exposes the *key*\ **->tunnel_id**, which is ++ * generally mapped to a VNI (Virtual Network Identifier), making ++ * it programmable together with the **bpf_skb_set_tunnel_key**\ ++ * () helper. ++ * ++ * Let's imagine that the following code is part of a program ++ * attached to the TC ingress interface, on one end of a GRE ++ * tunnel, and is supposed to filter out all messages coming from ++ * remote ends with IPv4 address other than 10.0.0.1: ++ * ++ * :: ++ * ++ * int ret; ++ * struct bpf_tunnel_key key = {}; ++ * ++ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); ++ * if (ret < 0) ++ * return TC_ACT_SHOT; // drop packet ++ * ++ * if (key.remote_ipv4 != 0x0a000001) ++ * return TC_ACT_SHOT; // drop packet ++ * ++ * return TC_ACT_OK; // accept packet ++ * ++ * This interface can also be used with all encapsulation devices ++ * that can operate in "collect metadata" mode: instead of having ++ * one network device per specific configuration, the "collect ++ * metadata" mode only requires a single device where the ++ * configuration can be extracted from this helper. ++ * ++ * This can be used together with various tunnels such as VXLan, ++ * Geneve, GRE or IP in IP (IPIP). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) ++ * Description ++ * Populate tunnel metadata for packet associated to *skb.* The ++ * tunnel metadata is set to the contents of *key*, of *size*. The ++ * *flags* can be set to a combination of the following values: ++ * ++ * **BPF_F_TUNINFO_IPV6** ++ * Indicate that the tunnel is based on IPv6 protocol ++ * instead of IPv4. ++ * **BPF_F_ZERO_CSUM_TX** ++ * For IPv4 packets, add a flag to tunnel metadata ++ * indicating that checksum computation should be skipped ++ * and checksum set to zeroes. ++ * **BPF_F_DONT_FRAGMENT** ++ * Add a flag to tunnel metadata indicating that the ++ * packet should not be fragmented. ++ * **BPF_F_SEQ_NUMBER** ++ * Add a flag to tunnel metadata indicating that a ++ * sequence number should be added to tunnel header before ++ * sending the packet. This flag was added for GRE ++ * encapsulation, but might be used with other protocols ++ * as well in the future. ++ * ++ * Here is a typical usage on the transmit path: ++ * ++ * :: ++ * ++ * struct bpf_tunnel_key key; ++ * populate key ... ++ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); ++ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); ++ * ++ * See also the description of the **bpf_skb_get_tunnel_key**\ () ++ * helper for additional information. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) ++ * Description ++ * Read the value of a perf event counter. This helper relies on a ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of ++ * the perf event counter is selected when *map* is updated with ++ * perf event file descriptors. The *map* is an array whose size ++ * is the number of available CPUs, and each cell contains a value ++ * relative to one CPU. The value to retrieve is indicated by ++ * *flags*, that contains the index of the CPU to look up, masked ++ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to ++ * **BPF_F_CURRENT_CPU** to indicate that the value for the ++ * current CPU should be retrieved. ++ * ++ * Note that before Linux 4.13, only hardware perf event can be ++ * retrieved. ++ * ++ * Also, be aware that the newer helper ++ * **bpf_perf_event_read_value**\ () is recommended over ++ * **bpf_perf_event_read**\ () in general. The latter has some ABI ++ * quirks where error and counter value are used as a return code ++ * (which is wrong to do since ranges may overlap). This issue is ++ * fixed with **bpf_perf_event_read_value**\ (), which at the same ++ * time provides more features over the **bpf_perf_event_read**\ ++ * () interface. Please refer to the description of ++ * **bpf_perf_event_read_value**\ () for details. ++ * Return ++ * The value of the perf event counter read from the map, or a ++ * negative error code in case of failure. ++ * ++ * long bpf_redirect(u32 ifindex, u64 flags) ++ * Description ++ * Redirect the packet to another net device of index *ifindex*. ++ * This helper is somewhat similar to **bpf_clone_redirect**\ ++ * (), except that the packet is not cloned, which provides ++ * increased performance. ++ * ++ * Except for XDP, both ingress and egress interfaces can be used ++ * for redirection. The **BPF_F_INGRESS** value in *flags* is used ++ * to make the distinction (ingress path is selected if the flag ++ * is present, egress path otherwise). Currently, XDP only ++ * supports redirection to the egress interface, and accepts no ++ * flag at all. ++ * ++ * The same effect can also be attained with the more generic ++ * **bpf_redirect_map**\ (), which uses a BPF map to store the ++ * redirect target instead of providing it directly to the helper. ++ * Return ++ * For XDP, the helper returns **XDP_REDIRECT** on success or ++ * **XDP_ABORTED** on error. For other program types, the values ++ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on ++ * error. ++ * ++ * u32 bpf_get_route_realm(struct sk_buff *skb) ++ * Description ++ * Retrieve the realm or the route, that is to say the ++ * **tclassid** field of the destination for the *skb*. The ++ * identifier retrieved is a user-provided tag, similar to the ++ * one used with the net_cls cgroup (see description for ++ * **bpf_get_cgroup_classid**\ () helper), but here this tag is ++ * held by a route (a destination entry), not by a task. ++ * ++ * Retrieving this identifier works with the clsact TC egress hook ++ * (see also **tc-bpf(8)**), or alternatively on conventional ++ * classful egress qdiscs, but not on TC ingress path. In case of ++ * clsact TC egress hook, this has the advantage that, internally, ++ * the destination entry has not been dropped yet in the transmit ++ * path. Therefore, the destination entry does not need to be ++ * artificially held via **netif_keep_dst**\ () for a classful ++ * qdisc until the *skb* is freed. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_IP_ROUTE_CLASSID** configuration option. ++ * Return ++ * The realm of the route for the packet associated to *skb*, or 0 ++ * if none was found. ++ * ++ * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) ++ * Description ++ * Write raw *data* blob into a special BPF perf event held by ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf ++ * event must have the following attributes: **PERF_SAMPLE_RAW** ++ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and ++ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. ++ * ++ * The *flags* are used to indicate the index in *map* for which ++ * the value must be put, masked with **BPF_F_INDEX_MASK**. ++ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** ++ * to indicate that the index of the current CPU core should be ++ * used. ++ * ++ * The value to write, of *size*, is passed through eBPF stack and ++ * pointed by *data*. ++ * ++ * The context of the program *ctx* needs also be passed to the ++ * helper. ++ * ++ * On user space, a program willing to read the values needs to ++ * call **perf_event_open**\ () on the perf event (either for ++ * one or for all CPUs) and to store the file descriptor into the ++ * *map*. This must be done before the eBPF program can send data ++ * into it. An example is available in file ++ * *samples/bpf/trace_output_user.c* in the Linux kernel source ++ * tree (the eBPF program counterpart is in ++ * *samples/bpf/trace_output_kern.c*). ++ * ++ * **bpf_perf_event_output**\ () achieves better performance ++ * than **bpf_trace_printk**\ () for sharing data with user ++ * space, and is much better suitable for streaming data from eBPF ++ * programs. ++ * ++ * Note that this helper is not restricted to tracing use cases ++ * and can be used with programs attached to TC or XDP as well, ++ * where it allows for passing data to user space listeners. Data ++ * can be: ++ * ++ * * Only custom structs, ++ * * Only the packet payload, or ++ * * A combination of both. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) ++ * Description ++ * This helper was provided as an easy way to load data from a ++ * packet. It can be used to load *len* bytes from *offset* from ++ * the packet associated to *skb*, into the buffer pointed by ++ * *to*. ++ * ++ * Since Linux 4.7, usage of this helper has mostly been replaced ++ * by "direct packet access", enabling packet data to be ++ * manipulated with *skb*\ **->data** and *skb*\ **->data_end** ++ * pointing respectively to the first byte of packet data and to ++ * the byte after the last byte of packet data. However, it ++ * remains useful if one wishes to read large quantities of data ++ * at once from a packet into the eBPF stack. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) ++ * Description ++ * Walk a user or a kernel stack and return its id. To achieve ++ * this, the helper needs *ctx*, which is a pointer to the context ++ * on which the tracing program is executed, and a pointer to a ++ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * a combination of the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_FAST_STACK_CMP** ++ * Compare stacks by hash only. ++ * **BPF_F_REUSE_STACKID** ++ * If two different stacks hash into the same *stackid*, ++ * discard the old one. ++ * ++ * The stack id retrieved is a 32 bit long integer handle which ++ * can be further combined with other data (including other stack ++ * ids) and used as a key into maps. This can be useful for ++ * generating a variety of graphs (such as flame graphs or off-cpu ++ * graphs). ++ * ++ * For walking a stack, this helper is an improvement over ++ * **bpf_probe_read**\ (), which can be used with unrolled loops ++ * but is not efficient and consumes a lot of eBPF instructions. ++ * Instead, **bpf_get_stackid**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * Return ++ * The positive or null stack id on success, or a negative error ++ * in case of failure. ++ * ++ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) ++ * Description ++ * Compute a checksum difference, from the raw buffer pointed by ++ * *from*, of length *from_size* (that must be a multiple of 4), ++ * towards the raw buffer pointed by *to*, of size *to_size* ++ * (same remark). An optional *seed* can be added to the value ++ * (this can be cascaded, the seed may come from a previous call ++ * to the helper). ++ * ++ * This is flexible enough to be used in several ways: ++ * ++ * * With *from_size* == 0, *to_size* > 0 and *seed* set to ++ * checksum, it can be used when pushing new data. ++ * * With *from_size* > 0, *to_size* == 0 and *seed* set to ++ * checksum, it can be used when removing data from a packet. ++ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it ++ * can be used to compute a diff. Note that *from_size* and ++ * *to_size* do not need to be equal. ++ * ++ * This helper can be used in combination with ++ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to ++ * which one can feed in the difference computed with ++ * **bpf_csum_diff**\ (). ++ * Return ++ * The checksum result, or a negative error code in case of ++ * failure. ++ * ++ * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) ++ * Description ++ * Retrieve tunnel options metadata for the packet associated to ++ * *skb*, and store the raw tunnel option data to the buffer *opt* ++ * of *size*. ++ * ++ * This helper can be used with encapsulation devices that can ++ * operate in "collect metadata" mode (please refer to the related ++ * note in the description of **bpf_skb_get_tunnel_key**\ () for ++ * more details). A particular example where this can be used is ++ * in combination with the Geneve encapsulation protocol, where it ++ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) ++ * and retrieving arbitrary TLVs (Type-Length-Value headers) from ++ * the eBPF program. This allows for full customization of these ++ * headers. ++ * Return ++ * The size of the option data retrieved. ++ * ++ * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) ++ * Description ++ * Set tunnel options metadata for the packet associated to *skb* ++ * to the option data contained in the raw buffer *opt* of *size*. ++ * ++ * See also the description of the **bpf_skb_get_tunnel_opt**\ () ++ * helper for additional information. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) ++ * Description ++ * Change the protocol of the *skb* to *proto*. Currently ++ * supported are transition from IPv4 to IPv6, and from IPv6 to ++ * IPv4. The helper takes care of the groundwork for the ++ * transition, including resizing the socket buffer. The eBPF ++ * program is expected to fill the new headers, if any, via ++ * **skb_store_bytes**\ () and to recompute the checksums with ++ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ ++ * (). The main case for this helper is to perform NAT64 ++ * operations out of an eBPF program. ++ * ++ * Internally, the GSO type is marked as dodgy so that headers are ++ * checked and segments are recalculated by the GSO/GRO engine. ++ * The size for GSO target is adapted as well. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_change_type(struct sk_buff *skb, u32 type) ++ * Description ++ * Change the packet type for the packet associated to *skb*. This ++ * comes down to setting *skb*\ **->pkt_type** to *type*, except ++ * the eBPF program does not have a write access to *skb*\ ++ * **->pkt_type** beside this helper. Using a helper here allows ++ * for graceful handling of errors. ++ * ++ * The major use case is to change incoming *skb*s to ++ * **PACKET_HOST** in a programmatic way instead of having to ++ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for ++ * example. ++ * ++ * Note that *type* only allows certain values. At this time, they ++ * are: ++ * ++ * **PACKET_HOST** ++ * Packet is for us. ++ * **PACKET_BROADCAST** ++ * Send packet to all. ++ * **PACKET_MULTICAST** ++ * Send packet to group. ++ * **PACKET_OTHERHOST** ++ * Send packet to someone else. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) ++ * Description ++ * Check whether *skb* is a descendant of the cgroup2 held by ++ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. ++ * Return ++ * The return value depends on the result of the test, and can be: ++ * ++ * * 0, if the *skb* failed the cgroup2 descendant test. ++ * * 1, if the *skb* succeeded the cgroup2 descendant test. ++ * * A negative error code, if an error occurred. ++ * ++ * u32 bpf_get_hash_recalc(struct sk_buff *skb) ++ * Description ++ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is ++ * not set, in particular if the hash was cleared due to mangling, ++ * recompute this hash. Later accesses to the hash can be done ++ * directly with *skb*\ **->hash**. ++ * ++ * Calling **bpf_set_hash_invalid**\ (), changing a packet ++ * prototype with **bpf_skb_change_proto**\ (), or calling ++ * **bpf_skb_store_bytes**\ () with the ++ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear ++ * the hash and to trigger a new computation for the next call to ++ * **bpf_get_hash_recalc**\ (). ++ * Return ++ * The 32-bit hash. ++ * ++ * u64 bpf_get_current_task(void) ++ * Description ++ * Get the current task. ++ * Return ++ * A pointer to the current task struct. ++ * ++ * long bpf_probe_write_user(void *dst, const void *src, u32 len) ++ * Description ++ * Attempt in a safe way to write *len* bytes from the buffer ++ * *src* to *dst* in memory. It only works for threads that are in ++ * user context, and *dst* must be a valid user space address. ++ * ++ * This helper should not be used to implement any kind of ++ * security mechanism because of TOC-TOU attacks, but rather to ++ * debug, divert, and manipulate execution of semi-cooperative ++ * processes. ++ * ++ * Keep in mind that this feature is meant for experiments, and it ++ * has a risk of crashing the system and running programs. ++ * Therefore, when an eBPF program using this helper is attached, ++ * a warning including PID and process name is printed to kernel ++ * logs. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) ++ * Description ++ * Check whether the probe is being run is the context of a given ++ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by ++ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. ++ * Return ++ * The return value depends on the result of the test, and can be: ++ * ++ * * 1, if current task belongs to the cgroup2. ++ * * 0, if current task does not belong to the cgroup2. ++ * * A negative error code, if an error occurred. ++ * ++ * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) ++ * Description ++ * Resize (trim or grow) the packet associated to *skb* to the ++ * new *len*. The *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * The basic idea is that the helper performs the needed work to ++ * change the size of the packet, then the eBPF program rewrites ++ * the rest via helpers like **bpf_skb_store_bytes**\ (), ++ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () ++ * and others. This helper is a slow path utility intended for ++ * replies with control messages. And because it is targeted for ++ * slow path, the helper itself can afford to be slow: it ++ * implicitly linearizes, unclones and drops offloads from the ++ * *skb*. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) ++ * Description ++ * Pull in non-linear data in case the *skb* is non-linear and not ++ * all of *len* are part of the linear section. Make *len* bytes ++ * from *skb* readable and writable. If a zero value is passed for ++ * *len*, then all bytes in the linear part of *skb* will be made ++ * readable and writable. ++ * ++ * This helper is only needed for reading and writing with direct ++ * packet access. ++ * ++ * For direct packet access, testing that offsets to access ++ * are within packet boundaries (test on *skb*\ **->data_end**) is ++ * susceptible to fail if offsets are invalid, or if the requested ++ * data is in non-linear parts of the *skb*. On failure the ++ * program can just bail out, or in the case of a non-linear ++ * buffer, use a helper to make the data available. The ++ * **bpf_skb_load_bytes**\ () helper is a first solution to access ++ * the data. Another one consists in using **bpf_skb_pull_data** ++ * to pull in once the non-linear parts, then retesting and ++ * eventually access the data. ++ * ++ * At the same time, this also makes sure the *skb* is uncloned, ++ * which is a necessary condition for direct write. As this needs ++ * to be an invariant for the write part only, the verifier ++ * detects writes and adds a prologue that is calling ++ * **bpf_skb_pull_data()** to effectively unclone the *skb* from ++ * the very beginning in case it is indeed cloned. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) ++ * Description ++ * Add the checksum *csum* into *skb*\ **->csum** in case the ++ * driver has supplied a checksum for the entire packet into that ++ * field. Return an error otherwise. This helper is intended to be ++ * used in combination with **bpf_csum_diff**\ (), in particular ++ * when the checksum needs to be updated after data has been ++ * written into the packet through direct packet access. ++ * Return ++ * The checksum on success, or a negative error code in case of ++ * failure. ++ * ++ * void bpf_set_hash_invalid(struct sk_buff *skb) ++ * Description ++ * Invalidate the current *skb*\ **->hash**. It can be used after ++ * mangling on headers through direct packet access, in order to ++ * indicate that the hash is outdated and to trigger a ++ * recalculation the next time the kernel tries to access this ++ * hash or when the **bpf_get_hash_recalc**\ () helper is called. ++ * Return ++ * void. ++ * ++ * long bpf_get_numa_node_id(void) ++ * Description ++ * Return the id of the current NUMA node. The primary use case ++ * for this helper is the selection of sockets for the local NUMA ++ * node, when the program is attached to sockets using the ++ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), ++ * but the helper is also available to other eBPF program types, ++ * similarly to **bpf_get_smp_processor_id**\ (). ++ * Return ++ * The id of current NUMA node. ++ * ++ * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) ++ * Description ++ * Grows headroom of packet associated to *skb* and adjusts the ++ * offset of the MAC header accordingly, adding *len* bytes of ++ * space. It automatically extends and reallocates memory as ++ * required. ++ * ++ * This helper can be used on a layer 3 *skb* to push a MAC header ++ * for redirection into a layer 2 device. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that ++ * it is possible to use a negative value for *delta*. This helper ++ * can be used to prepare the packet for pushing or popping ++ * headers. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) ++ * Description ++ * Copy a NUL terminated string from an unsafe kernel address ++ * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for ++ * more details. ++ * ++ * Generally, use **bpf_probe_read_user_str**\ () or ++ * **bpf_probe_read_kernel_str**\ () instead. ++ * Return ++ * On success, the strictly positive length of the string, ++ * including the trailing NUL character. On error, a negative ++ * value. ++ * ++ * u64 bpf_get_socket_cookie(struct sk_buff *skb) ++ * Description ++ * If the **struct sk_buff** pointed by *skb* has a known socket, ++ * retrieve the cookie (generated by the kernel) of this socket. ++ * If no cookie has been set yet, generate a new cookie. Once ++ * generated, the socket cookie remains stable for the life of the ++ * socket. This helper can be useful for monitoring per socket ++ * networking traffic statistics as it provides a global socket ++ * identifier that can be assumed unique. ++ * Return ++ * A 8-byte long unique number on success, or 0 if the socket ++ * field is missing inside *skb*. ++ * ++ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) ++ * Description ++ * Equivalent to bpf_get_socket_cookie() helper that accepts ++ * *skb*, but gets socket from **struct bpf_sock_addr** context. ++ * Return ++ * A 8-byte long unique number. ++ * ++ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) ++ * Description ++ * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts ++ * *skb*, but gets socket from **struct bpf_sock_ops** context. ++ * Return ++ * A 8-byte long unique number. ++ * ++ * u64 bpf_get_socket_cookie(struct sock *sk) ++ * Description ++ * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts ++ * *sk*, but gets socket from a BTF **struct sock**. This helper ++ * also works for sleepable programs. ++ * Return ++ * A 8-byte long unique number or 0 if *sk* is NULL. ++ * ++ * u32 bpf_get_socket_uid(struct sk_buff *skb) ++ * Description ++ * Get the owner UID of the socked associated to *skb*. ++ * Return ++ * The owner UID of the socket associated to *skb*. If the socket ++ * is **NULL**, or if it is not a full socket (i.e. if it is a ++ * time-wait or a request socket instead), **overflowuid** value ++ * is returned (note that **overflowuid** might also be the actual ++ * UID value for the socket). ++ * ++ * long bpf_set_hash(struct sk_buff *skb, u32 hash) ++ * Description ++ * Set the full hash for *skb* (set the field *skb*\ **->hash**) ++ * to value *hash*. ++ * Return ++ * 0 ++ * ++ * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) ++ * Description ++ * Emulate a call to **setsockopt()** on the socket associated to ++ * *bpf_socket*, which must be a full socket. The *level* at ++ * which the option resides and the name *optname* of the option ++ * must be specified, see **setsockopt(2)** for more information. ++ * The option value of length *optlen* is pointed by *optval*. ++ * ++ * *bpf_socket* should be one of the following: ++ * ++ * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. ++ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** ++ * and **BPF_CGROUP_INET6_CONNECT**. ++ * ++ * This helper actually implements a subset of **setsockopt()**. ++ * It supports the following *level*\ s: ++ * ++ * * **SOL_SOCKET**, which supports the following *optname*\ s: ++ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, ++ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, ++ * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. ++ * * **IPPROTO_TCP**, which supports the following *optname*\ s: ++ * **TCP_CONGESTION**, **TCP_BPF_IW**, ++ * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, ++ * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, ++ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. ++ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. ++ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) ++ * Description ++ * Grow or shrink the room for data in the packet associated to ++ * *skb* by *len_diff*, and according to the selected *mode*. ++ * ++ * By default, the helper will reset any offloaded checksum ++ * indicator of the skb to CHECKSUM_NONE. This can be avoided ++ * by the following flag: ++ * ++ * * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded ++ * checksum data of the skb to CHECKSUM_NONE. ++ * ++ * There are two supported modes at this time: ++ * ++ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer ++ * (room space is added or removed below the layer 2 header). ++ * ++ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer ++ * (room space is added or removed below the layer 3 header). ++ * ++ * The following flags are supported at this time: ++ * ++ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. ++ * Adjusting mss in this way is not allowed for datagrams. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, ++ * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: ++ * Any new space is reserved to hold a tunnel header. ++ * Configure skb offsets and other fields accordingly. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, ++ * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: ++ * Use with ENCAP_L3 flags to further specify the tunnel type. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): ++ * Use with ENCAP_L3/L4 flags to further specify the tunnel ++ * type; *len* is the length of the inner MAC header. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: ++ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the ++ * L2 type as Ethernet. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * Redirect the packet to the endpoint referenced by *map* at ++ * index *key*. Depending on its type, this *map* can contain ++ * references to net devices (for forwarding packets through other ++ * ports), or to CPUs (for redirecting XDP frames to another CPU; ++ * but this is only implemented for native XDP (with driver ++ * support) as of this writing). ++ * ++ * The lower two bits of *flags* are used as the return code if ++ * the map lookup fails. This is so that the return value can be ++ * one of the XDP program return codes up to **XDP_TX**, as chosen ++ * by the caller. The higher bits of *flags* can be set to ++ * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. ++ * ++ * With BPF_F_BROADCAST the packet will be broadcasted to all the ++ * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress ++ * interface will be excluded when do broadcasting. ++ * ++ * See also **bpf_redirect**\ (), which only supports redirecting ++ * to an ifindex, but doesn't require a map to do so. ++ * Return ++ * **XDP_REDIRECT** on success, or the value of the two lower bits ++ * of the *flags* argument on error. ++ * ++ * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * Redirect the packet to the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * Add an entry to, or update a *map* referencing sockets. The ++ * *skops* is used as a new value for the entry associated to ++ * *key*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * If the *map* has eBPF programs (parser and verdict), those will ++ * be inherited by the socket being added. If the socket is ++ * already attached to eBPF programs, this results in an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust the address pointed by *xdp_md*\ **->data_meta** by ++ * *delta* (which can be positive or negative). Note that this ++ * operation modifies the address stored in *xdp_md*\ **->data**, ++ * so the latter must be loaded only after the helper has been ++ * called. ++ * ++ * The use of *xdp_md*\ **->data_meta** is optional and programs ++ * are not required to use it. The rationale is that when the ++ * packet is processed with XDP (e.g. as DoS filter), it is ++ * possible to push further meta data along with it before passing ++ * to the stack, and to give the guarantee that an ingress eBPF ++ * program attached as a TC classifier on the same device can pick ++ * this up for further post-processing. Since TC works with socket ++ * buffers, it remains possible to set from XDP the **mark** or ++ * **priority** pointers, or other pointers for the socket buffer. ++ * Having this scratch space generic and programmable allows for ++ * more flexibility as the user is free to store whatever meta ++ * data they need. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) ++ * Description ++ * Read the value of a perf event counter, and store it into *buf* ++ * of size *buf_size*. This helper relies on a *map* of type ++ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event ++ * counter is selected when *map* is updated with perf event file ++ * descriptors. The *map* is an array whose size is the number of ++ * available CPUs, and each cell contains a value relative to one ++ * CPU. The value to retrieve is indicated by *flags*, that ++ * contains the index of the CPU to look up, masked with ++ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to ++ * **BPF_F_CURRENT_CPU** to indicate that the value for the ++ * current CPU should be retrieved. ++ * ++ * This helper behaves in a way close to ++ * **bpf_perf_event_read**\ () helper, save that instead of ++ * just returning the value observed, it fills the *buf* ++ * structure. This allows for additional data to be retrieved: in ++ * particular, the enabled and running times (in *buf*\ ++ * **->enabled** and *buf*\ **->running**, respectively) are ++ * copied. In general, **bpf_perf_event_read_value**\ () is ++ * recommended over **bpf_perf_event_read**\ (), which has some ++ * ABI issues and provides fewer functionalities. ++ * ++ * These values are interesting, because hardware PMU (Performance ++ * Monitoring Unit) counters are limited resources. When there are ++ * more PMU based perf events opened than available counters, ++ * kernel will multiplex these events so each event gets certain ++ * percentage (but not all) of the PMU time. In case that ++ * multiplexing happens, the number of samples or counter value ++ * will not reflect the case compared to when no multiplexing ++ * occurs. This makes comparison between different runs difficult. ++ * Typically, the counter value should be normalized before ++ * comparing to other experiments. The usual normalization is done ++ * as follows. ++ * ++ * :: ++ * ++ * normalized_counter = counter * t_enabled / t_running ++ * ++ * Where t_enabled is the time enabled for event and t_running is ++ * the time running for event since last normalization. The ++ * enabled and running times are accumulated since the perf event ++ * open. To achieve scaling factor between two invocations of an ++ * eBPF program, users can use CPU id as the key (which is ++ * typical for perf array usage model) to remember the previous ++ * value and do the calculation inside the eBPF program. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) ++ * Description ++ * For en eBPF program attached to a perf event, retrieve the ++ * value of the event counter associated to *ctx* and store it in ++ * the structure pointed by *buf* and of size *buf_size*. Enabled ++ * and running times are also stored in the structure (see ++ * description of helper **bpf_perf_event_read_value**\ () for ++ * more details). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) ++ * Description ++ * Emulate a call to **getsockopt()** on the socket associated to ++ * *bpf_socket*, which must be a full socket. The *level* at ++ * which the option resides and the name *optname* of the option ++ * must be specified, see **getsockopt(2)** for more information. ++ * The retrieved value is stored in the structure pointed by ++ * *opval* and of length *optlen*. ++ * ++ * *bpf_socket* should be one of the following: ++ * ++ * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. ++ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** ++ * and **BPF_CGROUP_INET6_CONNECT**. ++ * ++ * This helper actually implements a subset of **getsockopt()**. ++ * It supports the following *level*\ s: ++ * ++ * * **IPPROTO_TCP**, which supports *optname* ++ * **TCP_CONGESTION**. ++ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. ++ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_override_return(struct pt_regs *regs, u64 rc) ++ * Description ++ * Used for error injection, this helper uses kprobes to override ++ * the return value of the probed function, and to set it to *rc*. ++ * The first argument is the context *regs* on which the kprobe ++ * works. ++ * ++ * This helper works by setting the PC (program counter) ++ * to an override function which is run in place of the original ++ * probed function. This means the probed function is not run at ++ * all. The replacement function just returns with the required ++ * value. ++ * ++ * This helper has security implications, and thus is subject to ++ * restrictions. It is only available if the kernel was compiled ++ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration ++ * option, and in this case it only works on functions tagged with ++ * **ALLOW_ERROR_INJECTION** in the kernel code. ++ * ++ * Also, the helper is only available for the architectures having ++ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, ++ * x86 architecture is the only one to support this feature. ++ * Return ++ * 0 ++ * ++ * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) ++ * Description ++ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field ++ * for the full TCP socket associated to *bpf_sock_ops* to ++ * *argval*. ++ * ++ * The primary use of this field is to determine if there should ++ * be calls to eBPF programs of type ++ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP ++ * code. A program of the same type can change its value, per ++ * connection and as necessary, when the connection is ++ * established. This field is directly accessible for reading, but ++ * this helper must be used for updates in order to return an ++ * error if an eBPF program tries to set a callback that is not ++ * supported in the current kernel. ++ * ++ * *argval* is a flag array which can combine these flags: ++ * ++ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) ++ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) ++ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) ++ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) ++ * ++ * Therefore, this function can be used to clear a callback flag by ++ * setting the appropriate bit to zero. e.g. to disable the RTO ++ * callback: ++ * ++ * **bpf_sock_ops_cb_flags_set(bpf_sock,** ++ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** ++ * ++ * Here are some examples of where one could call such eBPF ++ * program: ++ * ++ * * When RTO fires. ++ * * When a packet is retransmitted. ++ * * When the connection terminates. ++ * * When a packet is sent. ++ * * When a packet is received. ++ * Return ++ * Code **-EINVAL** if the socket is not a full TCP socket; ++ * otherwise, a positive number containing the bits that could not ++ * be set is returned (which comes down to 0 if all bits were set ++ * as required). ++ * ++ * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * socket level. If the message *msg* is allowed to pass (i.e. if ++ * the verdict eBPF program returns **SK_PASS**), redirect it to ++ * the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) ++ * Description ++ * For socket policies, apply the verdict of the eBPF program to ++ * the next *bytes* (number of bytes) of message *msg*. ++ * ++ * For example, this helper can be used in the following cases: ++ * ++ * * A single **sendmsg**\ () or **sendfile**\ () system call ++ * contains multiple logical messages that the eBPF program is ++ * supposed to read and for which it should apply a verdict. ++ * * An eBPF program only cares to read the first *bytes* of a ++ * *msg*. If the message has a large payload, then setting up ++ * and calling the eBPF program repeatedly for all bytes, even ++ * though the verdict is already known, would create unnecessary ++ * overhead. ++ * ++ * When called from within an eBPF program, the helper sets a ++ * counter internal to the BPF infrastructure, that is used to ++ * apply the last verdict to the next *bytes*. If *bytes* is ++ * smaller than the current data being processed from a ++ * **sendmsg**\ () or **sendfile**\ () system call, the first ++ * *bytes* will be sent and the eBPF program will be re-run with ++ * the pointer for start of data pointing to byte number *bytes* ++ * **+ 1**. If *bytes* is larger than the current data being ++ * processed, then the eBPF verdict will be applied to multiple ++ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are ++ * consumed. ++ * ++ * Note that if a socket closes with the internal counter holding ++ * a non-zero value, this is not a problem because data is not ++ * being buffered for *bytes* and is sent as it is received. ++ * Return ++ * 0 ++ * ++ * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) ++ * Description ++ * For socket policies, prevent the execution of the verdict eBPF ++ * program for message *msg* until *bytes* (byte number) have been ++ * accumulated. ++ * ++ * This can be used when one needs a specific number of bytes ++ * before a verdict can be assigned, even if the data spans ++ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme ++ * case would be a user calling **sendmsg**\ () repeatedly with ++ * 1-byte long message segments. Obviously, this is bad for ++ * performance, but it is still valid. If the eBPF program needs ++ * *bytes* bytes to validate a header, this helper can be used to ++ * prevent the eBPF program to be called again until *bytes* have ++ * been accumulated. ++ * Return ++ * 0 ++ * ++ * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) ++ * Description ++ * For socket policies, pull in non-linear data from user space ++ * for *msg* and set pointers *msg*\ **->data** and *msg*\ ++ * **->data_end** to *start* and *end* bytes offsets into *msg*, ++ * respectively. ++ * ++ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a ++ * *msg* it can only parse data that the (**data**, **data_end**) ++ * pointers have already consumed. For **sendmsg**\ () hooks this ++ * is likely the first scatterlist element. But for calls relying ++ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will ++ * be the range (**0**, **0**) because the data is shared with ++ * user space and by default the objective is to avoid allowing ++ * user space to modify data while (or after) eBPF verdict is ++ * being decided. This helper can be used to pull in data and to ++ * set the start and end pointer to given values. Data will be ++ * copied if necessary (i.e. if data was not linear and if start ++ * and end pointers do not point to the same chunk). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) ++ * Description ++ * Bind the socket associated to *ctx* to the address pointed by ++ * *addr*, of length *addr_len*. This allows for making outgoing ++ * connection from the desired IP address, which can be useful for ++ * example when all processes inside a cgroup should use one ++ * single IP address on a host that has multiple IP configured. ++ * ++ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The ++ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or ++ * **AF_INET6**). It's advised to pass zero port (**sin_port** ++ * or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like ++ * behavior and lets the kernel efficiently pick up an unused ++ * port as long as 4-tuple is unique. Passing non-zero port might ++ * lead to degraded performance. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is ++ * possible to both shrink and grow the packet tail. ++ * Shrink done via *delta* being a negative integer. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) ++ * Description ++ * Retrieve the XFRM state (IP transform framework, see also ++ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. ++ * ++ * The retrieved value is stored in the **struct bpf_xfrm_state** ++ * pointed by *xfrm_state* and of length *size*. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_XFRM** configuration option. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) ++ * Description ++ * Return a user or a kernel stack in bpf program provided buffer. ++ * To achieve this, the helper needs *ctx*, which is a pointer ++ * to the context on which the tracing program is executed. ++ * To store the stacktrace, the bpf program provides *buf* with ++ * a nonnegative *size*. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_USER_BUILD_ID** ++ * Collect buildid+offset instead of ips for user stack, ++ * only valid if **BPF_F_USER_STACK** is also specified. ++ * ++ * **bpf_get_stack**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject ++ * to sufficient large buffer size. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * Return ++ * The non-negative copied *buf* length equal to or less than ++ * *size* on success, or a negative error in case of failure. ++ * ++ * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) ++ * Description ++ * This helper is similar to **bpf_skb_load_bytes**\ () in that ++ * it provides an easy way to load *len* bytes from *offset* ++ * from the packet associated to *skb*, into the buffer pointed ++ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that ++ * a fifth argument *start_header* exists in order to select a ++ * base offset to start from. *start_header* can be one of: ++ * ++ * **BPF_HDR_START_MAC** ++ * Base offset to load data from is *skb*'s mac header. ++ * **BPF_HDR_START_NET** ++ * Base offset to load data from is *skb*'s network header. ++ * ++ * In general, "direct packet access" is the preferred method to ++ * access packet data, however, this helper is in particular useful ++ * in socket filters where *skb*\ **->data** does not always point ++ * to the start of the mac header and where "direct packet access" ++ * is not available. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) ++ * Description ++ * Do FIB lookup in kernel tables using parameters in *params*. ++ * If lookup is successful and result shows packet is to be ++ * forwarded, the neighbor tables are searched for the nexthop. ++ * If successful (ie., FIB lookup shows forwarding and nexthop ++ * is resolved), the nexthop address is returned in ipv4_dst ++ * or ipv6_dst based on family, smac is set to mac address of ++ * egress device, dmac is set to nexthop mac address, rt_metric ++ * is set to metric from route (IPv4/IPv6 only), and ifindex ++ * is set to the device index of the nexthop from the FIB lookup. ++ * ++ * *plen* argument is the size of the passed in struct. ++ * *flags* argument can be a combination of one or more of the ++ * following values: ++ * ++ * **BPF_FIB_LOOKUP_DIRECT** ++ * Do a direct table lookup vs full lookup using FIB ++ * rules. ++ * **BPF_FIB_LOOKUP_OUTPUT** ++ * Perform lookup from an egress perspective (default is ++ * ingress). ++ * ++ * *ctx* is either **struct xdp_md** for XDP programs or ++ * **struct sk_buff** tc cls_act programs. ++ * Return ++ * * < 0 if any input argument is invalid ++ * * 0 on success (packet is forwarded, nexthop neighbor exists) ++ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the ++ * packet is not forwarded or needs assist from full stack ++ * ++ * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU ++ * was exceeded and output params->mtu_result contains the MTU. ++ * ++ * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * Add an entry to, or update a sockhash *map* referencing sockets. ++ * The *skops* is used as a new value for the entry associated to ++ * *key*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * If the *map* has eBPF programs (parser and verdict), those will ++ * be inherited by the socket being added. If the socket is ++ * already attached to eBPF programs, this results in an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * socket level. If the message *msg* is allowed to pass (i.e. if ++ * the verdict eBPF program returns **SK_PASS**), redirect it to ++ * the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. ++ * if the verdict eBPF program returns **SK_PASS**), redirect it ++ * to the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) ++ * Description ++ * Encapsulate the packet associated to *skb* within a Layer 3 ++ * protocol header. This header is provided in the buffer at ++ * address *hdr*, with *len* its size in bytes. *type* indicates ++ * the protocol of the header and can be one of: ++ * ++ * **BPF_LWT_ENCAP_SEG6** ++ * IPv6 encapsulation with Segment Routing Header ++ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, ++ * the IPv6 header is computed by the kernel. ++ * **BPF_LWT_ENCAP_SEG6_INLINE** ++ * Only works if *skb* contains an IPv6 packet. Insert a ++ * Segment Routing Header (**struct ipv6_sr_hdr**) inside ++ * the IPv6 header. ++ * **BPF_LWT_ENCAP_IP** ++ * IP encapsulation (GRE/GUE/IPIP/etc). The outer header ++ * must be IPv4 or IPv6, followed by zero or more ++ * additional headers, up to **LWT_BPF_MAX_HEADROOM** ++ * total bytes in all prepended headers. Please note that ++ * if **skb_is_gso**\ (*skb*) is true, no more than two ++ * headers can be prepended, and the inner header, if ++ * present, should be either GRE or UDP/GUE. ++ * ++ * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs ++ * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can ++ * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and ++ * **BPF_PROG_TYPE_LWT_XMIT**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) ++ * Description ++ * Store *len* bytes from address *from* into the packet ++ * associated to *skb*, at *offset*. Only the flags, tag and TLVs ++ * inside the outermost IPv6 Segment Routing Header can be ++ * modified through this helper. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) ++ * Description ++ * Adjust the size allocated to TLVs in the outermost IPv6 ++ * Segment Routing Header contained in the packet associated to ++ * *skb*, at position *offset* by *delta* bytes. Only offsets ++ * after the segments are accepted. *delta* can be as well ++ * positive (growing) as negative (shrinking). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) ++ * Description ++ * Apply an IPv6 Segment Routing action of type *action* to the ++ * packet associated to *skb*. Each action takes a parameter ++ * contained at address *param*, and of length *param_len* bytes. ++ * *action* can be one of: ++ * ++ * **SEG6_LOCAL_ACTION_END_X** ++ * End.X action: Endpoint with Layer-3 cross-connect. ++ * Type of *param*: **struct in6_addr**. ++ * **SEG6_LOCAL_ACTION_END_T** ++ * End.T action: Endpoint with specific IPv6 table lookup. ++ * Type of *param*: **int**. ++ * **SEG6_LOCAL_ACTION_END_B6** ++ * End.B6 action: Endpoint bound to an SRv6 policy. ++ * Type of *param*: **struct ipv6_sr_hdr**. ++ * **SEG6_LOCAL_ACTION_END_B6_ENCAP** ++ * End.B6.Encap action: Endpoint bound to an SRv6 ++ * encapsulation policy. ++ * Type of *param*: **struct ipv6_sr_hdr**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_rc_repeat(void *ctx) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded repeat key message. This delays ++ * the generation of a key up event for previously generated ++ * key down event. ++ * ++ * Some IR protocols like NEC have a special IR message for ++ * repeating last button, for when a button is held down. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 ++ * ++ * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded key press with *scancode*, ++ * *toggle* value in the given *protocol*. The scancode will be ++ * translated to a keycode using the rc keymap, and reported as ++ * an input key down event. After a period a key up event is ++ * generated. This period can be extended by calling either ++ * **bpf_rc_keydown**\ () again with the same values, or calling ++ * **bpf_rc_repeat**\ (). ++ * ++ * Some protocols include a toggle bit, in case the button was ++ * released and pressed again between consecutive scancodes. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * The *protocol* is the decoded protocol number (see ++ * **enum rc_proto** for some predefined values). ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 ++ * ++ * u64 bpf_skb_cgroup_id(struct sk_buff *skb) ++ * Description ++ * Return the cgroup v2 id of the socket associated with the *skb*. ++ * This is roughly similar to the **bpf_get_cgroup_classid**\ () ++ * helper for cgroup v1 by providing a tag resp. identifier that ++ * can be matched on or used for map lookups e.g. to implement ++ * policy. The cgroup v2 id of a given path in the hierarchy is ++ * exposed in user space through the f_handle API in order to get ++ * to the same 64-bit id. ++ * ++ * This helper can be used on TC egress path, but not on ingress, ++ * and is available only if the kernel was compiled with the ++ * **CONFIG_SOCK_CGROUP_DATA** configuration option. ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * u64 bpf_get_current_cgroup_id(void) ++ * Description ++ * Get the current cgroup id based on the cgroup within which ++ * the current task is running. ++ * Return ++ * A 64-bit integer containing the current cgroup id based ++ * on the cgroup within which the current task is running. ++ * ++ * void *bpf_get_local_storage(void *map, u64 flags) ++ * Description ++ * Get the pointer to the local storage area. ++ * The type and the size of the local storage is defined ++ * by the *map* argument. ++ * The *flags* meaning is specific for each map type, ++ * and has to be 0 for cgroup local storage. ++ * ++ * Depending on the BPF program type, a local storage area ++ * can be shared between multiple instances of the BPF program, ++ * running simultaneously. ++ * ++ * A user should care about the synchronization by himself. ++ * For example, by using the **BPF_ATOMIC** instructions to alter ++ * the shared data. ++ * Return ++ * A pointer to the local storage area. ++ * ++ * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * Select a **SO_REUSEPORT** socket from a ++ * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. ++ * It checks the selected socket is matching the incoming ++ * request in the socket buffer. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) ++ * Description ++ * Return id of cgroup v2 that is ancestor of cgroup associated ++ * with the *skb* at the *ancestor_level*. The root cgroup is at ++ * *ancestor_level* zero and each step down the hierarchy ++ * increments the level. If *ancestor_level* == level of cgroup ++ * associated with *skb*, then return value will be same as that ++ * of **bpf_skb_cgroup_id**\ (). ++ * ++ * The helper is useful to implement policies based on cgroups ++ * that are upper in hierarchy than immediate cgroup associated ++ * with *skb*. ++ * ++ * The format of returned id and helper limitations are same as in ++ * **bpf_skb_cgroup_id**\ (). ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) ++ * Description ++ * Look for TCP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * The *ctx* should point to the context of the program, such as ++ * the skb or socket (depending on the hook in use). This is used ++ * to determine the base network namespace for the lookup. ++ * ++ * *tuple_size* must be one of: ++ * ++ * **sizeof**\ (*tuple*\ **->ipv4**) ++ * Look for an IPv4 socket. ++ * **sizeof**\ (*tuple*\ **->ipv6**) ++ * Look for an IPv6 socket. ++ * ++ * If the *netns* is a negative signed 32-bit integer, then the ++ * socket lookup table in the netns associated with the *ctx* ++ * will be used. For the TC hooks, this is the netns of the device ++ * in the skb. For socket hooks, this is the netns of the socket. ++ * If *netns* is any other signed 32-bit value greater than or ++ * equal to zero then it specifies the ID of the netns relative to ++ * the netns associated with the *ctx*. *netns* values beyond the ++ * range of 32-bit integers are reserved for future use. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * Return ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ * ++ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) ++ * Description ++ * Look for UDP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * The *ctx* should point to the context of the program, such as ++ * the skb or socket (depending on the hook in use). This is used ++ * to determine the base network namespace for the lookup. ++ * ++ * *tuple_size* must be one of: ++ * ++ * **sizeof**\ (*tuple*\ **->ipv4**) ++ * Look for an IPv4 socket. ++ * **sizeof**\ (*tuple*\ **->ipv6**) ++ * Look for an IPv6 socket. ++ * ++ * If the *netns* is a negative signed 32-bit integer, then the ++ * socket lookup table in the netns associated with the *ctx* ++ * will be used. For the TC hooks, this is the netns of the device ++ * in the skb. For socket hooks, this is the netns of the socket. ++ * If *netns* is any other signed 32-bit value greater than or ++ * equal to zero then it specifies the ID of the netns relative to ++ * the netns associated with the *ctx*. *netns* values beyond the ++ * range of 32-bit integers are reserved for future use. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * Return ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ * ++ * long bpf_sk_release(void *sock) ++ * Description ++ * Release the reference held by *sock*. *sock* must be a ++ * non-**NULL** pointer that was returned from ++ * **bpf_sk_lookup_xxx**\ (). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) ++ * Description ++ * Push an element *value* in *map*. *flags* is one of: ++ * ++ * **BPF_EXIST** ++ * If the queue/stack is full, the oldest element is ++ * removed to make room for this. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_map_pop_elem(struct bpf_map *map, void *value) ++ * Description ++ * Pop an element from *map*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_map_peek_elem(struct bpf_map *map, void *value) ++ * Description ++ * Get an element from *map* without removing it. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) ++ * Description ++ * For socket policies, insert *len* bytes into *msg* at offset ++ * *start*. ++ * ++ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a ++ * *msg* it may want to insert metadata or options into the *msg*. ++ * This can later be read and used by any of the lower layer BPF ++ * hooks. ++ * ++ * This helper may fail if under memory pressure (a malloc ++ * fails) in these cases BPF programs will get an appropriate ++ * error and BPF programs will need to handle them. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) ++ * Description ++ * Will remove *len* bytes from a *msg* starting at byte *start*. ++ * This may result in **ENOMEM** errors under certain situations if ++ * an allocation and copy are required due to a full ring buffer. ++ * However, the helper will try to avoid doing the allocation ++ * if possible. Other errors can occur if input parameters are ++ * invalid either due to *start* byte not being valid part of *msg* ++ * payload and/or *pop* value being to large. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded pointer movement. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 ++ * ++ * long bpf_spin_lock(struct bpf_spin_lock *lock) ++ * Description ++ * Acquire a spinlock represented by the pointer *lock*, which is ++ * stored as part of a value of a map. Taking the lock allows to ++ * safely update the rest of the fields in that value. The ++ * spinlock can (and must) later be released with a call to ++ * **bpf_spin_unlock**\ (\ *lock*\ ). ++ * ++ * Spinlocks in BPF programs come with a number of restrictions ++ * and constraints: ++ * ++ * * **bpf_spin_lock** objects are only allowed inside maps of ++ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this ++ * list could be extended in the future). ++ * * BTF description of the map is mandatory. ++ * * The BPF program can take ONE lock at a time, since taking two ++ * or more could cause dead locks. ++ * * Only one **struct bpf_spin_lock** is allowed per map element. ++ * * When the lock is taken, calls (either BPF to BPF or helpers) ++ * are not allowed. ++ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not ++ * allowed inside a spinlock-ed region. ++ * * The BPF program MUST call **bpf_spin_unlock**\ () to release ++ * the lock, on all execution paths, before it returns. ++ * * The BPF program can access **struct bpf_spin_lock** only via ++ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () ++ * helpers. Loading or storing data into the **struct ++ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. ++ * * To use the **bpf_spin_lock**\ () helper, the BTF description ++ * of the map value must be a struct and have **struct ++ * bpf_spin_lock** *anyname*\ **;** field at the top level. ++ * Nested lock inside another struct is not allowed. ++ * * The **struct bpf_spin_lock** *lock* field in a map value must ++ * be aligned on a multiple of 4 bytes in that value. ++ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy ++ * the **bpf_spin_lock** field to user space. ++ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from ++ * a BPF program, do not update the **bpf_spin_lock** field. ++ * * **bpf_spin_lock** cannot be on the stack or inside a ++ * networking packet (it can only be inside of a map values). ++ * * **bpf_spin_lock** is available to root only. ++ * * Tracing programs and socket filter programs cannot use ++ * **bpf_spin_lock**\ () due to insufficient preemption checks ++ * (but this may change in the future). ++ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. ++ * Return ++ * 0 ++ * ++ * long bpf_spin_unlock(struct bpf_spin_lock *lock) ++ * Description ++ * Release the *lock* previously locked by a call to ++ * **bpf_spin_lock**\ (\ *lock*\ ). ++ * Return ++ * 0 ++ * ++ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) ++ * Description ++ * This helper gets a **struct bpf_sock** pointer such ++ * that all the fields in this **bpf_sock** can be accessed. ++ * Return ++ * A **struct bpf_sock** pointer on success, or **NULL** in ++ * case of failure. ++ * ++ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) ++ * Description ++ * This helper gets a **struct bpf_tcp_sock** pointer from a ++ * **struct bpf_sock** pointer. ++ * Return ++ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in ++ * case of failure. ++ * ++ * long bpf_skb_ecn_set_ce(struct sk_buff *skb) ++ * Description ++ * Set ECN (Explicit Congestion Notification) field of IP header ++ * to **CE** (Congestion Encountered) if current value is **ECT** ++ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 ++ * and IPv4. ++ * Return ++ * 1 if the **CE** flag is set (either by the current helper call ++ * or because it was already present), 0 if it is not set. ++ * ++ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) ++ * Description ++ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. ++ * **bpf_sk_release**\ () is unnecessary and not allowed. ++ * Return ++ * A **struct bpf_sock** pointer on success, or **NULL** in ++ * case of failure. ++ * ++ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) ++ * Description ++ * Look for TCP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * This function is identical to **bpf_sk_lookup_tcp**\ (), except ++ * that it also returns timewait or request sockets. Use ++ * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the ++ * full structure. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * Return ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ * ++ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) ++ * Description ++ * Check whether *iph* and *th* contain a valid SYN cookie ACK for ++ * the listening socket in *sk*. ++ * ++ * *iph* points to the start of the IPv4 or IPv6 header, while ++ * *iph_len* contains **sizeof**\ (**struct iphdr**) or ++ * **sizeof**\ (**struct ipv6hdr**). ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header (at least ++ * **sizeof**\ (**struct tcphdr**)). ++ * Return ++ * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative ++ * error otherwise. ++ * ++ * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) ++ * Description ++ * Get name of sysctl in /proc/sys/ and copy it into provided by ++ * program buffer *buf* of size *buf_len*. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * ++ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is ++ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name ++ * only (e.g. "tcp_mem"). ++ * Return ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ * ++ * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) ++ * Description ++ * Get current value of sysctl as it is presented in /proc/sys ++ * (incl. newline, etc), and copy it as a string into provided ++ * by program buffer *buf* of size *buf_len*. ++ * ++ * The whole value is copied, no matter what file position user ++ * space issued e.g. sys_read at. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * Return ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ * ++ * **-EINVAL** if current value was unavailable, e.g. because ++ * sysctl is uninitialized and read returns -EIO for it. ++ * ++ * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) ++ * Description ++ * Get new value being written by user space to sysctl (before ++ * the actual write happens) and copy it as a string into ++ * provided by program buffer *buf* of size *buf_len*. ++ * ++ * User space may write new value at file position > 0. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * Return ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ * ++ * **-EINVAL** if sysctl is being read. ++ * ++ * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) ++ * Description ++ * Override new value being written by user space to sysctl with ++ * value provided by program in buffer *buf* of size *buf_len*. ++ * ++ * *buf* should contain a string in same form as provided by user ++ * space on sysctl write. ++ * ++ * User space may write new value at file position > 0. To override ++ * the whole sysctl value file position should be set to zero. ++ * Return ++ * 0 on success. ++ * ++ * **-E2BIG** if the *buf_len* is too big. ++ * ++ * **-EINVAL** if sysctl is being read. ++ * ++ * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) ++ * Description ++ * Convert the initial part of the string from buffer *buf* of ++ * size *buf_len* to a long integer according to the given base ++ * and save the result in *res*. ++ * ++ * The string may begin with an arbitrary amount of white space ++ * (as determined by **isspace**\ (3)) followed by a single ++ * optional '**-**' sign. ++ * ++ * Five least significant bits of *flags* encode base, other bits ++ * are currently unused. ++ * ++ * Base must be either 8, 10, 16 or 0 to detect it automatically ++ * similar to user space **strtol**\ (3). ++ * Return ++ * Number of characters consumed on success. Must be positive but ++ * no more than *buf_len*. ++ * ++ * **-EINVAL** if no valid digits were found or unsupported base ++ * was provided. ++ * ++ * **-ERANGE** if resulting value was out of range. ++ * ++ * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) ++ * Description ++ * Convert the initial part of the string from buffer *buf* of ++ * size *buf_len* to an unsigned long integer according to the ++ * given base and save the result in *res*. ++ * ++ * The string may begin with an arbitrary amount of white space ++ * (as determined by **isspace**\ (3)). ++ * ++ * Five least significant bits of *flags* encode base, other bits ++ * are currently unused. ++ * ++ * Base must be either 8, 10, 16 or 0 to detect it automatically ++ * similar to user space **strtoul**\ (3). ++ * Return ++ * Number of characters consumed on success. Must be positive but ++ * no more than *buf_len*. ++ * ++ * **-EINVAL** if no valid digits were found or unsupported base ++ * was provided. ++ * ++ * **-ERANGE** if resulting value was out of range. ++ * ++ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags) ++ * Description ++ * Get a bpf-local-storage from a *sk*. ++ * ++ * Logically, it could be thought of getting the value from ++ * a *map* with *sk* as the **key**. From this ++ * perspective, the usage is not much different from ++ * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this ++ * helper enforces the key must be a full socket and the map must ++ * be a **BPF_MAP_TYPE_SK_STORAGE** also. ++ * ++ * Underneath, the value is stored locally at *sk* instead of ++ * the *map*. The *map* is used as the bpf-local-storage ++ * "type". The bpf-local-storage "type" (i.e. the *map*) is ++ * searched against all bpf-local-storages residing at *sk*. ++ * ++ * *sk* is a kernel **struct sock** pointer for LSM program. ++ * *sk* is a **struct bpf_sock** pointer for other program types. ++ * ++ * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be ++ * used such that a new bpf-local-storage will be ++ * created if one does not exist. *value* can be used ++ * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify ++ * the initial value of a bpf-local-storage. If *value* is ++ * **NULL**, the new bpf-local-storage will be zero initialized. ++ * Return ++ * A bpf-local-storage pointer is returned on success. ++ * ++ * **NULL** if not found or there was an error in adding ++ * a new bpf-local-storage. ++ * ++ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk) ++ * Description ++ * Delete a bpf-local-storage from a *sk*. ++ * Return ++ * 0 on success. ++ * ++ * **-ENOENT** if the bpf-local-storage cannot be found. ++ * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). ++ * ++ * long bpf_send_signal(u32 sig) ++ * Description ++ * Send signal *sig* to the process of the current task. ++ * The signal may be delivered to any of this process's threads. ++ * Return ++ * 0 on success or successfully queued. ++ * ++ * **-EBUSY** if work queue under nmi is full. ++ * ++ * **-EINVAL** if *sig* is invalid. ++ * ++ * **-EPERM** if no permission to send the *sig*. ++ * ++ * **-EAGAIN** if bpf program can try again. ++ * ++ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) ++ * Description ++ * Try to issue a SYN cookie for the packet with corresponding ++ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. ++ * ++ * *iph* points to the start of the IPv4 or IPv6 header, while ++ * *iph_len* contains **sizeof**\ (**struct iphdr**) or ++ * **sizeof**\ (**struct ipv6hdr**). ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header with options (at least ++ * **sizeof**\ (**struct tcphdr**)). ++ * Return ++ * On success, lower 32 bits hold the generated SYN cookie in ++ * followed by 16 bits which hold the MSS value for that cookie, ++ * and the top 16 bits are unused. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EINVAL** SYN cookie cannot be issued due to error ++ * ++ * **-ENOENT** SYN cookie should not be issued (no SYN flood) ++ * ++ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies ++ * ++ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 ++ * ++ * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) ++ * Description ++ * Write raw *data* blob into a special BPF perf event held by ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf ++ * event must have the following attributes: **PERF_SAMPLE_RAW** ++ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and ++ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. ++ * ++ * The *flags* are used to indicate the index in *map* for which ++ * the value must be put, masked with **BPF_F_INDEX_MASK**. ++ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** ++ * to indicate that the index of the current CPU core should be ++ * used. ++ * ++ * The value to write, of *size*, is passed through eBPF stack and ++ * pointed by *data*. ++ * ++ * *ctx* is a pointer to in-kernel struct sk_buff. ++ * ++ * This helper is similar to **bpf_perf_event_output**\ () but ++ * restricted to raw_tracepoint bpf programs. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) ++ * Description ++ * Safely attempt to read *size* bytes from user space address ++ * *unsafe_ptr* and store the data in *dst*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) ++ * Description ++ * Safely attempt to read *size* bytes from kernel space address ++ * *unsafe_ptr* and store the data in *dst*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) ++ * Description ++ * Copy a NUL terminated string from an unsafe user address ++ * *unsafe_ptr* to *dst*. The *size* should include the ++ * terminating NUL byte. In case the string length is smaller than ++ * *size*, the target is not padded with further NUL bytes. If the ++ * string length is larger than *size*, just *size*-1 bytes are ++ * copied and the last byte is set to NUL. ++ * ++ * On success, returns the number of bytes that were written, ++ * including the terminal NUL. This makes this helper useful in ++ * tracing programs for reading strings, and more importantly to ++ * get its length at runtime. See the following snippet: ++ * ++ * :: ++ * ++ * SEC("kprobe/sys_open") ++ * void bpf_sys_open(struct pt_regs *ctx) ++ * { ++ * char buf[PATHLEN]; // PATHLEN is defined to 256 ++ * int res = bpf_probe_read_user_str(buf, sizeof(buf), ++ * ctx->di); ++ * ++ * // Consume buf, for example push it to ++ * // userspace via bpf_perf_event_output(); we ++ * // can use res (the string length) as event ++ * // size, after checking its boundaries. ++ * } ++ * ++ * In comparison, using **bpf_probe_read_user**\ () helper here ++ * instead to read the string would require to estimate the length ++ * at compile time, and would often result in copying more memory ++ * than necessary. ++ * ++ * Another useful use case is when parsing individual process ++ * arguments or individual environment variables navigating ++ * *current*\ **->mm->arg_start** and *current*\ ++ * **->mm->env_start**: using this helper and the return value, ++ * one can quickly iterate at the right offset of the memory area. ++ * Return ++ * On success, the strictly positive length of the output string, ++ * including the trailing NUL character. On error, a negative ++ * value. ++ * ++ * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) ++ * Description ++ * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* ++ * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. ++ * Return ++ * On success, the strictly positive length of the string, including ++ * the trailing NUL character. On error, a negative value. ++ * ++ * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) ++ * Description ++ * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. ++ * *rcv_nxt* is the ack_seq to be sent out. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_send_signal_thread(u32 sig) ++ * Description ++ * Send signal *sig* to the thread corresponding to the current task. ++ * Return ++ * 0 on success or successfully queued. ++ * ++ * **-EBUSY** if work queue under nmi is full. ++ * ++ * **-EINVAL** if *sig* is invalid. ++ * ++ * **-EPERM** if no permission to send the *sig*. ++ * ++ * **-EAGAIN** if bpf program can try again. ++ * ++ * u64 bpf_jiffies64(void) ++ * Description ++ * Obtain the 64bit jiffies ++ * Return ++ * The 64 bit jiffies ++ * ++ * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) ++ * Description ++ * For an eBPF program attached to a perf event, retrieve the ++ * branch records (**struct perf_branch_entry**) associated to *ctx* ++ * and store it in the buffer pointed by *buf* up to size ++ * *size* bytes. ++ * Return ++ * On success, number of bytes written to *buf*. On error, a ++ * negative value. ++ * ++ * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to ++ * instead return the number of bytes required to store all the ++ * branch entries. If this flag is set, *buf* may be NULL. ++ * ++ * **-EINVAL** if arguments invalid or **size** not a multiple ++ * of **sizeof**\ (**struct perf_branch_entry**\ ). ++ * ++ * **-ENOENT** if architecture does not support branch records. ++ * ++ * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) ++ * Description ++ * Returns 0 on success, values for *pid* and *tgid* as seen from the current ++ * *namespace* will be returned in *nsdata*. ++ * Return ++ * 0 on success, or one of the following in case of failure: ++ * ++ * **-EINVAL** if dev and inum supplied don't match dev_t and inode number ++ * with nsfs of current task, or if dev conversion to dev_t lost high bits. ++ * ++ * **-ENOENT** if pidns does not exists for the current task. ++ * ++ * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) ++ * Description ++ * Write raw *data* blob into a special BPF perf event held by ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf ++ * event must have the following attributes: **PERF_SAMPLE_RAW** ++ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and ++ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. ++ * ++ * The *flags* are used to indicate the index in *map* for which ++ * the value must be put, masked with **BPF_F_INDEX_MASK**. ++ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** ++ * to indicate that the index of the current CPU core should be ++ * used. ++ * ++ * The value to write, of *size*, is passed through eBPF stack and ++ * pointed by *data*. ++ * ++ * *ctx* is a pointer to in-kernel struct xdp_buff. ++ * ++ * This helper is similar to **bpf_perf_eventoutput**\ () but ++ * restricted to raw_tracepoint bpf programs. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_get_netns_cookie(void *ctx) ++ * Description ++ * Retrieve the cookie (generated by the kernel) of the network ++ * namespace the input *ctx* is associated with. The network ++ * namespace cookie remains stable for its lifetime and provides ++ * a global identifier that can be assumed unique. If *ctx* is ++ * NULL, then the helper returns the cookie for the initial ++ * network namespace. The cookie itself is very similar to that ++ * of **bpf_get_socket_cookie**\ () helper, but for network ++ * namespaces instead of sockets. ++ * Return ++ * A 8-byte long opaque number. ++ * ++ * u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level) ++ * Description ++ * Return id of cgroup v2 that is ancestor of the cgroup associated ++ * with the current task at the *ancestor_level*. The root cgroup ++ * is at *ancestor_level* zero and each step down the hierarchy ++ * increments the level. If *ancestor_level* == level of cgroup ++ * associated with the current task, then return value will be the ++ * same as that of **bpf_get_current_cgroup_id**\ (). ++ * ++ * The helper is useful to implement policies based on cgroups ++ * that are upper in hierarchy than immediate cgroup associated ++ * with the current task. ++ * ++ * The format of returned id and helper limitations are same as in ++ * **bpf_get_current_cgroup_id**\ (). ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags) ++ * Description ++ * Helper is overloaded depending on BPF program type. This ++ * description applies to **BPF_PROG_TYPE_SCHED_CLS** and ++ * **BPF_PROG_TYPE_SCHED_ACT** programs. ++ * ++ * Assign the *sk* to the *skb*. When combined with appropriate ++ * routing configuration to receive the packet towards the socket, ++ * will cause *skb* to be delivered to the specified socket. ++ * Subsequent redirection of *skb* via **bpf_redirect**\ (), ++ * **bpf_clone_redirect**\ () or other methods outside of BPF may ++ * interfere with successful delivery to the socket. ++ * ++ * This operation is only valid from TC ingress path. ++ * ++ * The *flags* argument must be zero. ++ * Return ++ * 0 on success, or a negative error in case of failure: ++ * ++ * **-EINVAL** if specified *flags* are not supported. ++ * ++ * **-ENOENT** if the socket is unavailable for assignment. ++ * ++ * **-ENETUNREACH** if the socket is unreachable (wrong netns). ++ * ++ * **-EOPNOTSUPP** if the operation is not supported, for example ++ * a call from outside of TC ingress. ++ * ++ * **-ESOCKTNOSUPPORT** if the socket type is not supported ++ * (reuseport). ++ * ++ * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) ++ * Description ++ * Helper is overloaded depending on BPF program type. This ++ * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. ++ * ++ * Select the *sk* as a result of a socket lookup. ++ * ++ * For the operation to succeed passed socket must be compatible ++ * with the packet description provided by the *ctx* object. ++ * ++ * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must ++ * be an exact match. While IP family (**AF_INET** or ++ * **AF_INET6**) must be compatible, that is IPv6 sockets ++ * that are not v6-only can be selected for IPv4 packets. ++ * ++ * Only TCP listeners and UDP unconnected sockets can be ++ * selected. *sk* can also be NULL to reset any previous ++ * selection. ++ * ++ * *flags* argument can combination of following values: ++ * ++ * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous ++ * socket selection, potentially done by a BPF program ++ * that ran before us. ++ * ++ * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip ++ * load-balancing within reuseport group for the socket ++ * being selected. ++ * ++ * On success *ctx->sk* will point to the selected socket. ++ * ++ * Return ++ * 0 on success, or a negative errno in case of failure. ++ * ++ * * **-EAFNOSUPPORT** if socket family (*sk->family*) is ++ * not compatible with packet family (*ctx->family*). ++ * ++ * * **-EEXIST** if socket has been already selected, ++ * potentially by another program, and ++ * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. ++ * ++ * * **-EINVAL** if unsupported flags were specified. ++ * ++ * * **-EPROTOTYPE** if socket L4 protocol ++ * (*sk->protocol*) doesn't match packet protocol ++ * (*ctx->protocol*). ++ * ++ * * **-ESOCKTNOSUPPORT** if socket is not in allowed ++ * state (TCP listening or UDP unconnected). ++ * ++ * u64 bpf_ktime_get_boot_ns(void) ++ * Description ++ * Return the time elapsed since system boot, in nanoseconds. ++ * Does include the time the system was suspended. ++ * See: **clock_gettime**\ (**CLOCK_BOOTTIME**) ++ * Return ++ * Current *ktime*. ++ * ++ * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) ++ * Description ++ * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print ++ * out the format string. ++ * The *m* represents the seq_file. The *fmt* and *fmt_size* are for ++ * the format string itself. The *data* and *data_len* are format string ++ * arguments. The *data* are a **u64** array and corresponding format string ++ * values are stored in the array. For strings and pointers where pointees ++ * are accessed, only the pointer values are stored in the *data* array. ++ * The *data_len* is the size of *data* in bytes - must be a multiple of 8. ++ * ++ * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. ++ * Reading kernel memory may fail due to either invalid address or ++ * valid address but requiring a major memory fault. If reading kernel memory ++ * fails, the string for **%s** will be an empty string, and the ip ++ * address for **%p{i,I}{4,6}** will be 0. Not returning error to ++ * bpf program is consistent with what **bpf_trace_printk**\ () does for now. ++ * Return ++ * 0 on success, or a negative error in case of failure: ++ * ++ * **-EBUSY** if per-CPU memory copy buffer is busy, can try again ++ * by returning 1 from bpf program. ++ * ++ * **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported. ++ * ++ * **-E2BIG** if *fmt* contains too many format specifiers. ++ * ++ * **-EOVERFLOW** if an overflow happened: The same object will be tried again. ++ * ++ * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) ++ * Description ++ * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. ++ * The *m* represents the seq_file. The *data* and *len* represent the ++ * data to write in bytes. ++ * Return ++ * 0 on success, or a negative error in case of failure: ++ * ++ * **-EOVERFLOW** if an overflow happened: The same object will be tried again. ++ * ++ * u64 bpf_sk_cgroup_id(void *sk) ++ * Description ++ * Return the cgroup v2 id of the socket *sk*. ++ * ++ * *sk* must be a non-**NULL** pointer to a socket, e.g. one ++ * returned from **bpf_sk_lookup_xxx**\ (), ++ * **bpf_sk_fullsock**\ (), etc. The format of returned id is ++ * same as in **bpf_skb_cgroup_id**\ (). ++ * ++ * This helper is available only if the kernel was compiled with ++ * the **CONFIG_SOCK_CGROUP_DATA** configuration option. ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level) ++ * Description ++ * Return id of cgroup v2 that is ancestor of cgroup associated ++ * with the *sk* at the *ancestor_level*. The root cgroup is at ++ * *ancestor_level* zero and each step down the hierarchy ++ * increments the level. If *ancestor_level* == level of cgroup ++ * associated with *sk*, then return value will be same as that ++ * of **bpf_sk_cgroup_id**\ (). ++ * ++ * The helper is useful to implement policies based on cgroups ++ * that are upper in hierarchy than immediate cgroup associated ++ * with *sk*. ++ * ++ * The format of returned id and helper limitations are same as in ++ * **bpf_sk_cgroup_id**\ (). ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) ++ * Description ++ * Copy *size* bytes from *data* into a ring buffer *ringbuf*. ++ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification ++ * of new data availability is sent. ++ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification ++ * of new data availability is sent unconditionally. ++ * If **0** is specified in *flags*, an adaptive notification ++ * of new data availability is sent. ++ * ++ * An adaptive notification is a notification sent whenever the user-space ++ * process has caught up and consumed all available payloads. In case the user-space ++ * process is still processing a previous payload, then no notification is needed ++ * as it will process the newly added payload automatically. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) ++ * Description ++ * Reserve *size* bytes of payload in a ring buffer *ringbuf*. ++ * *flags* must be 0. ++ * Return ++ * Valid pointer with *size* bytes of memory available; NULL, ++ * otherwise. ++ * ++ * void bpf_ringbuf_submit(void *data, u64 flags) ++ * Description ++ * Submit reserved ring buffer sample, pointed to by *data*. ++ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification ++ * of new data availability is sent. ++ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification ++ * of new data availability is sent unconditionally. ++ * If **0** is specified in *flags*, an adaptive notification ++ * of new data availability is sent. ++ * ++ * See 'bpf_ringbuf_output()' for the definition of adaptive notification. ++ * Return ++ * Nothing. Always succeeds. ++ * ++ * void bpf_ringbuf_discard(void *data, u64 flags) ++ * Description ++ * Discard reserved ring buffer sample, pointed to by *data*. ++ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification ++ * of new data availability is sent. ++ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification ++ * of new data availability is sent unconditionally. ++ * If **0** is specified in *flags*, an adaptive notification ++ * of new data availability is sent. ++ * ++ * See 'bpf_ringbuf_output()' for the definition of adaptive notification. ++ * Return ++ * Nothing. Always succeeds. ++ * ++ * u64 bpf_ringbuf_query(void *ringbuf, u64 flags) ++ * Description ++ * Query various characteristics of provided ring buffer. What ++ * exactly is queries is determined by *flags*: ++ * ++ * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. ++ * * **BPF_RB_RING_SIZE**: The size of ring buffer. ++ * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). ++ * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). ++ * ++ * Data returned is just a momentary snapshot of actual values ++ * and could be inaccurate, so this facility should be used to ++ * power heuristics and for reporting, not to make 100% correct ++ * calculation. ++ * Return ++ * Requested value, or 0, if *flags* are not recognized. ++ * ++ * long bpf_csum_level(struct sk_buff *skb, u64 level) ++ * Description ++ * Change the skbs checksum level by one layer up or down, or ++ * reset it entirely to none in order to have the stack perform ++ * checksum validation. The level is applicable to the following ++ * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of ++ * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | ++ * through **bpf_skb_adjust_room**\ () helper with passing in ++ * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call ++ * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since ++ * the UDP header is removed. Similarly, an encap of the latter ++ * into the former could be accompanied by a helper call to ++ * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the ++ * skb is still intended to be processed in higher layers of the ++ * stack instead of just egressing at tc. ++ * ++ * There are three supported level settings at this time: ++ * ++ * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs ++ * with CHECKSUM_UNNECESSARY. ++ * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs ++ * with CHECKSUM_UNNECESSARY. ++ * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and ++ * sets CHECKSUM_NONE to force checksum validation by the stack. ++ * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current ++ * skb->csum_level. ++ * Return ++ * 0 on success, or a negative error in case of failure. In the ++ * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level ++ * is returned or the error code -EACCES in case the skb is not ++ * subject to CHECKSUM_UNNECESSARY. ++ * ++ * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) ++ * Description ++ * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. ++ * Return ++ * *sk* if casting is valid, or **NULL** otherwise. ++ * ++ * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) ++ * Description ++ * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. ++ * Return ++ * *sk* if casting is valid, or **NULL** otherwise. ++ * ++ * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) ++ * Description ++ * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. ++ * Return ++ * *sk* if casting is valid, or **NULL** otherwise. ++ * ++ * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) ++ * Description ++ * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. ++ * Return ++ * *sk* if casting is valid, or **NULL** otherwise. ++ * ++ * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) ++ * Description ++ * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. ++ * Return ++ * *sk* if casting is valid, or **NULL** otherwise. ++ * ++ * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) ++ * Description ++ * Return a user or a kernel stack in bpf program provided buffer. ++ * To achieve this, the helper needs *task*, which is a valid ++ * pointer to **struct task_struct**. To store the stacktrace, the ++ * bpf program provides *buf* with a nonnegative *size*. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_USER_BUILD_ID** ++ * Collect buildid+offset instead of ips for user stack, ++ * only valid if **BPF_F_USER_STACK** is also specified. ++ * ++ * **bpf_get_task_stack**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject ++ * to sufficient large buffer size. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * Return ++ * The non-negative copied *buf* length equal to or less than ++ * *size* on success, or a negative error in case of failure. ++ * ++ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) ++ * Description ++ * Load header option. Support reading a particular TCP header ++ * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**). ++ * ++ * If *flags* is 0, it will search the option from the ++ * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops** ++ * has details on what skb_data contains under different ++ * *skops*\ **->op**. ++ * ++ * The first byte of the *searchby_res* specifies the ++ * kind that it wants to search. ++ * ++ * If the searching kind is an experimental kind ++ * (i.e. 253 or 254 according to RFC6994). It also ++ * needs to specify the "magic" which is either ++ * 2 bytes or 4 bytes. It then also needs to ++ * specify the size of the magic by using ++ * the 2nd byte which is "kind-length" of a TCP ++ * header option and the "kind-length" also ++ * includes the first 2 bytes "kind" and "kind-length" ++ * itself as a normal TCP header option also does. ++ * ++ * For example, to search experimental kind 254 with ++ * 2 byte magic 0xeB9F, the searchby_res should be ++ * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. ++ * ++ * To search for the standard window scale option (3), ++ * the *searchby_res* should be [ 3, 0, 0, .... 0 ]. ++ * Note, kind-length must be 0 for regular option. ++ * ++ * Searching for No-Op (0) and End-of-Option-List (1) are ++ * not supported. ++ * ++ * *len* must be at least 2 bytes which is the minimal size ++ * of a header option. ++ * ++ * Supported flags: ++ * ++ * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the ++ * saved_syn packet or the just-received syn packet. ++ * ++ * Return ++ * > 0 when found, the header option is copied to *searchby_res*. ++ * The return value is the total length copied. On failure, a ++ * negative error code is returned: ++ * ++ * **-EINVAL** if a parameter is invalid. ++ * ++ * **-ENOMSG** if the option is not found. ++ * ++ * **-ENOENT** if no syn packet is available when ++ * **BPF_LOAD_HDR_OPT_TCP_SYN** is used. ++ * ++ * **-ENOSPC** if there is not enough space. Only *len* number of ++ * bytes are copied. ++ * ++ * **-EFAULT** on failure to parse the header options in the ++ * packet. ++ * ++ * **-EPERM** if the helper cannot be used under the current ++ * *skops*\ **->op**. ++ * ++ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags) ++ * Description ++ * Store header option. The data will be copied ++ * from buffer *from* with length *len* to the TCP header. ++ * ++ * The buffer *from* should have the whole option that ++ * includes the kind, kind-length, and the actual ++ * option data. The *len* must be at least kind-length ++ * long. The kind-length does not have to be 4 byte ++ * aligned. The kernel will take care of the padding ++ * and setting the 4 bytes aligned value to th->doff. ++ * ++ * This helper will check for duplicated option ++ * by searching the same option in the outgoing skb. ++ * ++ * This helper can only be called during ++ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. ++ * ++ * Return ++ * 0 on success, or negative error in case of failure: ++ * ++ * **-EINVAL** If param is invalid. ++ * ++ * **-ENOSPC** if there is not enough space in the header. ++ * Nothing has been written ++ * ++ * **-EEXIST** if the option already exists. ++ * ++ * **-EFAULT** on failrue to parse the existing header options. ++ * ++ * **-EPERM** if the helper cannot be used under the current ++ * *skops*\ **->op**. ++ * ++ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags) ++ * Description ++ * Reserve *len* bytes for the bpf header option. The ++ * space will be used by **bpf_store_hdr_opt**\ () later in ++ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. ++ * ++ * If **bpf_reserve_hdr_opt**\ () is called multiple times, ++ * the total number of bytes will be reserved. ++ * ++ * This helper can only be called during ++ * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**. ++ * ++ * Return ++ * 0 on success, or negative error in case of failure: ++ * ++ * **-EINVAL** if a parameter is invalid. ++ * ++ * **-ENOSPC** if there is not enough space in the header. ++ * ++ * **-EPERM** if the helper cannot be used under the current ++ * *skops*\ **->op**. ++ * ++ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) ++ * Description ++ * Get a bpf_local_storage from an *inode*. ++ * ++ * Logically, it could be thought of as getting the value from ++ * a *map* with *inode* as the **key**. From this ++ * perspective, the usage is not much different from ++ * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this ++ * helper enforces the key must be an inode and the map must also ++ * be a **BPF_MAP_TYPE_INODE_STORAGE**. ++ * ++ * Underneath, the value is stored locally at *inode* instead of ++ * the *map*. The *map* is used as the bpf-local-storage ++ * "type". The bpf-local-storage "type" (i.e. the *map*) is ++ * searched against all bpf_local_storage residing at *inode*. ++ * ++ * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be ++ * used such that a new bpf_local_storage will be ++ * created if one does not exist. *value* can be used ++ * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify ++ * the initial value of a bpf_local_storage. If *value* is ++ * **NULL**, the new bpf_local_storage will be zero initialized. ++ * Return ++ * A bpf_local_storage pointer is returned on success. ++ * ++ * **NULL** if not found or there was an error in adding ++ * a new bpf_local_storage. ++ * ++ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode) ++ * Description ++ * Delete a bpf_local_storage from an *inode*. ++ * Return ++ * 0 on success. ++ * ++ * **-ENOENT** if the bpf_local_storage cannot be found. ++ * ++ * long bpf_d_path(struct path *path, char *buf, u32 sz) ++ * Description ++ * Return full path for given **struct path** object, which ++ * needs to be the kernel BTF *path* object. The path is ++ * returned in the provided buffer *buf* of size *sz* and ++ * is zero terminated. ++ * ++ * Return ++ * On success, the strictly positive length of the string, ++ * including the trailing NUL character. On error, a negative ++ * value. ++ * ++ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr) ++ * Description ++ * Read *size* bytes from user space address *user_ptr* and store ++ * the data in *dst*. This is a wrapper of **copy_from_user**\ (). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags) ++ * Description ++ * Use BTF to store a string representation of *ptr*->ptr in *str*, ++ * using *ptr*->type_id. This value should specify the type ++ * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) ++ * can be used to look up vmlinux BTF type ids. Traversing the ++ * data structure using BTF, the type information and values are ++ * stored in the first *str_size* - 1 bytes of *str*. Safe copy of ++ * the pointer data is carried out to avoid kernel crashes during ++ * operation. Smaller types can use string space on the stack; ++ * larger programs can use map data to store the string ++ * representation. ++ * ++ * The string can be subsequently shared with userspace via ++ * bpf_perf_event_output() or ring buffer interfaces. ++ * bpf_trace_printk() is to be avoided as it places too small ++ * a limit on string size to be useful. ++ * ++ * *flags* is a combination of ++ * ++ * **BTF_F_COMPACT** ++ * no formatting around type information ++ * **BTF_F_NONAME** ++ * no struct/union member names/types ++ * **BTF_F_PTR_RAW** ++ * show raw (unobfuscated) pointer values; ++ * equivalent to printk specifier %px. ++ * **BTF_F_ZERO** ++ * show zero-valued struct/union members; they ++ * are not displayed by default ++ * ++ * Return ++ * The number of bytes that were written (or would have been ++ * written if output had to be truncated due to string size), ++ * or a negative error in cases of failure. ++ * ++ * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags) ++ * Description ++ * Use BTF to write to seq_write a string representation of ++ * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf(). ++ * *flags* are identical to those used for bpf_snprintf_btf. ++ * Return ++ * 0 on success or a negative error in case of failure. ++ * ++ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb) ++ * Description ++ * See **bpf_get_cgroup_classid**\ () for the main description. ++ * This helper differs from **bpf_get_cgroup_classid**\ () in that ++ * the cgroup v1 net_cls class is retrieved only from the *skb*'s ++ * associated socket instead of the current process. ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) ++ * Description ++ * Redirect the packet to another net device of index *ifindex* ++ * and fill in L2 addresses from neighboring subsystem. This helper ++ * is somewhat similar to **bpf_redirect**\ (), except that it ++ * populates L2 addresses as well, meaning, internally, the helper ++ * relies on the neighbor lookup for the L2 address of the nexthop. ++ * ++ * The helper will perform a FIB lookup based on the skb's ++ * networking header to get the address of the next hop, unless ++ * this is supplied by the caller in the *params* argument. The ++ * *plen* argument indicates the len of *params* and should be set ++ * to 0 if *params* is NULL. ++ * ++ * The *flags* argument is reserved and must be 0. The helper is ++ * currently only supported for tc BPF program types, and enabled ++ * for IPv4 and IPv6 protocols. ++ * Return ++ * The helper returns **TC_ACT_REDIRECT** on success or ++ * **TC_ACT_SHOT** on error. ++ * ++ * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) ++ * Description ++ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a ++ * pointer to the percpu kernel variable on *cpu*. A ksym is an ++ * extern variable decorated with '__ksym'. For ksym, there is a ++ * global var (either static or global) defined of the same name ++ * in the kernel. The ksym is percpu if the global var is percpu. ++ * The returned pointer points to the global percpu var on *cpu*. ++ * ++ * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the ++ * kernel, except that bpf_per_cpu_ptr() may return NULL. This ++ * happens if *cpu* is larger than nr_cpu_ids. The caller of ++ * bpf_per_cpu_ptr() must check the returned value. ++ * Return ++ * A pointer pointing to the kernel percpu variable on *cpu*, or ++ * NULL, if *cpu* is invalid. ++ * ++ * void *bpf_this_cpu_ptr(const void *percpu_ptr) ++ * Description ++ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a ++ * pointer to the percpu kernel variable on this cpu. See the ++ * description of 'ksym' in **bpf_per_cpu_ptr**\ (). ++ * ++ * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in ++ * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would ++ * never return NULL. ++ * Return ++ * A pointer pointing to the kernel percpu variable on this cpu. ++ * ++ * long bpf_redirect_peer(u32 ifindex, u64 flags) ++ * Description ++ * Redirect the packet to another net device of index *ifindex*. ++ * This helper is somewhat similar to **bpf_redirect**\ (), except ++ * that the redirection happens to the *ifindex*' peer device and ++ * the netns switch takes place from ingress to ingress without ++ * going through the CPU's backlog queue. ++ * ++ * The *flags* argument is reserved and must be 0. The helper is ++ * currently only supported for tc BPF program types at the ingress ++ * hook and for veth device types. The peer device must reside in a ++ * different network namespace. ++ * Return ++ * The helper returns **TC_ACT_REDIRECT** on success or ++ * **TC_ACT_SHOT** on error. ++ * ++ * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) ++ * Description ++ * Get a bpf_local_storage from the *task*. ++ * ++ * Logically, it could be thought of as getting the value from ++ * a *map* with *task* as the **key**. From this ++ * perspective, the usage is not much different from ++ * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this ++ * helper enforces the key must be an task_struct and the map must also ++ * be a **BPF_MAP_TYPE_TASK_STORAGE**. ++ * ++ * Underneath, the value is stored locally at *task* instead of ++ * the *map*. The *map* is used as the bpf-local-storage ++ * "type". The bpf-local-storage "type" (i.e. the *map*) is ++ * searched against all bpf_local_storage residing at *task*. ++ * ++ * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be ++ * used such that a new bpf_local_storage will be ++ * created if one does not exist. *value* can be used ++ * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify ++ * the initial value of a bpf_local_storage. If *value* is ++ * **NULL**, the new bpf_local_storage will be zero initialized. ++ * Return ++ * A bpf_local_storage pointer is returned on success. ++ * ++ * **NULL** if not found or there was an error in adding ++ * a new bpf_local_storage. ++ * ++ * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) ++ * Description ++ * Delete a bpf_local_storage from a *task*. ++ * Return ++ * 0 on success. ++ * ++ * **-ENOENT** if the bpf_local_storage cannot be found. ++ * ++ * struct task_struct *bpf_get_current_task_btf(void) ++ * Description ++ * Return a BTF pointer to the "current" task. ++ * This pointer can also be used in helpers that accept an ++ * *ARG_PTR_TO_BTF_ID* of type *task_struct*. ++ * Return ++ * Pointer to the current task. ++ * ++ * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags) ++ * Description ++ * Set or clear certain options on *bprm*: ++ * ++ * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit ++ * which sets the **AT_SECURE** auxv for glibc. The bit ++ * is cleared if the flag is not specified. ++ * Return ++ * **-EINVAL** if invalid *flags* are passed, zero otherwise. ++ * ++ * u64 bpf_ktime_get_coarse_ns(void) ++ * Description ++ * Return a coarse-grained version of the time elapsed since ++ * system boot, in nanoseconds. Does not include time the system ++ * was suspended. ++ * ++ * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) ++ * Return ++ * Current *ktime*. ++ * ++ * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) ++ * Description ++ * Returns the stored IMA hash of the *inode* (if it's avaialable). ++ * If the hash is larger than *size*, then only *size* ++ * bytes will be copied to *dst* ++ * Return ++ * The **hash_algo** is returned on success, ++ * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if ++ * invalid arguments are passed. ++ * ++ * struct socket *bpf_sock_from_file(struct file *file) ++ * Description ++ * If the given file represents a socket, returns the associated ++ * socket. ++ * Return ++ * A pointer to a struct socket on success or NULL if the file is ++ * not a socket. ++ * ++ * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) ++ * Description ++ * Check packet size against exceeding MTU of net device (based ++ * on *ifindex*). This helper will likely be used in combination ++ * with helpers that adjust/change the packet size. ++ * ++ * The argument *len_diff* can be used for querying with a planned ++ * size change. This allows to check MTU prior to changing packet ++ * ctx. Providing an *len_diff* adjustment that is larger than the ++ * actual packet size (resulting in negative packet size) will in ++ * principle not exceed the MTU, why it is not considered a ++ * failure. Other BPF-helpers are needed for performing the ++ * planned size change, why the responsability for catch a negative ++ * packet size belong in those helpers. ++ * ++ * Specifying *ifindex* zero means the MTU check is performed ++ * against the current net device. This is practical if this isn't ++ * used prior to redirect. ++ * ++ * On input *mtu_len* must be a valid pointer, else verifier will ++ * reject BPF program. If the value *mtu_len* is initialized to ++ * zero then the ctx packet size is use. When value *mtu_len* is ++ * provided as input this specify the L3 length that the MTU check ++ * is done against. Remember XDP and TC length operate at L2, but ++ * this value is L3 as this correlate to MTU and IP-header tot_len ++ * values which are L3 (similar behavior as bpf_fib_lookup). ++ * ++ * The Linux kernel route table can configure MTUs on a more ++ * specific per route level, which is not provided by this helper. ++ * For route level MTU checks use the **bpf_fib_lookup**\ () ++ * helper. ++ * ++ * *ctx* is either **struct xdp_md** for XDP programs or ++ * **struct sk_buff** for tc cls_act programs. ++ * ++ * The *flags* argument can be a combination of one or more of the ++ * following values: ++ * ++ * **BPF_MTU_CHK_SEGS** ++ * This flag will only works for *ctx* **struct sk_buff**. ++ * If packet context contains extra packet segment buffers ++ * (often knows as GSO skb), then MTU check is harder to ++ * check at this point, because in transmit path it is ++ * possible for the skb packet to get re-segmented ++ * (depending on net device features). This could still be ++ * a MTU violation, so this flag enables performing MTU ++ * check against segments, with a different violation ++ * return code to tell it apart. Check cannot use len_diff. ++ * ++ * On return *mtu_len* pointer contains the MTU value of the net ++ * device. Remember the net device configured MTU is the L3 size, ++ * which is returned here and XDP and TC length operate at L2. ++ * Helper take this into account for you, but remember when using ++ * MTU value in your BPF-code. ++ * ++ * Return ++ * * 0 on success, and populate MTU value in *mtu_len* pointer. ++ * ++ * * < 0 if any input argument is invalid (*mtu_len* not updated) ++ * ++ * MTU violations return positive values, but also populate MTU ++ * value in *mtu_len* pointer, as this can be needed for ++ * implementing PMTU handing: ++ * ++ * * **BPF_MTU_CHK_RET_FRAG_NEEDED** ++ * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** ++ * ++ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) ++ * Description ++ * For each element in **map**, call **callback_fn** function with ++ * **map**, **callback_ctx** and other map-specific parameters. ++ * The **callback_fn** should be a static function and ++ * the **callback_ctx** should be a pointer to the stack. ++ * The **flags** is used to control certain aspects of the helper. ++ * Currently, the **flags** must be 0. ++ * ++ * The following are a list of supported map types and their ++ * respective expected callback signatures: ++ * ++ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, ++ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, ++ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY ++ * ++ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); ++ * ++ * For per_cpu maps, the map_value is the value on the cpu where the ++ * bpf_prog is running. ++ * ++ * If **callback_fn** return 0, the helper will continue to the next ++ * element. If return value is 1, the helper will skip the rest of ++ * elements and return. Other return values are not used now. ++ * ++ * Return ++ * The number of traversed map elements for success, **-EINVAL** for ++ * invalid **flags**. ++ * ++ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len) ++ * Description ++ * Outputs a string into the **str** buffer of size **str_size** ++ * based on a format string stored in a read-only map pointed by ++ * **fmt**. ++ * ++ * Each format specifier in **fmt** corresponds to one u64 element ++ * in the **data** array. For strings and pointers where pointees ++ * are accessed, only the pointer values are stored in the *data* ++ * array. The *data_len* is the size of *data* in bytes - must be ++ * a multiple of 8. ++ * ++ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel ++ * memory. Reading kernel memory may fail due to either invalid ++ * address or valid address but requiring a major memory fault. If ++ * reading kernel memory fails, the string for **%s** will be an ++ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0. ++ * Not returning error to bpf program is consistent with what ++ * **bpf_trace_printk**\ () does for now. ++ * ++ * Return ++ * The strictly positive length of the formatted string, including ++ * the trailing zero character. If the return value is greater than ++ * **str_size**, **str** contains a truncated string, guaranteed to ++ * be zero-terminated except when **str_size** is 0. ++ * ++ * Or **-EBUSY** if the per-CPU memory copy buffer is busy. ++ * ++ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) ++ * Description ++ * Execute bpf syscall with given arguments. ++ * Return ++ * A syscall result. ++ * ++ * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) ++ * Description ++ * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. ++ * Return ++ * Returns btf_id and btf_obj_fd in lower and upper 32 bits. ++ * ++ * long bpf_sys_close(u32 fd) ++ * Description ++ * Execute close syscall for given FD. ++ * Return ++ * A syscall result. ++ * ++ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) ++ * Description ++ * Initialize the timer. ++ * First 4 bits of *flags* specify clockid. ++ * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. ++ * All other bits of *flags* are reserved. ++ * The verifier will reject the program if *timer* is not from ++ * the same *map*. ++ * Return ++ * 0 on success. ++ * **-EBUSY** if *timer* is already initialized. ++ * **-EINVAL** if invalid *flags* are passed. ++ * **-EPERM** if *timer* is in a map that doesn't have any user references. ++ * The user space should either hold a file descriptor to a map with timers ++ * or pin such map in bpffs. When map is unpinned or file descriptor is ++ * closed all timers in the map will be cancelled and freed. ++ * ++ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) ++ * Description ++ * Configure the timer to call *callback_fn* static function. ++ * Return ++ * 0 on success. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. ++ * **-EPERM** if *timer* is in a map that doesn't have any user references. ++ * The user space should either hold a file descriptor to a map with timers ++ * or pin such map in bpffs. When map is unpinned or file descriptor is ++ * closed all timers in the map will be cancelled and freed. ++ * ++ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) ++ * Description ++ * Set timer expiration N nanoseconds from the current time. The ++ * configured callback will be invoked in soft irq context on some cpu ++ * and will not repeat unless another bpf_timer_start() is made. ++ * In such case the next invocation can migrate to a different cpu. ++ * Since struct bpf_timer is a field inside map element the map ++ * owns the timer. The bpf_timer_set_callback() will increment refcnt ++ * of BPF program to make sure that callback_fn code stays valid. ++ * When user space reference to a map reaches zero all timers ++ * in a map are cancelled and corresponding program's refcnts are ++ * decremented. This is done to make sure that Ctrl-C of a user ++ * process doesn't leave any timers running. If map is pinned in ++ * bpffs the callback_fn can re-arm itself indefinitely. ++ * bpf_map_update/delete_elem() helpers and user space sys_bpf commands ++ * cancel and free the timer in the given map element. ++ * The map can contain timers that invoke callback_fn-s from different ++ * programs. The same callback_fn can serve different timers from ++ * different maps if key/value layout matches across maps. ++ * Every bpf_timer_set_callback() can have different callback_fn. ++ * ++ * Return ++ * 0 on success. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier ++ * or invalid *flags* are passed. ++ * ++ * long bpf_timer_cancel(struct bpf_timer *timer) ++ * Description ++ * Cancel the timer and wait for callback_fn to finish if it was running. ++ * Return ++ * 0 if the timer was not active. ++ * 1 if the timer was active. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. ++ * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its ++ * own timer which would have led to a deadlock otherwise. ++ * ++ * u64 bpf_get_func_ip(void *ctx) ++ * Description ++ * Get address of the traced function (for tracing and kprobe programs). ++ * Return ++ * Address of the traced function. ++ * ++ * u64 bpf_get_attach_cookie(void *ctx) ++ * Description ++ * Get bpf_cookie value provided (optionally) during the program ++ * attachment. It might be different for each individual ++ * attachment, even if BPF program itself is the same. ++ * Expects BPF program context *ctx* as a first argument. ++ * ++ * Supported for the following program types: ++ * - kprobe/uprobe; ++ * - tracepoint; ++ * - perf_event. ++ * Return ++ * Value specified by user at BPF link creation/attachment time ++ * or 0, if it was not specified. ++ * ++ * long bpf_task_pt_regs(struct task_struct *task) ++ * Description ++ * Get the struct pt_regs associated with **task**. ++ * Return ++ * A pointer to struct pt_regs. ++ * ++ * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags) ++ * Description ++ * Get branch trace from hardware engines like Intel LBR. The ++ * hardware engine is stopped shortly after the helper is ++ * called. Therefore, the user need to filter branch entries ++ * based on the actual use case. To capture branch trace ++ * before the trigger point of the BPF program, the helper ++ * should be called at the beginning of the BPF program. ++ * ++ * The data is stored as struct perf_branch_entry into output ++ * buffer *entries*. *size* is the size of *entries* in bytes. ++ * *flags* is reserved for now and must be zero. ++ * ++ * Return ++ * On success, number of bytes written to *buf*. On error, a ++ * negative value. ++ * ++ * **-EINVAL** if *flags* is not zero. ++ * ++ * **-ENOENT** if architecture does not support branch records. ++ * ++ * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) ++ * Description ++ * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 ++ * to format and can handle more format args as a result. ++ * ++ * Arguments are to be used as in **bpf_seq_printf**\ () helper. ++ * Return ++ * The number of bytes written to the buffer, or a negative error ++ * in case of failure. ++ * ++ * struct unix_sock *bpf_skc_to_unix_sock(void *sk) ++ * Description ++ * Dynamically cast a *sk* pointer to a *unix_sock* pointer. ++ * Return ++ * *sk* if casting is valid, or **NULL** otherwise. ++ * ++ * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) ++ * Description ++ * Get the address of a kernel symbol, returned in *res*. *res* is ++ * set to 0 if the symbol is not found. ++ * Return ++ * On success, zero. On error, a negative value. ++ * ++ * **-EINVAL** if *flags* is not zero. ++ * ++ * **-EINVAL** if string *name* is not the same size as *name_sz*. ++ * ++ * **-ENOENT** if symbol is not found. ++ * ++ * **-EPERM** if caller does not have permission to obtain kernel address. ++ * ++ * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) ++ * Description ++ * Find vma of *task* that contains *addr*, call *callback_fn* ++ * function with *task*, *vma*, and *callback_ctx*. ++ * The *callback_fn* should be a static function and ++ * the *callback_ctx* should be a pointer to the stack. ++ * The *flags* is used to control certain aspects of the helper. ++ * Currently, the *flags* must be 0. ++ * ++ * The expected callback signature is ++ * ++ * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); ++ * ++ * Return ++ * 0 on success. ++ * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. ++ * **-EBUSY** if failed to try lock mmap_lock. ++ * **-EINVAL** for invalid **flags**. ++ * ++ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) ++ * Description ++ * For **nr_loops**, call **callback_fn** function ++ * with **callback_ctx** as the context parameter. ++ * The **callback_fn** should be a static function and ++ * the **callback_ctx** should be a pointer to the stack. ++ * The **flags** is used to control certain aspects of the helper. ++ * Currently, the **flags** must be 0. Currently, nr_loops is ++ * limited to 1 << 23 (~8 million) loops. ++ * ++ * long (\*callback_fn)(u32 index, void \*ctx); ++ * ++ * where **index** is the current index in the loop. The index ++ * is zero-indexed. ++ * ++ * If **callback_fn** returns 0, the helper will continue to the next ++ * loop. If return value is 1, the helper will skip the rest of ++ * the loops and return. Other return values are not used now, ++ * and will be rejected by the verifier. ++ * ++ * Return ++ * The number of loops performed, **-EINVAL** for invalid **flags**, ++ * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. ++ * ++ * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) ++ * Description ++ * Do strncmp() between **s1** and **s2**. **s1** doesn't need ++ * to be null-terminated and **s1_sz** is the maximum storage ++ * size of **s1**. **s2** must be a read-only string. ++ * Return ++ * An integer less than, equal to, or greater than zero ++ * if the first **s1_sz** bytes of **s1** is found to be ++ * less than, to match, or be greater than **s2**. ++ * ++ * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) ++ * Description ++ * Get **n**-th argument (zero based) of the traced function (for tracing programs) ++ * returned in **value**. ++ * ++ * Return ++ * 0 on success. ++ * **-EINVAL** if n >= arguments count of traced function. ++ * ++ * long bpf_get_func_ret(void *ctx, u64 *value) ++ * Description ++ * Get return value of the traced function (for tracing programs) ++ * in **value**. ++ * ++ * Return ++ * 0 on success. ++ * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. ++ * ++ * long bpf_get_func_arg_cnt(void *ctx) ++ * Description ++ * Get number of arguments of the traced function (for tracing programs). ++ * ++ * Return ++ * The number of arguments of the traced function. ++ * ++ * int bpf_get_retval(void) ++ * Description ++ * Get the syscall's return value that will be returned to userspace. ++ * ++ * This helper is currently supported by cgroup programs only. ++ * Return ++ * The syscall's return value. ++ * ++ * int bpf_set_retval(int retval) ++ * Description ++ * Set the syscall's return value that will be returned to userspace. ++ * ++ * This helper is currently supported by cgroup programs only. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) ++ * Description ++ * Get the total size of a given xdp buff (linear and paged area) ++ * Return ++ * The total size of a given xdp buffer. ++ * ++ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) ++ * Description ++ * This helper is provided as an easy way to load data from a ++ * xdp buffer. It can be used to load *len* bytes from *offset* from ++ * the frame associated to *xdp_md*, into the buffer pointed by ++ * *buf*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) ++ * Description ++ * Store *len* bytes from buffer *buf* into the frame ++ * associated to *xdp_md*, at *offset*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags) ++ * Description ++ * Read *size* bytes from user space address *user_ptr* in *tsk*'s ++ * address space, and stores the data in *dst*. *flags* is not ++ * used yet and is provided for future extensibility. This helper ++ * can only be used by sleepable programs. ++ * Return ++ * 0 on success, or a negative error in case of failure. On error ++ * *dst* buffer is zeroed out. ++ * ++ * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type) ++ * Description ++ * Change the __sk_buff->tstamp_type to *tstamp_type* ++ * and set *tstamp* to the __sk_buff->tstamp together. ++ * ++ * If there is no need to change the __sk_buff->tstamp_type, ++ * the tstamp value can be directly written to __sk_buff->tstamp ++ * instead. ++ * ++ * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that ++ * will be kept during bpf_redirect_*(). A non zero ++ * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO ++ * *tstamp_type*. ++ * ++ * A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used ++ * with a zero *tstamp*. ++ * ++ * Only IPv4 and IPv6 skb->protocol are supported. ++ * ++ * This function is most useful when it needs to set a ++ * mono delivery time to __sk_buff->tstamp and then ++ * bpf_redirect_*() to the egress of an iface. For example, ++ * changing the (rcv) timestamp in __sk_buff->tstamp at ++ * ingress to a mono delivery time and then bpf_redirect_*() ++ * to sch_fq@phy-dev. ++ * Return ++ * 0 on success. ++ * **-EINVAL** for invalid input ++ * **-EOPNOTSUPP** for unsupported protocol ++ * ++ * long bpf_ima_file_hash(struct file *file, void *dst, u32 size) ++ * Description ++ * Returns a calculated IMA hash of the *file*. ++ * If the hash is larger than *size*, then only *size* ++ * bytes will be copied to *dst* ++ * Return ++ * The **hash_algo** is returned on success, ++ * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if ++ * invalid arguments are passed. ++ * ++ * void *bpf_kptr_xchg(void *map_value, void *ptr) ++ * Description ++ * Exchange kptr at pointer *map_value* with *ptr*, and return the ++ * old value. *ptr* can be NULL, otherwise it must be a referenced ++ * pointer which will be released when this helper is called. ++ * Return ++ * The old value of kptr (which can be NULL). The returned pointer ++ * if not NULL, is a reference which must be released using its ++ * corresponding release function, or moved into a BPF map before ++ * program exit. ++ * ++ * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) ++ * Description ++ * Perform a lookup in *percpu map* for an entry associated to ++ * *key* on *cpu*. ++ * Return ++ * Map value associated to *key* on *cpu*, or **NULL** if no entry ++ * was found or *cpu* is invalid. ++ * ++ * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) ++ * Description ++ * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. ++ * Return ++ * *sk* if casting is valid, or **NULL** otherwise. ++ * ++ * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) ++ * Description ++ * Get a dynptr to local memory *data*. ++ * ++ * *data* must be a ptr to a map value. ++ * The maximum *size* supported is DYNPTR_MAX_SIZE. ++ * *flags* is currently unused. ++ * Return ++ * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, ++ * -EINVAL if flags is not 0. ++ * ++ * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) ++ * Description ++ * Reserve *size* bytes of payload in a ring buffer *ringbuf* ++ * through the dynptr interface. *flags* must be 0. ++ * ++ * Please note that a corresponding bpf_ringbuf_submit_dynptr or ++ * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the ++ * reservation fails. This is enforced by the verifier. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) ++ * Description ++ * Submit reserved ring buffer sample, pointed to by *data*, ++ * through the dynptr interface. This is a no-op if the dynptr is ++ * invalid/null. ++ * ++ * For more information on *flags*, please see ++ * 'bpf_ringbuf_submit'. ++ * Return ++ * Nothing. Always succeeds. ++ * ++ * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) ++ * Description ++ * Discard reserved ring buffer sample through the dynptr ++ * interface. This is a no-op if the dynptr is invalid/null. ++ * ++ * For more information on *flags*, please see ++ * 'bpf_ringbuf_discard'. ++ * Return ++ * Nothing. Always succeeds. ++ * ++ * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) ++ * Description ++ * Read *len* bytes from *src* into *dst*, starting from *offset* ++ * into *src*. ++ * *flags* is currently unused. ++ * Return ++ * 0 on success, -E2BIG if *offset* + *len* exceeds the length ++ * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if ++ * *flags* is not 0. ++ * ++ * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) ++ * Description ++ * Write *len* bytes from *src* into *dst*, starting from *offset* ++ * into *dst*. ++ * *flags* is currently unused. ++ * Return ++ * 0 on success, -E2BIG if *offset* + *len* exceeds the length ++ * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* ++ * is a read-only dynptr or if *flags* is not 0. ++ * ++ * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) ++ * Description ++ * Get a pointer to the underlying dynptr data. ++ * ++ * *len* must be a statically known value. The returned data slice ++ * is invalidated whenever the dynptr is invalidated. ++ * Return ++ * Pointer to the underlying dynptr data, NULL if the dynptr is ++ * read-only, if the dynptr is invalid, or if the offset and length ++ * is out of bounds. ++ * ++ * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) ++ * Description ++ * Try to issue a SYN cookie for the packet with corresponding ++ * IPv4/TCP headers, *iph* and *th*, without depending on a ++ * listening socket. ++ * ++ * *iph* points to the IPv4 header. ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header (at least ++ * **sizeof**\ (**struct tcphdr**)). ++ * Return ++ * On success, lower 32 bits hold the generated SYN cookie in ++ * followed by 16 bits which hold the MSS value for that cookie, ++ * and the top 16 bits are unused. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EINVAL** if *th_len* is invalid. ++ * ++ * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) ++ * Description ++ * Try to issue a SYN cookie for the packet with corresponding ++ * IPv6/TCP headers, *iph* and *th*, without depending on a ++ * listening socket. ++ * ++ * *iph* points to the IPv6 header. ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header (at least ++ * **sizeof**\ (**struct tcphdr**)). ++ * Return ++ * On success, lower 32 bits hold the generated SYN cookie in ++ * followed by 16 bits which hold the MSS value for that cookie, ++ * and the top 16 bits are unused. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EINVAL** if *th_len* is invalid. ++ * ++ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. ++ * ++ * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) ++ * Description ++ * Check whether *iph* and *th* contain a valid SYN cookie ACK ++ * without depending on a listening socket. ++ * ++ * *iph* points to the IPv4 header. ++ * ++ * *th* points to the TCP header. ++ * Return ++ * 0 if *iph* and *th* are a valid SYN cookie ACK. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EACCES** if the SYN cookie is not valid. ++ * ++ * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) ++ * Description ++ * Check whether *iph* and *th* contain a valid SYN cookie ACK ++ * without depending on a listening socket. ++ * ++ * *iph* points to the IPv6 header. ++ * ++ * *th* points to the TCP header. ++ * Return ++ * 0 if *iph* and *th* are a valid SYN cookie ACK. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EACCES** if the SYN cookie is not valid. ++ * ++ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. ++ */ ++#define __BPF_FUNC_MAPPER(FN) \ ++ FN(unspec), \ ++ FN(map_lookup_elem), \ ++ FN(map_update_elem), \ ++ FN(map_delete_elem), \ ++ FN(probe_read), \ ++ FN(ktime_get_ns), \ ++ FN(trace_printk), \ ++ FN(get_prandom_u32), \ ++ FN(get_smp_processor_id), \ ++ FN(skb_store_bytes), \ ++ FN(l3_csum_replace), \ ++ FN(l4_csum_replace), \ ++ FN(tail_call), \ ++ FN(clone_redirect), \ ++ FN(get_current_pid_tgid), \ ++ FN(get_current_uid_gid), \ ++ FN(get_current_comm), \ ++ FN(get_cgroup_classid), \ ++ FN(skb_vlan_push), \ ++ FN(skb_vlan_pop), \ ++ FN(skb_get_tunnel_key), \ ++ FN(skb_set_tunnel_key), \ ++ FN(perf_event_read), \ ++ FN(redirect), \ ++ FN(get_route_realm), \ ++ FN(perf_event_output), \ ++ FN(skb_load_bytes), \ ++ FN(get_stackid), \ ++ FN(csum_diff), \ ++ FN(skb_get_tunnel_opt), \ ++ FN(skb_set_tunnel_opt), \ ++ FN(skb_change_proto), \ ++ FN(skb_change_type), \ ++ FN(skb_under_cgroup), \ ++ FN(get_hash_recalc), \ ++ FN(get_current_task), \ ++ FN(probe_write_user), \ ++ FN(current_task_under_cgroup), \ ++ FN(skb_change_tail), \ ++ FN(skb_pull_data), \ ++ FN(csum_update), \ ++ FN(set_hash_invalid), \ ++ FN(get_numa_node_id), \ ++ FN(skb_change_head), \ ++ FN(xdp_adjust_head), \ ++ FN(probe_read_str), \ ++ FN(get_socket_cookie), \ ++ FN(get_socket_uid), \ ++ FN(set_hash), \ ++ FN(setsockopt), \ ++ FN(skb_adjust_room), \ ++ FN(redirect_map), \ ++ FN(sk_redirect_map), \ ++ FN(sock_map_update), \ ++ FN(xdp_adjust_meta), \ ++ FN(perf_event_read_value), \ ++ FN(perf_prog_read_value), \ ++ FN(getsockopt), \ ++ FN(override_return), \ ++ FN(sock_ops_cb_flags_set), \ ++ FN(msg_redirect_map), \ ++ FN(msg_apply_bytes), \ ++ FN(msg_cork_bytes), \ ++ FN(msg_pull_data), \ ++ FN(bind), \ ++ FN(xdp_adjust_tail), \ ++ FN(skb_get_xfrm_state), \ ++ FN(get_stack), \ ++ FN(skb_load_bytes_relative), \ ++ FN(fib_lookup), \ ++ FN(sock_hash_update), \ ++ FN(msg_redirect_hash), \ ++ FN(sk_redirect_hash), \ ++ FN(lwt_push_encap), \ ++ FN(lwt_seg6_store_bytes), \ ++ FN(lwt_seg6_adjust_srh), \ ++ FN(lwt_seg6_action), \ ++ FN(rc_repeat), \ ++ FN(rc_keydown), \ ++ FN(skb_cgroup_id), \ ++ FN(get_current_cgroup_id), \ ++ FN(get_local_storage), \ ++ FN(sk_select_reuseport), \ ++ FN(skb_ancestor_cgroup_id), \ ++ FN(sk_lookup_tcp), \ ++ FN(sk_lookup_udp), \ ++ FN(sk_release), \ ++ FN(map_push_elem), \ ++ FN(map_pop_elem), \ ++ FN(map_peek_elem), \ ++ FN(msg_push_data), \ ++ FN(msg_pop_data), \ ++ FN(rc_pointer_rel), \ ++ FN(spin_lock), \ ++ FN(spin_unlock), \ ++ FN(sk_fullsock), \ ++ FN(tcp_sock), \ ++ FN(skb_ecn_set_ce), \ ++ FN(get_listener_sock), \ ++ FN(skc_lookup_tcp), \ ++ FN(tcp_check_syncookie), \ ++ FN(sysctl_get_name), \ ++ FN(sysctl_get_current_value), \ ++ FN(sysctl_get_new_value), \ ++ FN(sysctl_set_new_value), \ ++ FN(strtol), \ ++ FN(strtoul), \ ++ FN(sk_storage_get), \ ++ FN(sk_storage_delete), \ ++ FN(send_signal), \ ++ FN(tcp_gen_syncookie), \ ++ FN(skb_output), \ ++ FN(probe_read_user), \ ++ FN(probe_read_kernel), \ ++ FN(probe_read_user_str), \ ++ FN(probe_read_kernel_str), \ ++ FN(tcp_send_ack), \ ++ FN(send_signal_thread), \ ++ FN(jiffies64), \ ++ FN(read_branch_records), \ ++ FN(get_ns_current_pid_tgid), \ ++ FN(xdp_output), \ ++ FN(get_netns_cookie), \ ++ FN(get_current_ancestor_cgroup_id), \ ++ FN(sk_assign), \ ++ FN(ktime_get_boot_ns), \ ++ FN(seq_printf), \ ++ FN(seq_write), \ ++ FN(sk_cgroup_id), \ ++ FN(sk_ancestor_cgroup_id), \ ++ FN(ringbuf_output), \ ++ FN(ringbuf_reserve), \ ++ FN(ringbuf_submit), \ ++ FN(ringbuf_discard), \ ++ FN(ringbuf_query), \ ++ FN(csum_level), \ ++ FN(skc_to_tcp6_sock), \ ++ FN(skc_to_tcp_sock), \ ++ FN(skc_to_tcp_timewait_sock), \ ++ FN(skc_to_tcp_request_sock), \ ++ FN(skc_to_udp6_sock), \ ++ FN(get_task_stack), \ ++ FN(load_hdr_opt), \ ++ FN(store_hdr_opt), \ ++ FN(reserve_hdr_opt), \ ++ FN(inode_storage_get), \ ++ FN(inode_storage_delete), \ ++ FN(d_path), \ ++ FN(copy_from_user), \ ++ FN(snprintf_btf), \ ++ FN(seq_printf_btf), \ ++ FN(skb_cgroup_classid), \ ++ FN(redirect_neigh), \ ++ FN(per_cpu_ptr), \ ++ FN(this_cpu_ptr), \ ++ FN(redirect_peer), \ ++ FN(task_storage_get), \ ++ FN(task_storage_delete), \ ++ FN(get_current_task_btf), \ ++ FN(bprm_opts_set), \ ++ FN(ktime_get_coarse_ns), \ ++ FN(ima_inode_hash), \ ++ FN(sock_from_file), \ ++ FN(check_mtu), \ ++ FN(for_each_map_elem), \ ++ FN(snprintf), \ ++ FN(sys_bpf), \ ++ FN(btf_find_by_name_kind), \ ++ FN(sys_close), \ ++ FN(timer_init), \ ++ FN(timer_set_callback), \ ++ FN(timer_start), \ ++ FN(timer_cancel), \ ++ FN(get_func_ip), \ ++ FN(get_attach_cookie), \ ++ FN(task_pt_regs), \ ++ FN(get_branch_snapshot), \ ++ FN(trace_vprintk), \ ++ FN(skc_to_unix_sock), \ ++ FN(kallsyms_lookup_name), \ ++ FN(find_vma), \ ++ FN(loop), \ ++ FN(strncmp), \ ++ FN(get_func_arg), \ ++ FN(get_func_ret), \ ++ FN(get_func_arg_cnt), \ ++ FN(get_retval), \ ++ FN(set_retval), \ ++ FN(xdp_get_buff_len), \ ++ FN(xdp_load_bytes), \ ++ FN(xdp_store_bytes), \ ++ FN(copy_from_user_task), \ ++ FN(skb_set_tstamp), \ ++ FN(ima_file_hash), \ ++ FN(kptr_xchg), \ ++ FN(map_lookup_percpu_elem), \ ++ FN(skc_to_mptcp_sock), \ ++ FN(dynptr_from_mem), \ ++ FN(ringbuf_reserve_dynptr), \ ++ FN(ringbuf_submit_dynptr), \ ++ FN(ringbuf_discard_dynptr), \ ++ FN(dynptr_read), \ ++ FN(dynptr_write), \ ++ FN(dynptr_data), \ ++ FN(tcp_raw_gen_syncookie_ipv4), \ ++ FN(tcp_raw_gen_syncookie_ipv6), \ ++ FN(tcp_raw_check_syncookie_ipv4), \ ++ FN(tcp_raw_check_syncookie_ipv6), \ ++ /* */ ++ ++/* integer value in 'imm' field of BPF_CALL instruction selects which helper ++ * function eBPF program intends to call ++ */ ++#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x ++enum bpf_func_id { ++ __BPF_FUNC_MAPPER(__BPF_ENUM_FN) ++ __BPF_FUNC_MAX_ID, ++}; ++#undef __BPF_ENUM_FN ++ ++/* All flags used by eBPF helper functions, placed here. */ ++ ++/* BPF_FUNC_skb_store_bytes flags. */ ++enum { ++ BPF_F_RECOMPUTE_CSUM = (1ULL << 0), ++ BPF_F_INVALIDATE_HASH = (1ULL << 1), ++}; ++ ++/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. ++ * First 4 bits are for passing the header field size. ++ */ ++enum { ++ BPF_F_HDR_FIELD_MASK = 0xfULL, ++}; ++ ++/* BPF_FUNC_l4_csum_replace flags. */ ++enum { ++ BPF_F_PSEUDO_HDR = (1ULL << 4), ++ BPF_F_MARK_MANGLED_0 = (1ULL << 5), ++ BPF_F_MARK_ENFORCE = (1ULL << 6), ++}; ++ ++/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ ++enum { ++ BPF_F_INGRESS = (1ULL << 0), ++}; ++ ++/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ ++enum { ++ BPF_F_TUNINFO_IPV6 = (1ULL << 0), ++}; ++ ++/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ ++enum { ++ BPF_F_SKIP_FIELD_MASK = 0xffULL, ++ BPF_F_USER_STACK = (1ULL << 8), ++/* flags used by BPF_FUNC_get_stackid only. */ ++ BPF_F_FAST_STACK_CMP = (1ULL << 9), ++ BPF_F_REUSE_STACKID = (1ULL << 10), ++/* flags used by BPF_FUNC_get_stack only. */ ++ BPF_F_USER_BUILD_ID = (1ULL << 11), ++}; ++ ++/* BPF_FUNC_skb_set_tunnel_key flags. */ ++enum { ++ BPF_F_ZERO_CSUM_TX = (1ULL << 1), ++ BPF_F_DONT_FRAGMENT = (1ULL << 2), ++ BPF_F_SEQ_NUMBER = (1ULL << 3), ++}; ++ ++/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and ++ * BPF_FUNC_perf_event_read_value flags. ++ */ ++enum { ++ BPF_F_INDEX_MASK = 0xffffffffULL, ++ BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, ++/* BPF_FUNC_perf_event_output for sk_buff input context. */ ++ BPF_F_CTXLEN_MASK = (0xfffffULL << 32), ++}; ++ ++/* Current network namespace */ ++enum { ++ BPF_F_CURRENT_NETNS = (-1L), ++}; ++ ++/* BPF_FUNC_csum_level level values. */ ++enum { ++ BPF_CSUM_LEVEL_QUERY, ++ BPF_CSUM_LEVEL_INC, ++ BPF_CSUM_LEVEL_DEC, ++ BPF_CSUM_LEVEL_RESET, ++}; ++ ++/* BPF_FUNC_skb_adjust_room flags. */ ++enum { ++ BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), ++ BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), ++ BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), ++ BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), ++ BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), ++ BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), ++ BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), ++}; ++ ++enum { ++ BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, ++ BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, ++}; ++ ++#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ ++ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ ++ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) ++ ++/* BPF_FUNC_sysctl_get_name flags. */ ++enum { ++ BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), ++}; ++ ++/* BPF_FUNC__storage_get flags */ ++enum { ++ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0), ++ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility ++ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead. ++ */ ++ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE, ++}; ++ ++/* BPF_FUNC_read_branch_records flags. */ ++enum { ++ BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), ++}; ++ ++/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and ++ * BPF_FUNC_bpf_ringbuf_output flags. ++ */ ++enum { ++ BPF_RB_NO_WAKEUP = (1ULL << 0), ++ BPF_RB_FORCE_WAKEUP = (1ULL << 1), ++}; ++ ++/* BPF_FUNC_bpf_ringbuf_query flags */ ++enum { ++ BPF_RB_AVAIL_DATA = 0, ++ BPF_RB_RING_SIZE = 1, ++ BPF_RB_CONS_POS = 2, ++ BPF_RB_PROD_POS = 3, ++}; ++ ++/* BPF ring buffer constants */ ++enum { ++ BPF_RINGBUF_BUSY_BIT = (1U << 31), ++ BPF_RINGBUF_DISCARD_BIT = (1U << 30), ++ BPF_RINGBUF_HDR_SZ = 8, ++}; ++ ++/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ ++enum { ++ BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), ++ BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), ++}; ++ ++/* Mode for BPF_FUNC_skb_adjust_room helper. */ ++enum bpf_adj_room_mode { ++ BPF_ADJ_ROOM_NET, ++ BPF_ADJ_ROOM_MAC, ++}; ++ ++/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ ++enum bpf_hdr_start_off { ++ BPF_HDR_START_MAC, ++ BPF_HDR_START_NET, ++}; ++ ++/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ ++enum bpf_lwt_encap_mode { ++ BPF_LWT_ENCAP_SEG6, ++ BPF_LWT_ENCAP_SEG6_INLINE, ++ BPF_LWT_ENCAP_IP, ++}; ++ ++/* Flags for bpf_bprm_opts_set helper */ ++enum { ++ BPF_F_BPRM_SECUREEXEC = (1ULL << 0), ++}; ++ ++/* Flags for bpf_redirect_map helper */ ++enum { ++ BPF_F_BROADCAST = (1ULL << 3), ++ BPF_F_EXCLUDE_INGRESS = (1ULL << 4), ++}; ++ ++#define __bpf_md_ptr(type, name) \ ++union { \ ++ type name; \ ++ __u64 :64; \ ++} __attribute__((aligned(8))) ++ ++enum { ++ BPF_SKB_TSTAMP_UNSPEC, ++ BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ ++ /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, ++ * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC ++ * and try to deduce it by ingress, egress or skb->sk->sk_clockid. ++ */ ++}; ++ ++/* user accessible mirror of in-kernel sk_buff. ++ * new fields can only be added to the end of this structure ++ */ ++struct __sk_buff { ++ __u32 len; ++ __u32 pkt_type; ++ __u32 mark; ++ __u32 queue_mapping; ++ __u32 protocol; ++ __u32 vlan_present; ++ __u32 vlan_tci; ++ __u32 vlan_proto; ++ __u32 priority; ++ __u32 ingress_ifindex; ++ __u32 ifindex; ++ __u32 tc_index; ++ __u32 cb[5]; ++ __u32 hash; ++ __u32 tc_classid; ++ __u32 data; ++ __u32 data_end; ++ __u32 napi_id; ++ ++ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ ++ __u32 family; ++ __u32 remote_ip4; /* Stored in network byte order */ ++ __u32 local_ip4; /* Stored in network byte order */ ++ __u32 remote_ip6[4]; /* Stored in network byte order */ ++ __u32 local_ip6[4]; /* Stored in network byte order */ ++ __u32 remote_port; /* Stored in network byte order */ ++ __u32 local_port; /* stored in host byte order */ ++ /* ... here. */ ++ ++ __u32 data_meta; ++ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); ++ __u64 tstamp; ++ __u32 wire_len; ++ __u32 gso_segs; ++ __bpf_md_ptr(struct bpf_sock *, sk); ++ __u32 gso_size; ++ __u8 tstamp_type; ++ __u32 :24; /* Padding, future use. */ ++ __u64 hwtstamp; ++}; ++ ++struct bpf_tunnel_key { ++ __u32 tunnel_id; ++ union { ++ __u32 remote_ipv4; ++ __u32 remote_ipv6[4]; ++ }; ++ __u8 tunnel_tos; ++ __u8 tunnel_ttl; ++ __u16 tunnel_ext; /* Padding, future use. */ ++ __u32 tunnel_label; ++ union { ++ __u32 local_ipv4; ++ __u32 local_ipv6[4]; ++ }; ++}; ++ ++/* user accessible mirror of in-kernel xfrm_state. ++ * new fields can only be added to the end of this structure ++ */ ++struct bpf_xfrm_state { ++ __u32 reqid; ++ __u32 spi; /* Stored in network byte order */ ++ __u16 family; ++ __u16 ext; /* Padding, future use. */ ++ union { ++ __u32 remote_ipv4; /* Stored in network byte order */ ++ __u32 remote_ipv6[4]; /* Stored in network byte order */ ++ }; ++}; ++ ++/* Generic BPF return codes which all BPF program types may support. ++ * The values are binary compatible with their TC_ACT_* counter-part to ++ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT ++ * programs. ++ * ++ * XDP is handled seprately, see XDP_*. ++ */ ++enum bpf_ret_code { ++ BPF_OK = 0, ++ /* 1 reserved */ ++ BPF_DROP = 2, ++ /* 3-6 reserved */ ++ BPF_REDIRECT = 7, ++ /* >127 are reserved for prog type specific return codes. ++ * ++ * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and ++ * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been ++ * changed and should be routed based on its new L3 header. ++ * (This is an L3 redirect, as opposed to L2 redirect ++ * represented by BPF_REDIRECT above). ++ */ ++ BPF_LWT_REROUTE = 128, ++}; ++ ++struct bpf_sock { ++ __u32 bound_dev_if; ++ __u32 family; ++ __u32 type; ++ __u32 protocol; ++ __u32 mark; ++ __u32 priority; ++ /* IP address also allows 1 and 2 bytes access */ ++ __u32 src_ip4; ++ __u32 src_ip6[4]; ++ __u32 src_port; /* host byte order */ ++ __be16 dst_port; /* network byte order */ ++ __u16 :16; /* zero padding */ ++ __u32 dst_ip4; ++ __u32 dst_ip6[4]; ++ __u32 state; ++ __s32 rx_queue_mapping; ++}; ++ ++struct bpf_tcp_sock { ++ __u32 snd_cwnd; /* Sending congestion window */ ++ __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ ++ __u32 rtt_min; ++ __u32 snd_ssthresh; /* Slow start size threshold */ ++ __u32 rcv_nxt; /* What we want to receive next */ ++ __u32 snd_nxt; /* Next sequence we send */ ++ __u32 snd_una; /* First byte we want an ack for */ ++ __u32 mss_cache; /* Cached effective mss, not including SACKS */ ++ __u32 ecn_flags; /* ECN status bits. */ ++ __u32 rate_delivered; /* saved rate sample: packets delivered */ ++ __u32 rate_interval_us; /* saved rate sample: time elapsed */ ++ __u32 packets_out; /* Packets which are "in flight" */ ++ __u32 retrans_out; /* Retransmitted packets out */ ++ __u32 total_retrans; /* Total retransmits for entire connection */ ++ __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn ++ * total number of segments in. ++ */ ++ __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn ++ * total number of data segments in. ++ */ ++ __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut ++ * The total number of segments sent. ++ */ ++ __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut ++ * total number of data segments sent. ++ */ ++ __u32 lost_out; /* Lost packets */ ++ __u32 sacked_out; /* SACK'd packets */ ++ __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived ++ * sum(delta(rcv_nxt)), or how many bytes ++ * were acked. ++ */ ++ __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked ++ * sum(delta(snd_una)), or how many bytes ++ * were acked. ++ */ ++ __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups ++ * total number of DSACK blocks received ++ */ ++ __u32 delivered; /* Total data packets delivered incl. rexmits */ ++ __u32 delivered_ce; /* Like the above but only ECE marked packets */ ++ __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ ++}; ++ ++struct bpf_sock_tuple { ++ union { ++ struct { ++ __be32 saddr; ++ __be32 daddr; ++ __be16 sport; ++ __be16 dport; ++ } ipv4; ++ struct { ++ __be32 saddr[4]; ++ __be32 daddr[4]; ++ __be16 sport; ++ __be16 dport; ++ } ipv6; ++ }; ++}; ++ ++struct bpf_xdp_sock { ++ __u32 queue_id; ++}; ++ ++#define XDP_PACKET_HEADROOM 256 ++ ++/* User return codes for XDP prog type. ++ * A valid XDP program must return one of these defined values. All other ++ * return codes are reserved for future use. Unknown return codes will ++ * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). ++ */ ++enum xdp_action { ++ XDP_ABORTED = 0, ++ XDP_DROP, ++ XDP_PASS, ++ XDP_TX, ++ XDP_REDIRECT, ++}; ++ ++/* user accessible metadata for XDP packet hook ++ * new fields must be added to the end of this structure ++ */ ++struct xdp_md { ++ __u32 data; ++ __u32 data_end; ++ __u32 data_meta; ++ /* Below access go through struct xdp_rxq_info */ ++ __u32 ingress_ifindex; /* rxq->dev->ifindex */ ++ __u32 rx_queue_index; /* rxq->queue_index */ ++ ++ __u32 egress_ifindex; /* txq->dev->ifindex */ ++}; ++ ++/* DEVMAP map-value layout ++ * ++ * The struct data-layout of map-value is a configuration interface. ++ * New members can only be added to the end of this structure. ++ */ ++struct bpf_devmap_val { ++ __u32 ifindex; /* device index */ ++ union { ++ int fd; /* prog fd on map write */ ++ __u32 id; /* prog id on map read */ ++ } bpf_prog; ++}; ++ ++/* CPUMAP map-value layout ++ * ++ * The struct data-layout of map-value is a configuration interface. ++ * New members can only be added to the end of this structure. ++ */ ++struct bpf_cpumap_val { ++ __u32 qsize; /* queue size to remote target CPU */ ++ union { ++ int fd; /* prog fd on map write */ ++ __u32 id; /* prog id on map read */ ++ } bpf_prog; ++}; ++ ++enum sk_action { ++ SK_DROP = 0, ++ SK_PASS, ++}; ++ ++/* user accessible metadata for SK_MSG packet hook, new fields must ++ * be added to the end of this structure ++ */ ++struct sk_msg_md { ++ __bpf_md_ptr(void *, data); ++ __bpf_md_ptr(void *, data_end); ++ ++ __u32 family; ++ __u32 remote_ip4; /* Stored in network byte order */ ++ __u32 local_ip4; /* Stored in network byte order */ ++ __u32 remote_ip6[4]; /* Stored in network byte order */ ++ __u32 local_ip6[4]; /* Stored in network byte order */ ++ __u32 remote_port; /* Stored in network byte order */ ++ __u32 local_port; /* stored in host byte order */ ++ __u32 size; /* Total size of sk_msg */ ++ ++ __bpf_md_ptr(struct bpf_sock *, sk); /* current socket */ ++}; ++ ++struct sk_reuseport_md { ++ /* ++ * Start of directly accessible data. It begins from ++ * the tcp/udp header. ++ */ ++ __bpf_md_ptr(void *, data); ++ /* End of directly accessible data */ ++ __bpf_md_ptr(void *, data_end); ++ /* ++ * Total length of packet (starting from the tcp/udp header). ++ * Note that the directly accessible bytes (data_end - data) ++ * could be less than this "len". Those bytes could be ++ * indirectly read by a helper "bpf_skb_load_bytes()". ++ */ ++ __u32 len; ++ /* ++ * Eth protocol in the mac header (network byte order). e.g. ++ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD) ++ */ ++ __u32 eth_protocol; ++ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ ++ __u32 bind_inany; /* Is sock bound to an INANY address? */ ++ __u32 hash; /* A hash of the packet 4 tuples */ ++ /* When reuse->migrating_sk is NULL, it is selecting a sk for the ++ * new incoming connection request (e.g. selecting a listen sk for ++ * the received SYN in the TCP case). reuse->sk is one of the sk ++ * in the reuseport group. The bpf prog can use reuse->sk to learn ++ * the local listening ip/port without looking into the skb. ++ * ++ * When reuse->migrating_sk is not NULL, reuse->sk is closed and ++ * reuse->migrating_sk is the socket that needs to be migrated ++ * to another listening socket. migrating_sk could be a fullsock ++ * sk that is fully established or a reqsk that is in-the-middle ++ * of 3-way handshake. ++ */ ++ __bpf_md_ptr(struct bpf_sock *, sk); ++ __bpf_md_ptr(struct bpf_sock *, migrating_sk); ++}; ++ ++#define BPF_TAG_SIZE 8 ++ ++struct bpf_prog_info { ++ __u32 type; ++ __u32 id; ++ __u8 tag[BPF_TAG_SIZE]; ++ __u32 jited_prog_len; ++ __u32 xlated_prog_len; ++ __aligned_u64 jited_prog_insns; ++ __aligned_u64 xlated_prog_insns; ++ __u64 load_time; /* ns since boottime */ ++ __u32 created_by_uid; ++ __u32 nr_map_ids; ++ __aligned_u64 map_ids; ++ char name[BPF_OBJ_NAME_LEN]; ++ __u32 ifindex; ++ __u32 gpl_compatible:1; ++ __u32 :31; /* alignment pad */ ++ __u64 netns_dev; ++ __u64 netns_ino; ++ __u32 nr_jited_ksyms; ++ __u32 nr_jited_func_lens; ++ __aligned_u64 jited_ksyms; ++ __aligned_u64 jited_func_lens; ++ __u32 btf_id; ++ __u32 func_info_rec_size; ++ __aligned_u64 func_info; ++ __u32 nr_func_info; ++ __u32 nr_line_info; ++ __aligned_u64 line_info; ++ __aligned_u64 jited_line_info; ++ __u32 nr_jited_line_info; ++ __u32 line_info_rec_size; ++ __u32 jited_line_info_rec_size; ++ __u32 nr_prog_tags; ++ __aligned_u64 prog_tags; ++ __u64 run_time_ns; ++ __u64 run_cnt; ++ __u64 recursion_misses; ++ __u32 verified_insns; ++ __u32 attach_btf_obj_id; ++ __u32 attach_btf_id; ++} __attribute__((aligned(8))); ++ ++struct bpf_map_info { ++ __u32 type; ++ __u32 id; ++ __u32 key_size; ++ __u32 value_size; ++ __u32 max_entries; ++ __u32 map_flags; ++ char name[BPF_OBJ_NAME_LEN]; ++ __u32 ifindex; ++ __u32 btf_vmlinux_value_type_id; ++ __u64 netns_dev; ++ __u64 netns_ino; ++ __u32 btf_id; ++ __u32 btf_key_type_id; ++ __u32 btf_value_type_id; ++ __u32 :32; /* alignment pad */ ++ __u64 map_extra; ++} __attribute__((aligned(8))); ++ ++struct bpf_btf_info { ++ __aligned_u64 btf; ++ __u32 btf_size; ++ __u32 id; ++ __aligned_u64 name; ++ __u32 name_len; ++ __u32 kernel_btf; ++} __attribute__((aligned(8))); ++ ++struct bpf_link_info { ++ __u32 type; ++ __u32 id; ++ __u32 prog_id; ++ union { ++ struct { ++ __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ ++ __u32 tp_name_len; /* in/out: tp_name buffer len */ ++ } raw_tracepoint; ++ struct { ++ __u32 attach_type; ++ __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ ++ __u32 target_btf_id; /* BTF type id inside the object */ ++ } tracing; ++ struct { ++ __u64 cgroup_id; ++ __u32 attach_type; ++ } cgroup; ++ struct { ++ __aligned_u64 target_name; /* in/out: target_name buffer ptr */ ++ __u32 target_name_len; /* in/out: target_name buffer len */ ++ union { ++ struct { ++ __u32 map_id; ++ } map; ++ }; ++ } iter; ++ struct { ++ __u32 netns_ino; ++ __u32 attach_type; ++ } netns; ++ struct { ++ __u32 ifindex; ++ } xdp; ++ }; ++} __attribute__((aligned(8))); ++ ++/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed ++ * by user and intended to be used by socket (e.g. to bind to, depends on ++ * attach type). ++ */ ++struct bpf_sock_addr { ++ __u32 user_family; /* Allows 4-byte read, but no write. */ ++ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 user_port; /* Allows 1,2,4-byte read and 4-byte write. ++ * Stored in network byte order ++ */ ++ __u32 family; /* Allows 4-byte read, but no write */ ++ __u32 type; /* Allows 4-byte read, but no write */ ++ __u32 protocol; /* Allows 4-byte read, but no write */ ++ __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. ++ * Stored in network byte order. ++ */ ++ __bpf_md_ptr(struct bpf_sock *, sk); ++}; ++ ++/* User bpf_sock_ops struct to access socket values and specify request ops ++ * and their replies. ++ * Some of this fields are in network (bigendian) byte order and may need ++ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). ++ * New fields can only be added at the end of this structure ++ */ ++struct bpf_sock_ops { ++ __u32 op; ++ union { ++ __u32 args[4]; /* Optionally passed to bpf program */ ++ __u32 reply; /* Returned by bpf program */ ++ __u32 replylong[4]; /* Optionally returned by bpf prog */ ++ }; ++ __u32 family; ++ __u32 remote_ip4; /* Stored in network byte order */ ++ __u32 local_ip4; /* Stored in network byte order */ ++ __u32 remote_ip6[4]; /* Stored in network byte order */ ++ __u32 local_ip6[4]; /* Stored in network byte order */ ++ __u32 remote_port; /* Stored in network byte order */ ++ __u32 local_port; /* stored in host byte order */ ++ __u32 is_fullsock; /* Some TCP fields are only valid if ++ * there is a full socket. If not, the ++ * fields read as zero. ++ */ ++ __u32 snd_cwnd; ++ __u32 srtt_us; /* Averaged RTT << 3 in usecs */ ++ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ ++ __u32 state; ++ __u32 rtt_min; ++ __u32 snd_ssthresh; ++ __u32 rcv_nxt; ++ __u32 snd_nxt; ++ __u32 snd_una; ++ __u32 mss_cache; ++ __u32 ecn_flags; ++ __u32 rate_delivered; ++ __u32 rate_interval_us; ++ __u32 packets_out; ++ __u32 retrans_out; ++ __u32 total_retrans; ++ __u32 segs_in; ++ __u32 data_segs_in; ++ __u32 segs_out; ++ __u32 data_segs_out; ++ __u32 lost_out; ++ __u32 sacked_out; ++ __u32 sk_txhash; ++ __u64 bytes_received; ++ __u64 bytes_acked; ++ __bpf_md_ptr(struct bpf_sock *, sk); ++ /* [skb_data, skb_data_end) covers the whole TCP header. ++ * ++ * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received ++ * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the ++ * header has not been written. ++ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have ++ * been written so far. ++ * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes ++ * the 3WHS. ++ * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes ++ * the 3WHS. ++ * ++ * bpf_load_hdr_opt() can also be used to read a particular option. ++ */ ++ __bpf_md_ptr(void *, skb_data); ++ __bpf_md_ptr(void *, skb_data_end); ++ __u32 skb_len; /* The total length of a packet. ++ * It includes the header, options, ++ * and payload. ++ */ ++ __u32 skb_tcp_flags; /* tcp_flags of the header. It provides ++ * an easy way to check for tcp_flags ++ * without parsing skb_data. ++ * ++ * In particular, the skb_tcp_flags ++ * will still be available in ++ * BPF_SOCK_OPS_HDR_OPT_LEN even though ++ * the outgoing header has not ++ * been written yet. ++ */ ++}; ++ ++/* Definitions for bpf_sock_ops_cb_flags */ ++enum { ++ BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), ++ BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), ++ BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), ++ BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), ++ /* Call bpf for all received TCP headers. The bpf prog will be ++ * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB ++ * ++ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB ++ * for the header option related helpers that will be useful ++ * to the bpf programs. ++ * ++ * It could be used at the client/active side (i.e. connect() side) ++ * when the server told it that the server was in syncookie ++ * mode and required the active side to resend the bpf-written ++ * options. The active side can keep writing the bpf-options until ++ * it received a valid packet from the server side to confirm ++ * the earlier packet (and options) has been received. The later ++ * example patch is using it like this at the active side when the ++ * server is in syncookie mode. ++ * ++ * The bpf prog will usually turn this off in the common cases. ++ */ ++ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), ++ /* Call bpf when kernel has received a header option that ++ * the kernel cannot handle. The bpf prog will be called under ++ * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB. ++ * ++ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB ++ * for the header option related helpers that will be useful ++ * to the bpf programs. ++ */ ++ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), ++ /* Call bpf when the kernel is writing header options for the ++ * outgoing packet. The bpf prog will first be called ++ * to reserve space in a skb under ++ * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then ++ * the bpf prog will be called to write the header option(s) ++ * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB. ++ * ++ * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB ++ * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option ++ * related helpers that will be useful to the bpf programs. ++ * ++ * The kernel gets its chance to reserve space and write ++ * options first before the BPF program does. ++ */ ++ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), ++/* Mask of all currently supported cb flags */ ++ BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, ++}; ++ ++/* List of known BPF sock_ops operators. ++ * New entries can only be added at the end ++ */ ++enum { ++ BPF_SOCK_OPS_VOID, ++ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or ++ * -1 if default value should be used ++ */ ++ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized ++ * window (in packets) or -1 if default ++ * value should be used ++ */ ++ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an ++ * active connection is initialized ++ */ ++ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an ++ * active connection is ++ * established ++ */ ++ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a ++ * passive connection is ++ * established ++ */ ++ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control ++ * needs ECN ++ */ ++ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is ++ * based on the path and may be ++ * dependent on the congestion control ++ * algorithm. In general it indicates ++ * a congestion threshold. RTTs above ++ * this indicate congestion ++ */ ++ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. ++ * Arg1: value of icsk_retransmits ++ * Arg2: value of icsk_rto ++ * Arg3: whether RTO has expired ++ */ ++ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. ++ * Arg1: sequence number of 1st byte ++ * Arg2: # segments ++ * Arg3: return value of ++ * tcp_transmit_skb (0 => success) ++ */ ++ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. ++ * Arg1: old_state ++ * Arg2: new_state ++ */ ++ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after ++ * socket transition to LISTEN state. ++ */ ++ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. ++ */ ++ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. ++ * It will be called to handle ++ * the packets received at ++ * an already established ++ * connection. ++ * ++ * sock_ops->skb_data: ++ * Referring to the received skb. ++ * It covers the TCP header only. ++ * ++ * bpf_load_hdr_opt() can also ++ * be used to search for a ++ * particular option. ++ */ ++ BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the ++ * header option later in ++ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. ++ * Arg1: bool want_cookie. (in ++ * writing SYNACK only) ++ * ++ * sock_ops->skb_data: ++ * Not available because no header has ++ * been written yet. ++ * ++ * sock_ops->skb_tcp_flags: ++ * The tcp_flags of the ++ * outgoing skb. (e.g. SYN, ACK, FIN). ++ * ++ * bpf_reserve_hdr_opt() should ++ * be used to reserve space. ++ */ ++ BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options ++ * Arg1: bool want_cookie. (in ++ * writing SYNACK only) ++ * ++ * sock_ops->skb_data: ++ * Referring to the outgoing skb. ++ * It covers the TCP header ++ * that has already been written ++ * by the kernel and the ++ * earlier bpf-progs. ++ * ++ * sock_ops->skb_tcp_flags: ++ * The tcp_flags of the outgoing ++ * skb. (e.g. SYN, ACK, FIN). ++ * ++ * bpf_store_hdr_opt() should ++ * be used to write the ++ * option. ++ * ++ * bpf_load_hdr_opt() can also ++ * be used to search for a ++ * particular option that ++ * has already been written ++ * by the kernel or the ++ * earlier bpf-progs. ++ */ ++}; ++ ++/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect ++ * changes between the TCP and BPF versions. Ideally this should never happen. ++ * If it does, we need to add code to convert them before calling ++ * the BPF sock_ops function. ++ */ ++enum { ++ BPF_TCP_ESTABLISHED = 1, ++ BPF_TCP_SYN_SENT, ++ BPF_TCP_SYN_RECV, ++ BPF_TCP_FIN_WAIT1, ++ BPF_TCP_FIN_WAIT2, ++ BPF_TCP_TIME_WAIT, ++ BPF_TCP_CLOSE, ++ BPF_TCP_CLOSE_WAIT, ++ BPF_TCP_LAST_ACK, ++ BPF_TCP_LISTEN, ++ BPF_TCP_CLOSING, /* Now a valid state */ ++ BPF_TCP_NEW_SYN_RECV, ++ ++ BPF_TCP_MAX_STATES /* Leave at the end! */ ++}; ++ ++enum { ++ TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ ++ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ ++ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ ++ TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ ++ /* Copy the SYN pkt to optval ++ * ++ * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the ++ * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit ++ * to only getting from the saved_syn. It can either get the ++ * syn packet from: ++ * ++ * 1. the just-received SYN packet (only available when writing the ++ * SYNACK). It will be useful when it is not necessary to ++ * save the SYN packet for latter use. It is also the only way ++ * to get the SYN during syncookie mode because the syn ++ * packet cannot be saved during syncookie. ++ * ++ * OR ++ * ++ * 2. the earlier saved syn which was done by ++ * bpf_setsockopt(TCP_SAVE_SYN). ++ * ++ * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the ++ * SYN packet is obtained. ++ * ++ * If the bpf-prog does not need the IP[46] header, the ++ * bpf-prog can avoid parsing the IP header by using ++ * TCP_BPF_SYN. Otherwise, the bpf-prog can get both ++ * IP[46] and TCP header by using TCP_BPF_SYN_IP. ++ * ++ * >0: Total number of bytes copied ++ * -ENOSPC: Not enough space in optval. Only optlen number of ++ * bytes is copied. ++ * -ENOENT: The SYN skb is not available now and the earlier SYN pkt ++ * is not saved by setsockopt(TCP_SAVE_SYN). ++ */ ++ TCP_BPF_SYN = 1005, /* Copy the TCP header */ ++ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ ++ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ ++}; ++ ++enum { ++ BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0), ++}; ++ ++/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and ++ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. ++ */ ++enum { ++ BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the ++ * total option spaces ++ * required for an established ++ * sk in order to calculate the ++ * MSS. No skb is actually ++ * sent. ++ */ ++ BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode ++ * when sending a SYN. ++ */ ++}; ++ ++struct bpf_perf_event_value { ++ __u64 counter; ++ __u64 enabled; ++ __u64 running; ++}; ++ ++enum { ++ BPF_DEVCG_ACC_MKNOD = (1ULL << 0), ++ BPF_DEVCG_ACC_READ = (1ULL << 1), ++ BPF_DEVCG_ACC_WRITE = (1ULL << 2), ++}; ++ ++enum { ++ BPF_DEVCG_DEV_BLOCK = (1ULL << 0), ++ BPF_DEVCG_DEV_CHAR = (1ULL << 1), ++}; ++ ++struct bpf_cgroup_dev_ctx { ++ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ ++ __u32 access_type; ++ __u32 major; ++ __u32 minor; ++}; ++ ++struct bpf_raw_tracepoint_args { ++ __u64 args[0]; ++}; ++ ++/* DIRECT: Skip the FIB rules and go to FIB table associated with device ++ * OUTPUT: Do lookup from egress perspective; default is ingress ++ */ ++enum { ++ BPF_FIB_LOOKUP_DIRECT = (1U << 0), ++ BPF_FIB_LOOKUP_OUTPUT = (1U << 1), ++}; ++ ++enum { ++ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ ++ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ ++ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ ++ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ ++ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ ++ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ ++ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ ++ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ ++ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ ++}; ++ ++struct bpf_fib_lookup { ++ /* input: network family for lookup (AF_INET, AF_INET6) ++ * output: network family of egress nexthop ++ */ ++ __u8 family; ++ ++ /* set if lookup is to consider L4 data - e.g., FIB rules */ ++ __u8 l4_protocol; ++ __be16 sport; ++ __be16 dport; ++ ++ union { /* used for MTU check */ ++ /* input to lookup */ ++ __u16 tot_len; /* L3 length from network hdr (iph->tot_len) */ ++ ++ /* output: MTU value */ ++ __u16 mtu_result; ++ }; ++ /* input: L3 device index for lookup ++ * output: device index from FIB lookup ++ */ ++ __u32 ifindex; ++ ++ union { ++ /* inputs to lookup */ ++ __u8 tos; /* AF_INET */ ++ __be32 flowinfo; /* AF_INET6, flow_label + priority */ ++ ++ /* output: metric of fib result (IPv4/IPv6 only) */ ++ __u32 rt_metric; ++ }; ++ ++ union { ++ __be32 ipv4_src; ++ __u32 ipv6_src[4]; /* in6_addr; network order */ ++ }; ++ ++ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in ++ * network header. output: bpf_fib_lookup sets to gateway address ++ * if FIB lookup returns gateway route ++ */ ++ union { ++ __be32 ipv4_dst; ++ __u32 ipv6_dst[4]; /* in6_addr; network order */ ++ }; ++ ++ /* output */ ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++ __u8 smac[6]; /* ETH_ALEN */ ++ __u8 dmac[6]; /* ETH_ALEN */ ++}; ++ ++struct bpf_redir_neigh { ++ /* network family for lookup (AF_INET, AF_INET6) */ ++ __u32 nh_family; ++ /* network address of nexthop; skips fib lookup to find gateway */ ++ union { ++ __be32 ipv4_nh; ++ __u32 ipv6_nh[4]; /* in6_addr; network order */ ++ }; ++}; ++ ++/* bpf_check_mtu flags*/ ++enum bpf_check_mtu_flags { ++ BPF_MTU_CHK_SEGS = (1U << 0), ++}; ++ ++enum bpf_check_mtu_ret { ++ BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */ ++ BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */ ++ BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */ ++}; ++ ++enum bpf_task_fd_type { ++ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ ++ BPF_FD_TYPE_TRACEPOINT, /* tp name */ ++ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ ++ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ ++ BPF_FD_TYPE_UPROBE, /* filename + offset */ ++ BPF_FD_TYPE_URETPROBE, /* filename + offset */ ++}; ++ ++enum { ++ BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), ++ BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), ++ BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), ++}; ++ ++struct bpf_flow_keys { ++ __u16 nhoff; ++ __u16 thoff; ++ __u16 addr_proto; /* ETH_P_* of valid addrs */ ++ __u8 is_frag; ++ __u8 is_first_frag; ++ __u8 is_encap; ++ __u8 ip_proto; ++ __be16 n_proto; ++ __be16 sport; ++ __be16 dport; ++ union { ++ struct { ++ __be32 ipv4_src; ++ __be32 ipv4_dst; ++ }; ++ struct { ++ __u32 ipv6_src[4]; /* in6_addr; network order */ ++ __u32 ipv6_dst[4]; /* in6_addr; network order */ ++ }; ++ }; ++ __u32 flags; ++ __be32 flow_label; ++}; ++ ++struct bpf_func_info { ++ __u32 insn_off; ++ __u32 type_id; ++}; ++ ++#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) ++#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) ++ ++struct bpf_line_info { ++ __u32 insn_off; ++ __u32 file_name_off; ++ __u32 line_off; ++ __u32 line_col; ++}; ++ ++struct bpf_spin_lock { ++ __u32 val; ++}; ++ ++struct bpf_timer { ++ __u64 :64; ++ __u64 :64; ++} __attribute__((aligned(8))); ++ ++struct bpf_dynptr { ++ __u64 :64; ++ __u64 :64; ++} __attribute__((aligned(8))); ++ ++struct bpf_sysctl { ++ __u32 write; /* Sysctl is being read (= 0) or written (= 1). ++ * Allows 1,2,4-byte read, but no write. ++ */ ++ __u32 file_pos; /* Sysctl file position to read from, write to. ++ * Allows 1,2,4-byte read an 4-byte write. ++ */ ++}; ++ ++struct bpf_sockopt { ++ __bpf_md_ptr(struct bpf_sock *, sk); ++ __bpf_md_ptr(void *, optval); ++ __bpf_md_ptr(void *, optval_end); ++ ++ __s32 level; ++ __s32 optname; ++ __s32 optlen; ++ __s32 retval; ++}; ++ ++struct bpf_pidns_info { ++ __u32 pid; ++ __u32 tgid; ++}; ++ ++/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ ++struct bpf_sk_lookup { ++ union { ++ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ ++ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ ++ }; ++ ++ __u32 family; /* Protocol family (AF_INET, AF_INET6) */ ++ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ ++ __u32 remote_ip4; /* Network byte order */ ++ __u32 remote_ip6[4]; /* Network byte order */ ++ __be16 remote_port; /* Network byte order */ ++ __u16 :16; /* Zero padding */ ++ __u32 local_ip4; /* Network byte order */ ++ __u32 local_ip6[4]; /* Network byte order */ ++ __u32 local_port; /* Host byte order */ ++ __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ ++}; ++ ++/* ++ * struct btf_ptr is used for typed pointer representation; the ++ * type id is used to render the pointer data as the appropriate type ++ * via the bpf_snprintf_btf() helper described above. A flags field - ++ * potentially to specify additional details about the BTF pointer ++ * (rather than its mode of display) - is included for future use. ++ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. ++ */ ++struct btf_ptr { ++ void *ptr; ++ __u32 type_id; ++ __u32 flags; /* BTF ptr flags; unused at present. */ ++}; ++ ++/* ++ * Flags to control bpf_snprintf_btf() behaviour. ++ * - BTF_F_COMPACT: no formatting around type information ++ * - BTF_F_NONAME: no struct/union member names/types ++ * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; ++ * equivalent to %px. ++ * - BTF_F_ZERO: show zero-valued struct/union members; they ++ * are not displayed by default ++ */ ++enum { ++ BTF_F_COMPACT = (1ULL << 0), ++ BTF_F_NONAME = (1ULL << 1), ++ BTF_F_PTR_RAW = (1ULL << 2), ++ BTF_F_ZERO = (1ULL << 3), ++}; ++ ++/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value ++ * has to be adjusted by relocations. It is emitted by llvm and passed to ++ * libbpf and later to the kernel. ++ */ ++enum bpf_core_relo_kind { ++ BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ ++ BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ ++ BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ ++ BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ ++ BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ ++ BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ ++ BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ ++ BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ ++ BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ ++ BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ ++ BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ ++ BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ ++ BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */ ++}; ++ ++/* ++ * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf ++ * and from libbpf to the kernel. ++ * ++ * CO-RE relocation captures the following data: ++ * - insn_off - instruction offset (in bytes) within a BPF program that needs ++ * its insn->imm field to be relocated with actual field info; ++ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable ++ * type or field; ++ * - access_str_off - offset into corresponding .BTF string section. String ++ * interpretation depends on specific relocation kind: ++ * - for field-based relocations, string encodes an accessed field using ++ * a sequence of field and array indices, separated by colon (:). It's ++ * conceptually very close to LLVM's getelementptr ([0]) instruction's ++ * arguments for identifying offset to a field. ++ * - for type-based relocations, strings is expected to be just "0"; ++ * - for enum value-based relocations, string contains an index of enum ++ * value within its enum type; ++ * - kind - one of enum bpf_core_relo_kind; ++ * ++ * Example: ++ * struct sample { ++ * int a; ++ * struct { ++ * int b[10]; ++ * }; ++ * }; ++ * ++ * struct sample *s = ...; ++ * int *x = &s->a; // encoded as "0:0" (a is field #0) ++ * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, ++ * // b is field #0 inside anon struct, accessing elem #5) ++ * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) ++ * ++ * type_id for all relocs in this example will capture BTF type id of ++ * `struct sample`. ++ * ++ * Such relocation is emitted when using __builtin_preserve_access_index() ++ * Clang built-in, passing expression that captures field address, e.g.: ++ * ++ * bpf_probe_read(&dst, sizeof(dst), ++ * __builtin_preserve_access_index(&src->a.b.c)); ++ * ++ * In this case Clang will emit field relocation recording necessary data to ++ * be able to find offset of embedded `a.b.c` field within `src` struct. ++ * ++ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction ++ */ ++struct bpf_core_relo { ++ __u32 insn_off; ++ __u32 type_id; ++ __u32 access_str_off; ++ enum bpf_core_relo_kind kind; ++}; ++ ++#endif /* _UAPI__LINUX_BPF_H__ */ +diff --git a/src/cc/libbpf/include/uapi/linux/bpf_common.h b/src/cc/libbpf/include/uapi/linux/bpf_common.h +new file mode 100644 +index 0000000..ee97668 +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/bpf_common.h +@@ -0,0 +1,57 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _UAPI__LINUX_BPF_COMMON_H__ ++#define _UAPI__LINUX_BPF_COMMON_H__ ++ ++/* Instruction classes */ ++#define BPF_CLASS(code) ((code) & 0x07) ++#define BPF_LD 0x00 ++#define BPF_LDX 0x01 ++#define BPF_ST 0x02 ++#define BPF_STX 0x03 ++#define BPF_ALU 0x04 ++#define BPF_JMP 0x05 ++#define BPF_RET 0x06 ++#define BPF_MISC 0x07 ++ ++/* ld/ldx fields */ ++#define BPF_SIZE(code) ((code) & 0x18) ++#define BPF_W 0x00 /* 32-bit */ ++#define BPF_H 0x08 /* 16-bit */ ++#define BPF_B 0x10 /* 8-bit */ ++/* eBPF BPF_DW 0x18 64-bit */ ++#define BPF_MODE(code) ((code) & 0xe0) ++#define BPF_IMM 0x00 ++#define BPF_ABS 0x20 ++#define BPF_IND 0x40 ++#define BPF_MEM 0x60 ++#define BPF_LEN 0x80 ++#define BPF_MSH 0xa0 ++ ++/* alu/jmp fields */ ++#define BPF_OP(code) ((code) & 0xf0) ++#define BPF_ADD 0x00 ++#define BPF_SUB 0x10 ++#define BPF_MUL 0x20 ++#define BPF_DIV 0x30 ++#define BPF_OR 0x40 ++#define BPF_AND 0x50 ++#define BPF_LSH 0x60 ++#define BPF_RSH 0x70 ++#define BPF_NEG 0x80 ++#define BPF_MOD 0x90 ++#define BPF_XOR 0xa0 ++ ++#define BPF_JA 0x00 ++#define BPF_JEQ 0x10 ++#define BPF_JGT 0x20 ++#define BPF_JGE 0x30 ++#define BPF_JSET 0x40 ++#define BPF_SRC(code) ((code) & 0x08) ++#define BPF_K 0x00 ++#define BPF_X 0x08 ++ ++#ifndef BPF_MAXINSNS ++#define BPF_MAXINSNS 4096 ++#endif ++ ++#endif /* _UAPI__LINUX_BPF_COMMON_H__ */ +diff --git a/src/cc/libbpf/include/uapi/linux/btf.h b/src/cc/libbpf/include/uapi/linux/btf.h +new file mode 100644 +index 0000000..ec1798b +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/btf.h +@@ -0,0 +1,200 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* Copyright (c) 2018 Facebook */ ++#ifndef _UAPI__LINUX_BTF_H__ ++#define _UAPI__LINUX_BTF_H__ ++ ++#include ++ ++#define BTF_MAGIC 0xeB9F ++#define BTF_VERSION 1 ++ ++struct btf_header { ++ __u16 magic; ++ __u8 version; ++ __u8 flags; ++ __u32 hdr_len; ++ ++ /* All offsets are in bytes relative to the end of this header */ ++ __u32 type_off; /* offset of type section */ ++ __u32 type_len; /* length of type section */ ++ __u32 str_off; /* offset of string section */ ++ __u32 str_len; /* length of string section */ ++}; ++ ++/* Max # of type identifier */ ++#define BTF_MAX_TYPE 0x000fffff ++/* Max offset into the string section */ ++#define BTF_MAX_NAME_OFFSET 0x00ffffff ++/* Max # of struct/union/enum members or func args */ ++#define BTF_MAX_VLEN 0xffff ++ ++struct btf_type { ++ __u32 name_off; ++ /* "info" bits arrangement ++ * bits 0-15: vlen (e.g. # of struct's members) ++ * bits 16-23: unused ++ * bits 24-28: kind (e.g. int, ptr, array...etc) ++ * bits 29-30: unused ++ * bit 31: kind_flag, currently used by ++ * struct, union, enum, fwd and enum64 ++ */ ++ __u32 info; ++ /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. ++ * "size" tells the size of the type it is describing. ++ * ++ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, ++ * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. ++ * "type" is a type_id referring to another type. ++ */ ++ union { ++ __u32 size; ++ __u32 type; ++ }; ++}; ++ ++#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) ++#define BTF_INFO_VLEN(info) ((info) & 0xffff) ++#define BTF_INFO_KFLAG(info) ((info) >> 31) ++ ++enum { ++ BTF_KIND_UNKN = 0, /* Unknown */ ++ BTF_KIND_INT = 1, /* Integer */ ++ BTF_KIND_PTR = 2, /* Pointer */ ++ BTF_KIND_ARRAY = 3, /* Array */ ++ BTF_KIND_STRUCT = 4, /* Struct */ ++ BTF_KIND_UNION = 5, /* Union */ ++ BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */ ++ BTF_KIND_FWD = 7, /* Forward */ ++ BTF_KIND_TYPEDEF = 8, /* Typedef */ ++ BTF_KIND_VOLATILE = 9, /* Volatile */ ++ BTF_KIND_CONST = 10, /* Const */ ++ BTF_KIND_RESTRICT = 11, /* Restrict */ ++ BTF_KIND_FUNC = 12, /* Function */ ++ BTF_KIND_FUNC_PROTO = 13, /* Function Proto */ ++ BTF_KIND_VAR = 14, /* Variable */ ++ BTF_KIND_DATASEC = 15, /* Section */ ++ BTF_KIND_FLOAT = 16, /* Floating point */ ++ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ ++ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ ++ BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */ ++ ++ NR_BTF_KINDS, ++ BTF_KIND_MAX = NR_BTF_KINDS - 1, ++}; ++ ++/* For some specific BTF_KIND, "struct btf_type" is immediately ++ * followed by extra data. ++ */ ++ ++/* BTF_KIND_INT is followed by a u32 and the following ++ * is the 32 bits arrangement: ++ */ ++#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) ++#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) ++#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) ++ ++/* Attributes stored in the BTF_INT_ENCODING */ ++#define BTF_INT_SIGNED (1 << 0) ++#define BTF_INT_CHAR (1 << 1) ++#define BTF_INT_BOOL (1 << 2) ++ ++/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". ++ * The exact number of btf_enum is stored in the vlen (of the ++ * info in "struct btf_type"). ++ */ ++struct btf_enum { ++ __u32 name_off; ++ __s32 val; ++}; ++ ++/* BTF_KIND_ARRAY is followed by one "struct btf_array" */ ++struct btf_array { ++ __u32 type; ++ __u32 index_type; ++ __u32 nelems; ++}; ++ ++/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed ++ * by multiple "struct btf_member". The exact number ++ * of btf_member is stored in the vlen (of the info in ++ * "struct btf_type"). ++ */ ++struct btf_member { ++ __u32 name_off; ++ __u32 type; ++ /* If the type info kind_flag is set, the btf_member offset ++ * contains both member bitfield size and bit offset. The ++ * bitfield size is set for bitfield members. If the type ++ * info kind_flag is not set, the offset contains only bit ++ * offset. ++ */ ++ __u32 offset; ++}; ++ ++/* If the struct/union type info kind_flag is set, the ++ * following two macros are used to access bitfield_size ++ * and bit_offset from btf_member.offset. ++ */ ++#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) ++#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) ++ ++/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". ++ * The exact number of btf_param is stored in the vlen (of the ++ * info in "struct btf_type"). ++ */ ++struct btf_param { ++ __u32 name_off; ++ __u32 type; ++}; ++ ++enum { ++ BTF_VAR_STATIC = 0, ++ BTF_VAR_GLOBAL_ALLOCATED = 1, ++ BTF_VAR_GLOBAL_EXTERN = 2, ++}; ++ ++enum btf_func_linkage { ++ BTF_FUNC_STATIC = 0, ++ BTF_FUNC_GLOBAL = 1, ++ BTF_FUNC_EXTERN = 2, ++}; ++ ++/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe ++ * additional information related to the variable such as its linkage. ++ */ ++struct btf_var { ++ __u32 linkage; ++}; ++ ++/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo" ++ * to describe all BTF_KIND_VAR types it contains along with it's ++ * in-section offset as well as size. ++ */ ++struct btf_var_secinfo { ++ __u32 type; ++ __u32 offset; ++ __u32 size; ++}; ++ ++/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe ++ * additional information related to the tag applied location. ++ * If component_idx == -1, the tag is applied to a struct, union, ++ * variable or function. Otherwise, it is applied to a struct/union ++ * member or a func argument, and component_idx indicates which member ++ * or argument (0 ... vlen-1). ++ */ ++struct btf_decl_tag { ++ __s32 component_idx; ++}; ++ ++/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64". ++ * The exact number of btf_enum64 is stored in the vlen (of the ++ * info in "struct btf_type"). ++ */ ++struct btf_enum64 { ++ __u32 name_off; ++ __u32 val_lo32; ++ __u32 val_hi32; ++}; ++ ++#endif /* _UAPI__LINUX_BTF_H__ */ +diff --git a/src/cc/libbpf/include/uapi/linux/if_link.h b/src/cc/libbpf/include/uapi/linux/if_link.h +new file mode 100644 +index 0000000..0242f31 +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/if_link.h +@@ -0,0 +1,1282 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _UAPI_LINUX_IF_LINK_H ++#define _UAPI_LINUX_IF_LINK_H ++ ++#include ++#include ++ ++/* This struct should be in sync with struct rtnl_link_stats64 */ ++struct rtnl_link_stats { ++ __u32 rx_packets; ++ __u32 tx_packets; ++ __u32 rx_bytes; ++ __u32 tx_bytes; ++ __u32 rx_errors; ++ __u32 tx_errors; ++ __u32 rx_dropped; ++ __u32 tx_dropped; ++ __u32 multicast; ++ __u32 collisions; ++ /* detailed rx_errors: */ ++ __u32 rx_length_errors; ++ __u32 rx_over_errors; ++ __u32 rx_crc_errors; ++ __u32 rx_frame_errors; ++ __u32 rx_fifo_errors; ++ __u32 rx_missed_errors; ++ ++ /* detailed tx_errors */ ++ __u32 tx_aborted_errors; ++ __u32 tx_carrier_errors; ++ __u32 tx_fifo_errors; ++ __u32 tx_heartbeat_errors; ++ __u32 tx_window_errors; ++ ++ /* for cslip etc */ ++ __u32 rx_compressed; ++ __u32 tx_compressed; ++ ++ __u32 rx_nohandler; ++}; ++ ++/** ++ * struct rtnl_link_stats64 - The main device statistics structure. ++ * ++ * @rx_packets: Number of good packets received by the interface. ++ * For hardware interfaces counts all good packets received from the device ++ * by the host, including packets which host had to drop at various stages ++ * of processing (even in the driver). ++ * ++ * @tx_packets: Number of packets successfully transmitted. ++ * For hardware interfaces counts packets which host was able to successfully ++ * hand over to the device, which does not necessarily mean that packets ++ * had been successfully transmitted out of the device, only that device ++ * acknowledged it copied them out of host memory. ++ * ++ * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. ++ * ++ * For IEEE 802.3 devices should count the length of Ethernet Frames ++ * excluding the FCS. ++ * ++ * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. ++ * ++ * For IEEE 802.3 devices should count the length of Ethernet Frames ++ * excluding the FCS. ++ * ++ * @rx_errors: Total number of bad packets received on this network device. ++ * This counter must include events counted by @rx_length_errors, ++ * @rx_crc_errors, @rx_frame_errors and other errors not otherwise ++ * counted. ++ * ++ * @tx_errors: Total number of transmit problems. ++ * This counter must include events counter by @tx_aborted_errors, ++ * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, ++ * @tx_window_errors and other errors not otherwise counted. ++ * ++ * @rx_dropped: Number of packets received but not processed, ++ * e.g. due to lack of resources or unsupported protocol. ++ * For hardware interfaces this counter may include packets discarded ++ * due to L2 address filtering but should not include packets dropped ++ * by the device due to buffer exhaustion which are counted separately in ++ * @rx_missed_errors (since procfs folds those two counters together). ++ * ++ * @tx_dropped: Number of packets dropped on their way to transmission, ++ * e.g. due to lack of resources. ++ * ++ * @multicast: Multicast packets received. ++ * For hardware interfaces this statistic is commonly calculated ++ * at the device level (unlike @rx_packets) and therefore may include ++ * packets which did not reach the host. ++ * ++ * For IEEE 802.3 devices this counter may be equivalent to: ++ * ++ * - 30.3.1.1.21 aMulticastFramesReceivedOK ++ * ++ * @collisions: Number of collisions during packet transmissions. ++ * ++ * @rx_length_errors: Number of packets dropped due to invalid length. ++ * Part of aggregate "frame" errors in `/proc/net/dev`. ++ * ++ * For IEEE 802.3 devices this counter should be equivalent to a sum ++ * of the following attributes: ++ * ++ * - 30.3.1.1.23 aInRangeLengthErrors ++ * - 30.3.1.1.24 aOutOfRangeLengthField ++ * - 30.3.1.1.25 aFrameTooLongErrors ++ * ++ * @rx_over_errors: Receiver FIFO overflow event counter. ++ * ++ * Historically the count of overflow events. Such events may be ++ * reported in the receive descriptors or via interrupts, and may ++ * not correspond one-to-one with dropped packets. ++ * ++ * The recommended interpretation for high speed interfaces is - ++ * number of packets dropped because they did not fit into buffers ++ * provided by the host, e.g. packets larger than MTU or next buffer ++ * in the ring was not available for a scatter transfer. ++ * ++ * Part of aggregate "frame" errors in `/proc/net/dev`. ++ * ++ * This statistics was historically used interchangeably with ++ * @rx_fifo_errors. ++ * ++ * This statistic corresponds to hardware events and is not commonly used ++ * on software devices. ++ * ++ * @rx_crc_errors: Number of packets received with a CRC error. ++ * Part of aggregate "frame" errors in `/proc/net/dev`. ++ * ++ * For IEEE 802.3 devices this counter must be equivalent to: ++ * ++ * - 30.3.1.1.6 aFrameCheckSequenceErrors ++ * ++ * @rx_frame_errors: Receiver frame alignment errors. ++ * Part of aggregate "frame" errors in `/proc/net/dev`. ++ * ++ * For IEEE 802.3 devices this counter should be equivalent to: ++ * ++ * - 30.3.1.1.7 aAlignmentErrors ++ * ++ * @rx_fifo_errors: Receiver FIFO error counter. ++ * ++ * Historically the count of overflow events. Those events may be ++ * reported in the receive descriptors or via interrupts, and may ++ * not correspond one-to-one with dropped packets. ++ * ++ * This statistics was used interchangeably with @rx_over_errors. ++ * Not recommended for use in drivers for high speed interfaces. ++ * ++ * This statistic is used on software devices, e.g. to count software ++ * packet queue overflow (can) or sequencing errors (GRE). ++ * ++ * @rx_missed_errors: Count of packets missed by the host. ++ * Folded into the "drop" counter in `/proc/net/dev`. ++ * ++ * Counts number of packets dropped by the device due to lack ++ * of buffer space. This usually indicates that the host interface ++ * is slower than the network interface, or host is not keeping up ++ * with the receive packet rate. ++ * ++ * This statistic corresponds to hardware events and is not used ++ * on software devices. ++ * ++ * @tx_aborted_errors: ++ * Part of aggregate "carrier" errors in `/proc/net/dev`. ++ * For IEEE 802.3 devices capable of half-duplex operation this counter ++ * must be equivalent to: ++ * ++ * - 30.3.1.1.11 aFramesAbortedDueToXSColls ++ * ++ * High speed interfaces may use this counter as a general device ++ * discard counter. ++ * ++ * @tx_carrier_errors: Number of frame transmission errors due to loss ++ * of carrier during transmission. ++ * Part of aggregate "carrier" errors in `/proc/net/dev`. ++ * ++ * For IEEE 802.3 devices this counter must be equivalent to: ++ * ++ * - 30.3.1.1.13 aCarrierSenseErrors ++ * ++ * @tx_fifo_errors: Number of frame transmission errors due to device ++ * FIFO underrun / underflow. This condition occurs when the device ++ * begins transmission of a frame but is unable to deliver the ++ * entire frame to the transmitter in time for transmission. ++ * Part of aggregate "carrier" errors in `/proc/net/dev`. ++ * ++ * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for ++ * old half-duplex Ethernet. ++ * Part of aggregate "carrier" errors in `/proc/net/dev`. ++ * ++ * For IEEE 802.3 devices possibly equivalent to: ++ * ++ * - 30.3.2.1.4 aSQETestErrors ++ * ++ * @tx_window_errors: Number of frame transmission errors due ++ * to late collisions (for Ethernet - after the first 64B of transmission). ++ * Part of aggregate "carrier" errors in `/proc/net/dev`. ++ * ++ * For IEEE 802.3 devices this counter must be equivalent to: ++ * ++ * - 30.3.1.1.10 aLateCollisions ++ * ++ * @rx_compressed: Number of correctly received compressed packets. ++ * This counters is only meaningful for interfaces which support ++ * packet compression (e.g. CSLIP, PPP). ++ * ++ * @tx_compressed: Number of transmitted compressed packets. ++ * This counters is only meaningful for interfaces which support ++ * packet compression (e.g. CSLIP, PPP). ++ * ++ * @rx_nohandler: Number of packets received on the interface ++ * but dropped by the networking stack because the device is ++ * not designated to receive packets (e.g. backup link in a bond). ++ */ ++struct rtnl_link_stats64 { ++ __u64 rx_packets; ++ __u64 tx_packets; ++ __u64 rx_bytes; ++ __u64 tx_bytes; ++ __u64 rx_errors; ++ __u64 tx_errors; ++ __u64 rx_dropped; ++ __u64 tx_dropped; ++ __u64 multicast; ++ __u64 collisions; ++ ++ /* detailed rx_errors: */ ++ __u64 rx_length_errors; ++ __u64 rx_over_errors; ++ __u64 rx_crc_errors; ++ __u64 rx_frame_errors; ++ __u64 rx_fifo_errors; ++ __u64 rx_missed_errors; ++ ++ /* detailed tx_errors */ ++ __u64 tx_aborted_errors; ++ __u64 tx_carrier_errors; ++ __u64 tx_fifo_errors; ++ __u64 tx_heartbeat_errors; ++ __u64 tx_window_errors; ++ ++ /* for cslip etc */ ++ __u64 rx_compressed; ++ __u64 tx_compressed; ++ __u64 rx_nohandler; ++}; ++ ++/* The struct should be in sync with struct ifmap */ ++struct rtnl_link_ifmap { ++ __u64 mem_start; ++ __u64 mem_end; ++ __u64 base_addr; ++ __u16 irq; ++ __u8 dma; ++ __u8 port; ++}; ++ ++/* ++ * IFLA_AF_SPEC ++ * Contains nested attributes for address family specific attributes. ++ * Each address family may create a attribute with the address family ++ * number as type and create its own attribute structure in it. ++ * ++ * Example: ++ * [IFLA_AF_SPEC] = { ++ * [AF_INET] = { ++ * [IFLA_INET_CONF] = ..., ++ * }, ++ * [AF_INET6] = { ++ * [IFLA_INET6_FLAGS] = ..., ++ * [IFLA_INET6_CONF] = ..., ++ * } ++ * } ++ */ ++ ++enum { ++ IFLA_UNSPEC, ++ IFLA_ADDRESS, ++ IFLA_BROADCAST, ++ IFLA_IFNAME, ++ IFLA_MTU, ++ IFLA_LINK, ++ IFLA_QDISC, ++ IFLA_STATS, ++ IFLA_COST, ++#define IFLA_COST IFLA_COST ++ IFLA_PRIORITY, ++#define IFLA_PRIORITY IFLA_PRIORITY ++ IFLA_MASTER, ++#define IFLA_MASTER IFLA_MASTER ++ IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ ++#define IFLA_WIRELESS IFLA_WIRELESS ++ IFLA_PROTINFO, /* Protocol specific information for a link */ ++#define IFLA_PROTINFO IFLA_PROTINFO ++ IFLA_TXQLEN, ++#define IFLA_TXQLEN IFLA_TXQLEN ++ IFLA_MAP, ++#define IFLA_MAP IFLA_MAP ++ IFLA_WEIGHT, ++#define IFLA_WEIGHT IFLA_WEIGHT ++ IFLA_OPERSTATE, ++ IFLA_LINKMODE, ++ IFLA_LINKINFO, ++#define IFLA_LINKINFO IFLA_LINKINFO ++ IFLA_NET_NS_PID, ++ IFLA_IFALIAS, ++ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ ++ IFLA_VFINFO_LIST, ++ IFLA_STATS64, ++ IFLA_VF_PORTS, ++ IFLA_PORT_SELF, ++ IFLA_AF_SPEC, ++ IFLA_GROUP, /* Group the device belongs to */ ++ IFLA_NET_NS_FD, ++ IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ ++ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */ ++#define IFLA_PROMISCUITY IFLA_PROMISCUITY ++ IFLA_NUM_TX_QUEUES, ++ IFLA_NUM_RX_QUEUES, ++ IFLA_CARRIER, ++ IFLA_PHYS_PORT_ID, ++ IFLA_CARRIER_CHANGES, ++ IFLA_PHYS_SWITCH_ID, ++ IFLA_LINK_NETNSID, ++ IFLA_PHYS_PORT_NAME, ++ IFLA_PROTO_DOWN, ++ IFLA_GSO_MAX_SEGS, ++ IFLA_GSO_MAX_SIZE, ++ IFLA_PAD, ++ IFLA_XDP, ++ IFLA_EVENT, ++ IFLA_NEW_NETNSID, ++ IFLA_IF_NETNSID, ++ IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */ ++ IFLA_CARRIER_UP_COUNT, ++ IFLA_CARRIER_DOWN_COUNT, ++ IFLA_NEW_IFINDEX, ++ IFLA_MIN_MTU, ++ IFLA_MAX_MTU, ++ IFLA_PROP_LIST, ++ IFLA_ALT_IFNAME, /* Alternative ifname */ ++ IFLA_PERM_ADDRESS, ++ IFLA_PROTO_DOWN_REASON, ++ ++ /* device (sysfs) name as parent, used instead ++ * of IFLA_LINK where there's no parent netdev ++ */ ++ IFLA_PARENT_DEV_NAME, ++ IFLA_PARENT_DEV_BUS_NAME, ++ IFLA_GRO_MAX_SIZE, ++ IFLA_TSO_MAX_SIZE, ++ IFLA_TSO_MAX_SEGS, ++ ++ __IFLA_MAX ++}; ++ ++ ++#define IFLA_MAX (__IFLA_MAX - 1) ++ ++enum { ++ IFLA_PROTO_DOWN_REASON_UNSPEC, ++ IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ ++ IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ ++ ++ __IFLA_PROTO_DOWN_REASON_CNT, ++ IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 ++}; ++ ++/* backwards compatibility for userspace */ ++#ifndef __KERNEL__ ++#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) ++#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) ++#endif ++ ++enum { ++ IFLA_INET_UNSPEC, ++ IFLA_INET_CONF, ++ __IFLA_INET_MAX, ++}; ++ ++#define IFLA_INET_MAX (__IFLA_INET_MAX - 1) ++ ++/* ifi_flags. ++ ++ IFF_* flags. ++ ++ The only change is: ++ IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are ++ more not changeable by user. They describe link media ++ characteristics and set by device driver. ++ ++ Comments: ++ - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid ++ - If neither of these three flags are set; ++ the interface is NBMA. ++ ++ - IFF_MULTICAST does not mean anything special: ++ multicasts can be used on all not-NBMA links. ++ IFF_MULTICAST means that this media uses special encapsulation ++ for multicast frames. Apparently, all IFF_POINTOPOINT and ++ IFF_BROADCAST devices are able to use multicasts too. ++ */ ++ ++/* IFLA_LINK. ++ For usual devices it is equal ifi_index. ++ If it is a "virtual interface" (f.e. tunnel), ifi_link ++ can point to real physical interface (f.e. for bandwidth calculations), ++ or maybe 0, what means, that real media is unknown (usual ++ for IPIP tunnels, when route to endpoint is allowed to change) ++ */ ++ ++/* Subtype attributes for IFLA_PROTINFO */ ++enum { ++ IFLA_INET6_UNSPEC, ++ IFLA_INET6_FLAGS, /* link flags */ ++ IFLA_INET6_CONF, /* sysctl parameters */ ++ IFLA_INET6_STATS, /* statistics */ ++ IFLA_INET6_MCAST, /* MC things. What of them? */ ++ IFLA_INET6_CACHEINFO, /* time values and max reasm size */ ++ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ ++ IFLA_INET6_TOKEN, /* device token */ ++ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ ++ IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ ++ __IFLA_INET6_MAX ++}; ++ ++#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) ++ ++enum in6_addr_gen_mode { ++ IN6_ADDR_GEN_MODE_EUI64, ++ IN6_ADDR_GEN_MODE_NONE, ++ IN6_ADDR_GEN_MODE_STABLE_PRIVACY, ++ IN6_ADDR_GEN_MODE_RANDOM, ++}; ++ ++/* Bridge section */ ++ ++enum { ++ IFLA_BR_UNSPEC, ++ IFLA_BR_FORWARD_DELAY, ++ IFLA_BR_HELLO_TIME, ++ IFLA_BR_MAX_AGE, ++ IFLA_BR_AGEING_TIME, ++ IFLA_BR_STP_STATE, ++ IFLA_BR_PRIORITY, ++ IFLA_BR_VLAN_FILTERING, ++ IFLA_BR_VLAN_PROTOCOL, ++ IFLA_BR_GROUP_FWD_MASK, ++ IFLA_BR_ROOT_ID, ++ IFLA_BR_BRIDGE_ID, ++ IFLA_BR_ROOT_PORT, ++ IFLA_BR_ROOT_PATH_COST, ++ IFLA_BR_TOPOLOGY_CHANGE, ++ IFLA_BR_TOPOLOGY_CHANGE_DETECTED, ++ IFLA_BR_HELLO_TIMER, ++ IFLA_BR_TCN_TIMER, ++ IFLA_BR_TOPOLOGY_CHANGE_TIMER, ++ IFLA_BR_GC_TIMER, ++ IFLA_BR_GROUP_ADDR, ++ IFLA_BR_FDB_FLUSH, ++ IFLA_BR_MCAST_ROUTER, ++ IFLA_BR_MCAST_SNOOPING, ++ IFLA_BR_MCAST_QUERY_USE_IFADDR, ++ IFLA_BR_MCAST_QUERIER, ++ IFLA_BR_MCAST_HASH_ELASTICITY, ++ IFLA_BR_MCAST_HASH_MAX, ++ IFLA_BR_MCAST_LAST_MEMBER_CNT, ++ IFLA_BR_MCAST_STARTUP_QUERY_CNT, ++ IFLA_BR_MCAST_LAST_MEMBER_INTVL, ++ IFLA_BR_MCAST_MEMBERSHIP_INTVL, ++ IFLA_BR_MCAST_QUERIER_INTVL, ++ IFLA_BR_MCAST_QUERY_INTVL, ++ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, ++ IFLA_BR_MCAST_STARTUP_QUERY_INTVL, ++ IFLA_BR_NF_CALL_IPTABLES, ++ IFLA_BR_NF_CALL_IP6TABLES, ++ IFLA_BR_NF_CALL_ARPTABLES, ++ IFLA_BR_VLAN_DEFAULT_PVID, ++ IFLA_BR_PAD, ++ IFLA_BR_VLAN_STATS_ENABLED, ++ IFLA_BR_MCAST_STATS_ENABLED, ++ IFLA_BR_MCAST_IGMP_VERSION, ++ IFLA_BR_MCAST_MLD_VERSION, ++ IFLA_BR_VLAN_STATS_PER_PORT, ++ IFLA_BR_MULTI_BOOLOPT, ++ IFLA_BR_MCAST_QUERIER_STATE, ++ __IFLA_BR_MAX, ++}; ++ ++#define IFLA_BR_MAX (__IFLA_BR_MAX - 1) ++ ++struct ifla_bridge_id { ++ __u8 prio[2]; ++ __u8 addr[6]; /* ETH_ALEN */ ++}; ++ ++enum { ++ BRIDGE_MODE_UNSPEC, ++ BRIDGE_MODE_HAIRPIN, ++}; ++ ++enum { ++ IFLA_BRPORT_UNSPEC, ++ IFLA_BRPORT_STATE, /* Spanning tree state */ ++ IFLA_BRPORT_PRIORITY, /* " priority */ ++ IFLA_BRPORT_COST, /* " cost */ ++ IFLA_BRPORT_MODE, /* mode (hairpin) */ ++ IFLA_BRPORT_GUARD, /* bpdu guard */ ++ IFLA_BRPORT_PROTECT, /* root port protection */ ++ IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ ++ IFLA_BRPORT_LEARNING, /* mac learning */ ++ IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ ++ IFLA_BRPORT_PROXYARP, /* proxy ARP */ ++ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ ++ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ ++ IFLA_BRPORT_ROOT_ID, /* designated root */ ++ IFLA_BRPORT_BRIDGE_ID, /* designated bridge */ ++ IFLA_BRPORT_DESIGNATED_PORT, ++ IFLA_BRPORT_DESIGNATED_COST, ++ IFLA_BRPORT_ID, ++ IFLA_BRPORT_NO, ++ IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, ++ IFLA_BRPORT_CONFIG_PENDING, ++ IFLA_BRPORT_MESSAGE_AGE_TIMER, ++ IFLA_BRPORT_FORWARD_DELAY_TIMER, ++ IFLA_BRPORT_HOLD_TIMER, ++ IFLA_BRPORT_FLUSH, ++ IFLA_BRPORT_MULTICAST_ROUTER, ++ IFLA_BRPORT_PAD, ++ IFLA_BRPORT_MCAST_FLOOD, ++ IFLA_BRPORT_MCAST_TO_UCAST, ++ IFLA_BRPORT_VLAN_TUNNEL, ++ IFLA_BRPORT_BCAST_FLOOD, ++ IFLA_BRPORT_GROUP_FWD_MASK, ++ IFLA_BRPORT_NEIGH_SUPPRESS, ++ IFLA_BRPORT_ISOLATED, ++ IFLA_BRPORT_BACKUP_PORT, ++ IFLA_BRPORT_MRP_RING_OPEN, ++ IFLA_BRPORT_MRP_IN_OPEN, ++ IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, ++ IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, ++ __IFLA_BRPORT_MAX ++}; ++#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) ++ ++struct ifla_cacheinfo { ++ __u32 max_reasm_len; ++ __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ ++ __u32 reachable_time; ++ __u32 retrans_time; ++}; ++ ++enum { ++ IFLA_INFO_UNSPEC, ++ IFLA_INFO_KIND, ++ IFLA_INFO_DATA, ++ IFLA_INFO_XSTATS, ++ IFLA_INFO_SLAVE_KIND, ++ IFLA_INFO_SLAVE_DATA, ++ __IFLA_INFO_MAX, ++}; ++ ++#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) ++ ++/* VLAN section */ ++ ++enum { ++ IFLA_VLAN_UNSPEC, ++ IFLA_VLAN_ID, ++ IFLA_VLAN_FLAGS, ++ IFLA_VLAN_EGRESS_QOS, ++ IFLA_VLAN_INGRESS_QOS, ++ IFLA_VLAN_PROTOCOL, ++ __IFLA_VLAN_MAX, ++}; ++ ++#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) ++ ++struct ifla_vlan_flags { ++ __u32 flags; ++ __u32 mask; ++}; ++ ++enum { ++ IFLA_VLAN_QOS_UNSPEC, ++ IFLA_VLAN_QOS_MAPPING, ++ __IFLA_VLAN_QOS_MAX ++}; ++ ++#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) ++ ++struct ifla_vlan_qos_mapping { ++ __u32 from; ++ __u32 to; ++}; ++ ++/* MACVLAN section */ ++enum { ++ IFLA_MACVLAN_UNSPEC, ++ IFLA_MACVLAN_MODE, ++ IFLA_MACVLAN_FLAGS, ++ IFLA_MACVLAN_MACADDR_MODE, ++ IFLA_MACVLAN_MACADDR, ++ IFLA_MACVLAN_MACADDR_DATA, ++ IFLA_MACVLAN_MACADDR_COUNT, ++ IFLA_MACVLAN_BC_QUEUE_LEN, ++ IFLA_MACVLAN_BC_QUEUE_LEN_USED, ++ __IFLA_MACVLAN_MAX, ++}; ++ ++#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1) ++ ++enum macvlan_mode { ++ MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ ++ MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ ++ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ ++ MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ ++ MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */ ++}; ++ ++enum macvlan_macaddr_mode { ++ MACVLAN_MACADDR_ADD, ++ MACVLAN_MACADDR_DEL, ++ MACVLAN_MACADDR_FLUSH, ++ MACVLAN_MACADDR_SET, ++}; ++ ++#define MACVLAN_FLAG_NOPROMISC 1 ++#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */ ++ ++/* VRF section */ ++enum { ++ IFLA_VRF_UNSPEC, ++ IFLA_VRF_TABLE, ++ __IFLA_VRF_MAX ++}; ++ ++#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) ++ ++enum { ++ IFLA_VRF_PORT_UNSPEC, ++ IFLA_VRF_PORT_TABLE, ++ __IFLA_VRF_PORT_MAX ++}; ++ ++#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) ++ ++/* MACSEC section */ ++enum { ++ IFLA_MACSEC_UNSPEC, ++ IFLA_MACSEC_SCI, ++ IFLA_MACSEC_PORT, ++ IFLA_MACSEC_ICV_LEN, ++ IFLA_MACSEC_CIPHER_SUITE, ++ IFLA_MACSEC_WINDOW, ++ IFLA_MACSEC_ENCODING_SA, ++ IFLA_MACSEC_ENCRYPT, ++ IFLA_MACSEC_PROTECT, ++ IFLA_MACSEC_INC_SCI, ++ IFLA_MACSEC_ES, ++ IFLA_MACSEC_SCB, ++ IFLA_MACSEC_REPLAY_PROTECT, ++ IFLA_MACSEC_VALIDATION, ++ IFLA_MACSEC_PAD, ++ IFLA_MACSEC_OFFLOAD, ++ __IFLA_MACSEC_MAX, ++}; ++ ++#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) ++ ++/* XFRM section */ ++enum { ++ IFLA_XFRM_UNSPEC, ++ IFLA_XFRM_LINK, ++ IFLA_XFRM_IF_ID, ++ __IFLA_XFRM_MAX ++}; ++ ++#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) ++ ++enum macsec_validation_type { ++ MACSEC_VALIDATE_DISABLED = 0, ++ MACSEC_VALIDATE_CHECK = 1, ++ MACSEC_VALIDATE_STRICT = 2, ++ __MACSEC_VALIDATE_END, ++ MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, ++}; ++ ++enum macsec_offload { ++ MACSEC_OFFLOAD_OFF = 0, ++ MACSEC_OFFLOAD_PHY = 1, ++ MACSEC_OFFLOAD_MAC = 2, ++ __MACSEC_OFFLOAD_END, ++ MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1, ++}; ++ ++/* IPVLAN section */ ++enum { ++ IFLA_IPVLAN_UNSPEC, ++ IFLA_IPVLAN_MODE, ++ IFLA_IPVLAN_FLAGS, ++ __IFLA_IPVLAN_MAX ++}; ++ ++#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) ++ ++enum ipvlan_mode { ++ IPVLAN_MODE_L2 = 0, ++ IPVLAN_MODE_L3, ++ IPVLAN_MODE_L3S, ++ IPVLAN_MODE_MAX ++}; ++ ++#define IPVLAN_F_PRIVATE 0x01 ++#define IPVLAN_F_VEPA 0x02 ++ ++/* VXLAN section */ ++enum { ++ IFLA_VXLAN_UNSPEC, ++ IFLA_VXLAN_ID, ++ IFLA_VXLAN_GROUP, /* group or remote address */ ++ IFLA_VXLAN_LINK, ++ IFLA_VXLAN_LOCAL, ++ IFLA_VXLAN_TTL, ++ IFLA_VXLAN_TOS, ++ IFLA_VXLAN_LEARNING, ++ IFLA_VXLAN_AGEING, ++ IFLA_VXLAN_LIMIT, ++ IFLA_VXLAN_PORT_RANGE, /* source port */ ++ IFLA_VXLAN_PROXY, ++ IFLA_VXLAN_RSC, ++ IFLA_VXLAN_L2MISS, ++ IFLA_VXLAN_L3MISS, ++ IFLA_VXLAN_PORT, /* destination port */ ++ IFLA_VXLAN_GROUP6, ++ IFLA_VXLAN_LOCAL6, ++ IFLA_VXLAN_UDP_CSUM, ++ IFLA_VXLAN_UDP_ZERO_CSUM6_TX, ++ IFLA_VXLAN_UDP_ZERO_CSUM6_RX, ++ IFLA_VXLAN_REMCSUM_TX, ++ IFLA_VXLAN_REMCSUM_RX, ++ IFLA_VXLAN_GBP, ++ IFLA_VXLAN_REMCSUM_NOPARTIAL, ++ IFLA_VXLAN_COLLECT_METADATA, ++ IFLA_VXLAN_LABEL, ++ IFLA_VXLAN_GPE, ++ IFLA_VXLAN_TTL_INHERIT, ++ IFLA_VXLAN_DF, ++ __IFLA_VXLAN_MAX ++}; ++#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) ++ ++struct ifla_vxlan_port_range { ++ __be16 low; ++ __be16 high; ++}; ++ ++enum ifla_vxlan_df { ++ VXLAN_DF_UNSET = 0, ++ VXLAN_DF_SET, ++ VXLAN_DF_INHERIT, ++ __VXLAN_DF_END, ++ VXLAN_DF_MAX = __VXLAN_DF_END - 1, ++}; ++ ++/* GENEVE section */ ++enum { ++ IFLA_GENEVE_UNSPEC, ++ IFLA_GENEVE_ID, ++ IFLA_GENEVE_REMOTE, ++ IFLA_GENEVE_TTL, ++ IFLA_GENEVE_TOS, ++ IFLA_GENEVE_PORT, /* destination port */ ++ IFLA_GENEVE_COLLECT_METADATA, ++ IFLA_GENEVE_REMOTE6, ++ IFLA_GENEVE_UDP_CSUM, ++ IFLA_GENEVE_UDP_ZERO_CSUM6_TX, ++ IFLA_GENEVE_UDP_ZERO_CSUM6_RX, ++ IFLA_GENEVE_LABEL, ++ IFLA_GENEVE_TTL_INHERIT, ++ IFLA_GENEVE_DF, ++ __IFLA_GENEVE_MAX ++}; ++#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) ++ ++enum ifla_geneve_df { ++ GENEVE_DF_UNSET = 0, ++ GENEVE_DF_SET, ++ GENEVE_DF_INHERIT, ++ __GENEVE_DF_END, ++ GENEVE_DF_MAX = __GENEVE_DF_END - 1, ++}; ++ ++/* Bareudp section */ ++enum { ++ IFLA_BAREUDP_UNSPEC, ++ IFLA_BAREUDP_PORT, ++ IFLA_BAREUDP_ETHERTYPE, ++ IFLA_BAREUDP_SRCPORT_MIN, ++ IFLA_BAREUDP_MULTIPROTO_MODE, ++ __IFLA_BAREUDP_MAX ++}; ++ ++#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) ++ ++/* PPP section */ ++enum { ++ IFLA_PPP_UNSPEC, ++ IFLA_PPP_DEV_FD, ++ __IFLA_PPP_MAX ++}; ++#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) ++ ++/* GTP section */ ++ ++enum ifla_gtp_role { ++ GTP_ROLE_GGSN = 0, ++ GTP_ROLE_SGSN, ++}; ++ ++enum { ++ IFLA_GTP_UNSPEC, ++ IFLA_GTP_FD0, ++ IFLA_GTP_FD1, ++ IFLA_GTP_PDP_HASHSIZE, ++ IFLA_GTP_ROLE, ++ __IFLA_GTP_MAX, ++}; ++#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) ++ ++/* Bonding section */ ++ ++enum { ++ IFLA_BOND_UNSPEC, ++ IFLA_BOND_MODE, ++ IFLA_BOND_ACTIVE_SLAVE, ++ IFLA_BOND_MIIMON, ++ IFLA_BOND_UPDELAY, ++ IFLA_BOND_DOWNDELAY, ++ IFLA_BOND_USE_CARRIER, ++ IFLA_BOND_ARP_INTERVAL, ++ IFLA_BOND_ARP_IP_TARGET, ++ IFLA_BOND_ARP_VALIDATE, ++ IFLA_BOND_ARP_ALL_TARGETS, ++ IFLA_BOND_PRIMARY, ++ IFLA_BOND_PRIMARY_RESELECT, ++ IFLA_BOND_FAIL_OVER_MAC, ++ IFLA_BOND_XMIT_HASH_POLICY, ++ IFLA_BOND_RESEND_IGMP, ++ IFLA_BOND_NUM_PEER_NOTIF, ++ IFLA_BOND_ALL_SLAVES_ACTIVE, ++ IFLA_BOND_MIN_LINKS, ++ IFLA_BOND_LP_INTERVAL, ++ IFLA_BOND_PACKETS_PER_SLAVE, ++ IFLA_BOND_AD_LACP_RATE, ++ IFLA_BOND_AD_SELECT, ++ IFLA_BOND_AD_INFO, ++ IFLA_BOND_AD_ACTOR_SYS_PRIO, ++ IFLA_BOND_AD_USER_PORT_KEY, ++ IFLA_BOND_AD_ACTOR_SYSTEM, ++ IFLA_BOND_TLB_DYNAMIC_LB, ++ IFLA_BOND_PEER_NOTIF_DELAY, ++ IFLA_BOND_AD_LACP_ACTIVE, ++ IFLA_BOND_MISSED_MAX, ++ IFLA_BOND_NS_IP6_TARGET, ++ __IFLA_BOND_MAX, ++}; ++ ++#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) ++ ++enum { ++ IFLA_BOND_AD_INFO_UNSPEC, ++ IFLA_BOND_AD_INFO_AGGREGATOR, ++ IFLA_BOND_AD_INFO_NUM_PORTS, ++ IFLA_BOND_AD_INFO_ACTOR_KEY, ++ IFLA_BOND_AD_INFO_PARTNER_KEY, ++ IFLA_BOND_AD_INFO_PARTNER_MAC, ++ __IFLA_BOND_AD_INFO_MAX, ++}; ++ ++#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) ++ ++enum { ++ IFLA_BOND_SLAVE_UNSPEC, ++ IFLA_BOND_SLAVE_STATE, ++ IFLA_BOND_SLAVE_MII_STATUS, ++ IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, ++ IFLA_BOND_SLAVE_PERM_HWADDR, ++ IFLA_BOND_SLAVE_QUEUE_ID, ++ IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, ++ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, ++ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, ++ IFLA_BOND_SLAVE_PRIO, ++ __IFLA_BOND_SLAVE_MAX, ++}; ++ ++#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1) ++ ++/* SR-IOV virtual function management section */ ++ ++enum { ++ IFLA_VF_INFO_UNSPEC, ++ IFLA_VF_INFO, ++ __IFLA_VF_INFO_MAX, ++}; ++ ++#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) ++ ++enum { ++ IFLA_VF_UNSPEC, ++ IFLA_VF_MAC, /* Hardware queue specific attributes */ ++ IFLA_VF_VLAN, /* VLAN ID and QoS */ ++ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ ++ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ ++ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ ++ IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ ++ IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query ++ * on/off switch ++ */ ++ IFLA_VF_STATS, /* network device statistics */ ++ IFLA_VF_TRUST, /* Trust VF */ ++ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ ++ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ ++ IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ ++ IFLA_VF_BROADCAST, /* VF broadcast */ ++ __IFLA_VF_MAX, ++}; ++ ++#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) ++ ++struct ifla_vf_mac { ++ __u32 vf; ++ __u8 mac[32]; /* MAX_ADDR_LEN */ ++}; ++ ++struct ifla_vf_broadcast { ++ __u8 broadcast[32]; ++}; ++ ++struct ifla_vf_vlan { ++ __u32 vf; ++ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ ++ __u32 qos; ++}; ++ ++enum { ++ IFLA_VF_VLAN_INFO_UNSPEC, ++ IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ ++ __IFLA_VF_VLAN_INFO_MAX, ++}; ++ ++#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) ++#define MAX_VLAN_LIST_LEN 1 ++ ++struct ifla_vf_vlan_info { ++ __u32 vf; ++ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ ++ __u32 qos; ++ __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ ++}; ++ ++struct ifla_vf_tx_rate { ++ __u32 vf; ++ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ ++}; ++ ++struct ifla_vf_rate { ++ __u32 vf; ++ __u32 min_tx_rate; /* Min Bandwidth in Mbps */ ++ __u32 max_tx_rate; /* Max Bandwidth in Mbps */ ++}; ++ ++struct ifla_vf_spoofchk { ++ __u32 vf; ++ __u32 setting; ++}; ++ ++struct ifla_vf_guid { ++ __u32 vf; ++ __u64 guid; ++}; ++ ++enum { ++ IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */ ++ IFLA_VF_LINK_STATE_ENABLE, /* link always up */ ++ IFLA_VF_LINK_STATE_DISABLE, /* link always down */ ++ __IFLA_VF_LINK_STATE_MAX, ++}; ++ ++struct ifla_vf_link_state { ++ __u32 vf; ++ __u32 link_state; ++}; ++ ++struct ifla_vf_rss_query_en { ++ __u32 vf; ++ __u32 setting; ++}; ++ ++enum { ++ IFLA_VF_STATS_RX_PACKETS, ++ IFLA_VF_STATS_TX_PACKETS, ++ IFLA_VF_STATS_RX_BYTES, ++ IFLA_VF_STATS_TX_BYTES, ++ IFLA_VF_STATS_BROADCAST, ++ IFLA_VF_STATS_MULTICAST, ++ IFLA_VF_STATS_PAD, ++ IFLA_VF_STATS_RX_DROPPED, ++ IFLA_VF_STATS_TX_DROPPED, ++ __IFLA_VF_STATS_MAX, ++}; ++ ++#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1) ++ ++struct ifla_vf_trust { ++ __u32 vf; ++ __u32 setting; ++}; ++ ++/* VF ports management section ++ * ++ * Nested layout of set/get msg is: ++ * ++ * [IFLA_NUM_VF] ++ * [IFLA_VF_PORTS] ++ * [IFLA_VF_PORT] ++ * [IFLA_PORT_*], ... ++ * [IFLA_VF_PORT] ++ * [IFLA_PORT_*], ... ++ * ... ++ * [IFLA_PORT_SELF] ++ * [IFLA_PORT_*], ... ++ */ ++ ++enum { ++ IFLA_VF_PORT_UNSPEC, ++ IFLA_VF_PORT, /* nest */ ++ __IFLA_VF_PORT_MAX, ++}; ++ ++#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1) ++ ++enum { ++ IFLA_PORT_UNSPEC, ++ IFLA_PORT_VF, /* __u32 */ ++ IFLA_PORT_PROFILE, /* string */ ++ IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */ ++ IFLA_PORT_INSTANCE_UUID, /* binary UUID */ ++ IFLA_PORT_HOST_UUID, /* binary UUID */ ++ IFLA_PORT_REQUEST, /* __u8 */ ++ IFLA_PORT_RESPONSE, /* __u16, output only */ ++ __IFLA_PORT_MAX, ++}; ++ ++#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1) ++ ++#define PORT_PROFILE_MAX 40 ++#define PORT_UUID_MAX 16 ++#define PORT_SELF_VF -1 ++ ++enum { ++ PORT_REQUEST_PREASSOCIATE = 0, ++ PORT_REQUEST_PREASSOCIATE_RR, ++ PORT_REQUEST_ASSOCIATE, ++ PORT_REQUEST_DISASSOCIATE, ++}; ++ ++enum { ++ PORT_VDP_RESPONSE_SUCCESS = 0, ++ PORT_VDP_RESPONSE_INVALID_FORMAT, ++ PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES, ++ PORT_VDP_RESPONSE_UNUSED_VTID, ++ PORT_VDP_RESPONSE_VTID_VIOLATION, ++ PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION, ++ PORT_VDP_RESPONSE_OUT_OF_SYNC, ++ /* 0x08-0xFF reserved for future VDP use */ ++ PORT_PROFILE_RESPONSE_SUCCESS = 0x100, ++ PORT_PROFILE_RESPONSE_INPROGRESS, ++ PORT_PROFILE_RESPONSE_INVALID, ++ PORT_PROFILE_RESPONSE_BADSTATE, ++ PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES, ++ PORT_PROFILE_RESPONSE_ERROR, ++}; ++ ++struct ifla_port_vsi { ++ __u8 vsi_mgr_id; ++ __u8 vsi_type_id[3]; ++ __u8 vsi_type_version; ++ __u8 pad[3]; ++}; ++ ++ ++/* IPoIB section */ ++ ++enum { ++ IFLA_IPOIB_UNSPEC, ++ IFLA_IPOIB_PKEY, ++ IFLA_IPOIB_MODE, ++ IFLA_IPOIB_UMCAST, ++ __IFLA_IPOIB_MAX ++}; ++ ++enum { ++ IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */ ++ IPOIB_MODE_CONNECTED = 1, /* using connected QPs */ ++}; ++ ++#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) ++ ++ ++/* HSR/PRP section, both uses same interface */ ++ ++/* Different redundancy protocols for hsr device */ ++enum { ++ HSR_PROTOCOL_HSR, ++ HSR_PROTOCOL_PRP, ++ HSR_PROTOCOL_MAX, ++}; ++ ++enum { ++ IFLA_HSR_UNSPEC, ++ IFLA_HSR_SLAVE1, ++ IFLA_HSR_SLAVE2, ++ IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ ++ IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ ++ IFLA_HSR_SEQ_NR, ++ IFLA_HSR_VERSION, /* HSR version */ ++ IFLA_HSR_PROTOCOL, /* Indicate different protocol than ++ * HSR. For example PRP. ++ */ ++ __IFLA_HSR_MAX, ++}; ++ ++#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) ++ ++/* STATS section */ ++ ++struct if_stats_msg { ++ __u8 family; ++ __u8 pad1; ++ __u16 pad2; ++ __u32 ifindex; ++ __u32 filter_mask; ++}; ++ ++/* A stats attribute can be netdev specific or a global stat. ++ * For netdev stats, lets use the prefix IFLA_STATS_LINK_* ++ */ ++enum { ++ IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ ++ IFLA_STATS_LINK_64, ++ IFLA_STATS_LINK_XSTATS, ++ IFLA_STATS_LINK_XSTATS_SLAVE, ++ IFLA_STATS_LINK_OFFLOAD_XSTATS, ++ IFLA_STATS_AF_SPEC, ++ __IFLA_STATS_MAX, ++}; ++ ++#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1) ++ ++#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) ++ ++/* These are embedded into IFLA_STATS_LINK_XSTATS: ++ * [IFLA_STATS_LINK_XSTATS] ++ * -> [LINK_XSTATS_TYPE_xxx] ++ * -> [rtnl link type specific attributes] ++ */ ++enum { ++ LINK_XSTATS_TYPE_UNSPEC, ++ LINK_XSTATS_TYPE_BRIDGE, ++ LINK_XSTATS_TYPE_BOND, ++ __LINK_XSTATS_TYPE_MAX ++}; ++#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) ++ ++/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ ++enum { ++ IFLA_OFFLOAD_XSTATS_UNSPEC, ++ IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ ++ __IFLA_OFFLOAD_XSTATS_MAX ++}; ++#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) ++ ++/* XDP section */ ++ ++#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) ++#define XDP_FLAGS_SKB_MODE (1U << 1) ++#define XDP_FLAGS_DRV_MODE (1U << 2) ++#define XDP_FLAGS_HW_MODE (1U << 3) ++#define XDP_FLAGS_REPLACE (1U << 4) ++#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ ++ XDP_FLAGS_DRV_MODE | \ ++ XDP_FLAGS_HW_MODE) ++#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ ++ XDP_FLAGS_MODES | XDP_FLAGS_REPLACE) ++ ++/* These are stored into IFLA_XDP_ATTACHED on dump. */ ++enum { ++ XDP_ATTACHED_NONE = 0, ++ XDP_ATTACHED_DRV, ++ XDP_ATTACHED_SKB, ++ XDP_ATTACHED_HW, ++ XDP_ATTACHED_MULTI, ++}; ++ ++enum { ++ IFLA_XDP_UNSPEC, ++ IFLA_XDP_FD, ++ IFLA_XDP_ATTACHED, ++ IFLA_XDP_FLAGS, ++ IFLA_XDP_PROG_ID, ++ IFLA_XDP_DRV_PROG_ID, ++ IFLA_XDP_SKB_PROG_ID, ++ IFLA_XDP_HW_PROG_ID, ++ IFLA_XDP_EXPECTED_FD, ++ __IFLA_XDP_MAX, ++}; ++ ++#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) ++ ++enum { ++ IFLA_EVENT_NONE, ++ IFLA_EVENT_REBOOT, /* internal reset / reboot */ ++ IFLA_EVENT_FEATURES, /* change in offload features */ ++ IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */ ++ IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */ ++ IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */ ++ IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ ++}; ++ ++/* tun section */ ++ ++enum { ++ IFLA_TUN_UNSPEC, ++ IFLA_TUN_OWNER, ++ IFLA_TUN_GROUP, ++ IFLA_TUN_TYPE, ++ IFLA_TUN_PI, ++ IFLA_TUN_VNET_HDR, ++ IFLA_TUN_PERSIST, ++ IFLA_TUN_MULTI_QUEUE, ++ IFLA_TUN_NUM_QUEUES, ++ IFLA_TUN_NUM_DISABLED_QUEUES, ++ __IFLA_TUN_MAX, ++}; ++ ++#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) ++ ++/* rmnet section */ ++ ++#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0) ++#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) ++#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) ++#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) ++#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) ++#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) ++ ++enum { ++ IFLA_RMNET_UNSPEC, ++ IFLA_RMNET_MUX_ID, ++ IFLA_RMNET_FLAGS, ++ __IFLA_RMNET_MAX, ++}; ++ ++#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1) ++ ++struct ifla_rmnet_flags { ++ __u32 flags; ++ __u32 mask; ++}; ++ ++/* MCTP section */ ++ ++enum { ++ IFLA_MCTP_UNSPEC, ++ IFLA_MCTP_NET, ++ __IFLA_MCTP_MAX, ++}; ++ ++#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) ++ ++#endif /* _UAPI_LINUX_IF_LINK_H */ +diff --git a/src/cc/libbpf/include/uapi/linux/if_xdp.h b/src/cc/libbpf/include/uapi/linux/if_xdp.h +new file mode 100644 +index 0000000..a78a809 +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/if_xdp.h +@@ -0,0 +1,111 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * if_xdp: XDP socket user-space interface ++ * Copyright(c) 2018 Intel Corporation. ++ * ++ * Author(s): Björn Töpel ++ * Magnus Karlsson ++ */ ++ ++#ifndef _LINUX_IF_XDP_H ++#define _LINUX_IF_XDP_H ++ ++#include ++ ++/* Options for the sxdp_flags field */ ++#define XDP_SHARED_UMEM (1 << 0) ++#define XDP_COPY (1 << 1) /* Force copy-mode */ ++#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */ ++/* If this option is set, the driver might go sleep and in that case ++ * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be ++ * set. If it is set, the application need to explicitly wake up the ++ * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are ++ * running the driver and the application on the same core, you should ++ * use this option so that the kernel will yield to the user space ++ * application. ++ */ ++#define XDP_USE_NEED_WAKEUP (1 << 3) ++ ++/* Flags for xsk_umem_config flags */ ++#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) ++ ++struct sockaddr_xdp { ++ __u16 sxdp_family; ++ __u16 sxdp_flags; ++ __u32 sxdp_ifindex; ++ __u32 sxdp_queue_id; ++ __u32 sxdp_shared_umem_fd; ++}; ++ ++/* XDP_RING flags */ ++#define XDP_RING_NEED_WAKEUP (1 << 0) ++ ++struct xdp_ring_offset { ++ __u64 producer; ++ __u64 consumer; ++ __u64 desc; ++ __u64 flags; ++}; ++ ++struct xdp_mmap_offsets { ++ struct xdp_ring_offset rx; ++ struct xdp_ring_offset tx; ++ struct xdp_ring_offset fr; /* Fill */ ++ struct xdp_ring_offset cr; /* Completion */ ++}; ++ ++/* XDP socket options */ ++#define XDP_MMAP_OFFSETS 1 ++#define XDP_RX_RING 2 ++#define XDP_TX_RING 3 ++#define XDP_UMEM_REG 4 ++#define XDP_UMEM_FILL_RING 5 ++#define XDP_UMEM_COMPLETION_RING 6 ++#define XDP_STATISTICS 7 ++#define XDP_OPTIONS 8 ++ ++struct xdp_umem_reg { ++ __u64 addr; /* Start of packet data area */ ++ __u64 len; /* Length of packet data area */ ++ __u32 chunk_size; ++ __u32 headroom; ++ __u32 flags; ++}; ++ ++struct xdp_statistics { ++ __u64 rx_dropped; /* Dropped for other reasons */ ++ __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ ++ __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ ++ __u64 rx_ring_full; /* Dropped due to rx ring being full */ ++ __u64 rx_fill_ring_empty_descs; /* Failed to retrieve item from fill ring */ ++ __u64 tx_ring_empty_descs; /* Failed to retrieve item from tx ring */ ++}; ++ ++struct xdp_options { ++ __u32 flags; ++}; ++ ++/* Flags for the flags field of struct xdp_options */ ++#define XDP_OPTIONS_ZEROCOPY (1 << 0) ++ ++/* Pgoff for mmaping the rings */ ++#define XDP_PGOFF_RX_RING 0 ++#define XDP_PGOFF_TX_RING 0x80000000 ++#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL ++#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL ++ ++/* Masks for unaligned chunks mode */ ++#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48 ++#define XSK_UNALIGNED_BUF_ADDR_MASK \ ++ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) ++ ++/* Rx/Tx descriptor */ ++struct xdp_desc { ++ __u64 addr; ++ __u32 len; ++ __u32 options; ++}; ++ ++/* UMEM descriptor is __u64 */ ++ ++#endif /* _LINUX_IF_XDP_H */ +diff --git a/src/cc/libbpf/include/uapi/linux/netlink.h b/src/cc/libbpf/include/uapi/linux/netlink.h +new file mode 100644 +index 0000000..0a4d733 +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/netlink.h +@@ -0,0 +1,252 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _UAPI__LINUX_NETLINK_H ++#define _UAPI__LINUX_NETLINK_H ++ ++#include ++#include /* for __kernel_sa_family_t */ ++#include ++ ++#define NETLINK_ROUTE 0 /* Routing/device hook */ ++#define NETLINK_UNUSED 1 /* Unused number */ ++#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ ++#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ ++#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ ++#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ ++#define NETLINK_XFRM 6 /* ipsec */ ++#define NETLINK_SELINUX 7 /* SELinux event notifications */ ++#define NETLINK_ISCSI 8 /* Open-iSCSI */ ++#define NETLINK_AUDIT 9 /* auditing */ ++#define NETLINK_FIB_LOOKUP 10 ++#define NETLINK_CONNECTOR 11 ++#define NETLINK_NETFILTER 12 /* netfilter subsystem */ ++#define NETLINK_IP6_FW 13 ++#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ ++#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ ++#define NETLINK_GENERIC 16 ++/* leave room for NETLINK_DM (DM Events) */ ++#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ ++#define NETLINK_ECRYPTFS 19 ++#define NETLINK_RDMA 20 ++#define NETLINK_CRYPTO 21 /* Crypto layer */ ++#define NETLINK_SMC 22 /* SMC monitoring */ ++ ++#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG ++ ++#define MAX_LINKS 32 ++ ++struct sockaddr_nl { ++ __kernel_sa_family_t nl_family; /* AF_NETLINK */ ++ unsigned short nl_pad; /* zero */ ++ __u32 nl_pid; /* port ID */ ++ __u32 nl_groups; /* multicast groups mask */ ++}; ++ ++struct nlmsghdr { ++ __u32 nlmsg_len; /* Length of message including header */ ++ __u16 nlmsg_type; /* Message content */ ++ __u16 nlmsg_flags; /* Additional flags */ ++ __u32 nlmsg_seq; /* Sequence number */ ++ __u32 nlmsg_pid; /* Sending process port ID */ ++}; ++ ++/* Flags values */ ++ ++#define NLM_F_REQUEST 0x01 /* It is request message. */ ++#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ ++#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ ++#define NLM_F_ECHO 0x08 /* Echo this request */ ++#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ ++#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ ++ ++/* Modifiers to GET request */ ++#define NLM_F_ROOT 0x100 /* specify tree root */ ++#define NLM_F_MATCH 0x200 /* return all matching */ ++#define NLM_F_ATOMIC 0x400 /* atomic GET */ ++#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH) ++ ++/* Modifiers to NEW request */ ++#define NLM_F_REPLACE 0x100 /* Override existing */ ++#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */ ++#define NLM_F_CREATE 0x400 /* Create, if it does not exist */ ++#define NLM_F_APPEND 0x800 /* Add to end of list */ ++ ++/* Modifiers to DELETE request */ ++#define NLM_F_NONREC 0x100 /* Do not delete recursively */ ++ ++/* Flags for ACK message */ ++#define NLM_F_CAPPED 0x100 /* request was capped */ ++#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ ++ ++/* ++ 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL ++ 4.4BSD CHANGE NLM_F_REPLACE ++ ++ True CHANGE NLM_F_CREATE|NLM_F_REPLACE ++ Append NLM_F_CREATE ++ Check NLM_F_EXCL ++ */ ++ ++#define NLMSG_ALIGNTO 4U ++#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) ++#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) ++#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) ++#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) ++#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) ++#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ ++ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) ++#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ ++ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ ++ (nlh)->nlmsg_len <= (len)) ++#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len))) ++ ++#define NLMSG_NOOP 0x1 /* Nothing. */ ++#define NLMSG_ERROR 0x2 /* Error */ ++#define NLMSG_DONE 0x3 /* End of a dump */ ++#define NLMSG_OVERRUN 0x4 /* Data lost */ ++ ++#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ ++ ++struct nlmsgerr { ++ int error; ++ struct nlmsghdr msg; ++ /* ++ * followed by the message contents unless NETLINK_CAP_ACK was set ++ * or the ACK indicates success (error == 0) ++ * message length is aligned with NLMSG_ALIGN() ++ */ ++ /* ++ * followed by TLVs defined in enum nlmsgerr_attrs ++ * if NETLINK_EXT_ACK was set ++ */ ++}; ++ ++/** ++ * enum nlmsgerr_attrs - nlmsgerr attributes ++ * @NLMSGERR_ATTR_UNUSED: unused ++ * @NLMSGERR_ATTR_MSG: error message string (string) ++ * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original ++ * message, counting from the beginning of the header (u32) ++ * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to ++ * be used - in the success case - to identify a created ++ * object or operation or similar (binary) ++ * @__NLMSGERR_ATTR_MAX: number of attributes ++ * @NLMSGERR_ATTR_MAX: highest attribute number ++ */ ++enum nlmsgerr_attrs { ++ NLMSGERR_ATTR_UNUSED, ++ NLMSGERR_ATTR_MSG, ++ NLMSGERR_ATTR_OFFS, ++ NLMSGERR_ATTR_COOKIE, ++ ++ __NLMSGERR_ATTR_MAX, ++ NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 ++}; ++ ++#define NETLINK_ADD_MEMBERSHIP 1 ++#define NETLINK_DROP_MEMBERSHIP 2 ++#define NETLINK_PKTINFO 3 ++#define NETLINK_BROADCAST_ERROR 4 ++#define NETLINK_NO_ENOBUFS 5 ++#ifndef __KERNEL__ ++#define NETLINK_RX_RING 6 ++#define NETLINK_TX_RING 7 ++#endif ++#define NETLINK_LISTEN_ALL_NSID 8 ++#define NETLINK_LIST_MEMBERSHIPS 9 ++#define NETLINK_CAP_ACK 10 ++#define NETLINK_EXT_ACK 11 ++#define NETLINK_GET_STRICT_CHK 12 ++ ++struct nl_pktinfo { ++ __u32 group; ++}; ++ ++struct nl_mmap_req { ++ unsigned int nm_block_size; ++ unsigned int nm_block_nr; ++ unsigned int nm_frame_size; ++ unsigned int nm_frame_nr; ++}; ++ ++struct nl_mmap_hdr { ++ unsigned int nm_status; ++ unsigned int nm_len; ++ __u32 nm_group; ++ /* credentials */ ++ __u32 nm_pid; ++ __u32 nm_uid; ++ __u32 nm_gid; ++}; ++ ++#ifndef __KERNEL__ ++enum nl_mmap_status { ++ NL_MMAP_STATUS_UNUSED, ++ NL_MMAP_STATUS_RESERVED, ++ NL_MMAP_STATUS_VALID, ++ NL_MMAP_STATUS_COPY, ++ NL_MMAP_STATUS_SKIP, ++}; ++ ++#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO ++#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) ++#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) ++#endif ++ ++#define NET_MAJOR 36 /* Major 36 is reserved for networking */ ++ ++enum { ++ NETLINK_UNCONNECTED = 0, ++ NETLINK_CONNECTED, ++}; ++ ++/* ++ * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> ++ * +---------------------+- - -+- - - - - - - - - -+- - -+ ++ * | Header | Pad | Payload | Pad | ++ * | (struct nlattr) | ing | | ing | ++ * +---------------------+- - -+- - - - - - - - - -+- - -+ ++ * <-------------- nlattr->nla_len --------------> ++ */ ++ ++struct nlattr { ++ __u16 nla_len; ++ __u16 nla_type; ++}; ++ ++/* ++ * nla_type (16 bits) ++ * +---+---+-------------------------------+ ++ * | N | O | Attribute Type | ++ * +---+---+-------------------------------+ ++ * N := Carries nested attributes ++ * O := Payload stored in network byte order ++ * ++ * Note: The N and O flag are mutually exclusive. ++ */ ++#define NLA_F_NESTED (1 << 15) ++#define NLA_F_NET_BYTEORDER (1 << 14) ++#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) ++ ++#define NLA_ALIGNTO 4 ++#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) ++#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) ++ ++/* Generic 32 bitflags attribute content sent to the kernel. ++ * ++ * The value is a bitmap that defines the values being set ++ * The selector is a bitmask that defines which value is legit ++ * ++ * Examples: ++ * value = 0x0, and selector = 0x1 ++ * implies we are selecting bit 1 and we want to set its value to 0. ++ * ++ * value = 0x2, and selector = 0x2 ++ * implies we are selecting bit 2 and we want to set its value to 1. ++ * ++ */ ++struct nla_bitfield32 { ++ __u32 value; ++ __u32 selector; ++}; ++ ++#endif /* _UAPI__LINUX_NETLINK_H */ +diff --git a/src/cc/libbpf/include/uapi/linux/perf_event.h b/src/cc/libbpf/include/uapi/linux/perf_event.h +new file mode 100644 +index 0000000..d37629d +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/perf_event.h +@@ -0,0 +1,1395 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * Performance events: ++ * ++ * Copyright (C) 2008-2009, Thomas Gleixner ++ * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar ++ * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra ++ * ++ * Data type definitions, declarations, prototypes. ++ * ++ * Started by: Thomas Gleixner and Ingo Molnar ++ * ++ * For licencing details see kernel-base/COPYING ++ */ ++#ifndef _UAPI_LINUX_PERF_EVENT_H ++#define _UAPI_LINUX_PERF_EVENT_H ++ ++#include ++#include ++#include ++ ++/* ++ * User-space ABI bits: ++ */ ++ ++/* ++ * attr.type ++ */ ++enum perf_type_id { ++ PERF_TYPE_HARDWARE = 0, ++ PERF_TYPE_SOFTWARE = 1, ++ PERF_TYPE_TRACEPOINT = 2, ++ PERF_TYPE_HW_CACHE = 3, ++ PERF_TYPE_RAW = 4, ++ PERF_TYPE_BREAKPOINT = 5, ++ ++ PERF_TYPE_MAX, /* non-ABI */ ++}; ++ ++/* ++ * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE ++ * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA ++ * AA: hardware event ID ++ * EEEEEEEE: PMU type ID ++ * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB ++ * BB: hardware cache ID ++ * CC: hardware cache op ID ++ * DD: hardware cache op result ID ++ * EEEEEEEE: PMU type ID ++ * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. ++ */ ++#define PERF_PMU_TYPE_SHIFT 32 ++#define PERF_HW_EVENT_MASK 0xffffffff ++ ++/* ++ * Generalized performance event event_id types, used by the ++ * attr.event_id parameter of the sys_perf_event_open() ++ * syscall: ++ */ ++enum perf_hw_id { ++ /* ++ * Common hardware events, generalized by the kernel: ++ */ ++ PERF_COUNT_HW_CPU_CYCLES = 0, ++ PERF_COUNT_HW_INSTRUCTIONS = 1, ++ PERF_COUNT_HW_CACHE_REFERENCES = 2, ++ PERF_COUNT_HW_CACHE_MISSES = 3, ++ PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, ++ PERF_COUNT_HW_BRANCH_MISSES = 5, ++ PERF_COUNT_HW_BUS_CYCLES = 6, ++ PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, ++ PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, ++ PERF_COUNT_HW_REF_CPU_CYCLES = 9, ++ ++ PERF_COUNT_HW_MAX, /* non-ABI */ ++}; ++ ++/* ++ * Generalized hardware cache events: ++ * ++ * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x ++ * { read, write, prefetch } x ++ * { accesses, misses } ++ */ ++enum perf_hw_cache_id { ++ PERF_COUNT_HW_CACHE_L1D = 0, ++ PERF_COUNT_HW_CACHE_L1I = 1, ++ PERF_COUNT_HW_CACHE_LL = 2, ++ PERF_COUNT_HW_CACHE_DTLB = 3, ++ PERF_COUNT_HW_CACHE_ITLB = 4, ++ PERF_COUNT_HW_CACHE_BPU = 5, ++ PERF_COUNT_HW_CACHE_NODE = 6, ++ ++ PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ ++}; ++ ++enum perf_hw_cache_op_id { ++ PERF_COUNT_HW_CACHE_OP_READ = 0, ++ PERF_COUNT_HW_CACHE_OP_WRITE = 1, ++ PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, ++ ++ PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ ++}; ++ ++enum perf_hw_cache_op_result_id { ++ PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, ++ PERF_COUNT_HW_CACHE_RESULT_MISS = 1, ++ ++ PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ ++}; ++ ++/* ++ * Special "software" events provided by the kernel, even if the hardware ++ * does not support performance events. These events measure various ++ * physical and sw events of the kernel (and allow the profiling of them as ++ * well): ++ */ ++enum perf_sw_ids { ++ PERF_COUNT_SW_CPU_CLOCK = 0, ++ PERF_COUNT_SW_TASK_CLOCK = 1, ++ PERF_COUNT_SW_PAGE_FAULTS = 2, ++ PERF_COUNT_SW_CONTEXT_SWITCHES = 3, ++ PERF_COUNT_SW_CPU_MIGRATIONS = 4, ++ PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, ++ PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, ++ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, ++ PERF_COUNT_SW_EMULATION_FAULTS = 8, ++ PERF_COUNT_SW_DUMMY = 9, ++ PERF_COUNT_SW_BPF_OUTPUT = 10, ++ PERF_COUNT_SW_CGROUP_SWITCHES = 11, ++ ++ PERF_COUNT_SW_MAX, /* non-ABI */ ++}; ++ ++/* ++ * Bits that can be set in attr.sample_type to request information ++ * in the overflow packets. ++ */ ++enum perf_event_sample_format { ++ PERF_SAMPLE_IP = 1U << 0, ++ PERF_SAMPLE_TID = 1U << 1, ++ PERF_SAMPLE_TIME = 1U << 2, ++ PERF_SAMPLE_ADDR = 1U << 3, ++ PERF_SAMPLE_READ = 1U << 4, ++ PERF_SAMPLE_CALLCHAIN = 1U << 5, ++ PERF_SAMPLE_ID = 1U << 6, ++ PERF_SAMPLE_CPU = 1U << 7, ++ PERF_SAMPLE_PERIOD = 1U << 8, ++ PERF_SAMPLE_STREAM_ID = 1U << 9, ++ PERF_SAMPLE_RAW = 1U << 10, ++ PERF_SAMPLE_BRANCH_STACK = 1U << 11, ++ PERF_SAMPLE_REGS_USER = 1U << 12, ++ PERF_SAMPLE_STACK_USER = 1U << 13, ++ PERF_SAMPLE_WEIGHT = 1U << 14, ++ PERF_SAMPLE_DATA_SRC = 1U << 15, ++ PERF_SAMPLE_IDENTIFIER = 1U << 16, ++ PERF_SAMPLE_TRANSACTION = 1U << 17, ++ PERF_SAMPLE_REGS_INTR = 1U << 18, ++ PERF_SAMPLE_PHYS_ADDR = 1U << 19, ++ PERF_SAMPLE_AUX = 1U << 20, ++ PERF_SAMPLE_CGROUP = 1U << 21, ++ PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, ++ PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, ++ PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, ++ ++ PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ ++ ++ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ ++}; ++ ++#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) ++/* ++ * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set ++ * ++ * If the user does not pass priv level information via branch_sample_type, ++ * the kernel uses the event's priv level. Branch and event priv levels do ++ * not have to match. Branch priv level is checked for permissions. ++ * ++ * The branch types can be combined, however BRANCH_ANY covers all types ++ * of branches and therefore it supersedes all the other types. ++ */ ++enum perf_branch_sample_type_shift { ++ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ ++ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ ++ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ ++ ++ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ ++ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ ++ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ ++ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ ++ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ ++ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ ++ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ ++ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ ++ ++ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ ++ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ ++ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ ++ ++ PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */ ++ PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */ ++ ++ PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ ++ ++ PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ ++ ++ PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ ++}; ++ ++enum perf_branch_sample_type { ++ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, ++ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, ++ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, ++ ++ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, ++ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, ++ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, ++ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, ++ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, ++ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, ++ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, ++ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, ++ ++ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, ++ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, ++ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, ++ ++ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, ++ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, ++ ++ PERF_SAMPLE_BRANCH_TYPE_SAVE = ++ 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, ++ ++ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, ++ ++ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, ++}; ++ ++/* ++ * Common flow change classification ++ */ ++enum { ++ PERF_BR_UNKNOWN = 0, /* unknown */ ++ PERF_BR_COND = 1, /* conditional */ ++ PERF_BR_UNCOND = 2, /* unconditional */ ++ PERF_BR_IND = 3, /* indirect */ ++ PERF_BR_CALL = 4, /* function call */ ++ PERF_BR_IND_CALL = 5, /* indirect function call */ ++ PERF_BR_RET = 6, /* function return */ ++ PERF_BR_SYSCALL = 7, /* syscall */ ++ PERF_BR_SYSRET = 8, /* syscall return */ ++ PERF_BR_COND_CALL = 9, /* conditional function call */ ++ PERF_BR_COND_RET = 10, /* conditional function return */ ++ PERF_BR_ERET = 11, /* exception return */ ++ PERF_BR_IRQ = 12, /* irq */ ++ PERF_BR_MAX, ++}; ++ ++#define PERF_SAMPLE_BRANCH_PLM_ALL \ ++ (PERF_SAMPLE_BRANCH_USER|\ ++ PERF_SAMPLE_BRANCH_KERNEL|\ ++ PERF_SAMPLE_BRANCH_HV) ++ ++/* ++ * Values to determine ABI of the registers dump. ++ */ ++enum perf_sample_regs_abi { ++ PERF_SAMPLE_REGS_ABI_NONE = 0, ++ PERF_SAMPLE_REGS_ABI_32 = 1, ++ PERF_SAMPLE_REGS_ABI_64 = 2, ++}; ++ ++/* ++ * Values for the memory transaction event qualifier, mostly for ++ * abort events. Multiple bits can be set. ++ */ ++enum { ++ PERF_TXN_ELISION = (1 << 0), /* From elision */ ++ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ ++ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ ++ PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ ++ PERF_TXN_RETRY = (1 << 4), /* Retry possible */ ++ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ ++ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ ++ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ ++ ++ PERF_TXN_MAX = (1 << 8), /* non-ABI */ ++ ++ /* bits 32..63 are reserved for the abort code */ ++ ++ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), ++ PERF_TXN_ABORT_SHIFT = 32, ++}; ++ ++/* ++ * The format of the data returned by read() on a perf event fd, ++ * as specified by attr.read_format: ++ * ++ * struct read_format { ++ * { u64 value; ++ * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED ++ * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING ++ * { u64 id; } && PERF_FORMAT_ID ++ * } && !PERF_FORMAT_GROUP ++ * ++ * { u64 nr; ++ * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED ++ * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING ++ * { u64 value; ++ * { u64 id; } && PERF_FORMAT_ID ++ * } cntr[nr]; ++ * } && PERF_FORMAT_GROUP ++ * }; ++ */ ++enum perf_event_read_format { ++ PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, ++ PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, ++ PERF_FORMAT_ID = 1U << 2, ++ PERF_FORMAT_GROUP = 1U << 3, ++ ++ PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ ++}; ++ ++#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ ++#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ ++#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ ++#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ ++ /* add: sample_stack_user */ ++#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ ++#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ ++#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ ++#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ ++ ++/* ++ * Hardware event_id to monitor via a performance monitoring event: ++ * ++ * @sample_max_stack: Max number of frame pointers in a callchain, ++ * should be < /proc/sys/kernel/perf_event_max_stack ++ */ ++struct perf_event_attr { ++ ++ /* ++ * Major type: hardware/software/tracepoint/etc. ++ */ ++ __u32 type; ++ ++ /* ++ * Size of the attr structure, for fwd/bwd compat. ++ */ ++ __u32 size; ++ ++ /* ++ * Type specific configuration information. ++ */ ++ __u64 config; ++ ++ union { ++ __u64 sample_period; ++ __u64 sample_freq; ++ }; ++ ++ __u64 sample_type; ++ __u64 read_format; ++ ++ __u64 disabled : 1, /* off by default */ ++ inherit : 1, /* children inherit it */ ++ pinned : 1, /* must always be on PMU */ ++ exclusive : 1, /* only group on PMU */ ++ exclude_user : 1, /* don't count user */ ++ exclude_kernel : 1, /* ditto kernel */ ++ exclude_hv : 1, /* ditto hypervisor */ ++ exclude_idle : 1, /* don't count when idle */ ++ mmap : 1, /* include mmap data */ ++ comm : 1, /* include comm data */ ++ freq : 1, /* use freq, not period */ ++ inherit_stat : 1, /* per task counts */ ++ enable_on_exec : 1, /* next exec enables */ ++ task : 1, /* trace fork/exit */ ++ watermark : 1, /* wakeup_watermark */ ++ /* ++ * precise_ip: ++ * ++ * 0 - SAMPLE_IP can have arbitrary skid ++ * 1 - SAMPLE_IP must have constant skid ++ * 2 - SAMPLE_IP requested to have 0 skid ++ * 3 - SAMPLE_IP must have 0 skid ++ * ++ * See also PERF_RECORD_MISC_EXACT_IP ++ */ ++ precise_ip : 2, /* skid constraint */ ++ mmap_data : 1, /* non-exec mmap data */ ++ sample_id_all : 1, /* sample_type all events */ ++ ++ exclude_host : 1, /* don't count in host */ ++ exclude_guest : 1, /* don't count in guest */ ++ ++ exclude_callchain_kernel : 1, /* exclude kernel callchains */ ++ exclude_callchain_user : 1, /* exclude user callchains */ ++ mmap2 : 1, /* include mmap with inode data */ ++ comm_exec : 1, /* flag comm events that are due to an exec */ ++ use_clockid : 1, /* use @clockid for time fields */ ++ context_switch : 1, /* context switch data */ ++ write_backward : 1, /* Write ring buffer from end to beginning */ ++ namespaces : 1, /* include namespaces data */ ++ ksymbol : 1, /* include ksymbol events */ ++ bpf_event : 1, /* include bpf events */ ++ aux_output : 1, /* generate AUX records instead of events */ ++ cgroup : 1, /* include cgroup events */ ++ text_poke : 1, /* include text poke events */ ++ build_id : 1, /* use build id in mmap2 events */ ++ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ ++ remove_on_exec : 1, /* event is removed from task on exec */ ++ sigtrap : 1, /* send synchronous SIGTRAP on event */ ++ __reserved_1 : 26; ++ ++ union { ++ __u32 wakeup_events; /* wakeup every n events */ ++ __u32 wakeup_watermark; /* bytes before wakeup */ ++ }; ++ ++ __u32 bp_type; ++ union { ++ __u64 bp_addr; ++ __u64 kprobe_func; /* for perf_kprobe */ ++ __u64 uprobe_path; /* for perf_uprobe */ ++ __u64 config1; /* extension of config */ ++ }; ++ union { ++ __u64 bp_len; ++ __u64 kprobe_addr; /* when kprobe_func == NULL */ ++ __u64 probe_offset; /* for perf_[k,u]probe */ ++ __u64 config2; /* extension of config1 */ ++ }; ++ __u64 branch_sample_type; /* enum perf_branch_sample_type */ ++ ++ /* ++ * Defines set of user regs to dump on samples. ++ * See asm/perf_regs.h for details. ++ */ ++ __u64 sample_regs_user; ++ ++ /* ++ * Defines size of the user stack to dump on samples. ++ */ ++ __u32 sample_stack_user; ++ ++ __s32 clockid; ++ /* ++ * Defines set of regs to dump for each sample ++ * state captured on: ++ * - precise = 0: PMU interrupt ++ * - precise > 0: sampled instruction ++ * ++ * See asm/perf_regs.h for details. ++ */ ++ __u64 sample_regs_intr; ++ ++ /* ++ * Wakeup watermark for AUX area ++ */ ++ __u32 aux_watermark; ++ __u16 sample_max_stack; ++ __u16 __reserved_2; ++ __u32 aux_sample_size; ++ __u32 __reserved_3; ++ ++ /* ++ * User provided data if sigtrap=1, passed back to user via ++ * siginfo_t::si_perf_data, e.g. to permit user to identify the event. ++ * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be ++ * truncated accordingly on 32 bit architectures. ++ */ ++ __u64 sig_data; ++}; ++ ++/* ++ * Structure used by below PERF_EVENT_IOC_QUERY_BPF command ++ * to query bpf programs attached to the same perf tracepoint ++ * as the given perf event. ++ */ ++struct perf_event_query_bpf { ++ /* ++ * The below ids array length ++ */ ++ __u32 ids_len; ++ /* ++ * Set by the kernel to indicate the number of ++ * available programs ++ */ ++ __u32 prog_cnt; ++ /* ++ * User provided buffer to store program ids ++ */ ++ __u32 ids[0]; ++}; ++ ++/* ++ * Ioctls that can be done on a perf event fd: ++ */ ++#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) ++#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) ++#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) ++#define PERF_EVENT_IOC_RESET _IO ('$', 3) ++#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) ++#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) ++#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) ++#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) ++#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) ++#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) ++#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) ++#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) ++ ++enum perf_event_ioc_flags { ++ PERF_IOC_FLAG_GROUP = 1U << 0, ++}; ++ ++/* ++ * Structure of the page that can be mapped via mmap ++ */ ++struct perf_event_mmap_page { ++ __u32 version; /* version number of this structure */ ++ __u32 compat_version; /* lowest version this is compat with */ ++ ++ /* ++ * Bits needed to read the hw events in user-space. ++ * ++ * u32 seq, time_mult, time_shift, index, width; ++ * u64 count, enabled, running; ++ * u64 cyc, time_offset; ++ * s64 pmc = 0; ++ * ++ * do { ++ * seq = pc->lock; ++ * barrier() ++ * ++ * enabled = pc->time_enabled; ++ * running = pc->time_running; ++ * ++ * if (pc->cap_usr_time && enabled != running) { ++ * cyc = rdtsc(); ++ * time_offset = pc->time_offset; ++ * time_mult = pc->time_mult; ++ * time_shift = pc->time_shift; ++ * } ++ * ++ * index = pc->index; ++ * count = pc->offset; ++ * if (pc->cap_user_rdpmc && index) { ++ * width = pc->pmc_width; ++ * pmc = rdpmc(index - 1); ++ * } ++ * ++ * barrier(); ++ * } while (pc->lock != seq); ++ * ++ * NOTE: for obvious reason this only works on self-monitoring ++ * processes. ++ */ ++ __u32 lock; /* seqlock for synchronization */ ++ __u32 index; /* hardware event identifier */ ++ __s64 offset; /* add to hardware event value */ ++ __u64 time_enabled; /* time event active */ ++ __u64 time_running; /* time event on cpu */ ++ union { ++ __u64 capabilities; ++ struct { ++ __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */ ++ cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ ++ ++ cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ ++ cap_user_time : 1, /* The time_{shift,mult,offset} fields are used */ ++ cap_user_time_zero : 1, /* The time_zero field is used */ ++ cap_user_time_short : 1, /* the time_{cycle,mask} fields are used */ ++ cap_____res : 58; ++ }; ++ }; ++ ++ /* ++ * If cap_user_rdpmc this field provides the bit-width of the value ++ * read using the rdpmc() or equivalent instruction. This can be used ++ * to sign extend the result like: ++ * ++ * pmc <<= 64 - width; ++ * pmc >>= 64 - width; // signed shift right ++ * count += pmc; ++ */ ++ __u16 pmc_width; ++ ++ /* ++ * If cap_usr_time the below fields can be used to compute the time ++ * delta since time_enabled (in ns) using rdtsc or similar. ++ * ++ * u64 quot, rem; ++ * u64 delta; ++ * ++ * quot = (cyc >> time_shift); ++ * rem = cyc & (((u64)1 << time_shift) - 1); ++ * delta = time_offset + quot * time_mult + ++ * ((rem * time_mult) >> time_shift); ++ * ++ * Where time_offset,time_mult,time_shift and cyc are read in the ++ * seqcount loop described above. This delta can then be added to ++ * enabled and possible running (if index), improving the scaling: ++ * ++ * enabled += delta; ++ * if (index) ++ * running += delta; ++ * ++ * quot = count / running; ++ * rem = count % running; ++ * count = quot * enabled + (rem * enabled) / running; ++ */ ++ __u16 time_shift; ++ __u32 time_mult; ++ __u64 time_offset; ++ /* ++ * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated ++ * from sample timestamps. ++ * ++ * time = timestamp - time_zero; ++ * quot = time / time_mult; ++ * rem = time % time_mult; ++ * cyc = (quot << time_shift) + (rem << time_shift) / time_mult; ++ * ++ * And vice versa: ++ * ++ * quot = cyc >> time_shift; ++ * rem = cyc & (((u64)1 << time_shift) - 1); ++ * timestamp = time_zero + quot * time_mult + ++ * ((rem * time_mult) >> time_shift); ++ */ ++ __u64 time_zero; ++ ++ __u32 size; /* Header size up to __reserved[] fields. */ ++ __u32 __reserved_1; ++ ++ /* ++ * If cap_usr_time_short, the hardware clock is less than 64bit wide ++ * and we must compute the 'cyc' value, as used by cap_usr_time, as: ++ * ++ * cyc = time_cycles + ((cyc - time_cycles) & time_mask) ++ * ++ * NOTE: this form is explicitly chosen such that cap_usr_time_short ++ * is a correction on top of cap_usr_time, and code that doesn't ++ * know about cap_usr_time_short still works under the assumption ++ * the counter doesn't wrap. ++ */ ++ __u64 time_cycles; ++ __u64 time_mask; ++ ++ /* ++ * Hole for extension of the self monitor capabilities ++ */ ++ ++ __u8 __reserved[116*8]; /* align to 1k. */ ++ ++ /* ++ * Control data for the mmap() data buffer. ++ * ++ * User-space reading the @data_head value should issue an smp_rmb(), ++ * after reading this value. ++ * ++ * When the mapping is PROT_WRITE the @data_tail value should be ++ * written by userspace to reflect the last read data, after issueing ++ * an smp_mb() to separate the data read from the ->data_tail store. ++ * In this case the kernel will not over-write unread data. ++ * ++ * See perf_output_put_handle() for the data ordering. ++ * ++ * data_{offset,size} indicate the location and size of the perf record ++ * buffer within the mmapped area. ++ */ ++ __u64 data_head; /* head in the data section */ ++ __u64 data_tail; /* user-space written tail */ ++ __u64 data_offset; /* where the buffer starts */ ++ __u64 data_size; /* data buffer size */ ++ ++ /* ++ * AUX area is defined by aux_{offset,size} fields that should be set ++ * by the userspace, so that ++ * ++ * aux_offset >= data_offset + data_size ++ * ++ * prior to mmap()ing it. Size of the mmap()ed area should be aux_size. ++ * ++ * Ring buffer pointers aux_{head,tail} have the same semantics as ++ * data_{head,tail} and same ordering rules apply. ++ */ ++ __u64 aux_head; ++ __u64 aux_tail; ++ __u64 aux_offset; ++ __u64 aux_size; ++}; ++ ++/* ++ * The current state of perf_event_header::misc bits usage: ++ * ('|' used bit, '-' unused bit) ++ * ++ * 012 CDEF ++ * |||---------|||| ++ * ++ * Where: ++ * 0-2 CPUMODE_MASK ++ * ++ * C PROC_MAP_PARSE_TIMEOUT ++ * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT ++ * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT ++ * F (reserved) ++ */ ++ ++#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) ++#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) ++#define PERF_RECORD_MISC_KERNEL (1 << 0) ++#define PERF_RECORD_MISC_USER (2 << 0) ++#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) ++#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) ++#define PERF_RECORD_MISC_GUEST_USER (5 << 0) ++ ++/* ++ * Indicates that /proc/PID/maps parsing are truncated by time out. ++ */ ++#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) ++/* ++ * Following PERF_RECORD_MISC_* are used on different ++ * events, so can reuse the same bit position: ++ * ++ * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events ++ * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event ++ * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) ++ * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events ++ */ ++#define PERF_RECORD_MISC_MMAP_DATA (1 << 13) ++#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) ++#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) ++#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) ++/* ++ * These PERF_RECORD_MISC_* flags below are safely reused ++ * for the following events: ++ * ++ * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events ++ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events ++ * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event ++ * ++ * ++ * PERF_RECORD_MISC_EXACT_IP: ++ * Indicates that the content of PERF_SAMPLE_IP points to ++ * the actual instruction that triggered the event. See also ++ * perf_event_attr::precise_ip. ++ * ++ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: ++ * Indicates that thread was preempted in TASK_RUNNING state. ++ * ++ * PERF_RECORD_MISC_MMAP_BUILD_ID: ++ * Indicates that mmap2 event carries build id data. ++ */ ++#define PERF_RECORD_MISC_EXACT_IP (1 << 14) ++#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) ++#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14) ++/* ++ * Reserve the last bit to indicate some extended misc field ++ */ ++#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) ++ ++struct perf_event_header { ++ __u32 type; ++ __u16 misc; ++ __u16 size; ++}; ++ ++struct perf_ns_link_info { ++ __u64 dev; ++ __u64 ino; ++}; ++ ++enum { ++ NET_NS_INDEX = 0, ++ UTS_NS_INDEX = 1, ++ IPC_NS_INDEX = 2, ++ PID_NS_INDEX = 3, ++ USER_NS_INDEX = 4, ++ MNT_NS_INDEX = 5, ++ CGROUP_NS_INDEX = 6, ++ ++ NR_NAMESPACES, /* number of available namespaces */ ++}; ++ ++enum perf_event_type { ++ ++ /* ++ * If perf_event_attr.sample_id_all is set then all event types will ++ * have the sample_type selected fields related to where/when ++ * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU, ++ * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed ++ * just after the perf_event_header and the fields already present for ++ * the existing fields, i.e. at the end of the payload. That way a newer ++ * perf.data file will be supported by older perf tools, with these new ++ * optional fields being ignored. ++ * ++ * struct sample_id { ++ * { u32 pid, tid; } && PERF_SAMPLE_TID ++ * { u64 time; } && PERF_SAMPLE_TIME ++ * { u64 id; } && PERF_SAMPLE_ID ++ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID ++ * { u32 cpu, res; } && PERF_SAMPLE_CPU ++ * { u64 id; } && PERF_SAMPLE_IDENTIFIER ++ * } && perf_event_attr::sample_id_all ++ * ++ * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The ++ * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed ++ * relative to header.size. ++ */ ++ ++ /* ++ * The MMAP events record the PROT_EXEC mappings so that we can ++ * correlate userspace IPs to code. They have the following structure: ++ * ++ * struct { ++ * struct perf_event_header header; ++ * ++ * u32 pid, tid; ++ * u64 addr; ++ * u64 len; ++ * u64 pgoff; ++ * char filename[]; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_MMAP = 1, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * u64 id; ++ * u64 lost; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_LOST = 2, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * ++ * u32 pid, tid; ++ * char comm[]; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_COMM = 3, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * u32 pid, ppid; ++ * u32 tid, ptid; ++ * u64 time; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_EXIT = 4, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * u64 time; ++ * u64 id; ++ * u64 stream_id; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_THROTTLE = 5, ++ PERF_RECORD_UNTHROTTLE = 6, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * u32 pid, ppid; ++ * u32 tid, ptid; ++ * u64 time; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_FORK = 7, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * u32 pid, tid; ++ * ++ * struct read_format values; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_READ = 8, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * ++ * # ++ * # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. ++ * # The advantage of PERF_SAMPLE_IDENTIFIER is that its position ++ * # is fixed relative to header. ++ * # ++ * ++ * { u64 id; } && PERF_SAMPLE_IDENTIFIER ++ * { u64 ip; } && PERF_SAMPLE_IP ++ * { u32 pid, tid; } && PERF_SAMPLE_TID ++ * { u64 time; } && PERF_SAMPLE_TIME ++ * { u64 addr; } && PERF_SAMPLE_ADDR ++ * { u64 id; } && PERF_SAMPLE_ID ++ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID ++ * { u32 cpu, res; } && PERF_SAMPLE_CPU ++ * { u64 period; } && PERF_SAMPLE_PERIOD ++ * ++ * { struct read_format values; } && PERF_SAMPLE_READ ++ * ++ * { u64 nr, ++ * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN ++ * ++ * # ++ * # The RAW record below is opaque data wrt the ABI ++ * # ++ * # That is, the ABI doesn't make any promises wrt to ++ * # the stability of its content, it may vary depending ++ * # on event, hardware, kernel version and phase of ++ * # the moon. ++ * # ++ * # In other words, PERF_SAMPLE_RAW contents are not an ABI. ++ * # ++ * ++ * { u32 size; ++ * char data[size];}&& PERF_SAMPLE_RAW ++ * ++ * { u64 nr; ++ * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX ++ * { u64 from, to, flags } lbr[nr]; ++ * } && PERF_SAMPLE_BRANCH_STACK ++ * ++ * { u64 abi; # enum perf_sample_regs_abi ++ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER ++ * ++ * { u64 size; ++ * char data[size]; ++ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER ++ * ++ * { union perf_sample_weight ++ * { ++ * u64 full; && PERF_SAMPLE_WEIGHT ++ * #if defined(__LITTLE_ENDIAN_BITFIELD) ++ * struct { ++ * u32 var1_dw; ++ * u16 var2_w; ++ * u16 var3_w; ++ * } && PERF_SAMPLE_WEIGHT_STRUCT ++ * #elif defined(__BIG_ENDIAN_BITFIELD) ++ * struct { ++ * u16 var3_w; ++ * u16 var2_w; ++ * u32 var1_dw; ++ * } && PERF_SAMPLE_WEIGHT_STRUCT ++ * #endif ++ * } ++ * } ++ * { u64 data_src; } && PERF_SAMPLE_DATA_SRC ++ * { u64 transaction; } && PERF_SAMPLE_TRANSACTION ++ * { u64 abi; # enum perf_sample_regs_abi ++ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR ++ * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR ++ * { u64 size; ++ * char data[size]; } && PERF_SAMPLE_AUX ++ * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE ++ * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE ++ * }; ++ */ ++ PERF_RECORD_SAMPLE = 9, ++ ++ /* ++ * The MMAP2 records are an augmented version of MMAP, they add ++ * maj, min, ino numbers to be used to uniquely identify each mapping ++ * ++ * struct { ++ * struct perf_event_header header; ++ * ++ * u32 pid, tid; ++ * u64 addr; ++ * u64 len; ++ * u64 pgoff; ++ * union { ++ * struct { ++ * u32 maj; ++ * u32 min; ++ * u64 ino; ++ * u64 ino_generation; ++ * }; ++ * struct { ++ * u8 build_id_size; ++ * u8 __reserved_1; ++ * u16 __reserved_2; ++ * u8 build_id[20]; ++ * }; ++ * }; ++ * u32 prot, flags; ++ * char filename[]; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_MMAP2 = 10, ++ ++ /* ++ * Records that new data landed in the AUX buffer part. ++ * ++ * struct { ++ * struct perf_event_header header; ++ * ++ * u64 aux_offset; ++ * u64 aux_size; ++ * u64 flags; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_AUX = 11, ++ ++ /* ++ * Indicates that instruction trace has started ++ * ++ * struct { ++ * struct perf_event_header header; ++ * u32 pid; ++ * u32 tid; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_ITRACE_START = 12, ++ ++ /* ++ * Records the dropped/lost sample number. ++ * ++ * struct { ++ * struct perf_event_header header; ++ * ++ * u64 lost; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_LOST_SAMPLES = 13, ++ ++ /* ++ * Records a context switch in or out (flagged by ++ * PERF_RECORD_MISC_SWITCH_OUT). See also ++ * PERF_RECORD_SWITCH_CPU_WIDE. ++ * ++ * struct { ++ * struct perf_event_header header; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_SWITCH = 14, ++ ++ /* ++ * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and ++ * next_prev_tid that are the next (switching out) or previous ++ * (switching in) pid/tid. ++ * ++ * struct { ++ * struct perf_event_header header; ++ * u32 next_prev_pid; ++ * u32 next_prev_tid; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_SWITCH_CPU_WIDE = 15, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * u32 pid; ++ * u32 tid; ++ * u64 nr_namespaces; ++ * { u64 dev, inode; } [nr_namespaces]; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_NAMESPACES = 16, ++ ++ /* ++ * Record ksymbol register/unregister events: ++ * ++ * struct { ++ * struct perf_event_header header; ++ * u64 addr; ++ * u32 len; ++ * u16 ksym_type; ++ * u16 flags; ++ * char name[]; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_KSYMBOL = 17, ++ ++ /* ++ * Record bpf events: ++ * enum perf_bpf_event_type { ++ * PERF_BPF_EVENT_UNKNOWN = 0, ++ * PERF_BPF_EVENT_PROG_LOAD = 1, ++ * PERF_BPF_EVENT_PROG_UNLOAD = 2, ++ * }; ++ * ++ * struct { ++ * struct perf_event_header header; ++ * u16 type; ++ * u16 flags; ++ * u32 id; ++ * u8 tag[BPF_TAG_SIZE]; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_BPF_EVENT = 18, ++ ++ /* ++ * struct { ++ * struct perf_event_header header; ++ * u64 id; ++ * char path[]; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_CGROUP = 19, ++ ++ /* ++ * Records changes to kernel text i.e. self-modified code. 'old_len' is ++ * the number of old bytes, 'new_len' is the number of new bytes. Either ++ * 'old_len' or 'new_len' may be zero to indicate, for example, the ++ * addition or removal of a trampoline. 'bytes' contains the old bytes ++ * followed immediately by the new bytes. ++ * ++ * struct { ++ * struct perf_event_header header; ++ * u64 addr; ++ * u16 old_len; ++ * u16 new_len; ++ * u8 bytes[]; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_TEXT_POKE = 20, ++ ++ /* ++ * Data written to the AUX area by hardware due to aux_output, may need ++ * to be matched to the event by an architecture-specific hardware ID. ++ * This records the hardware ID, but requires sample_id to provide the ++ * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT ++ * records from multiple events. ++ * ++ * struct { ++ * struct perf_event_header header; ++ * u64 hw_id; ++ * struct sample_id sample_id; ++ * }; ++ */ ++ PERF_RECORD_AUX_OUTPUT_HW_ID = 21, ++ ++ PERF_RECORD_MAX, /* non-ABI */ ++}; ++ ++enum perf_record_ksymbol_type { ++ PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, ++ PERF_RECORD_KSYMBOL_TYPE_BPF = 1, ++ /* ++ * Out of line code such as kprobe-replaced instructions or optimized ++ * kprobes or ftrace trampolines. ++ */ ++ PERF_RECORD_KSYMBOL_TYPE_OOL = 2, ++ PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ ++}; ++ ++#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) ++ ++enum perf_bpf_event_type { ++ PERF_BPF_EVENT_UNKNOWN = 0, ++ PERF_BPF_EVENT_PROG_LOAD = 1, ++ PERF_BPF_EVENT_PROG_UNLOAD = 2, ++ PERF_BPF_EVENT_MAX, /* non-ABI */ ++}; ++ ++#define PERF_MAX_STACK_DEPTH 127 ++#define PERF_MAX_CONTEXTS_PER_STACK 8 ++ ++enum perf_callchain_context { ++ PERF_CONTEXT_HV = (__u64)-32, ++ PERF_CONTEXT_KERNEL = (__u64)-128, ++ PERF_CONTEXT_USER = (__u64)-512, ++ ++ PERF_CONTEXT_GUEST = (__u64)-2048, ++ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, ++ PERF_CONTEXT_GUEST_USER = (__u64)-2560, ++ ++ PERF_CONTEXT_MAX = (__u64)-4095, ++}; ++ ++/** ++ * PERF_RECORD_AUX::flags bits ++ */ ++#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ ++#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ ++#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ ++#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ ++#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ ++ ++/* CoreSight PMU AUX buffer formats */ ++#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ ++#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ ++ ++#define PERF_FLAG_FD_NO_GROUP (1UL << 0) ++#define PERF_FLAG_FD_OUTPUT (1UL << 1) ++#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ ++#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ ++ ++#if defined(__LITTLE_ENDIAN_BITFIELD) ++union perf_mem_data_src { ++ __u64 val; ++ struct { ++ __u64 mem_op:5, /* type of opcode */ ++ mem_lvl:14, /* memory hierarchy level */ ++ mem_snoop:5, /* snoop mode */ ++ mem_lock:2, /* lock instr */ ++ mem_dtlb:7, /* tlb access */ ++ mem_lvl_num:4, /* memory hierarchy level number */ ++ mem_remote:1, /* remote */ ++ mem_snoopx:2, /* snoop mode, ext */ ++ mem_blk:3, /* access blocked */ ++ mem_hops:3, /* hop level */ ++ mem_rsvd:18; ++ }; ++}; ++#elif defined(__BIG_ENDIAN_BITFIELD) ++union perf_mem_data_src { ++ __u64 val; ++ struct { ++ __u64 mem_rsvd:18, ++ mem_hops:3, /* hop level */ ++ mem_blk:3, /* access blocked */ ++ mem_snoopx:2, /* snoop mode, ext */ ++ mem_remote:1, /* remote */ ++ mem_lvl_num:4, /* memory hierarchy level number */ ++ mem_dtlb:7, /* tlb access */ ++ mem_lock:2, /* lock instr */ ++ mem_snoop:5, /* snoop mode */ ++ mem_lvl:14, /* memory hierarchy level */ ++ mem_op:5; /* type of opcode */ ++ }; ++}; ++#else ++#error "Unknown endianness" ++#endif ++ ++/* type of opcode (load/store/prefetch,code) */ ++#define PERF_MEM_OP_NA 0x01 /* not available */ ++#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ ++#define PERF_MEM_OP_STORE 0x04 /* store instruction */ ++#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ ++#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ ++#define PERF_MEM_OP_SHIFT 0 ++ ++/* ++ * PERF_MEM_LVL_* namespace being depricated to some extent in the ++ * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. ++ * Supporting this namespace inorder to not break defined ABIs. ++ * ++ * memory hierarchy (memory level, hit or miss) ++ */ ++#define PERF_MEM_LVL_NA 0x01 /* not available */ ++#define PERF_MEM_LVL_HIT 0x02 /* hit level */ ++#define PERF_MEM_LVL_MISS 0x04 /* miss level */ ++#define PERF_MEM_LVL_L1 0x08 /* L1 */ ++#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ ++#define PERF_MEM_LVL_L2 0x20 /* L2 */ ++#define PERF_MEM_LVL_L3 0x40 /* L3 */ ++#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ ++#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ ++#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ ++#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ ++#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ ++#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ ++#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ ++#define PERF_MEM_LVL_SHIFT 5 ++ ++#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ ++#define PERF_MEM_REMOTE_SHIFT 37 ++ ++#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ ++#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ ++#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ ++#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ ++/* 5-0xa available */ ++#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ ++#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ ++#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ ++#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ ++#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ ++ ++#define PERF_MEM_LVLNUM_SHIFT 33 ++ ++/* snoop mode */ ++#define PERF_MEM_SNOOP_NA 0x01 /* not available */ ++#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ ++#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ ++#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ ++#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ ++#define PERF_MEM_SNOOP_SHIFT 19 ++ ++#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ ++/* 1 free */ ++#define PERF_MEM_SNOOPX_SHIFT 38 ++ ++/* locked instruction */ ++#define PERF_MEM_LOCK_NA 0x01 /* not available */ ++#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ ++#define PERF_MEM_LOCK_SHIFT 24 ++ ++/* TLB access */ ++#define PERF_MEM_TLB_NA 0x01 /* not available */ ++#define PERF_MEM_TLB_HIT 0x02 /* hit level */ ++#define PERF_MEM_TLB_MISS 0x04 /* miss level */ ++#define PERF_MEM_TLB_L1 0x08 /* L1 */ ++#define PERF_MEM_TLB_L2 0x10 /* L2 */ ++#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ ++#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ ++#define PERF_MEM_TLB_SHIFT 26 ++ ++/* Access blocked */ ++#define PERF_MEM_BLK_NA 0x01 /* not available */ ++#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ ++#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ ++#define PERF_MEM_BLK_SHIFT 40 ++ ++/* hop level */ ++#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ ++#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ ++#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ ++#define PERF_MEM_HOPS_3 0x04 /* remote board */ ++/* 5-7 available */ ++#define PERF_MEM_HOPS_SHIFT 43 ++ ++#define PERF_MEM_S(a, s) \ ++ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) ++ ++/* ++ * single taken branch record layout: ++ * ++ * from: source instruction (may not always be a branch insn) ++ * to: branch target ++ * mispred: branch target was mispredicted ++ * predicted: branch target was predicted ++ * ++ * support for mispred, predicted is optional. In case it ++ * is not supported mispred = predicted = 0. ++ * ++ * in_tx: running in a hardware transaction ++ * abort: aborting a hardware transaction ++ * cycles: cycles from last branch (or 0 if not supported) ++ * type: branch type ++ */ ++struct perf_branch_entry { ++ __u64 from; ++ __u64 to; ++ __u64 mispred:1, /* target mispredicted */ ++ predicted:1,/* target predicted */ ++ in_tx:1, /* in transaction */ ++ abort:1, /* transaction abort */ ++ cycles:16, /* cycle count to last branch */ ++ type:4, /* branch type */ ++ reserved:40; ++}; ++ ++union perf_sample_weight { ++ __u64 full; ++#if defined(__LITTLE_ENDIAN_BITFIELD) ++ struct { ++ __u32 var1_dw; ++ __u16 var2_w; ++ __u16 var3_w; ++ }; ++#elif defined(__BIG_ENDIAN_BITFIELD) ++ struct { ++ __u16 var3_w; ++ __u16 var2_w; ++ __u32 var1_dw; ++ }; ++#else ++#error "Unknown endianness" ++#endif ++}; ++ ++#endif /* _UAPI_LINUX_PERF_EVENT_H */ +diff --git a/src/cc/libbpf/include/uapi/linux/pkt_cls.h b/src/cc/libbpf/include/uapi/linux/pkt_cls.h +new file mode 100644 +index 0000000..1215377 +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/pkt_cls.h +@@ -0,0 +1,612 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __LINUX_PKT_CLS_H ++#define __LINUX_PKT_CLS_H ++ ++#include ++#include ++ ++#define TC_COOKIE_MAX_SIZE 16 ++ ++/* Action attributes */ ++enum { ++ TCA_ACT_UNSPEC, ++ TCA_ACT_KIND, ++ TCA_ACT_OPTIONS, ++ TCA_ACT_INDEX, ++ TCA_ACT_STATS, ++ TCA_ACT_PAD, ++ TCA_ACT_COOKIE, ++ __TCA_ACT_MAX ++}; ++ ++#define TCA_ACT_MAX __TCA_ACT_MAX ++#define TCA_OLD_COMPAT (TCA_ACT_MAX+1) ++#define TCA_ACT_MAX_PRIO 32 ++#define TCA_ACT_BIND 1 ++#define TCA_ACT_NOBIND 0 ++#define TCA_ACT_UNBIND 1 ++#define TCA_ACT_NOUNBIND 0 ++#define TCA_ACT_REPLACE 1 ++#define TCA_ACT_NOREPLACE 0 ++ ++#define TC_ACT_UNSPEC (-1) ++#define TC_ACT_OK 0 ++#define TC_ACT_RECLASSIFY 1 ++#define TC_ACT_SHOT 2 ++#define TC_ACT_PIPE 3 ++#define TC_ACT_STOLEN 4 ++#define TC_ACT_QUEUED 5 ++#define TC_ACT_REPEAT 6 ++#define TC_ACT_REDIRECT 7 ++#define TC_ACT_TRAP 8 /* For hw path, this means "trap to cpu" ++ * and don't further process the frame ++ * in hardware. For sw path, this is ++ * equivalent of TC_ACT_STOLEN - drop ++ * the skb and act like everything ++ * is alright. ++ */ ++#define TC_ACT_VALUE_MAX TC_ACT_TRAP ++ ++/* There is a special kind of actions called "extended actions", ++ * which need a value parameter. These have a local opcode located in ++ * the highest nibble, starting from 1. The rest of the bits ++ * are used to carry the value. These two parts together make ++ * a combined opcode. ++ */ ++#define __TC_ACT_EXT_SHIFT 28 ++#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT) ++#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1) ++#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK)) ++#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode) ++ ++#define TC_ACT_JUMP __TC_ACT_EXT(1) ++#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) ++#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN ++ ++/* Action type identifiers*/ ++enum { ++ TCA_ID_UNSPEC=0, ++ TCA_ID_POLICE=1, ++ /* other actions go here */ ++ __TCA_ID_MAX=255 ++}; ++ ++#define TCA_ID_MAX __TCA_ID_MAX ++ ++struct tc_police { ++ __u32 index; ++ int action; ++#define TC_POLICE_UNSPEC TC_ACT_UNSPEC ++#define TC_POLICE_OK TC_ACT_OK ++#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY ++#define TC_POLICE_SHOT TC_ACT_SHOT ++#define TC_POLICE_PIPE TC_ACT_PIPE ++ ++ __u32 limit; ++ __u32 burst; ++ __u32 mtu; ++ struct tc_ratespec rate; ++ struct tc_ratespec peakrate; ++ int refcnt; ++ int bindcnt; ++ __u32 capab; ++}; ++ ++struct tcf_t { ++ __u64 install; ++ __u64 lastuse; ++ __u64 expires; ++ __u64 firstuse; ++}; ++ ++struct tc_cnt { ++ int refcnt; ++ int bindcnt; ++}; ++ ++#define tc_gen \ ++ __u32 index; \ ++ __u32 capab; \ ++ int action; \ ++ int refcnt; \ ++ int bindcnt ++ ++enum { ++ TCA_POLICE_UNSPEC, ++ TCA_POLICE_TBF, ++ TCA_POLICE_RATE, ++ TCA_POLICE_PEAKRATE, ++ TCA_POLICE_AVRATE, ++ TCA_POLICE_RESULT, ++ TCA_POLICE_TM, ++ TCA_POLICE_PAD, ++ __TCA_POLICE_MAX ++#define TCA_POLICE_RESULT TCA_POLICE_RESULT ++}; ++ ++#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1) ++ ++/* tca flags definitions */ ++#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) /* don't offload filter to HW */ ++#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */ ++#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */ ++#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */ ++#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */ ++ ++/* U32 filters */ ++ ++#define TC_U32_HTID(h) ((h)&0xFFF00000) ++#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20) ++#define TC_U32_HASH(h) (((h)>>12)&0xFF) ++#define TC_U32_NODE(h) ((h)&0xFFF) ++#define TC_U32_KEY(h) ((h)&0xFFFFF) ++#define TC_U32_UNSPEC 0 ++#define TC_U32_ROOT (0xFFF00000) ++ ++enum { ++ TCA_U32_UNSPEC, ++ TCA_U32_CLASSID, ++ TCA_U32_HASH, ++ TCA_U32_LINK, ++ TCA_U32_DIVISOR, ++ TCA_U32_SEL, ++ TCA_U32_POLICE, ++ TCA_U32_ACT, ++ TCA_U32_INDEV, ++ TCA_U32_PCNT, ++ TCA_U32_MARK, ++ TCA_U32_FLAGS, ++ TCA_U32_PAD, ++ __TCA_U32_MAX ++}; ++ ++#define TCA_U32_MAX (__TCA_U32_MAX - 1) ++ ++struct tc_u32_key { ++ __be32 mask; ++ __be32 val; ++ int off; ++ int offmask; ++}; ++ ++struct tc_u32_sel { ++ unsigned char flags; ++ unsigned char offshift; ++ unsigned char nkeys; ++ ++ __be16 offmask; ++ __u16 off; ++ short offoff; ++ ++ short hoff; ++ __be32 hmask; ++ struct tc_u32_key keys[0]; ++}; ++ ++struct tc_u32_mark { ++ __u32 val; ++ __u32 mask; ++ __u32 success; ++}; ++ ++struct tc_u32_pcnt { ++ __u64 rcnt; ++ __u64 rhit; ++ __u64 kcnts[0]; ++}; ++ ++/* Flags */ ++ ++#define TC_U32_TERMINAL 1 ++#define TC_U32_OFFSET 2 ++#define TC_U32_VAROFFSET 4 ++#define TC_U32_EAT 8 ++ ++#define TC_U32_MAXDEPTH 8 ++ ++ ++/* RSVP filter */ ++ ++enum { ++ TCA_RSVP_UNSPEC, ++ TCA_RSVP_CLASSID, ++ TCA_RSVP_DST, ++ TCA_RSVP_SRC, ++ TCA_RSVP_PINFO, ++ TCA_RSVP_POLICE, ++ TCA_RSVP_ACT, ++ __TCA_RSVP_MAX ++}; ++ ++#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) ++ ++struct tc_rsvp_gpi { ++ __u32 key; ++ __u32 mask; ++ int offset; ++}; ++ ++struct tc_rsvp_pinfo { ++ struct tc_rsvp_gpi dpi; ++ struct tc_rsvp_gpi spi; ++ __u8 protocol; ++ __u8 tunnelid; ++ __u8 tunnelhdr; ++ __u8 pad; ++}; ++ ++/* ROUTE filter */ ++ ++enum { ++ TCA_ROUTE4_UNSPEC, ++ TCA_ROUTE4_CLASSID, ++ TCA_ROUTE4_TO, ++ TCA_ROUTE4_FROM, ++ TCA_ROUTE4_IIF, ++ TCA_ROUTE4_POLICE, ++ TCA_ROUTE4_ACT, ++ __TCA_ROUTE4_MAX ++}; ++ ++#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1) ++ ++ ++/* FW filter */ ++ ++enum { ++ TCA_FW_UNSPEC, ++ TCA_FW_CLASSID, ++ TCA_FW_POLICE, ++ TCA_FW_INDEV, ++ TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ ++ TCA_FW_MASK, ++ __TCA_FW_MAX ++}; ++ ++#define TCA_FW_MAX (__TCA_FW_MAX - 1) ++ ++/* TC index filter */ ++ ++enum { ++ TCA_TCINDEX_UNSPEC, ++ TCA_TCINDEX_HASH, ++ TCA_TCINDEX_MASK, ++ TCA_TCINDEX_SHIFT, ++ TCA_TCINDEX_FALL_THROUGH, ++ TCA_TCINDEX_CLASSID, ++ TCA_TCINDEX_POLICE, ++ TCA_TCINDEX_ACT, ++ __TCA_TCINDEX_MAX ++}; ++ ++#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) ++ ++/* Flow filter */ ++ ++enum { ++ FLOW_KEY_SRC, ++ FLOW_KEY_DST, ++ FLOW_KEY_PROTO, ++ FLOW_KEY_PROTO_SRC, ++ FLOW_KEY_PROTO_DST, ++ FLOW_KEY_IIF, ++ FLOW_KEY_PRIORITY, ++ FLOW_KEY_MARK, ++ FLOW_KEY_NFCT, ++ FLOW_KEY_NFCT_SRC, ++ FLOW_KEY_NFCT_DST, ++ FLOW_KEY_NFCT_PROTO_SRC, ++ FLOW_KEY_NFCT_PROTO_DST, ++ FLOW_KEY_RTCLASSID, ++ FLOW_KEY_SKUID, ++ FLOW_KEY_SKGID, ++ FLOW_KEY_VLAN_TAG, ++ FLOW_KEY_RXHASH, ++ __FLOW_KEY_MAX, ++}; ++ ++#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1) ++ ++enum { ++ FLOW_MODE_MAP, ++ FLOW_MODE_HASH, ++}; ++ ++enum { ++ TCA_FLOW_UNSPEC, ++ TCA_FLOW_KEYS, ++ TCA_FLOW_MODE, ++ TCA_FLOW_BASECLASS, ++ TCA_FLOW_RSHIFT, ++ TCA_FLOW_ADDEND, ++ TCA_FLOW_MASK, ++ TCA_FLOW_XOR, ++ TCA_FLOW_DIVISOR, ++ TCA_FLOW_ACT, ++ TCA_FLOW_POLICE, ++ TCA_FLOW_EMATCHES, ++ TCA_FLOW_PERTURB, ++ __TCA_FLOW_MAX ++}; ++ ++#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1) ++ ++/* Basic filter */ ++ ++enum { ++ TCA_BASIC_UNSPEC, ++ TCA_BASIC_CLASSID, ++ TCA_BASIC_EMATCHES, ++ TCA_BASIC_ACT, ++ TCA_BASIC_POLICE, ++ __TCA_BASIC_MAX ++}; ++ ++#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) ++ ++ ++/* Cgroup classifier */ ++ ++enum { ++ TCA_CGROUP_UNSPEC, ++ TCA_CGROUP_ACT, ++ TCA_CGROUP_POLICE, ++ TCA_CGROUP_EMATCHES, ++ __TCA_CGROUP_MAX, ++}; ++ ++#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) ++ ++/* BPF classifier */ ++ ++#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0) ++ ++enum { ++ TCA_BPF_UNSPEC, ++ TCA_BPF_ACT, ++ TCA_BPF_POLICE, ++ TCA_BPF_CLASSID, ++ TCA_BPF_OPS_LEN, ++ TCA_BPF_OPS, ++ TCA_BPF_FD, ++ TCA_BPF_NAME, ++ TCA_BPF_FLAGS, ++ TCA_BPF_FLAGS_GEN, ++ TCA_BPF_TAG, ++ TCA_BPF_ID, ++ __TCA_BPF_MAX, ++}; ++ ++#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) ++ ++/* Flower classifier */ ++ ++enum { ++ TCA_FLOWER_UNSPEC, ++ TCA_FLOWER_CLASSID, ++ TCA_FLOWER_INDEV, ++ TCA_FLOWER_ACT, ++ TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ ++ TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ ++ TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ ++ TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ ++ TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ ++ TCA_FLOWER_KEY_IP_PROTO, /* u8 */ ++ TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ ++ TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ ++ TCA_FLOWER_KEY_IPV4_DST, /* be32 */ ++ TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ ++ TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ ++ TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ ++ TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ ++ TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ ++ TCA_FLOWER_KEY_TCP_SRC, /* be16 */ ++ TCA_FLOWER_KEY_TCP_DST, /* be16 */ ++ TCA_FLOWER_KEY_UDP_SRC, /* be16 */ ++ TCA_FLOWER_KEY_UDP_DST, /* be16 */ ++ ++ TCA_FLOWER_FLAGS, ++ TCA_FLOWER_KEY_VLAN_ID, /* be16 */ ++ TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ ++ TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ ++ ++ TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ ++ TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ ++ TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */ ++ TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */ ++ TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */ ++ TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */ ++ TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */ ++ TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ ++ TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ ++ ++ TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */ ++ TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ ++ TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ ++ TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ ++ TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ ++ TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ ++ ++ TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ ++ TCA_FLOWER_KEY_SCTP_DST, /* be16 */ ++ ++ TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ ++ TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ ++ TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ ++ TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ ++ ++ TCA_FLOWER_KEY_FLAGS, /* be32 */ ++ TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */ ++ ++ TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */ ++ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */ ++ TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */ ++ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */ ++ TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */ ++ TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */ ++ TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */ ++ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */ ++ ++ TCA_FLOWER_KEY_ARP_SIP, /* be32 */ ++ TCA_FLOWER_KEY_ARP_SIP_MASK, /* be32 */ ++ TCA_FLOWER_KEY_ARP_TIP, /* be32 */ ++ TCA_FLOWER_KEY_ARP_TIP_MASK, /* be32 */ ++ TCA_FLOWER_KEY_ARP_OP, /* u8 */ ++ TCA_FLOWER_KEY_ARP_OP_MASK, /* u8 */ ++ TCA_FLOWER_KEY_ARP_SHA, /* ETH_ALEN */ ++ TCA_FLOWER_KEY_ARP_SHA_MASK, /* ETH_ALEN */ ++ TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */ ++ TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */ ++ ++ TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */ ++ TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */ ++ TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ ++ TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ ++ ++ TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */ ++ TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */ ++ ++ TCA_FLOWER_KEY_IP_TOS, /* u8 */ ++ TCA_FLOWER_KEY_IP_TOS_MASK, /* u8 */ ++ TCA_FLOWER_KEY_IP_TTL, /* u8 */ ++ TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */ ++ ++ TCA_FLOWER_KEY_CVLAN_ID, /* be16 */ ++ TCA_FLOWER_KEY_CVLAN_PRIO, /* u8 */ ++ TCA_FLOWER_KEY_CVLAN_ETH_TYPE, /* be16 */ ++ ++ TCA_FLOWER_KEY_ENC_IP_TOS, /* u8 */ ++ TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */ ++ TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ ++ TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ ++ ++ TCA_FLOWER_KEY_ENC_OPTS, ++ TCA_FLOWER_KEY_ENC_OPTS_MASK, ++ ++ TCA_FLOWER_IN_HW_COUNT, ++ ++ __TCA_FLOWER_MAX, ++}; ++ ++#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) ++ ++enum { ++ TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, ++ TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested ++ * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ ++ * attributes ++ */ ++ __TCA_FLOWER_KEY_ENC_OPTS_MAX, ++}; ++ ++#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) ++ ++enum { ++ TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, ++ TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */ ++ TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ ++ TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ ++ ++ __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, ++}; ++ ++#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ ++ (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) ++ ++enum { ++ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), ++ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), ++}; ++ ++/* Match-all classifier */ ++ ++enum { ++ TCA_MATCHALL_UNSPEC, ++ TCA_MATCHALL_CLASSID, ++ TCA_MATCHALL_ACT, ++ TCA_MATCHALL_FLAGS, ++ __TCA_MATCHALL_MAX, ++}; ++ ++#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1) ++ ++/* Extended Matches */ ++ ++struct tcf_ematch_tree_hdr { ++ __u16 nmatches; ++ __u16 progid; ++}; ++ ++enum { ++ TCA_EMATCH_TREE_UNSPEC, ++ TCA_EMATCH_TREE_HDR, ++ TCA_EMATCH_TREE_LIST, ++ __TCA_EMATCH_TREE_MAX ++}; ++#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1) ++ ++struct tcf_ematch_hdr { ++ __u16 matchid; ++ __u16 kind; ++ __u16 flags; ++ __u16 pad; /* currently unused */ ++}; ++ ++/* 0 1 ++ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 ++ * +-----------------------+-+-+---+ ++ * | Unused |S|I| R | ++ * +-----------------------+-+-+---+ ++ * ++ * R(2) ::= relation to next ematch ++ * where: 0 0 END (last ematch) ++ * 0 1 AND ++ * 1 0 OR ++ * 1 1 Unused (invalid) ++ * I(1) ::= invert result ++ * S(1) ::= simple payload ++ */ ++#define TCF_EM_REL_END 0 ++#define TCF_EM_REL_AND (1<<0) ++#define TCF_EM_REL_OR (1<<1) ++#define TCF_EM_INVERT (1<<2) ++#define TCF_EM_SIMPLE (1<<3) ++ ++#define TCF_EM_REL_MASK 3 ++#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK) ++ ++enum { ++ TCF_LAYER_LINK, ++ TCF_LAYER_NETWORK, ++ TCF_LAYER_TRANSPORT, ++ __TCF_LAYER_MAX ++}; ++#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1) ++ ++/* Ematch type assignments ++ * 1..32767 Reserved for ematches inside kernel tree ++ * 32768..65535 Free to use, not reliable ++ */ ++#define TCF_EM_CONTAINER 0 ++#define TCF_EM_CMP 1 ++#define TCF_EM_NBYTE 2 ++#define TCF_EM_U32 3 ++#define TCF_EM_META 4 ++#define TCF_EM_TEXT 5 ++#define TCF_EM_VLAN 6 ++#define TCF_EM_CANID 7 ++#define TCF_EM_IPSET 8 ++#define TCF_EM_IPT 9 ++#define TCF_EM_MAX 9 ++ ++enum { ++ TCF_EM_PROG_TC ++}; ++ ++enum { ++ TCF_EM_OPND_EQ, ++ TCF_EM_OPND_GT, ++ TCF_EM_OPND_LT ++}; ++ ++#endif +diff --git a/src/cc/libbpf/include/uapi/linux/pkt_sched.h b/src/cc/libbpf/include/uapi/linux/pkt_sched.h +new file mode 100644 +index 0000000..5c903ab +--- /dev/null ++++ b/src/cc/libbpf/include/uapi/linux/pkt_sched.h +@@ -0,0 +1,1164 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __LINUX_PKT_SCHED_H ++#define __LINUX_PKT_SCHED_H ++ ++#include ++ ++/* Logical priority bands not depending on specific packet scheduler. ++ Every scheduler will map them to real traffic classes, if it has ++ no more precise mechanism to classify packets. ++ ++ These numbers have no special meaning, though their coincidence ++ with obsolete IPv6 values is not occasional :-). New IPv6 drafts ++ preferred full anarchy inspired by diffserv group. ++ ++ Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy ++ class, actually, as rule it will be handled with more care than ++ filler or even bulk. ++ */ ++ ++#define TC_PRIO_BESTEFFORT 0 ++#define TC_PRIO_FILLER 1 ++#define TC_PRIO_BULK 2 ++#define TC_PRIO_INTERACTIVE_BULK 4 ++#define TC_PRIO_INTERACTIVE 6 ++#define TC_PRIO_CONTROL 7 ++ ++#define TC_PRIO_MAX 15 ++ ++/* Generic queue statistics, available for all the elements. ++ Particular schedulers may have also their private records. ++ */ ++ ++struct tc_stats { ++ __u64 bytes; /* Number of enqueued bytes */ ++ __u32 packets; /* Number of enqueued packets */ ++ __u32 drops; /* Packets dropped because of lack of resources */ ++ __u32 overlimits; /* Number of throttle events when this ++ * flow goes out of allocated bandwidth */ ++ __u32 bps; /* Current flow byte rate */ ++ __u32 pps; /* Current flow packet rate */ ++ __u32 qlen; ++ __u32 backlog; ++}; ++ ++struct tc_estimator { ++ signed char interval; ++ unsigned char ewma_log; ++}; ++ ++/* "Handles" ++ --------- ++ ++ All the traffic control objects have 32bit identifiers, or "handles". ++ ++ They can be considered as opaque numbers from user API viewpoint, ++ but actually they always consist of two fields: major and ++ minor numbers, which are interpreted by kernel specially, ++ that may be used by applications, though not recommended. ++ ++ F.e. qdisc handles always have minor number equal to zero, ++ classes (or flows) have major equal to parent qdisc major, and ++ minor uniquely identifying class inside qdisc. ++ ++ Macros to manipulate handles: ++ */ ++ ++#define TC_H_MAJ_MASK (0xFFFF0000U) ++#define TC_H_MIN_MASK (0x0000FFFFU) ++#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) ++#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) ++#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) ++ ++#define TC_H_UNSPEC (0U) ++#define TC_H_ROOT (0xFFFFFFFFU) ++#define TC_H_INGRESS (0xFFFFFFF1U) ++#define TC_H_CLSACT TC_H_INGRESS ++ ++#define TC_H_MIN_PRIORITY 0xFFE0U ++#define TC_H_MIN_INGRESS 0xFFF2U ++#define TC_H_MIN_EGRESS 0xFFF3U ++ ++/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ ++enum tc_link_layer { ++ TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ ++ TC_LINKLAYER_ETHERNET, ++ TC_LINKLAYER_ATM, ++}; ++#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ ++ ++struct tc_ratespec { ++ unsigned char cell_log; ++ __u8 linklayer; /* lower 4 bits */ ++ unsigned short overhead; ++ short cell_align; ++ unsigned short mpu; ++ __u32 rate; ++}; ++ ++#define TC_RTAB_SIZE 1024 ++ ++struct tc_sizespec { ++ unsigned char cell_log; ++ unsigned char size_log; ++ short cell_align; ++ int overhead; ++ unsigned int linklayer; ++ unsigned int mpu; ++ unsigned int mtu; ++ unsigned int tsize; ++}; ++ ++enum { ++ TCA_STAB_UNSPEC, ++ TCA_STAB_BASE, ++ TCA_STAB_DATA, ++ __TCA_STAB_MAX ++}; ++ ++#define TCA_STAB_MAX (__TCA_STAB_MAX - 1) ++ ++/* FIFO section */ ++ ++struct tc_fifo_qopt { ++ __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ ++}; ++ ++/* SKBPRIO section */ ++ ++/* ++ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1). ++ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able ++ * to map one to one the DS field of IPV4 and IPV6 headers. ++ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY. ++ */ ++ ++#define SKBPRIO_MAX_PRIORITY 64 ++ ++struct tc_skbprio_qopt { ++ __u32 limit; /* Queue length in packets. */ ++}; ++ ++/* PRIO section */ ++ ++#define TCQ_PRIO_BANDS 16 ++#define TCQ_MIN_PRIO_BANDS 2 ++ ++struct tc_prio_qopt { ++ int bands; /* Number of bands */ ++ __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ ++}; ++ ++/* MULTIQ section */ ++ ++struct tc_multiq_qopt { ++ __u16 bands; /* Number of bands */ ++ __u16 max_bands; /* Maximum number of queues */ ++}; ++ ++/* PLUG section */ ++ ++#define TCQ_PLUG_BUFFER 0 ++#define TCQ_PLUG_RELEASE_ONE 1 ++#define TCQ_PLUG_RELEASE_INDEFINITE 2 ++#define TCQ_PLUG_LIMIT 3 ++ ++struct tc_plug_qopt { ++ /* TCQ_PLUG_BUFFER: Inset a plug into the queue and ++ * buffer any incoming packets ++ * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head ++ * to beginning of the next plug. ++ * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue. ++ * Stop buffering packets until the next TCQ_PLUG_BUFFER ++ * command is received (just act as a pass-thru queue). ++ * TCQ_PLUG_LIMIT: Increase/decrease queue size ++ */ ++ int action; ++ __u32 limit; ++}; ++ ++/* TBF section */ ++ ++struct tc_tbf_qopt { ++ struct tc_ratespec rate; ++ struct tc_ratespec peakrate; ++ __u32 limit; ++ __u32 buffer; ++ __u32 mtu; ++}; ++ ++enum { ++ TCA_TBF_UNSPEC, ++ TCA_TBF_PARMS, ++ TCA_TBF_RTAB, ++ TCA_TBF_PTAB, ++ TCA_TBF_RATE64, ++ TCA_TBF_PRATE64, ++ TCA_TBF_BURST, ++ TCA_TBF_PBURST, ++ TCA_TBF_PAD, ++ __TCA_TBF_MAX, ++}; ++ ++#define TCA_TBF_MAX (__TCA_TBF_MAX - 1) ++ ++ ++/* TEQL section */ ++ ++/* TEQL does not require any parameters */ ++ ++/* SFQ section */ ++ ++struct tc_sfq_qopt { ++ unsigned quantum; /* Bytes per round allocated to flow */ ++ int perturb_period; /* Period of hash perturbation */ ++ __u32 limit; /* Maximal packets in queue */ ++ unsigned divisor; /* Hash divisor */ ++ unsigned flows; /* Maximal number of flows */ ++}; ++ ++struct tc_sfqred_stats { ++ __u32 prob_drop; /* Early drops, below max threshold */ ++ __u32 forced_drop; /* Early drops, after max threshold */ ++ __u32 prob_mark; /* Marked packets, below max threshold */ ++ __u32 forced_mark; /* Marked packets, after max threshold */ ++ __u32 prob_mark_head; /* Marked packets, below max threshold */ ++ __u32 forced_mark_head;/* Marked packets, after max threshold */ ++}; ++ ++struct tc_sfq_qopt_v1 { ++ struct tc_sfq_qopt v0; ++ unsigned int depth; /* max number of packets per flow */ ++ unsigned int headdrop; ++/* SFQRED parameters */ ++ __u32 limit; /* HARD maximal flow queue length (bytes) */ ++ __u32 qth_min; /* Min average length threshold (bytes) */ ++ __u32 qth_max; /* Max average length threshold (bytes) */ ++ unsigned char Wlog; /* log(W) */ ++ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ ++ unsigned char Scell_log; /* cell size for idle damping */ ++ unsigned char flags; ++ __u32 max_P; /* probability, high resolution */ ++/* SFQRED stats */ ++ struct tc_sfqred_stats stats; ++}; ++ ++ ++struct tc_sfq_xstats { ++ __s32 allot; ++}; ++ ++/* RED section */ ++ ++enum { ++ TCA_RED_UNSPEC, ++ TCA_RED_PARMS, ++ TCA_RED_STAB, ++ TCA_RED_MAX_P, ++ __TCA_RED_MAX, ++}; ++ ++#define TCA_RED_MAX (__TCA_RED_MAX - 1) ++ ++struct tc_red_qopt { ++ __u32 limit; /* HARD maximal queue length (bytes) */ ++ __u32 qth_min; /* Min average length threshold (bytes) */ ++ __u32 qth_max; /* Max average length threshold (bytes) */ ++ unsigned char Wlog; /* log(W) */ ++ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ ++ unsigned char Scell_log; /* cell size for idle damping */ ++ unsigned char flags; ++#define TC_RED_ECN 1 ++#define TC_RED_HARDDROP 2 ++#define TC_RED_ADAPTATIVE 4 ++}; ++ ++struct tc_red_xstats { ++ __u32 early; /* Early drops */ ++ __u32 pdrop; /* Drops due to queue limits */ ++ __u32 other; /* Drops due to drop() calls */ ++ __u32 marked; /* Marked packets */ ++}; ++ ++/* GRED section */ ++ ++#define MAX_DPs 16 ++ ++enum { ++ TCA_GRED_UNSPEC, ++ TCA_GRED_PARMS, ++ TCA_GRED_STAB, ++ TCA_GRED_DPS, ++ TCA_GRED_MAX_P, ++ TCA_GRED_LIMIT, ++ TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */ ++ __TCA_GRED_MAX, ++}; ++ ++#define TCA_GRED_MAX (__TCA_GRED_MAX - 1) ++ ++enum { ++ TCA_GRED_VQ_ENTRY_UNSPEC, ++ TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */ ++ __TCA_GRED_VQ_ENTRY_MAX, ++}; ++#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1) ++ ++enum { ++ TCA_GRED_VQ_UNSPEC, ++ TCA_GRED_VQ_PAD, ++ TCA_GRED_VQ_DP, /* u32 */ ++ TCA_GRED_VQ_STAT_BYTES, /* u64 */ ++ TCA_GRED_VQ_STAT_PACKETS, /* u32 */ ++ TCA_GRED_VQ_STAT_BACKLOG, /* u32 */ ++ TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */ ++ TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */ ++ TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */ ++ TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ ++ TCA_GRED_VQ_STAT_PDROP, /* u32 */ ++ TCA_GRED_VQ_STAT_OTHER, /* u32 */ ++ TCA_GRED_VQ_FLAGS, /* u32 */ ++ __TCA_GRED_VQ_MAX ++}; ++ ++#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1) ++ ++struct tc_gred_qopt { ++ __u32 limit; /* HARD maximal queue length (bytes) */ ++ __u32 qth_min; /* Min average length threshold (bytes) */ ++ __u32 qth_max; /* Max average length threshold (bytes) */ ++ __u32 DP; /* up to 2^32 DPs */ ++ __u32 backlog; ++ __u32 qave; ++ __u32 forced; ++ __u32 early; ++ __u32 other; ++ __u32 pdrop; ++ __u8 Wlog; /* log(W) */ ++ __u8 Plog; /* log(P_max/(qth_max-qth_min)) */ ++ __u8 Scell_log; /* cell size for idle damping */ ++ __u8 prio; /* prio of this VQ */ ++ __u32 packets; ++ __u32 bytesin; ++}; ++ ++/* gred setup */ ++struct tc_gred_sopt { ++ __u32 DPs; ++ __u32 def_DP; ++ __u8 grio; ++ __u8 flags; ++ __u16 pad1; ++}; ++ ++/* CHOKe section */ ++ ++enum { ++ TCA_CHOKE_UNSPEC, ++ TCA_CHOKE_PARMS, ++ TCA_CHOKE_STAB, ++ TCA_CHOKE_MAX_P, ++ __TCA_CHOKE_MAX, ++}; ++ ++#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1) ++ ++struct tc_choke_qopt { ++ __u32 limit; /* Hard queue length (packets) */ ++ __u32 qth_min; /* Min average threshold (packets) */ ++ __u32 qth_max; /* Max average threshold (packets) */ ++ unsigned char Wlog; /* log(W) */ ++ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ ++ unsigned char Scell_log; /* cell size for idle damping */ ++ unsigned char flags; /* see RED flags */ ++}; ++ ++struct tc_choke_xstats { ++ __u32 early; /* Early drops */ ++ __u32 pdrop; /* Drops due to queue limits */ ++ __u32 other; /* Drops due to drop() calls */ ++ __u32 marked; /* Marked packets */ ++ __u32 matched; /* Drops due to flow match */ ++}; ++ ++/* HTB section */ ++#define TC_HTB_NUMPRIO 8 ++#define TC_HTB_MAXDEPTH 8 ++#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */ ++ ++struct tc_htb_opt { ++ struct tc_ratespec rate; ++ struct tc_ratespec ceil; ++ __u32 buffer; ++ __u32 cbuffer; ++ __u32 quantum; ++ __u32 level; /* out only */ ++ __u32 prio; ++}; ++struct tc_htb_glob { ++ __u32 version; /* to match HTB/TC */ ++ __u32 rate2quantum; /* bps->quantum divisor */ ++ __u32 defcls; /* default class number */ ++ __u32 debug; /* debug flags */ ++ ++ /* stats */ ++ __u32 direct_pkts; /* count of non shaped packets */ ++}; ++enum { ++ TCA_HTB_UNSPEC, ++ TCA_HTB_PARMS, ++ TCA_HTB_INIT, ++ TCA_HTB_CTAB, ++ TCA_HTB_RTAB, ++ TCA_HTB_DIRECT_QLEN, ++ TCA_HTB_RATE64, ++ TCA_HTB_CEIL64, ++ TCA_HTB_PAD, ++ TCA_HTB_OFFLOAD, ++ __TCA_HTB_MAX, ++}; ++ ++#define TCA_HTB_MAX (__TCA_HTB_MAX - 1) ++ ++struct tc_htb_xstats { ++ __u32 lends; ++ __u32 borrows; ++ __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */ ++ __s32 tokens; ++ __s32 ctokens; ++}; ++ ++/* HFSC section */ ++ ++struct tc_hfsc_qopt { ++ __u16 defcls; /* default class */ ++}; ++ ++struct tc_service_curve { ++ __u32 m1; /* slope of the first segment in bps */ ++ __u32 d; /* x-projection of the first segment in us */ ++ __u32 m2; /* slope of the second segment in bps */ ++}; ++ ++struct tc_hfsc_stats { ++ __u64 work; /* total work done */ ++ __u64 rtwork; /* work done by real-time criteria */ ++ __u32 period; /* current period */ ++ __u32 level; /* class level in hierarchy */ ++}; ++ ++enum { ++ TCA_HFSC_UNSPEC, ++ TCA_HFSC_RSC, ++ TCA_HFSC_FSC, ++ TCA_HFSC_USC, ++ __TCA_HFSC_MAX, ++}; ++ ++#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) ++ ++ ++/* CBQ section */ ++ ++#define TC_CBQ_MAXPRIO 8 ++#define TC_CBQ_MAXLEVEL 8 ++#define TC_CBQ_DEF_EWMA 5 ++ ++struct tc_cbq_lssopt { ++ unsigned char change; ++ unsigned char flags; ++#define TCF_CBQ_LSS_BOUNDED 1 ++#define TCF_CBQ_LSS_ISOLATED 2 ++ unsigned char ewma_log; ++ unsigned char level; ++#define TCF_CBQ_LSS_FLAGS 1 ++#define TCF_CBQ_LSS_EWMA 2 ++#define TCF_CBQ_LSS_MAXIDLE 4 ++#define TCF_CBQ_LSS_MINIDLE 8 ++#define TCF_CBQ_LSS_OFFTIME 0x10 ++#define TCF_CBQ_LSS_AVPKT 0x20 ++ __u32 maxidle; ++ __u32 minidle; ++ __u32 offtime; ++ __u32 avpkt; ++}; ++ ++struct tc_cbq_wrropt { ++ unsigned char flags; ++ unsigned char priority; ++ unsigned char cpriority; ++ unsigned char __reserved; ++ __u32 allot; ++ __u32 weight; ++}; ++ ++struct tc_cbq_ovl { ++ unsigned char strategy; ++#define TC_CBQ_OVL_CLASSIC 0 ++#define TC_CBQ_OVL_DELAY 1 ++#define TC_CBQ_OVL_LOWPRIO 2 ++#define TC_CBQ_OVL_DROP 3 ++#define TC_CBQ_OVL_RCLASSIC 4 ++ unsigned char priority2; ++ __u16 pad; ++ __u32 penalty; ++}; ++ ++struct tc_cbq_police { ++ unsigned char police; ++ unsigned char __res1; ++ unsigned short __res2; ++}; ++ ++struct tc_cbq_fopt { ++ __u32 split; ++ __u32 defmap; ++ __u32 defchange; ++}; ++ ++struct tc_cbq_xstats { ++ __u32 borrows; ++ __u32 overactions; ++ __s32 avgidle; ++ __s32 undertime; ++}; ++ ++enum { ++ TCA_CBQ_UNSPEC, ++ TCA_CBQ_LSSOPT, ++ TCA_CBQ_WRROPT, ++ TCA_CBQ_FOPT, ++ TCA_CBQ_OVL_STRATEGY, ++ TCA_CBQ_RATE, ++ TCA_CBQ_RTAB, ++ TCA_CBQ_POLICE, ++ __TCA_CBQ_MAX, ++}; ++ ++#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) ++ ++/* dsmark section */ ++ ++enum { ++ TCA_DSMARK_UNSPEC, ++ TCA_DSMARK_INDICES, ++ TCA_DSMARK_DEFAULT_INDEX, ++ TCA_DSMARK_SET_TC_INDEX, ++ TCA_DSMARK_MASK, ++ TCA_DSMARK_VALUE, ++ __TCA_DSMARK_MAX, ++}; ++ ++#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) ++ ++/* ATM section */ ++ ++enum { ++ TCA_ATM_UNSPEC, ++ TCA_ATM_FD, /* file/socket descriptor */ ++ TCA_ATM_PTR, /* pointer to descriptor - later */ ++ TCA_ATM_HDR, /* LL header */ ++ TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ ++ TCA_ATM_ADDR, /* PVC address (for output only) */ ++ TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ ++ __TCA_ATM_MAX, ++}; ++ ++#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) ++ ++/* Network emulator */ ++ ++enum { ++ TCA_NETEM_UNSPEC, ++ TCA_NETEM_CORR, ++ TCA_NETEM_DELAY_DIST, ++ TCA_NETEM_REORDER, ++ TCA_NETEM_CORRUPT, ++ TCA_NETEM_LOSS, ++ TCA_NETEM_RATE, ++ TCA_NETEM_ECN, ++ TCA_NETEM_RATE64, ++ TCA_NETEM_PAD, ++ TCA_NETEM_LATENCY64, ++ TCA_NETEM_JITTER64, ++ TCA_NETEM_SLOT, ++ TCA_NETEM_SLOT_DIST, ++ __TCA_NETEM_MAX, ++}; ++ ++#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1) ++ ++struct tc_netem_qopt { ++ __u32 latency; /* added delay (us) */ ++ __u32 limit; /* fifo limit (packets) */ ++ __u32 loss; /* random packet loss (0=none ~0=100%) */ ++ __u32 gap; /* re-ordering gap (0 for none) */ ++ __u32 duplicate; /* random packet dup (0=none ~0=100%) */ ++ __u32 jitter; /* random jitter in latency (us) */ ++}; ++ ++struct tc_netem_corr { ++ __u32 delay_corr; /* delay correlation */ ++ __u32 loss_corr; /* packet loss correlation */ ++ __u32 dup_corr; /* duplicate correlation */ ++}; ++ ++struct tc_netem_reorder { ++ __u32 probability; ++ __u32 correlation; ++}; ++ ++struct tc_netem_corrupt { ++ __u32 probability; ++ __u32 correlation; ++}; ++ ++struct tc_netem_rate { ++ __u32 rate; /* byte/s */ ++ __s32 packet_overhead; ++ __u32 cell_size; ++ __s32 cell_overhead; ++}; ++ ++struct tc_netem_slot { ++ __s64 min_delay; /* nsec */ ++ __s64 max_delay; ++ __s32 max_packets; ++ __s32 max_bytes; ++ __s64 dist_delay; /* nsec */ ++ __s64 dist_jitter; /* nsec */ ++}; ++ ++enum { ++ NETEM_LOSS_UNSPEC, ++ NETEM_LOSS_GI, /* General Intuitive - 4 state model */ ++ NETEM_LOSS_GE, /* Gilbert Elliot models */ ++ __NETEM_LOSS_MAX ++}; ++#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) ++ ++/* State transition probabilities for 4 state model */ ++struct tc_netem_gimodel { ++ __u32 p13; ++ __u32 p31; ++ __u32 p32; ++ __u32 p14; ++ __u32 p23; ++}; ++ ++/* Gilbert-Elliot models */ ++struct tc_netem_gemodel { ++ __u32 p; ++ __u32 r; ++ __u32 h; ++ __u32 k1; ++}; ++ ++#define NETEM_DIST_SCALE 8192 ++#define NETEM_DIST_MAX 16384 ++ ++/* DRR */ ++ ++enum { ++ TCA_DRR_UNSPEC, ++ TCA_DRR_QUANTUM, ++ __TCA_DRR_MAX ++}; ++ ++#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) ++ ++struct tc_drr_stats { ++ __u32 deficit; ++}; ++ ++/* MQPRIO */ ++#define TC_QOPT_BITMASK 15 ++#define TC_QOPT_MAX_QUEUE 16 ++ ++enum { ++ TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */ ++ TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */ ++ __TC_MQPRIO_HW_OFFLOAD_MAX ++}; ++ ++#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) ++ ++enum { ++ TC_MQPRIO_MODE_DCB, ++ TC_MQPRIO_MODE_CHANNEL, ++ __TC_MQPRIO_MODE_MAX ++}; ++ ++#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1) ++ ++enum { ++ TC_MQPRIO_SHAPER_DCB, ++ TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */ ++ __TC_MQPRIO_SHAPER_MAX ++}; ++ ++#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) ++ ++struct tc_mqprio_qopt { ++ __u8 num_tc; ++ __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; ++ __u8 hw; ++ __u16 count[TC_QOPT_MAX_QUEUE]; ++ __u16 offset[TC_QOPT_MAX_QUEUE]; ++}; ++ ++#define TC_MQPRIO_F_MODE 0x1 ++#define TC_MQPRIO_F_SHAPER 0x2 ++#define TC_MQPRIO_F_MIN_RATE 0x4 ++#define TC_MQPRIO_F_MAX_RATE 0x8 ++ ++enum { ++ TCA_MQPRIO_UNSPEC, ++ TCA_MQPRIO_MODE, ++ TCA_MQPRIO_SHAPER, ++ TCA_MQPRIO_MIN_RATE64, ++ TCA_MQPRIO_MAX_RATE64, ++ __TCA_MQPRIO_MAX, ++}; ++ ++#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1) ++ ++/* SFB */ ++ ++enum { ++ TCA_SFB_UNSPEC, ++ TCA_SFB_PARMS, ++ __TCA_SFB_MAX, ++}; ++ ++#define TCA_SFB_MAX (__TCA_SFB_MAX - 1) ++ ++/* ++ * Note: increment, decrement are Q0.16 fixed-point values. ++ */ ++struct tc_sfb_qopt { ++ __u32 rehash_interval; /* delay between hash move, in ms */ ++ __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */ ++ __u32 max; /* max len of qlen_min */ ++ __u32 bin_size; /* maximum queue length per bin */ ++ __u32 increment; /* probability increment, (d1 in Blue) */ ++ __u32 decrement; /* probability decrement, (d2 in Blue) */ ++ __u32 limit; /* max SFB queue length */ ++ __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */ ++ __u32 penalty_burst; ++}; ++ ++struct tc_sfb_xstats { ++ __u32 earlydrop; ++ __u32 penaltydrop; ++ __u32 bucketdrop; ++ __u32 queuedrop; ++ __u32 childdrop; /* drops in child qdisc */ ++ __u32 marked; ++ __u32 maxqlen; ++ __u32 maxprob; ++ __u32 avgprob; ++}; ++ ++#define SFB_MAX_PROB 0xFFFF ++ ++/* QFQ */ ++enum { ++ TCA_QFQ_UNSPEC, ++ TCA_QFQ_WEIGHT, ++ TCA_QFQ_LMAX, ++ __TCA_QFQ_MAX ++}; ++ ++#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1) ++ ++struct tc_qfq_stats { ++ __u32 weight; ++ __u32 lmax; ++}; ++ ++/* CODEL */ ++ ++enum { ++ TCA_CODEL_UNSPEC, ++ TCA_CODEL_TARGET, ++ TCA_CODEL_LIMIT, ++ TCA_CODEL_INTERVAL, ++ TCA_CODEL_ECN, ++ TCA_CODEL_CE_THRESHOLD, ++ __TCA_CODEL_MAX ++}; ++ ++#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1) ++ ++struct tc_codel_xstats { ++ __u32 maxpacket; /* largest packet we've seen so far */ ++ __u32 count; /* how many drops we've done since the last time we ++ * entered dropping state ++ */ ++ __u32 lastcount; /* count at entry to dropping state */ ++ __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */ ++ __s32 drop_next; /* time to drop next packet */ ++ __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ ++ __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ ++ __u32 dropping; /* are we in dropping state ? */ ++ __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ ++}; ++ ++/* FQ_CODEL */ ++ ++enum { ++ TCA_FQ_CODEL_UNSPEC, ++ TCA_FQ_CODEL_TARGET, ++ TCA_FQ_CODEL_LIMIT, ++ TCA_FQ_CODEL_INTERVAL, ++ TCA_FQ_CODEL_ECN, ++ TCA_FQ_CODEL_FLOWS, ++ TCA_FQ_CODEL_QUANTUM, ++ TCA_FQ_CODEL_CE_THRESHOLD, ++ TCA_FQ_CODEL_DROP_BATCH_SIZE, ++ TCA_FQ_CODEL_MEMORY_LIMIT, ++ __TCA_FQ_CODEL_MAX ++}; ++ ++#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) ++ ++enum { ++ TCA_FQ_CODEL_XSTATS_QDISC, ++ TCA_FQ_CODEL_XSTATS_CLASS, ++}; ++ ++struct tc_fq_codel_qd_stats { ++ __u32 maxpacket; /* largest packet we've seen so far */ ++ __u32 drop_overlimit; /* number of time max qdisc ++ * packet limit was hit ++ */ ++ __u32 ecn_mark; /* number of packets we ECN marked ++ * instead of being dropped ++ */ ++ __u32 new_flow_count; /* number of time packets ++ * created a 'new flow' ++ */ ++ __u32 new_flows_len; /* count of flows in new list */ ++ __u32 old_flows_len; /* count of flows in old list */ ++ __u32 ce_mark; /* packets above ce_threshold */ ++ __u32 memory_usage; /* in bytes */ ++ __u32 drop_overmemory; ++}; ++ ++struct tc_fq_codel_cl_stats { ++ __s32 deficit; ++ __u32 ldelay; /* in-queue delay seen by most recently ++ * dequeued packet ++ */ ++ __u32 count; ++ __u32 lastcount; ++ __u32 dropping; ++ __s32 drop_next; ++}; ++ ++struct tc_fq_codel_xstats { ++ __u32 type; ++ union { ++ struct tc_fq_codel_qd_stats qdisc_stats; ++ struct tc_fq_codel_cl_stats class_stats; ++ }; ++}; ++ ++/* FQ */ ++ ++enum { ++ TCA_FQ_UNSPEC, ++ ++ TCA_FQ_PLIMIT, /* limit of total number of packets in queue */ ++ ++ TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */ ++ ++ TCA_FQ_QUANTUM, /* RR quantum */ ++ ++ TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */ ++ ++ TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ ++ ++ TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ ++ ++ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ ++ ++ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ ++ ++ TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ ++ ++ TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ ++ ++ TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ ++ ++ TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ ++ ++ __TCA_FQ_MAX ++}; ++ ++#define TCA_FQ_MAX (__TCA_FQ_MAX - 1) ++ ++struct tc_fq_qd_stats { ++ __u64 gc_flows; ++ __u64 highprio_packets; ++ __u64 tcp_retrans; ++ __u64 throttled; ++ __u64 flows_plimit; ++ __u64 pkts_too_long; ++ __u64 allocation_errors; ++ __s64 time_next_delayed_flow; ++ __u32 flows; ++ __u32 inactive_flows; ++ __u32 throttled_flows; ++ __u32 unthrottle_latency_ns; ++ __u64 ce_mark; /* packets above ce_threshold */ ++}; ++ ++/* Heavy-Hitter Filter */ ++ ++enum { ++ TCA_HHF_UNSPEC, ++ TCA_HHF_BACKLOG_LIMIT, ++ TCA_HHF_QUANTUM, ++ TCA_HHF_HH_FLOWS_LIMIT, ++ TCA_HHF_RESET_TIMEOUT, ++ TCA_HHF_ADMIT_BYTES, ++ TCA_HHF_EVICT_TIMEOUT, ++ TCA_HHF_NON_HH_WEIGHT, ++ __TCA_HHF_MAX ++}; ++ ++#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) ++ ++struct tc_hhf_xstats { ++ __u32 drop_overlimit; /* number of times max qdisc packet limit ++ * was hit ++ */ ++ __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ ++ __u32 hh_tot_count; /* number of captured heavy-hitters so far */ ++ __u32 hh_cur_count; /* number of current heavy-hitters */ ++}; ++ ++/* PIE */ ++enum { ++ TCA_PIE_UNSPEC, ++ TCA_PIE_TARGET, ++ TCA_PIE_LIMIT, ++ TCA_PIE_TUPDATE, ++ TCA_PIE_ALPHA, ++ TCA_PIE_BETA, ++ TCA_PIE_ECN, ++ TCA_PIE_BYTEMODE, ++ __TCA_PIE_MAX ++}; ++#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) ++ ++struct tc_pie_xstats { ++ __u32 prob; /* current probability */ ++ __u32 delay; /* current delay in ms */ ++ __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ ++ __u32 packets_in; /* total number of packets enqueued */ ++ __u32 dropped; /* packets dropped due to pie_action */ ++ __u32 overlimit; /* dropped due to lack of space in queue */ ++ __u32 maxq; /* maximum queue size */ ++ __u32 ecn_mark; /* packets marked with ecn*/ ++}; ++ ++/* CBS */ ++struct tc_cbs_qopt { ++ __u8 offload; ++ __u8 _pad[3]; ++ __s32 hicredit; ++ __s32 locredit; ++ __s32 idleslope; ++ __s32 sendslope; ++}; ++ ++enum { ++ TCA_CBS_UNSPEC, ++ TCA_CBS_PARMS, ++ __TCA_CBS_MAX, ++}; ++ ++#define TCA_CBS_MAX (__TCA_CBS_MAX - 1) ++ ++ ++/* ETF */ ++struct tc_etf_qopt { ++ __s32 delta; ++ __s32 clockid; ++ __u32 flags; ++#define TC_ETF_DEADLINE_MODE_ON BIT(0) ++#define TC_ETF_OFFLOAD_ON BIT(1) ++}; ++ ++enum { ++ TCA_ETF_UNSPEC, ++ TCA_ETF_PARMS, ++ __TCA_ETF_MAX, ++}; ++ ++#define TCA_ETF_MAX (__TCA_ETF_MAX - 1) ++ ++ ++/* CAKE */ ++enum { ++ TCA_CAKE_UNSPEC, ++ TCA_CAKE_PAD, ++ TCA_CAKE_BASE_RATE64, ++ TCA_CAKE_DIFFSERV_MODE, ++ TCA_CAKE_ATM, ++ TCA_CAKE_FLOW_MODE, ++ TCA_CAKE_OVERHEAD, ++ TCA_CAKE_RTT, ++ TCA_CAKE_TARGET, ++ TCA_CAKE_AUTORATE, ++ TCA_CAKE_MEMORY, ++ TCA_CAKE_NAT, ++ TCA_CAKE_RAW, ++ TCA_CAKE_WASH, ++ TCA_CAKE_MPU, ++ TCA_CAKE_INGRESS, ++ TCA_CAKE_ACK_FILTER, ++ TCA_CAKE_SPLIT_GSO, ++ __TCA_CAKE_MAX ++}; ++#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) ++ ++enum { ++ __TCA_CAKE_STATS_INVALID, ++ TCA_CAKE_STATS_PAD, ++ TCA_CAKE_STATS_CAPACITY_ESTIMATE64, ++ TCA_CAKE_STATS_MEMORY_LIMIT, ++ TCA_CAKE_STATS_MEMORY_USED, ++ TCA_CAKE_STATS_AVG_NETOFF, ++ TCA_CAKE_STATS_MIN_NETLEN, ++ TCA_CAKE_STATS_MAX_NETLEN, ++ TCA_CAKE_STATS_MIN_ADJLEN, ++ TCA_CAKE_STATS_MAX_ADJLEN, ++ TCA_CAKE_STATS_TIN_STATS, ++ TCA_CAKE_STATS_DEFICIT, ++ TCA_CAKE_STATS_COBALT_COUNT, ++ TCA_CAKE_STATS_DROPPING, ++ TCA_CAKE_STATS_DROP_NEXT_US, ++ TCA_CAKE_STATS_P_DROP, ++ TCA_CAKE_STATS_BLUE_TIMER_US, ++ __TCA_CAKE_STATS_MAX ++}; ++#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) ++ ++enum { ++ __TCA_CAKE_TIN_STATS_INVALID, ++ TCA_CAKE_TIN_STATS_PAD, ++ TCA_CAKE_TIN_STATS_SENT_PACKETS, ++ TCA_CAKE_TIN_STATS_SENT_BYTES64, ++ TCA_CAKE_TIN_STATS_DROPPED_PACKETS, ++ TCA_CAKE_TIN_STATS_DROPPED_BYTES64, ++ TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, ++ TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, ++ TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, ++ TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, ++ TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, ++ TCA_CAKE_TIN_STATS_BACKLOG_BYTES, ++ TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, ++ TCA_CAKE_TIN_STATS_TARGET_US, ++ TCA_CAKE_TIN_STATS_INTERVAL_US, ++ TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, ++ TCA_CAKE_TIN_STATS_WAY_MISSES, ++ TCA_CAKE_TIN_STATS_WAY_COLLISIONS, ++ TCA_CAKE_TIN_STATS_PEAK_DELAY_US, ++ TCA_CAKE_TIN_STATS_AVG_DELAY_US, ++ TCA_CAKE_TIN_STATS_BASE_DELAY_US, ++ TCA_CAKE_TIN_STATS_SPARSE_FLOWS, ++ TCA_CAKE_TIN_STATS_BULK_FLOWS, ++ TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, ++ TCA_CAKE_TIN_STATS_MAX_SKBLEN, ++ TCA_CAKE_TIN_STATS_FLOW_QUANTUM, ++ __TCA_CAKE_TIN_STATS_MAX ++}; ++#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) ++#define TC_CAKE_MAX_TINS (8) ++ ++enum { ++ CAKE_FLOW_NONE = 0, ++ CAKE_FLOW_SRC_IP, ++ CAKE_FLOW_DST_IP, ++ CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ ++ CAKE_FLOW_FLOWS, ++ CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ ++ CAKE_FLOW_MAX, ++}; ++ ++enum { ++ CAKE_DIFFSERV_DIFFSERV3 = 0, ++ CAKE_DIFFSERV_DIFFSERV4, ++ CAKE_DIFFSERV_DIFFSERV8, ++ CAKE_DIFFSERV_BESTEFFORT, ++ CAKE_DIFFSERV_PRECEDENCE, ++ CAKE_DIFFSERV_MAX ++}; ++ ++enum { ++ CAKE_ACK_NONE = 0, ++ CAKE_ACK_FILTER, ++ CAKE_ACK_AGGRESSIVE, ++ CAKE_ACK_MAX ++}; ++ ++enum { ++ CAKE_ATM_NONE = 0, ++ CAKE_ATM_ATM, ++ CAKE_ATM_PTM, ++ CAKE_ATM_MAX ++}; ++ ++ ++/* TAPRIO */ ++enum { ++ TC_TAPRIO_CMD_SET_GATES = 0x00, ++ TC_TAPRIO_CMD_SET_AND_HOLD = 0x01, ++ TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02, ++}; ++ ++enum { ++ TCA_TAPRIO_SCHED_ENTRY_UNSPEC, ++ TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */ ++ TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */ ++ TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */ ++ TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */ ++ __TCA_TAPRIO_SCHED_ENTRY_MAX, ++}; ++#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1) ++ ++/* The format for schedule entry list is: ++ * [TCA_TAPRIO_SCHED_ENTRY_LIST] ++ * [TCA_TAPRIO_SCHED_ENTRY] ++ * [TCA_TAPRIO_SCHED_ENTRY_CMD] ++ * [TCA_TAPRIO_SCHED_ENTRY_GATES] ++ * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] ++ */ ++enum { ++ TCA_TAPRIO_SCHED_UNSPEC, ++ TCA_TAPRIO_SCHED_ENTRY, ++ __TCA_TAPRIO_SCHED_MAX, ++}; ++ ++#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1) ++ ++enum { ++ TCA_TAPRIO_ATTR_UNSPEC, ++ TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ ++ TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */ ++ TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */ ++ TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ ++ TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ ++ TCA_TAPRIO_PAD, ++ __TCA_TAPRIO_ATTR_MAX, ++}; ++ ++#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1) ++ ++#endif +diff --git a/src/cc/libbpf/scripts/build-fuzzers.sh b/src/cc/libbpf/scripts/build-fuzzers.sh +new file mode 100755 +index 0000000..75b3833 +--- /dev/null ++++ b/src/cc/libbpf/scripts/build-fuzzers.sh +@@ -0,0 +1,81 @@ ++#!/bin/bash ++set -eux ++ ++SANITIZER=${SANITIZER:-address} ++flags="-O1 -fno-omit-frame-pointer -g -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=$SANITIZER -fsanitize=fuzzer-no-link" ++ ++export CC=${CC:-clang} ++export CFLAGS=${CFLAGS:-$flags} ++ ++export CXX=${CXX:-clang++} ++export CXXFLAGS=${CXXFLAGS:-$flags} ++ ++cd "$(dirname -- "$0")/.." ++ ++export OUT=${OUT:-"$(pwd)/out"} ++mkdir -p "$OUT" ++ ++export LIB_FUZZING_ENGINE=${LIB_FUZZING_ENGINE:--fsanitize=fuzzer} ++ ++# libelf is compiled with _FORTIFY_SOURCE by default and it ++# isn't compatible with MSan. It was borrowed ++# from https://github.com/google/oss-fuzz/pull/7422 ++if [[ "$SANITIZER" == memory ]]; then ++ CFLAGS+=" -U_FORTIFY_SOURCE" ++ CXXFLAGS+=" -U_FORTIFY_SOURCE" ++fi ++ ++# The alignment check is turned off by default on OSS-Fuzz/CFLite so it should be ++# turned on explicitly there. It was borrowed from ++# https://github.com/google/oss-fuzz/pull/7092 ++if [[ "$SANITIZER" == undefined ]]; then ++ additional_ubsan_checks=alignment ++ UBSAN_FLAGS="-fsanitize=$additional_ubsan_checks -fno-sanitize-recover=$additional_ubsan_checks" ++ CFLAGS+=" $UBSAN_FLAGS" ++ CXXFLAGS+=" $UBSAN_FLAGS" ++fi ++ ++# Ideally libbelf should be built using release tarballs available ++# at https://sourceware.org/elfutils/ftp/. Unfortunately sometimes they ++# fail to compile (for example, elfutils-0.185 fails to compile with LDFLAGS enabled ++# due to https://bugs.gentoo.org/794601) so let's just point the script to ++# commits referring to versions of libelf that actually can be built ++rm -rf elfutils ++git clone git://sourceware.org/git/elfutils.git ++( ++cd elfutils ++git checkout 83251d4091241acddbdcf16f814e3bc6ef3df49a ++git log --oneline -1 ++ ++# ASan isn't compatible with -Wl,--no-undefined: https://github.com/google/sanitizers/issues/380 ++find -name Makefile.am | xargs sed -i 's/,--no-undefined//' ++ ++# ASan isn't compatible with -Wl,-z,defs either: ++# https://clang.llvm.org/docs/AddressSanitizer.html#usage ++sed -i 's/^\(ZDEFS_LDFLAGS=\).*/\1/' configure.ac ++ ++if [[ "$SANITIZER" == undefined ]]; then ++ # That's basicaly what --enable-sanitize-undefined does to turn off unaligned access ++ # elfutils heavily relies on on i386/x86_64 but without changing compiler flags along the way ++ sed -i 's/\(check_undefined_val\)=[0-9]/\1=1/' configure.ac ++fi ++ ++autoreconf -i -f ++if ! ./configure --enable-maintainer-mode --disable-debuginfod --disable-libdebuginfod \ ++ CC="$CC" CFLAGS="-Wno-error $CFLAGS" CXX="$CXX" CXXFLAGS="-Wno-error $CXXFLAGS" LDFLAGS="$CFLAGS"; then ++ cat config.log ++ exit 1 ++fi ++ ++make -C config -j$(nproc) V=1 ++make -C lib -j$(nproc) V=1 ++make -C libelf -j$(nproc) V=1 ++) ++ ++make -C src BUILD_STATIC_ONLY=y V=1 clean ++make -C src -j$(nproc) CFLAGS="-I$(pwd)/elfutils/libelf $CFLAGS" BUILD_STATIC_ONLY=y V=1 ++ ++$CC $CFLAGS -Isrc -Iinclude -Iinclude/uapi -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -c fuzz/bpf-object-fuzzer.c -o bpf-object-fuzzer.o ++$CXX $CXXFLAGS $LIB_FUZZING_ENGINE bpf-object-fuzzer.o src/libbpf.a "$(pwd)/elfutils/libelf/libelf.a" -l:libz.a -o "$OUT/bpf-object-fuzzer" ++ ++cp fuzz/bpf-object-fuzzer_seed_corpus.zip "$OUT" +diff --git a/src/cc/libbpf/scripts/coverity.sh b/src/cc/libbpf/scripts/coverity.sh +new file mode 100755 +index 0000000..99e4809 +--- /dev/null ++++ b/src/cc/libbpf/scripts/coverity.sh +@@ -0,0 +1,105 @@ ++#!/bin/bash ++# Taken from: https://scan.coverity.com/scripts/travisci_build_coverity_scan.sh ++# Local changes are annotated with "#[local]" ++ ++set -e ++ ++# Environment check ++echo -e "\033[33;1mNote: COVERITY_SCAN_PROJECT_NAME and COVERITY_SCAN_TOKEN are available on Project Settings page on scan.coverity.com\033[0m" ++[ -z "$COVERITY_SCAN_PROJECT_NAME" ] && echo "ERROR: COVERITY_SCAN_PROJECT_NAME must be set" && exit 1 ++[ -z "$COVERITY_SCAN_NOTIFICATION_EMAIL" ] && echo "ERROR: COVERITY_SCAN_NOTIFICATION_EMAIL must be set" && exit 1 ++[ -z "$COVERITY_SCAN_BRANCH_PATTERN" ] && echo "ERROR: COVERITY_SCAN_BRANCH_PATTERN must be set" && exit 1 ++[ -z "$COVERITY_SCAN_BUILD_COMMAND" ] && echo "ERROR: COVERITY_SCAN_BUILD_COMMAND must be set" && exit 1 ++[ -z "$COVERITY_SCAN_TOKEN" ] && echo "ERROR: COVERITY_SCAN_TOKEN must be set" && exit 1 ++ ++PLATFORM=`uname` ++#[local] Use /var/tmp for TOOL_ARCHIVE and TOOL_BASE, as on certain systems ++# /tmp is tmpfs and is sometimes too small to handle all necessary tooling ++TOOL_ARCHIVE=/var//tmp/cov-analysis-${PLATFORM}.tgz ++TOOL_URL=https://scan.coverity.com/download/${PLATFORM} ++TOOL_BASE=/var/tmp/coverity-scan-analysis ++UPLOAD_URL="https://scan.coverity.com/builds" ++SCAN_URL="https://scan.coverity.com" ++ ++# Do not run on pull requests ++if [ "${TRAVIS_PULL_REQUEST}" = "true" ]; then ++ echo -e "\033[33;1mINFO: Skipping Coverity Analysis: branch is a pull request.\033[0m" ++ exit 0 ++fi ++ ++# Verify this branch should run ++IS_COVERITY_SCAN_BRANCH=`ruby -e "puts '${TRAVIS_BRANCH}' =~ /\\A$COVERITY_SCAN_BRANCH_PATTERN\\z/ ? 1 : 0"` ++if [ "$IS_COVERITY_SCAN_BRANCH" = "1" ]; then ++ echo -e "\033[33;1mCoverity Scan configured to run on branch ${TRAVIS_BRANCH}\033[0m" ++else ++ echo -e "\033[33;1mCoverity Scan NOT configured to run on branch ${TRAVIS_BRANCH}\033[0m" ++ exit 1 ++fi ++ ++# Verify upload is permitted ++AUTH_RES=`curl -s --form project="$COVERITY_SCAN_PROJECT_NAME" --form token="$COVERITY_SCAN_TOKEN" $SCAN_URL/api/upload_permitted` ++if [ "$AUTH_RES" = "Access denied" ]; then ++ echo -e "\033[33;1mCoverity Scan API access denied. Check COVERITY_SCAN_PROJECT_NAME and COVERITY_SCAN_TOKEN.\033[0m" ++ exit 1 ++else ++ AUTH=`echo $AUTH_RES | ruby -e "require 'rubygems'; require 'json'; puts JSON[STDIN.read]['upload_permitted']"` ++ if [ "$AUTH" = "true" ]; then ++ echo -e "\033[33;1mCoverity Scan analysis authorized per quota.\033[0m" ++ else ++ WHEN=`echo $AUTH_RES | ruby -e "require 'rubygems'; require 'json'; puts JSON[STDIN.read]['next_upload_permitted_at']"` ++ echo -e "\033[33;1mCoverity Scan analysis NOT authorized until $WHEN.\033[0m" ++ exit 0 ++ fi ++fi ++ ++if [ ! -d $TOOL_BASE ]; then ++ # Download Coverity Scan Analysis Tool ++ if [ ! -e $TOOL_ARCHIVE ]; then ++ echo -e "\033[33;1mDownloading Coverity Scan Analysis Tool...\033[0m" ++ wget -nv -O $TOOL_ARCHIVE $TOOL_URL --post-data "project=$COVERITY_SCAN_PROJECT_NAME&token=$COVERITY_SCAN_TOKEN" ++ fi ++ ++ # Extract Coverity Scan Analysis Tool ++ echo -e "\033[33;1mExtracting Coverity Scan Analysis Tool...\033[0m" ++ mkdir -p $TOOL_BASE ++ pushd $TOOL_BASE ++ tar xzf $TOOL_ARCHIVE ++ popd ++fi ++ ++TOOL_DIR=`find $TOOL_BASE -type d -name 'cov-analysis*'` ++export PATH=$TOOL_DIR/bin:$PATH ++ ++# Build ++echo -e "\033[33;1mRunning Coverity Scan Analysis Tool...\033[0m" ++COV_BUILD_OPTIONS="" ++#COV_BUILD_OPTIONS="--return-emit-failures 8 --parse-error-threshold 85" ++RESULTS_DIR="cov-int" ++eval "${COVERITY_SCAN_BUILD_COMMAND_PREPEND}" ++COVERITY_UNSUPPORTED=1 cov-build --dir $RESULTS_DIR $COV_BUILD_OPTIONS $COVERITY_SCAN_BUILD_COMMAND ++cov-import-scm --dir $RESULTS_DIR --scm git --log $RESULTS_DIR/scm_log.txt 2>&1 ++ ++# Upload results ++echo -e "\033[33;1mTarring Coverity Scan Analysis results...\033[0m" ++RESULTS_ARCHIVE=analysis-results.tgz ++tar czf $RESULTS_ARCHIVE $RESULTS_DIR ++SHA=`git rev-parse --short HEAD` ++ ++echo -e "\033[33;1mUploading Coverity Scan Analysis results...\033[0m" ++response=$(curl \ ++ --silent --write-out "\n%{http_code}\n" \ ++ --form project=$COVERITY_SCAN_PROJECT_NAME \ ++ --form token=$COVERITY_SCAN_TOKEN \ ++ --form email=$COVERITY_SCAN_NOTIFICATION_EMAIL \ ++ --form file=@$RESULTS_ARCHIVE \ ++ --form version=$SHA \ ++ --form description="Travis CI build" \ ++ $UPLOAD_URL) ++status_code=$(echo "$response" | sed -n '$p') ++#[local] Coverity used to return 201 on success, but it's 200 now ++# See https://github.com/systemd/systemd/blob/master/tools/coverity.sh#L145 ++if [ "$status_code" != "200" ]; then ++ TEXT=$(echo "$response" | sed '$d') ++ echo -e "\033[33;1mCoverity Scan upload failed: $TEXT.\033[0m" ++ exit 1 ++fi +diff --git a/src/cc/libbpf/scripts/sync-kernel.sh b/src/cc/libbpf/scripts/sync-kernel.sh +new file mode 100755 +index 0000000..1653c2e +--- /dev/null ++++ b/src/cc/libbpf/scripts/sync-kernel.sh +@@ -0,0 +1,350 @@ ++#!/bin/bash ++ ++usage () { ++ echo "USAGE: ./sync-kernel.sh " ++ echo "" ++ echo "Set BPF_NEXT_BASELINE to override bpf-next tree commit, otherwise read from /CHECKPOINT-COMMIT." ++ echo "Set BPF_BASELINE to override bpf tree commit, otherwise read from /BPF-CHECKPOINT-COMMIT." ++ echo "Set MANUAL_MODE to 1 to manually control every cherry-picked commits." ++ exit 1 ++} ++ ++set -eu ++ ++LIBBPF_REPO=${1-""} ++LINUX_REPO=${2-""} ++BPF_BRANCH=${3-""} ++BASELINE_COMMIT=${BPF_NEXT_BASELINE:-$(cat ${LIBBPF_REPO}/CHECKPOINT-COMMIT)} ++BPF_BASELINE_COMMIT=${BPF_BASELINE:-$(cat ${LIBBPF_REPO}/BPF-CHECKPOINT-COMMIT)} ++ ++if [ -z "${LIBBPF_REPO}" ] || [ -z "${LINUX_REPO}" ]; then ++ echo "Error: libbpf or linux repos are not specified" ++ usage ++fi ++if [ -z "${BPF_BRANCH}" ]; then ++ echo "Error: linux's bpf tree branch is not specified" ++ usage ++fi ++if [ -z "${BASELINE_COMMIT}" ] || [ -z "${BPF_BASELINE_COMMIT}" ]; then ++ echo "Error: bpf or bpf-next baseline commits are not provided" ++ usage ++fi ++ ++SUFFIX=$(date --utc +%Y-%m-%dT%H-%M-%S.%3NZ) ++WORKDIR=$(pwd) ++TMP_DIR=$(mktemp -d) ++ ++trap "cd ${WORKDIR}; exit" INT TERM EXIT ++ ++declare -A PATH_MAP ++PATH_MAP=( \ ++ [tools/lib/bpf]=src \ ++ [tools/include/uapi/linux/bpf_common.h]=include/uapi/linux/bpf_common.h \ ++ [tools/include/uapi/linux/bpf.h]=include/uapi/linux/bpf.h \ ++ [tools/include/uapi/linux/btf.h]=include/uapi/linux/btf.h \ ++ [tools/include/uapi/linux/if_link.h]=include/uapi/linux/if_link.h \ ++ [tools/include/uapi/linux/if_xdp.h]=include/uapi/linux/if_xdp.h \ ++ [tools/include/uapi/linux/netlink.h]=include/uapi/linux/netlink.h \ ++ [tools/include/uapi/linux/pkt_cls.h]=include/uapi/linux/pkt_cls.h \ ++ [tools/include/uapi/linux/pkt_sched.h]=include/uapi/linux/pkt_sched.h \ ++ [include/uapi/linux/perf_event.h]=include/uapi/linux/perf_event.h \ ++ [Documentation/bpf/libbpf]=docs \ ++) ++ ++LIBBPF_PATHS="${!PATH_MAP[@]} :^tools/lib/bpf/Makefile :^tools/lib/bpf/Build :^tools/lib/bpf/.gitignore :^tools/include/tools/libc_compat.h" ++LIBBPF_VIEW_PATHS="${PATH_MAP[@]}" ++LIBBPF_VIEW_EXCLUDE_REGEX='^src/(Makefile|Build|test_libbpf\.c|bpf_helper_defs\.h|\.gitignore)$|^docs/(\.gitignore|api\.rst|conf\.py)$|^docs/sphinx/.*' ++LINUX_VIEW_EXCLUDE_REGEX='^include/tools/libc_compat.h$' ++ ++LIBBPF_TREE_FILTER="mkdir -p __libbpf/include/uapi/linux __libbpf/include/tools && "$'\\\n' ++for p in "${!PATH_MAP[@]}"; do ++ LIBBPF_TREE_FILTER+="git mv -kf ${p} __libbpf/${PATH_MAP[${p}]} && "$'\\\n' ++done ++LIBBPF_TREE_FILTER+="git rm --ignore-unmatch -f __libbpf/src/{Makefile,Build,test_libbpf.c,.gitignore} >/dev/null" ++ ++cd_to() ++{ ++ cd ${WORKDIR} && cd "$1" ++} ++ ++# Output brief single-line commit description ++# $1 - commit ref ++commit_desc() ++{ ++ git log -n1 --pretty='%h ("%s")' $1 ++} ++ ++# Create commit single-line signature, which consists of: ++# - full commit subject ++# - author date in ISO8601 format ++# - full commit body with newlines replaced with vertical bars (|) ++# - shortstat appended at the end ++# The idea is that this single-line signature is good enough to make final ++# decision about whether two commits are the same, across different repos. ++# $1 - commit ref ++# $2 - paths filter ++commit_signature() ++{ ++ git show --pretty='("%s")|%aI|%b' --shortstat $1 -- ${2-.} | tr '\n' '|' ++} ++ ++# Cherry-pick commits touching libbpf-related files ++# $1 - baseline_tag ++# $2 - tip_tag ++cherry_pick_commits() ++{ ++ local manual_mode=${MANUAL_MODE:-0} ++ local baseline_tag=$1 ++ local tip_tag=$2 ++ local new_commits ++ local signature ++ local should_skip ++ local synced_cnt ++ local manual_check ++ local libbpf_conflict_cnt ++ local desc ++ ++ new_commits=$(git rev-list --no-merges --topo-order --reverse ${baseline_tag}..${tip_tag} ${LIBBPF_PATHS[@]}) ++ for new_commit in ${new_commits}; do ++ desc="$(commit_desc ${new_commit})" ++ signature="$(commit_signature ${new_commit} "${LIBBPF_PATHS[@]}")" ++ synced_cnt=$(grep -F "${signature}" ${TMP_DIR}/libbpf_commits.txt | wc -l) ++ manual_check=0 ++ if ((${synced_cnt} > 0)); then ++ # commit with the same subject is already in libbpf, but it's ++ # not 100% the same commit, so check with user ++ echo "Commit '${desc}' is synced into libbpf as:" ++ grep -F "${signature}" ${TMP_DIR}/libbpf_commits.txt | \ ++ cut -d'|' -f1 | sed -e 's/^/- /' ++ if ((${manual_mode} != 1 && ${synced_cnt} == 1)); then ++ echo "Skipping '${desc}' due to unique match..." ++ continue ++ fi ++ if ((${synced_cnt} > 1)); then ++ echo "'${desc} matches multiple commits, please, double-check!" ++ manual_check=1 ++ fi ++ fi ++ if ((${manual_mode} == 1 || ${manual_check} == 1)); then ++ read -p "Do you want to skip '${desc}'? [y/N]: " should_skip ++ case "${should_skip}" in ++ "y" | "Y") ++ echo "Skipping '${desc}'..." ++ continue ++ ;; ++ esac ++ fi ++ # commit hasn't been synced into libbpf yet ++ echo "Picking '${desc}'..." ++ if ! git cherry-pick ${new_commit} &>/dev/null; then ++ echo "Warning! Cherry-picking '${desc} failed, checking if it's non-libbpf files causing problems..." ++ libbpf_conflict_cnt=$(git diff --name-only --diff-filter=U -- ${LIBBPF_PATHS[@]} | wc -l) ++ conflict_cnt=$(git diff --name-only | wc -l) ++ prompt_resolution=1 ++ ++ if ((${libbpf_conflict_cnt} == 0)); then ++ echo "Looks like only non-libbpf files have conflicts, ignoring..." ++ if ((${conflict_cnt} == 0)); then ++ echo "Empty cherry-pick, skipping it..." ++ git cherry-pick --abort ++ continue ++ fi ++ ++ git add . ++ # GIT_EDITOR=true to avoid editor popping up to edit commit message ++ if ! GIT_EDITOR=true git cherry-pick --continue &>/dev/null; then ++ echo "Error! That still failed! Please resolve manually." ++ else ++ echo "Success! All cherry-pick conflicts were resolved for '${desc}'!" ++ prompt_resolution=0 ++ fi ++ fi ++ ++ if ((${prompt_resolution} == 1)); then ++ read -p "Error! Cherry-picking '${desc}' failed, please fix manually and press to proceed..." ++ fi ++ fi ++ # Append signature of just cherry-picked commit to avoid ++ # potentially cherry-picking the same commit twice later when ++ # processing bpf tree commits. At this point we don't know yet ++ # the final commit sha in libbpf repo, so we record Linux SHA ++ # instead as LINUX_. ++ echo LINUX_$(git log --pretty='%h' -n1) "${signature}" >> ${TMP_DIR}/libbpf_commits.txt ++ done ++} ++ ++cleanup() ++{ ++ echo "Cleaning up..." ++ rm -r ${TMP_DIR} ++ cd_to ${LINUX_REPO} ++ git checkout ${TIP_SYM_REF} ++ git branch -D ${BASELINE_TAG} ${TIP_TAG} ${BPF_BASELINE_TAG} ${BPF_TIP_TAG} \ ++ ${SQUASH_BASE_TAG} ${SQUASH_TIP_TAG} ${VIEW_TAG} || true ++ ++ cd_to . ++ echo "DONE." ++} ++ ++ ++cd_to ${LIBBPF_REPO} ++GITHUB_ABS_DIR=$(pwd) ++echo "Dumping existing libbpf commit signatures..." ++for h in $(git log --pretty='%h' -n500); do ++ echo $h "$(commit_signature $h)" >> ${TMP_DIR}/libbpf_commits.txt ++done ++ ++# Use current kernel repo HEAD as a source of patches ++cd_to ${LINUX_REPO} ++LINUX_ABS_DIR=$(pwd) ++TIP_SYM_REF=$(git symbolic-ref -q --short HEAD || git rev-parse HEAD) ++TIP_COMMIT=$(git rev-parse HEAD) ++BPF_TIP_COMMIT=$(git rev-parse ${BPF_BRANCH}) ++BASELINE_TAG=libbpf-baseline-${SUFFIX} ++TIP_TAG=libbpf-tip-${SUFFIX} ++BPF_BASELINE_TAG=libbpf-bpf-baseline-${SUFFIX} ++BPF_TIP_TAG=libbpf-bpf-tip-${SUFFIX} ++VIEW_TAG=libbpf-view-${SUFFIX} ++LIBBPF_SYNC_TAG=libbpf-sync-${SUFFIX} ++ ++# Squash state of kernel repo at baseline into single commit ++SQUASH_BASE_TAG=libbpf-squash-base-${SUFFIX} ++SQUASH_TIP_TAG=libbpf-squash-tip-${SUFFIX} ++SQUASH_COMMIT=$(git commit-tree ${BASELINE_COMMIT}^{tree} -m "BASELINE SQUASH ${BASELINE_COMMIT}") ++ ++echo "WORKDIR: ${WORKDIR}" ++echo "LINUX REPO: ${LINUX_REPO}" ++echo "LIBBPF REPO: ${LIBBPF_REPO}" ++echo "TEMP DIR: ${TMP_DIR}" ++echo "SUFFIX: ${SUFFIX}" ++echo "BASE COMMIT: '$(commit_desc ${BASELINE_COMMIT})'" ++echo "TIP COMMIT: '$(commit_desc ${TIP_COMMIT})'" ++echo "BPF BASE COMMIT: '$(commit_desc ${BPF_BASELINE_COMMIT})'" ++echo "BPF TIP COMMIT: '$(commit_desc ${BPF_TIP_COMMIT})'" ++echo "SQUASH COMMIT: ${SQUASH_COMMIT}" ++echo "BASELINE TAG: ${BASELINE_TAG}" ++echo "TIP TAG: ${TIP_TAG}" ++echo "BPF BASELINE TAG: ${BPF_BASELINE_TAG}" ++echo "BPF TIP TAG: ${BPF_TIP_TAG}" ++echo "SQUASH BASE TAG: ${SQUASH_BASE_TAG}" ++echo "SQUASH TIP TAG: ${SQUASH_TIP_TAG}" ++echo "VIEW TAG: ${VIEW_TAG}" ++echo "LIBBPF SYNC TAG: ${LIBBPF_SYNC_TAG}" ++echo "PATCHES: ${TMP_DIR}/patches" ++ ++git branch ${BASELINE_TAG} ${BASELINE_COMMIT} ++git branch ${TIP_TAG} ${TIP_COMMIT} ++git branch ${BPF_BASELINE_TAG} ${BPF_BASELINE_COMMIT} ++git branch ${BPF_TIP_TAG} ${BPF_TIP_COMMIT} ++git branch ${SQUASH_BASE_TAG} ${SQUASH_COMMIT} ++git checkout -b ${SQUASH_TIP_TAG} ${SQUASH_COMMIT} ++ ++# Cherry-pick new commits onto squashed baseline commit ++cherry_pick_commits ${BASELINE_TAG} ${TIP_TAG} ++cherry_pick_commits ${BPF_BASELINE_TAG} ${BPF_TIP_TAG} ++ ++# Move all libbpf files into __libbpf directory. ++FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch --prune-empty -f --tree-filter "${LIBBPF_TREE_FILTER}" ${SQUASH_TIP_TAG} ${SQUASH_BASE_TAG} ++# Make __libbpf a new root directory ++FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch --prune-empty -f --subdirectory-filter __libbpf ${SQUASH_TIP_TAG} ${SQUASH_BASE_TAG} ++ ++# If there are no new commits with libbpf-related changes, bail out ++COMMIT_CNT=$(git rev-list --count ${SQUASH_BASE_TAG}..${SQUASH_TIP_TAG}) ++if ((${COMMIT_CNT} <= 0)); then ++ echo "No new changes to apply, we are done!" ++ cleanup ++ exit 2 ++fi ++ ++# Exclude baseline commit and generate nice cover letter with summary ++git format-patch ${SQUASH_BASE_TAG}..${SQUASH_TIP_TAG} --cover-letter -o ${TMP_DIR}/patches ++ ++# Now is time to re-apply libbpf-related linux patches to libbpf repo ++cd_to ${LIBBPF_REPO} ++git checkout -b ${LIBBPF_SYNC_TAG} ++ ++for patch in $(ls -1 ${TMP_DIR}/patches | tail -n +2); do ++ if ! git am -3 --committer-date-is-author-date "${TMP_DIR}/patches/${patch}"; then ++ if ! patch -p1 --merge < "${TMP_DIR}/patches/${patch}"; then ++ read -p "Applying ${TMP_DIR}/patches/${patch} failed, please resolve manually and press to proceed..." ++ fi ++ git am --continue ++ fi ++done ++ ++# Generate bpf_helper_defs.h and commit, if anything changed ++# restore Linux tip to use bpf_doc.py ++cd_to ${LINUX_REPO} ++git checkout ${TIP_TAG} ++# re-generate bpf_helper_defs.h ++cd_to ${LIBBPF_REPO} ++"${LINUX_ABS_DIR}/scripts/bpf_doc.py" --header \ ++ --file include/uapi/linux/bpf.h > src/bpf_helper_defs.h ++# if anything changed, commit it ++helpers_changes=$(git status --porcelain src/bpf_helper_defs.h | wc -l) ++if ((${helpers_changes} == 1)); then ++ git add src/bpf_helper_defs.h ++ git commit -m "sync: auto-generate latest BPF helpers ++ ++Latest changes to BPF helper definitions. ++" -- src/bpf_helper_defs.h ++fi ++ ++# Use generated cover-letter as a template for "sync commit" with ++# baseline and checkpoint commits from kernel repo (and leave summary ++# from cover letter intact, of course) ++echo ${TIP_COMMIT} > CHECKPOINT-COMMIT && \ ++echo ${BPF_TIP_COMMIT} > BPF-CHECKPOINT-COMMIT && \ ++git add CHECKPOINT-COMMIT && \ ++git add BPF-CHECKPOINT-COMMIT && \ ++awk '/\*\*\* BLURB HERE \*\*\*/ {p=1} p' ${TMP_DIR}/patches/0000-cover-letter.patch | \ ++sed "s/\*\*\* BLURB HERE \*\*\*/\ ++sync: latest libbpf changes from kernel\n\ ++\n\ ++Syncing latest libbpf commits from kernel repository.\n\ ++Baseline bpf-next commit: ${BASELINE_COMMIT}\n\ ++Checkpoint bpf-next commit: ${TIP_COMMIT}\n\ ++Baseline bpf commit: ${BPF_BASELINE_COMMIT}\n\ ++Checkpoint bpf commit: ${BPF_TIP_COMMIT}/" | \ ++git commit --file=- ++ ++echo "SUCCESS! ${COMMIT_CNT} commits synced." ++ ++echo "Verifying Linux's and Github's libbpf state" ++ ++cd_to ${LINUX_REPO} ++git checkout -b ${VIEW_TAG} ${TIP_COMMIT} ++FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --tree-filter "${LIBBPF_TREE_FILTER}" ${VIEW_TAG}^..${VIEW_TAG} ++FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --subdirectory-filter __libbpf ${VIEW_TAG}^..${VIEW_TAG} ++git ls-files -- ${LIBBPF_VIEW_PATHS[@]} | grep -v -E "${LINUX_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/linux-view.ls ++ ++cd_to ${LIBBPF_REPO} ++git ls-files -- ${LIBBPF_VIEW_PATHS[@]} | grep -v -E "${LIBBPF_VIEW_EXCLUDE_REGEX}" > ${TMP_DIR}/github-view.ls ++ ++echo "Comparing list of files..." ++diff -u ${TMP_DIR}/linux-view.ls ${TMP_DIR}/github-view.ls ++echo "Comparing file contents..." ++CONSISTENT=1 ++for F in $(cat ${TMP_DIR}/linux-view.ls); do ++ if ! diff -u "${LINUX_ABS_DIR}/${F}" "${GITHUB_ABS_DIR}/${F}"; then ++ echo "${LINUX_ABS_DIR}/${F} and ${GITHUB_ABS_DIR}/${F} are different!" ++ CONSISTENT=0 ++ fi ++done ++if ((${CONSISTENT} == 1)); then ++ echo "Great! Content is identical!" ++else ++ ignore_inconsistency=n ++ echo "Unfortunately, there are some inconsistencies, please double check." ++ read -p "Does everything look good? [y/N]: " ignore_inconsistency ++ case "${ignore_inconsistency}" in ++ "y" | "Y") ++ echo "Ok, proceeding..." ++ ;; ++ *) ++ echo "Oops, exiting with error..." ++ exit 4 ++ esac ++fi ++ ++cleanup +diff --git a/src/cc/libbpf/src/Makefile b/src/cc/libbpf/src/Makefile +new file mode 100644 +index 0000000..6be5361 +--- /dev/null ++++ b/src/cc/libbpf/src/Makefile +@@ -0,0 +1,182 @@ ++# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++ifeq ($(V),1) ++ Q = ++ msg = ++else ++ Q = @ ++ msg = @printf ' %-8s %s%s\n' "$(1)" "$(2)" "$(if $(3), $(3))"; ++endif ++ ++LIBBPF_MAJOR_VERSION := 1 ++LIBBPF_MINOR_VERSION := 0 ++LIBBPF_PATCH_VERSION := 0 ++LIBBPF_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).$(LIBBPF_PATCH_VERSION) ++LIBBPF_MAJMIN_VERSION := $(LIBBPF_MAJOR_VERSION).$(LIBBPF_MINOR_VERSION).0 ++LIBBPF_MAP_VERSION := $(shell grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | sort -rV | head -n1 | cut -d'_' -f2) ++ifneq ($(LIBBPF_MAJMIN_VERSION), $(LIBBPF_MAP_VERSION)) ++$(error Libbpf release ($(LIBBPF_VERSION)) and map ($(LIBBPF_MAP_VERSION)) versions are out of sync!) ++endif ++ ++define allow-override ++ $(if $(or $(findstring environment,$(origin $(1))),\ ++ $(findstring command line,$(origin $(1)))),,\ ++ $(eval $(1) = $(2))) ++endef ++ ++$(call allow-override,CC,$(CROSS_COMPILE)cc) ++$(call allow-override,LD,$(CROSS_COMPILE)ld) ++ ++TOPDIR = .. ++ ++INCLUDES := -I. -I$(TOPDIR)/include -I$(TOPDIR)/include/uapi ++ALL_CFLAGS := $(INCLUDES) ++ ++SHARED_CFLAGS += -fPIC -fvisibility=hidden -DSHARED ++ ++CFLAGS ?= -g -O2 -Werror -Wall -std=gnu89 ++ALL_CFLAGS += $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 $(EXTRA_CFLAGS) ++ALL_LDFLAGS += $(LDFLAGS) $(EXTRA_LDFLAGS) ++ ++ifdef NO_PKG_CONFIG ++ ALL_LDFLAGS += -lelf -lz ++else ++ PKG_CONFIG ?= pkg-config ++ ALL_CFLAGS += $(shell $(PKG_CONFIG) --cflags libelf zlib) ++ ALL_LDFLAGS += $(shell $(PKG_CONFIG) --libs libelf zlib) ++endif ++ ++OBJDIR ?= . ++SHARED_OBJDIR := $(OBJDIR)/sharedobjs ++STATIC_OBJDIR := $(OBJDIR)/staticobjs ++OBJS := bpf.o btf.o libbpf.o libbpf_errno.o netlink.o \ ++ nlattr.o str_error.o libbpf_probes.o bpf_prog_linfo.o \ ++ btf_dump.o hashmap.o ringbuf.o strset.o linker.o gen_loader.o \ ++ relo_core.o usdt.o ++SHARED_OBJS := $(addprefix $(SHARED_OBJDIR)/,$(OBJS)) ++STATIC_OBJS := $(addprefix $(STATIC_OBJDIR)/,$(OBJS)) ++ ++STATIC_LIBS := $(OBJDIR)/libbpf.a ++ifndef BUILD_STATIC_ONLY ++ SHARED_LIBS := $(OBJDIR)/libbpf.so \ ++ $(OBJDIR)/libbpf.so.$(LIBBPF_MAJOR_VERSION) \ ++ $(OBJDIR)/libbpf.so.$(LIBBPF_VERSION) ++ VERSION_SCRIPT := libbpf.map ++endif ++ ++HEADERS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h \ ++ bpf_helpers.h bpf_helper_defs.h bpf_tracing.h \ ++ bpf_endian.h bpf_core_read.h skel_internal.h libbpf_version.h \ ++ usdt.bpf.h ++UAPI_HEADERS := $(addprefix $(TOPDIR)/include/uapi/linux/,\ ++ bpf.h bpf_common.h btf.h) ++ ++PC_FILE := $(OBJDIR)/libbpf.pc ++ ++INSTALL = install ++ ++DESTDIR ?= ++ ++ifeq ($(filter-out %64 %64be %64eb %64le %64el s390x, $(shell uname -m)),) ++ LIBSUBDIR := lib64 ++else ++ LIBSUBDIR := lib ++endif ++ ++# By default let the pc file itself use ${prefix} in includedir/libdir so that ++# the prefix can be overridden at runtime (eg: --define-prefix) ++ifndef LIBDIR ++ LIBDIR_PC := $$\{prefix\}/$(LIBSUBDIR) ++else ++ LIBDIR_PC := $(LIBDIR) ++endif ++PREFIX ?= /usr ++LIBDIR ?= $(PREFIX)/$(LIBSUBDIR) ++INCLUDEDIR ?= $(PREFIX)/include ++UAPIDIR ?= $(PREFIX)/include ++ ++TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) ++ ++all: $(STATIC_LIBS) $(SHARED_LIBS) $(PC_FILE) ++ ++$(OBJDIR)/libbpf.a: $(STATIC_OBJS) ++ $(call msg,AR,$@) ++ $(Q)$(AR) rcs $@ $^ ++ ++$(OBJDIR)/libbpf.so: $(OBJDIR)/libbpf.so.$(LIBBPF_MAJOR_VERSION) ++ $(Q)ln -sf $(^F) $@ ++ ++$(OBJDIR)/libbpf.so.$(LIBBPF_MAJOR_VERSION): $(OBJDIR)/libbpf.so.$(LIBBPF_VERSION) ++ $(Q)ln -sf $(^F) $@ ++ ++$(OBJDIR)/libbpf.so.$(LIBBPF_VERSION): $(SHARED_OBJS) ++ $(call msg,CC,$@) ++ $(Q)$(CC) -shared -Wl,--version-script=$(VERSION_SCRIPT) \ ++ -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ ++ $^ $(ALL_LDFLAGS) -o $@ ++ ++$(OBJDIR)/libbpf.pc: force ++ $(Q)sed -e "s|@PREFIX@|$(PREFIX)|" \ ++ -e "s|@LIBDIR@|$(LIBDIR_PC)|" \ ++ -e "s|@VERSION@|$(LIBBPF_VERSION)|" \ ++ < libbpf.pc.template > $@ ++ ++$(STATIC_OBJDIR) $(SHARED_OBJDIR): ++ $(call msg,MKDIR,$@) ++ $(Q)mkdir -p $@ ++ ++$(STATIC_OBJDIR)/%.o: %.c | $(STATIC_OBJDIR) ++ $(call msg,CC,$@) ++ $(Q)$(CC) $(ALL_CFLAGS) $(CPPFLAGS) -c $< -o $@ ++ ++$(SHARED_OBJDIR)/%.o: %.c | $(SHARED_OBJDIR) ++ $(call msg,CC,$@) ++ $(Q)$(CC) $(ALL_CFLAGS) $(SHARED_CFLAGS) $(CPPFLAGS) -c $< -o $@ ++ ++define do_install ++ $(call msg,INSTALL,$1) ++ $(Q)if [ ! -d '$(DESTDIR)$2' ]; then \ ++ $(INSTALL) -d -m 755 '$(DESTDIR)$2'; \ ++ fi; ++ $(Q)$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR)$2' ++endef ++ ++# Preserve symlinks at installation. ++define do_s_install ++ $(call msg,INSTALL,$1) ++ $(Q)if [ ! -d '$(DESTDIR)$2' ]; then \ ++ $(INSTALL) -d -m 755 '$(DESTDIR)$2'; \ ++ fi; ++ $(Q)cp -fR $1 '$(DESTDIR)$2' ++endef ++ ++install: all install_headers install_pkgconfig ++ $(call do_s_install,$(STATIC_LIBS) $(SHARED_LIBS),$(LIBDIR)) ++ ++install_headers: ++ $(call do_install,$(HEADERS),$(INCLUDEDIR)/bpf,644) ++ ++# UAPI headers can be installed by a different package so they're not installed ++# in by install rule. ++install_uapi_headers: ++ $(call do_install,$(UAPI_HEADERS),$(UAPIDIR)/linux,644) ++ ++install_pkgconfig: $(PC_FILE) ++ $(call do_install,$(PC_FILE),$(LIBDIR)/pkgconfig,644) ++ ++clean: ++ $(call msg,CLEAN) ++ $(Q)rm -rf *.o *.a *.so *.so.* *.pc $(SHARED_OBJDIR) $(STATIC_OBJDIR) ++ ++.PHONY: cscope tags force ++cscope: ++ $(call msg,CSCOPE) ++ $(Q)ls *.c *.h > cscope.files ++ $(Q)cscope -b -q -f cscope.out ++ ++tags: ++ $(call msg,CTAGS) ++ $(Q)rm -f TAGS tags ++ $(Q)ls *.c *.h | xargs $(TAGS_PROG) -a ++ ++force: +diff --git a/src/cc/libbpf/src/bpf.c b/src/cc/libbpf/src/bpf.c +new file mode 100644 +index 0000000..efcc06d +--- /dev/null ++++ b/src/cc/libbpf/src/bpf.c +@@ -0,0 +1,1101 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++/* ++ * common eBPF ELF operations. ++ * ++ * Copyright (C) 2013-2015 Alexei Starovoitov ++ * Copyright (C) 2015 Wang Nan ++ * Copyright (C) 2015 Huawei Inc. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; ++ * version 2.1 of the License (not later!) ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this program; if not, see ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "bpf.h" ++#include "libbpf.h" ++#include "libbpf_internal.h" ++ ++/* ++ * When building perf, unistd.h is overridden. __NR_bpf is ++ * required to be defined explicitly. ++ */ ++#ifndef __NR_bpf ++# if defined(__i386__) ++# define __NR_bpf 357 ++# elif defined(__x86_64__) ++# define __NR_bpf 321 ++# elif defined(__aarch64__) ++# define __NR_bpf 280 ++# elif defined(__sparc__) ++# define __NR_bpf 349 ++# elif defined(__s390__) ++# define __NR_bpf 351 ++# elif defined(__arc__) ++# define __NR_bpf 280 ++# elif defined(__mips__) && defined(_ABIO32) ++# define __NR_bpf 4355 ++# elif defined(__mips__) && defined(_ABIN32) ++# define __NR_bpf 6319 ++# elif defined(__mips__) && defined(_ABI64) ++# define __NR_bpf 5315 ++# else ++# error __NR_bpf not defined. libbpf does not support your arch. ++# endif ++#endif ++ ++static inline __u64 ptr_to_u64(const void *ptr) ++{ ++ return (__u64) (unsigned long) ptr; ++} ++ ++static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, ++ unsigned int size) ++{ ++ return syscall(__NR_bpf, cmd, attr, size); ++} ++ ++static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, ++ unsigned int size) ++{ ++ int fd; ++ ++ fd = sys_bpf(cmd, attr, size); ++ return ensure_good_fd(fd); ++} ++ ++#define PROG_LOAD_ATTEMPTS 5 ++ ++static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) ++{ ++ int fd; ++ ++ do { ++ fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); ++ } while (fd < 0 && errno == EAGAIN && --attempts > 0); ++ ++ return fd; ++} ++ ++/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to ++ * memcg-based memory accounting for BPF maps and progs. This was done in [0]. ++ * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in ++ * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF. ++ * ++ * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ ++ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") ++ */ ++int probe_memcg_account(void) ++{ ++ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); ++ struct bpf_insn insns[] = { ++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), ++ BPF_EXIT_INSN(), ++ }; ++ size_t insn_cnt = ARRAY_SIZE(insns); ++ union bpf_attr attr; ++ int prog_fd; ++ ++ /* attempt loading freplace trying to use custom BTF */ ++ memset(&attr, 0, prog_load_attr_sz); ++ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; ++ attr.insns = ptr_to_u64(insns); ++ attr.insn_cnt = insn_cnt; ++ attr.license = ptr_to_u64("GPL"); ++ ++ prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); ++ if (prog_fd >= 0) { ++ close(prog_fd); ++ return 1; ++ } ++ return 0; ++} ++ ++static bool memlock_bumped; ++static rlim_t memlock_rlim = RLIM_INFINITY; ++ ++int libbpf_set_memlock_rlim(size_t memlock_bytes) ++{ ++ if (memlock_bumped) ++ return libbpf_err(-EBUSY); ++ ++ memlock_rlim = memlock_bytes; ++ return 0; ++} ++ ++int bump_rlimit_memlock(void) ++{ ++ struct rlimit rlim; ++ ++ /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ ++ if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) ++ return 0; ++ ++ memlock_bumped = true; ++ ++ /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ ++ if (memlock_rlim == 0) ++ return 0; ++ ++ rlim.rlim_cur = rlim.rlim_max = memlock_rlim; ++ if (setrlimit(RLIMIT_MEMLOCK, &rlim)) ++ return -errno; ++ ++ return 0; ++} ++ ++int bpf_map_create(enum bpf_map_type map_type, ++ const char *map_name, ++ __u32 key_size, ++ __u32 value_size, ++ __u32 max_entries, ++ const struct bpf_map_create_opts *opts) ++{ ++ const size_t attr_sz = offsetofend(union bpf_attr, map_extra); ++ union bpf_attr attr; ++ int fd; ++ ++ bump_rlimit_memlock(); ++ ++ memset(&attr, 0, attr_sz); ++ ++ if (!OPTS_VALID(opts, bpf_map_create_opts)) ++ return libbpf_err(-EINVAL); ++ ++ attr.map_type = map_type; ++ if (map_name) ++ libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); ++ attr.key_size = key_size; ++ attr.value_size = value_size; ++ attr.max_entries = max_entries; ++ ++ attr.btf_fd = OPTS_GET(opts, btf_fd, 0); ++ attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); ++ attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); ++ attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); ++ ++ attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); ++ attr.map_flags = OPTS_GET(opts, map_flags, 0); ++ attr.map_extra = OPTS_GET(opts, map_extra, 0); ++ attr.numa_node = OPTS_GET(opts, numa_node, 0); ++ attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); ++ ++ fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); ++ return libbpf_err_errno(fd); ++} ++ ++static void * ++alloc_zero_tailing_info(const void *orecord, __u32 cnt, ++ __u32 actual_rec_size, __u32 expected_rec_size) ++{ ++ __u64 info_len = (__u64)actual_rec_size * cnt; ++ void *info, *nrecord; ++ int i; ++ ++ info = malloc(info_len); ++ if (!info) ++ return NULL; ++ ++ /* zero out bytes kernel does not understand */ ++ nrecord = info; ++ for (i = 0; i < cnt; i++) { ++ memcpy(nrecord, orecord, expected_rec_size); ++ memset(nrecord + expected_rec_size, 0, ++ actual_rec_size - expected_rec_size); ++ orecord += actual_rec_size; ++ nrecord += actual_rec_size; ++ } ++ ++ return info; ++} ++ ++int bpf_prog_load(enum bpf_prog_type prog_type, ++ const char *prog_name, const char *license, ++ const struct bpf_insn *insns, size_t insn_cnt, ++ const struct bpf_prog_load_opts *opts) ++{ ++ void *finfo = NULL, *linfo = NULL; ++ const char *func_info, *line_info; ++ __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; ++ __u32 func_info_rec_size, line_info_rec_size; ++ int fd, attempts; ++ union bpf_attr attr; ++ char *log_buf; ++ ++ bump_rlimit_memlock(); ++ ++ if (!OPTS_VALID(opts, bpf_prog_load_opts)) ++ return libbpf_err(-EINVAL); ++ ++ attempts = OPTS_GET(opts, attempts, 0); ++ if (attempts < 0) ++ return libbpf_err(-EINVAL); ++ if (attempts == 0) ++ attempts = PROG_LOAD_ATTEMPTS; ++ ++ memset(&attr, 0, sizeof(attr)); ++ ++ attr.prog_type = prog_type; ++ attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); ++ ++ attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); ++ attr.prog_flags = OPTS_GET(opts, prog_flags, 0); ++ attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); ++ attr.kern_version = OPTS_GET(opts, kern_version, 0); ++ ++ if (prog_name) ++ libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); ++ attr.license = ptr_to_u64(license); ++ ++ if (insn_cnt > UINT_MAX) ++ return libbpf_err(-E2BIG); ++ ++ attr.insns = ptr_to_u64(insns); ++ attr.insn_cnt = (__u32)insn_cnt; ++ ++ attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); ++ attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); ++ ++ if (attach_prog_fd && attach_btf_obj_fd) ++ return libbpf_err(-EINVAL); ++ ++ attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); ++ if (attach_prog_fd) ++ attr.attach_prog_fd = attach_prog_fd; ++ else ++ attr.attach_btf_obj_fd = attach_btf_obj_fd; ++ ++ log_buf = OPTS_GET(opts, log_buf, NULL); ++ log_size = OPTS_GET(opts, log_size, 0); ++ log_level = OPTS_GET(opts, log_level, 0); ++ ++ if (!!log_buf != !!log_size) ++ return libbpf_err(-EINVAL); ++ if (log_level > (4 | 2 | 1)) ++ return libbpf_err(-EINVAL); ++ if (log_level && !log_buf) ++ return libbpf_err(-EINVAL); ++ ++ func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); ++ func_info = OPTS_GET(opts, func_info, NULL); ++ attr.func_info_rec_size = func_info_rec_size; ++ attr.func_info = ptr_to_u64(func_info); ++ attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0); ++ ++ line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); ++ line_info = OPTS_GET(opts, line_info, NULL); ++ attr.line_info_rec_size = line_info_rec_size; ++ attr.line_info = ptr_to_u64(line_info); ++ attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); ++ ++ attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); ++ ++ if (log_level) { ++ attr.log_buf = ptr_to_u64(log_buf); ++ attr.log_size = log_size; ++ attr.log_level = log_level; ++ } ++ ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); ++ if (fd >= 0) ++ return fd; ++ ++ /* After bpf_prog_load, the kernel may modify certain attributes ++ * to give user space a hint how to deal with loading failure. ++ * Check to see whether we can make some changes and load again. ++ */ ++ while (errno == E2BIG && (!finfo || !linfo)) { ++ if (!finfo && attr.func_info_cnt && ++ attr.func_info_rec_size < func_info_rec_size) { ++ /* try with corrected func info records */ ++ finfo = alloc_zero_tailing_info(func_info, ++ attr.func_info_cnt, ++ func_info_rec_size, ++ attr.func_info_rec_size); ++ if (!finfo) { ++ errno = E2BIG; ++ goto done; ++ } ++ ++ attr.func_info = ptr_to_u64(finfo); ++ attr.func_info_rec_size = func_info_rec_size; ++ } else if (!linfo && attr.line_info_cnt && ++ attr.line_info_rec_size < line_info_rec_size) { ++ linfo = alloc_zero_tailing_info(line_info, ++ attr.line_info_cnt, ++ line_info_rec_size, ++ attr.line_info_rec_size); ++ if (!linfo) { ++ errno = E2BIG; ++ goto done; ++ } ++ ++ attr.line_info = ptr_to_u64(linfo); ++ attr.line_info_rec_size = line_info_rec_size; ++ } else { ++ break; ++ } ++ ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); ++ if (fd >= 0) ++ goto done; ++ } ++ ++ if (log_level == 0 && log_buf) { ++ /* log_level == 0 with non-NULL log_buf requires retrying on error ++ * with log_level == 1 and log_buf/log_buf_size set, to get details of ++ * failure ++ */ ++ attr.log_buf = ptr_to_u64(log_buf); ++ attr.log_size = log_size; ++ attr.log_level = 1; ++ ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); ++ } ++done: ++ /* free() doesn't affect errno, so we don't need to restore it */ ++ free(finfo); ++ free(linfo); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_map_update_elem(int fd, const void *key, const void *value, ++ __u64 flags) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ attr.key = ptr_to_u64(key); ++ attr.value = ptr_to_u64(value); ++ attr.flags = flags; ++ ++ ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_lookup_elem(int fd, const void *key, void *value) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ attr.key = ptr_to_u64(key); ++ attr.value = ptr_to_u64(value); ++ ++ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ attr.key = ptr_to_u64(key); ++ attr.value = ptr_to_u64(value); ++ attr.flags = flags; ++ ++ ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ attr.key = ptr_to_u64(key); ++ attr.value = ptr_to_u64(value); ++ ++ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ attr.key = ptr_to_u64(key); ++ attr.value = ptr_to_u64(value); ++ attr.flags = flags; ++ ++ ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_delete_elem(int fd, const void *key) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ attr.key = ptr_to_u64(key); ++ ++ ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ attr.key = ptr_to_u64(key); ++ attr.flags = flags; ++ ++ ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_get_next_key(int fd, const void *key, void *next_key) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ attr.key = ptr_to_u64(key); ++ attr.next_key = ptr_to_u64(next_key); ++ ++ ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_freeze(int fd) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_fd = fd; ++ ++ ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++static int bpf_map_batch_common(int cmd, int fd, void *in_batch, ++ void *out_batch, void *keys, void *values, ++ __u32 *count, ++ const struct bpf_map_batch_opts *opts) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ if (!OPTS_VALID(opts, bpf_map_batch_opts)) ++ return libbpf_err(-EINVAL); ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.batch.map_fd = fd; ++ attr.batch.in_batch = ptr_to_u64(in_batch); ++ attr.batch.out_batch = ptr_to_u64(out_batch); ++ attr.batch.keys = ptr_to_u64(keys); ++ attr.batch.values = ptr_to_u64(values); ++ attr.batch.count = *count; ++ attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); ++ attr.batch.flags = OPTS_GET(opts, flags, 0); ++ ++ ret = sys_bpf(cmd, &attr, sizeof(attr)); ++ *count = attr.batch.count; ++ ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, ++ const struct bpf_map_batch_opts *opts) ++{ ++ return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, ++ NULL, (void *)keys, NULL, count, opts); ++} ++ ++int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, ++ void *values, __u32 *count, ++ const struct bpf_map_batch_opts *opts) ++{ ++ return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch, ++ out_batch, keys, values, count, opts); ++} ++ ++int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, ++ void *keys, void *values, __u32 *count, ++ const struct bpf_map_batch_opts *opts) ++{ ++ return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH, ++ fd, in_batch, out_batch, keys, values, ++ count, opts); ++} ++ ++int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, ++ const struct bpf_map_batch_opts *opts) ++{ ++ return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, ++ (void *)keys, (void *)values, count, opts); ++} ++ ++int bpf_obj_pin(int fd, const char *pathname) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.pathname = ptr_to_u64((void *)pathname); ++ attr.bpf_fd = fd; ++ ++ ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_obj_get(const char *pathname) ++{ ++ return bpf_obj_get_opts(pathname, NULL); ++} ++ ++int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) ++{ ++ union bpf_attr attr; ++ int fd; ++ ++ if (!OPTS_VALID(opts, bpf_obj_get_opts)) ++ return libbpf_err(-EINVAL); ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.pathname = ptr_to_u64((void *)pathname); ++ attr.file_flags = OPTS_GET(opts, file_flags, 0); ++ ++ fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, ++ unsigned int flags) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts, ++ .flags = flags, ++ ); ++ ++ return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts); ++} ++ ++int bpf_prog_attach_opts(int prog_fd, int target_fd, ++ enum bpf_attach_type type, ++ const struct bpf_prog_attach_opts *opts) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ if (!OPTS_VALID(opts, bpf_prog_attach_opts)) ++ return libbpf_err(-EINVAL); ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.target_fd = target_fd; ++ attr.attach_bpf_fd = prog_fd; ++ attr.attach_type = type; ++ attr.attach_flags = OPTS_GET(opts, flags, 0); ++ attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); ++ ++ ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++__attribute__((alias("bpf_prog_attach_opts"))) ++int bpf_prog_attach_xattr(int prog_fd, int target_fd, ++ enum bpf_attach_type type, ++ const struct bpf_prog_attach_opts *opts); ++ ++int bpf_prog_detach(int target_fd, enum bpf_attach_type type) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.target_fd = target_fd; ++ attr.attach_type = type; ++ ++ ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.target_fd = target_fd; ++ attr.attach_bpf_fd = prog_fd; ++ attr.attach_type = type; ++ ++ ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_link_create(int prog_fd, int target_fd, ++ enum bpf_attach_type attach_type, ++ const struct bpf_link_create_opts *opts) ++{ ++ __u32 target_btf_id, iter_info_len; ++ union bpf_attr attr; ++ int fd, err; ++ ++ if (!OPTS_VALID(opts, bpf_link_create_opts)) ++ return libbpf_err(-EINVAL); ++ ++ iter_info_len = OPTS_GET(opts, iter_info_len, 0); ++ target_btf_id = OPTS_GET(opts, target_btf_id, 0); ++ ++ /* validate we don't have unexpected combinations of non-zero fields */ ++ if (iter_info_len || target_btf_id) { ++ if (iter_info_len && target_btf_id) ++ return libbpf_err(-EINVAL); ++ if (!OPTS_ZEROED(opts, target_btf_id)) ++ return libbpf_err(-EINVAL); ++ } ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.link_create.prog_fd = prog_fd; ++ attr.link_create.target_fd = target_fd; ++ attr.link_create.attach_type = attach_type; ++ attr.link_create.flags = OPTS_GET(opts, flags, 0); ++ ++ if (target_btf_id) { ++ attr.link_create.target_btf_id = target_btf_id; ++ goto proceed; ++ } ++ ++ switch (attach_type) { ++ case BPF_TRACE_ITER: ++ attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); ++ attr.link_create.iter_info_len = iter_info_len; ++ break; ++ case BPF_PERF_EVENT: ++ attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0); ++ if (!OPTS_ZEROED(opts, perf_event)) ++ return libbpf_err(-EINVAL); ++ break; ++ case BPF_TRACE_KPROBE_MULTI: ++ attr.link_create.kprobe_multi.flags = OPTS_GET(opts, kprobe_multi.flags, 0); ++ attr.link_create.kprobe_multi.cnt = OPTS_GET(opts, kprobe_multi.cnt, 0); ++ attr.link_create.kprobe_multi.syms = ptr_to_u64(OPTS_GET(opts, kprobe_multi.syms, 0)); ++ attr.link_create.kprobe_multi.addrs = ptr_to_u64(OPTS_GET(opts, kprobe_multi.addrs, 0)); ++ attr.link_create.kprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, kprobe_multi.cookies, 0)); ++ if (!OPTS_ZEROED(opts, kprobe_multi)) ++ return libbpf_err(-EINVAL); ++ break; ++ case BPF_TRACE_FENTRY: ++ case BPF_TRACE_FEXIT: ++ case BPF_MODIFY_RETURN: ++ case BPF_LSM_MAC: ++ attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); ++ if (!OPTS_ZEROED(opts, tracing)) ++ return libbpf_err(-EINVAL); ++ break; ++ default: ++ if (!OPTS_ZEROED(opts, flags)) ++ return libbpf_err(-EINVAL); ++ break; ++ } ++proceed: ++ fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); ++ if (fd >= 0) ++ return fd; ++ /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry ++ * and other similar programs ++ */ ++ err = -errno; ++ if (err != -EINVAL) ++ return libbpf_err(err); ++ ++ /* if user used features not supported by ++ * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately ++ */ ++ if (attr.link_create.target_fd || attr.link_create.target_btf_id) ++ return libbpf_err(err); ++ if (!OPTS_ZEROED(opts, sz)) ++ return libbpf_err(err); ++ ++ /* otherwise, for few select kinds of programs that can be ++ * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as ++ * a fallback for older kernels ++ */ ++ switch (attach_type) { ++ case BPF_TRACE_RAW_TP: ++ case BPF_LSM_MAC: ++ case BPF_TRACE_FENTRY: ++ case BPF_TRACE_FEXIT: ++ case BPF_MODIFY_RETURN: ++ return bpf_raw_tracepoint_open(NULL, prog_fd); ++ default: ++ return libbpf_err(err); ++ } ++} ++ ++int bpf_link_detach(int link_fd) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.link_detach.link_fd = link_fd; ++ ++ ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_link_update(int link_fd, int new_prog_fd, ++ const struct bpf_link_update_opts *opts) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ if (!OPTS_VALID(opts, bpf_link_update_opts)) ++ return libbpf_err(-EINVAL); ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.link_update.link_fd = link_fd; ++ attr.link_update.new_prog_fd = new_prog_fd; ++ attr.link_update.flags = OPTS_GET(opts, flags, 0); ++ attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); ++ ++ ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_iter_create(int link_fd) ++{ ++ union bpf_attr attr; ++ int fd; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.iter_create.link_fd = link_fd; ++ ++ fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_prog_query_opts(int target_fd, ++ enum bpf_attach_type type, ++ struct bpf_prog_query_opts *opts) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ if (!OPTS_VALID(opts, bpf_prog_query_opts)) ++ return libbpf_err(-EINVAL); ++ ++ memset(&attr, 0, sizeof(attr)); ++ ++ attr.query.target_fd = target_fd; ++ attr.query.attach_type = type; ++ attr.query.query_flags = OPTS_GET(opts, query_flags, 0); ++ attr.query.prog_cnt = OPTS_GET(opts, prog_cnt, 0); ++ attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL)); ++ attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL)); ++ ++ ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); ++ ++ OPTS_SET(opts, attach_flags, attr.query.attach_flags); ++ OPTS_SET(opts, prog_cnt, attr.query.prog_cnt); ++ ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, ++ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) ++{ ++ LIBBPF_OPTS(bpf_prog_query_opts, opts); ++ int ret; ++ ++ opts.query_flags = query_flags; ++ opts.prog_ids = prog_ids; ++ opts.prog_cnt = *prog_cnt; ++ ++ ret = bpf_prog_query_opts(target_fd, type, &opts); ++ ++ if (attach_flags) ++ *attach_flags = opts.attach_flags; ++ *prog_cnt = opts.prog_cnt; ++ ++ return libbpf_err_errno(ret); ++} ++ ++int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ if (!OPTS_VALID(opts, bpf_test_run_opts)) ++ return libbpf_err(-EINVAL); ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.test.prog_fd = prog_fd; ++ attr.test.batch_size = OPTS_GET(opts, batch_size, 0); ++ attr.test.cpu = OPTS_GET(opts, cpu, 0); ++ attr.test.flags = OPTS_GET(opts, flags, 0); ++ attr.test.repeat = OPTS_GET(opts, repeat, 0); ++ attr.test.duration = OPTS_GET(opts, duration, 0); ++ attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0); ++ attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0); ++ attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0); ++ attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0); ++ attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL)); ++ attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL)); ++ attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL)); ++ attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL)); ++ ++ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); ++ ++ OPTS_SET(opts, data_size_out, attr.test.data_size_out); ++ OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out); ++ OPTS_SET(opts, duration, attr.test.duration); ++ OPTS_SET(opts, retval, attr.test.retval); ++ ++ return libbpf_err_errno(ret); ++} ++ ++static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) ++{ ++ union bpf_attr attr; ++ int err; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.start_id = start_id; ++ ++ err = sys_bpf(cmd, &attr, sizeof(attr)); ++ if (!err) ++ *next_id = attr.next_id; ++ ++ return libbpf_err_errno(err); ++} ++ ++int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) ++{ ++ return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID); ++} ++ ++int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) ++{ ++ return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID); ++} ++ ++int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id) ++{ ++ return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID); ++} ++ ++int bpf_link_get_next_id(__u32 start_id, __u32 *next_id) ++{ ++ return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID); ++} ++ ++int bpf_prog_get_fd_by_id(__u32 id) ++{ ++ union bpf_attr attr; ++ int fd; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.prog_id = id; ++ ++ fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_map_get_fd_by_id(__u32 id) ++{ ++ union bpf_attr attr; ++ int fd; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.map_id = id; ++ ++ fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_btf_get_fd_by_id(__u32 id) ++{ ++ union bpf_attr attr; ++ int fd; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.btf_id = id; ++ ++ fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_link_get_fd_by_id(__u32 id) ++{ ++ union bpf_attr attr; ++ int fd; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.link_id = id; ++ ++ fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) ++{ ++ union bpf_attr attr; ++ int err; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.info.bpf_fd = bpf_fd; ++ attr.info.info_len = *info_len; ++ attr.info.info = ptr_to_u64(info); ++ ++ err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); ++ ++ if (!err) ++ *info_len = attr.info.info_len; ++ ++ return libbpf_err_errno(err); ++} ++ ++int bpf_raw_tracepoint_open(const char *name, int prog_fd) ++{ ++ union bpf_attr attr; ++ int fd; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.raw_tracepoint.name = ptr_to_u64(name); ++ attr.raw_tracepoint.prog_fd = prog_fd; ++ ++ fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts) ++{ ++ const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); ++ union bpf_attr attr; ++ char *log_buf; ++ size_t log_size; ++ __u32 log_level; ++ int fd; ++ ++ bump_rlimit_memlock(); ++ ++ memset(&attr, 0, attr_sz); ++ ++ if (!OPTS_VALID(opts, bpf_btf_load_opts)) ++ return libbpf_err(-EINVAL); ++ ++ log_buf = OPTS_GET(opts, log_buf, NULL); ++ log_size = OPTS_GET(opts, log_size, 0); ++ log_level = OPTS_GET(opts, log_level, 0); ++ ++ if (log_size > UINT_MAX) ++ return libbpf_err(-EINVAL); ++ if (log_size && !log_buf) ++ return libbpf_err(-EINVAL); ++ ++ attr.btf = ptr_to_u64(btf_data); ++ attr.btf_size = btf_size; ++ /* log_level == 0 and log_buf != NULL means "try loading without ++ * log_buf, but retry with log_buf and log_level=1 on error", which is ++ * consistent across low-level and high-level BTF and program loading ++ * APIs within libbpf and provides a sensible behavior in practice ++ */ ++ if (log_level) { ++ attr.btf_log_buf = ptr_to_u64(log_buf); ++ attr.btf_log_size = (__u32)log_size; ++ attr.btf_log_level = log_level; ++ } ++ ++ fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); ++ if (fd < 0 && log_buf && log_level == 0) { ++ attr.btf_log_buf = ptr_to_u64(log_buf); ++ attr.btf_log_size = (__u32)log_size; ++ attr.btf_log_level = 1; ++ fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); ++ } ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, ++ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, ++ __u64 *probe_addr) ++{ ++ union bpf_attr attr = {}; ++ int err; ++ ++ attr.task_fd_query.pid = pid; ++ attr.task_fd_query.fd = fd; ++ attr.task_fd_query.flags = flags; ++ attr.task_fd_query.buf = ptr_to_u64(buf); ++ attr.task_fd_query.buf_len = *buf_len; ++ ++ err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); ++ ++ *buf_len = attr.task_fd_query.buf_len; ++ *prog_id = attr.task_fd_query.prog_id; ++ *fd_type = attr.task_fd_query.fd_type; ++ *probe_offset = attr.task_fd_query.probe_offset; ++ *probe_addr = attr.task_fd_query.probe_addr; ++ ++ return libbpf_err_errno(err); ++} ++ ++int bpf_enable_stats(enum bpf_stats_type type) ++{ ++ union bpf_attr attr; ++ int fd; ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.enable_stats.type = type; ++ ++ fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); ++ return libbpf_err_errno(fd); ++} ++ ++int bpf_prog_bind_map(int prog_fd, int map_fd, ++ const struct bpf_prog_bind_opts *opts) ++{ ++ union bpf_attr attr; ++ int ret; ++ ++ if (!OPTS_VALID(opts, bpf_prog_bind_opts)) ++ return libbpf_err(-EINVAL); ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.prog_bind_map.prog_fd = prog_fd; ++ attr.prog_bind_map.map_fd = map_fd; ++ attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0); ++ ++ ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); ++ return libbpf_err_errno(ret); ++} +diff --git a/src/cc/libbpf/src/bpf.h b/src/cc/libbpf/src/bpf.h +new file mode 100644 +index 0000000..9c50bea +--- /dev/null ++++ b/src/cc/libbpf/src/bpf.h +@@ -0,0 +1,438 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++/* ++ * common eBPF ELF operations. ++ * ++ * Copyright (C) 2013-2015 Alexei Starovoitov ++ * Copyright (C) 2015 Wang Nan ++ * Copyright (C) 2015 Huawei Inc. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; ++ * version 2.1 of the License (not later!) ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this program; if not, see ++ */ ++#ifndef __LIBBPF_BPF_H ++#define __LIBBPF_BPF_H ++ ++#include ++#include ++#include ++#include ++ ++#include "libbpf_common.h" ++#include "libbpf_legacy.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++int libbpf_set_memlock_rlim(size_t memlock_bytes); ++ ++struct bpf_map_create_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ ++ __u32 btf_fd; ++ __u32 btf_key_type_id; ++ __u32 btf_value_type_id; ++ __u32 btf_vmlinux_value_type_id; ++ ++ __u32 inner_map_fd; ++ __u32 map_flags; ++ __u64 map_extra; ++ ++ __u32 numa_node; ++ __u32 map_ifindex; ++}; ++#define bpf_map_create_opts__last_field map_ifindex ++ ++LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, ++ const char *map_name, ++ __u32 key_size, ++ __u32 value_size, ++ __u32 max_entries, ++ const struct bpf_map_create_opts *opts); ++ ++struct bpf_prog_load_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ ++ /* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns ++ * -EAGAIN. This field determines how many attempts libbpf has to ++ * make. If not specified, libbpf will use default value of 5. ++ */ ++ int attempts; ++ ++ enum bpf_attach_type expected_attach_type; ++ __u32 prog_btf_fd; ++ __u32 prog_flags; ++ __u32 prog_ifindex; ++ __u32 kern_version; ++ ++ __u32 attach_btf_id; ++ __u32 attach_prog_fd; ++ __u32 attach_btf_obj_fd; ++ ++ const int *fd_array; ++ ++ /* .BTF.ext func info data */ ++ const void *func_info; ++ __u32 func_info_cnt; ++ __u32 func_info_rec_size; ++ ++ /* .BTF.ext line info data */ ++ const void *line_info; ++ __u32 line_info_cnt; ++ __u32 line_info_rec_size; ++ ++ /* verifier log options */ ++ __u32 log_level; ++ __u32 log_size; ++ char *log_buf; ++}; ++#define bpf_prog_load_opts__last_field log_buf ++ ++LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, ++ const char *prog_name, const char *license, ++ const struct bpf_insn *insns, size_t insn_cnt, ++ const struct bpf_prog_load_opts *opts); ++ ++/* Flags to direct loading requirements */ ++#define MAPS_RELAX_COMPAT 0x01 ++ ++/* Recommended log buffer size */ ++#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ ++ ++struct bpf_btf_load_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ ++ /* kernel log options */ ++ char *log_buf; ++ __u32 log_level; ++ __u32 log_size; ++}; ++#define bpf_btf_load_opts__last_field log_size ++ ++LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, ++ const struct bpf_btf_load_opts *opts); ++ ++LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, ++ __u64 flags); ++ ++LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); ++LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, ++ __u64 flags); ++LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, ++ void *value); ++LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, ++ void *value, __u64 flags); ++LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); ++LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags); ++LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); ++LIBBPF_API int bpf_map_freeze(int fd); ++ ++struct bpf_map_batch_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ __u64 elem_flags; ++ __u64 flags; ++}; ++#define bpf_map_batch_opts__last_field flags ++ ++ ++/** ++ * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple ++ * elements in a BPF map. ++ * ++ * @param fd BPF map file descriptor ++ * @param keys pointer to an array of *count* keys ++ * @param count input and output parameter; on input **count** represents the ++ * number of elements in the map to delete in batch; ++ * on output if a non-EFAULT error is returned, **count** represents the number of deleted ++ * elements if the output **count** value is not equal to the input **count** value ++ * If EFAULT is returned, **count** should not be trusted to be correct. ++ * @param opts options for configuring the way the batch deletion works ++ * @return 0, on success; negative error code, otherwise (errno is also set to ++ * the error code) ++ */ ++LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, ++ __u32 *count, ++ const struct bpf_map_batch_opts *opts); ++ ++/** ++ * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. ++ * ++ * The parameter *in_batch* is the address of the first element in the batch to read. ++ * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent ++ * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate ++ * that the batched lookup starts from the beginning of the map. ++ * ++ * The *keys* and *values* are output parameters which must point to memory large enough to ++ * hold *count* items based on the key and value size of the map *map_fd*. The *keys* ++ * buffer must be of *key_size* * *count*. The *values* buffer must be of ++ * *value_size* * *count*. ++ * ++ * @param fd BPF map file descriptor ++ * @param in_batch address of the first element in batch to read, can pass NULL to ++ * indicate that the batched lookup starts from the beginning of the map. ++ * @param out_batch output parameter that should be passed to next call as *in_batch* ++ * @param keys pointer to an array large enough for *count* keys ++ * @param values pointer to an array large enough for *count* values ++ * @param count input and output parameter; on input it's the number of elements ++ * in the map to read in batch; on output it's the number of elements that were ++ * successfully read. ++ * If a non-EFAULT error is returned, count will be set as the number of elements ++ * that were read before the error occurred. ++ * If EFAULT is returned, **count** should not be trusted to be correct. ++ * @param opts options for configuring the way the batch lookup works ++ * @return 0, on success; negative error code, otherwise (errno is also set to ++ * the error code) ++ */ ++LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, ++ void *keys, void *values, __u32 *count, ++ const struct bpf_map_batch_opts *opts); ++ ++/** ++ * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion ++ * of BPF map elements where each element is deleted after being retrieved. ++ * ++ * @param fd BPF map file descriptor ++ * @param in_batch address of the first element in batch to read, can pass NULL to ++ * get address of the first element in *out_batch* ++ * @param out_batch output parameter that should be passed to next call as *in_batch* ++ * @param keys pointer to an array of *count* keys ++ * @param values pointer to an array large enough for *count* values ++ * @param count input and output parameter; on input it's the number of elements ++ * in the map to read and delete in batch; on output it represents the number of ++ * elements that were successfully read and deleted ++ * If a non-**EFAULT** error code is returned and if the output **count** value ++ * is not equal to the input **count** value, up to **count** elements may ++ * have been deleted. ++ * if **EFAULT** is returned up to *count* elements may have been deleted without ++ * being returned via the *keys* and *values* output parameters. ++ * @param opts options for configuring the way the batch lookup and delete works ++ * @return 0, on success; negative error code, otherwise (errno is also set to ++ * the error code) ++ */ ++LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, ++ void *out_batch, void *keys, ++ void *values, __u32 *count, ++ const struct bpf_map_batch_opts *opts); ++ ++/** ++ * @brief **bpf_map_update_batch()** updates multiple elements in a map ++ * by specifying keys and their corresponding values. ++ * ++ * The *keys* and *values* parameters must point to memory large enough ++ * to hold *count* items based on the key and value size of the map. ++ * ++ * The *opts* parameter can be used to control how *bpf_map_update_batch()* ++ * should handle keys that either do or do not already exist in the map. ++ * In particular the *flags* parameter of *bpf_map_batch_opts* can be ++ * one of the following: ++ * ++ * Note that *count* is an input and output parameter, where on output it ++ * represents how many elements were successfully updated. Also note that if ++ * **EFAULT** then *count* should not be trusted to be correct. ++ * ++ * **BPF_ANY** ++ * Create new elements or update existing. ++ * ++ * **BPF_NOEXIST** ++ * Create new elements only if they do not exist. ++ * ++ * **BPF_EXIST** ++ * Update existing elements. ++ * ++ * **BPF_F_LOCK** ++ * Update spin_lock-ed map elements. This must be ++ * specified if the map value contains a spinlock. ++ * ++ * @param fd BPF map file descriptor ++ * @param keys pointer to an array of *count* keys ++ * @param values pointer to an array of *count* values ++ * @param count input and output parameter; on input it's the number of elements ++ * in the map to update in batch; on output if a non-EFAULT error is returned, ++ * **count** represents the number of updated elements if the output **count** ++ * value is not equal to the input **count** value. ++ * If EFAULT is returned, **count** should not be trusted to be correct. ++ * @param opts options for configuring the way the batch update works ++ * @return 0, on success; negative error code, otherwise (errno is also set to ++ * the error code) ++ */ ++LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values, ++ __u32 *count, ++ const struct bpf_map_batch_opts *opts); ++ ++struct bpf_obj_get_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ ++ __u32 file_flags; ++ ++ size_t :0; ++}; ++#define bpf_obj_get_opts__last_field file_flags ++ ++LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); ++LIBBPF_API int bpf_obj_get(const char *pathname); ++LIBBPF_API int bpf_obj_get_opts(const char *pathname, ++ const struct bpf_obj_get_opts *opts); ++ ++struct bpf_prog_attach_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ unsigned int flags; ++ int replace_prog_fd; ++}; ++#define bpf_prog_attach_opts__last_field replace_prog_fd ++ ++LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, ++ enum bpf_attach_type type, unsigned int flags); ++LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd, ++ enum bpf_attach_type type, ++ const struct bpf_prog_attach_opts *opts); ++LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); ++LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, ++ enum bpf_attach_type type); ++ ++union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */ ++struct bpf_link_create_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ __u32 flags; ++ union bpf_iter_link_info *iter_info; ++ __u32 iter_info_len; ++ __u32 target_btf_id; ++ union { ++ struct { ++ __u64 bpf_cookie; ++ } perf_event; ++ struct { ++ __u32 flags; ++ __u32 cnt; ++ const char **syms; ++ const unsigned long *addrs; ++ const __u64 *cookies; ++ } kprobe_multi; ++ struct { ++ __u64 cookie; ++ } tracing; ++ }; ++ size_t :0; ++}; ++#define bpf_link_create_opts__last_field kprobe_multi.cookies ++ ++LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, ++ enum bpf_attach_type attach_type, ++ const struct bpf_link_create_opts *opts); ++ ++LIBBPF_API int bpf_link_detach(int link_fd); ++ ++struct bpf_link_update_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ __u32 flags; /* extra flags */ ++ __u32 old_prog_fd; /* expected old program FD */ ++}; ++#define bpf_link_update_opts__last_field old_prog_fd ++ ++LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd, ++ const struct bpf_link_update_opts *opts); ++ ++LIBBPF_API int bpf_iter_create(int link_fd); ++ ++struct bpf_prog_test_run_attr { ++ int prog_fd; ++ int repeat; ++ const void *data_in; ++ __u32 data_size_in; ++ void *data_out; /* optional */ ++ __u32 data_size_out; /* in: max length of data_out ++ * out: length of data_out */ ++ __u32 retval; /* out: return code of the BPF program */ ++ __u32 duration; /* out: average per repetition in ns */ ++ const void *ctx_in; /* optional */ ++ __u32 ctx_size_in; ++ void *ctx_out; /* optional */ ++ __u32 ctx_size_out; /* in: max length of ctx_out ++ * out: length of cxt_out */ ++}; ++ ++LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); ++LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); ++LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id); ++LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id); ++LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); ++LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); ++LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); ++LIBBPF_API int bpf_link_get_fd_by_id(__u32 id); ++LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len); ++ ++struct bpf_prog_query_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ __u32 query_flags; ++ __u32 attach_flags; /* output argument */ ++ __u32 *prog_ids; ++ __u32 prog_cnt; /* input+output argument */ ++ __u32 *prog_attach_flags; ++}; ++#define bpf_prog_query_opts__last_field prog_attach_flags ++ ++LIBBPF_API int bpf_prog_query_opts(int target_fd, ++ enum bpf_attach_type type, ++ struct bpf_prog_query_opts *opts); ++LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, ++ __u32 query_flags, __u32 *attach_flags, ++ __u32 *prog_ids, __u32 *prog_cnt); ++ ++LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); ++LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, ++ __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, ++ __u64 *probe_offset, __u64 *probe_addr); ++ ++enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */ ++LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); ++ ++struct bpf_prog_bind_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ __u32 flags; ++}; ++#define bpf_prog_bind_opts__last_field flags ++ ++LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd, ++ const struct bpf_prog_bind_opts *opts); ++ ++struct bpf_test_run_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ const void *data_in; /* optional */ ++ void *data_out; /* optional */ ++ __u32 data_size_in; ++ __u32 data_size_out; /* in: max length of data_out ++ * out: length of data_out ++ */ ++ const void *ctx_in; /* optional */ ++ void *ctx_out; /* optional */ ++ __u32 ctx_size_in; ++ __u32 ctx_size_out; /* in: max length of ctx_out ++ * out: length of cxt_out ++ */ ++ __u32 retval; /* out: return code of the BPF program */ ++ int repeat; ++ __u32 duration; /* out: average per repetition in ns */ ++ __u32 flags; ++ __u32 cpu; ++ __u32 batch_size; ++}; ++#define bpf_test_run_opts__last_field batch_size ++ ++LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, ++ struct bpf_test_run_opts *opts); ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* __LIBBPF_BPF_H */ +diff --git a/src/cc/libbpf/src/bpf_core_read.h b/src/cc/libbpf/src/bpf_core_read.h +new file mode 100644 +index 0000000..496e6a8 +--- /dev/null ++++ b/src/cc/libbpf/src/bpf_core_read.h +@@ -0,0 +1,484 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++#ifndef __BPF_CORE_READ_H__ ++#define __BPF_CORE_READ_H__ ++ ++/* ++ * enum bpf_field_info_kind is passed as a second argument into ++ * __builtin_preserve_field_info() built-in to get a specific aspect of ++ * a field, captured as a first argument. __builtin_preserve_field_info(field, ++ * info_kind) returns __u32 integer and produces BTF field relocation, which ++ * is understood and processed by libbpf during BPF object loading. See ++ * selftests/bpf for examples. ++ */ ++enum bpf_field_info_kind { ++ BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ ++ BPF_FIELD_BYTE_SIZE = 1, ++ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ ++ BPF_FIELD_SIGNED = 3, ++ BPF_FIELD_LSHIFT_U64 = 4, ++ BPF_FIELD_RSHIFT_U64 = 5, ++}; ++ ++/* second argument to __builtin_btf_type_id() built-in */ ++enum bpf_type_id_kind { ++ BPF_TYPE_ID_LOCAL = 0, /* BTF type ID in local program */ ++ BPF_TYPE_ID_TARGET = 1, /* BTF type ID in target kernel */ ++}; ++ ++/* second argument to __builtin_preserve_type_info() built-in */ ++enum bpf_type_info_kind { ++ BPF_TYPE_EXISTS = 0, /* type existence in target kernel */ ++ BPF_TYPE_SIZE = 1, /* type size in target kernel */ ++ BPF_TYPE_MATCHES = 2, /* type match in target kernel */ ++}; ++ ++/* second argument to __builtin_preserve_enum_value() built-in */ ++enum bpf_enum_value_kind { ++ BPF_ENUMVAL_EXISTS = 0, /* enum value existence in kernel */ ++ BPF_ENUMVAL_VALUE = 1, /* enum value value relocation */ ++}; ++ ++#define __CORE_RELO(src, field, info) \ ++ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) ++ ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ ++ bpf_probe_read_kernel( \ ++ (void *)dst, \ ++ __CORE_RELO(src, fld, BYTE_SIZE), \ ++ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) ++#else ++/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so ++ * for big-endian we need to adjust destination pointer accordingly, based on ++ * field byte size ++ */ ++#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ ++ bpf_probe_read_kernel( \ ++ (void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ ++ __CORE_RELO(src, fld, BYTE_SIZE), \ ++ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) ++#endif ++ ++/* ++ * Extract bitfield, identified by s->field, and return its value as u64. ++ * All this is done in relocatable manner, so bitfield changes such as ++ * signedness, bit size, offset changes, this will be handled automatically. ++ * This version of macro is using bpf_probe_read_kernel() to read underlying ++ * integer storage. Macro functions as an expression and its return type is ++ * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error. ++ */ ++#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ ++ unsigned long long val = 0; \ ++ \ ++ __CORE_BITFIELD_PROBE_READ(&val, s, field); \ ++ val <<= __CORE_RELO(s, field, LSHIFT_U64); \ ++ if (__CORE_RELO(s, field, SIGNED)) \ ++ val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ ++ else \ ++ val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ ++ val; \ ++}) ++ ++/* ++ * Extract bitfield, identified by s->field, and return its value as u64. ++ * This version of macro is using direct memory reads and should be used from ++ * BPF program types that support such functionality (e.g., typed raw ++ * tracepoints). ++ */ ++#define BPF_CORE_READ_BITFIELD(s, field) ({ \ ++ const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ ++ unsigned long long val; \ ++ \ ++ /* This is a so-called barrier_var() operation that makes specified \ ++ * variable "a black box" for optimizing compiler. \ ++ * It forces compiler to perform BYTE_OFFSET relocation on p and use \ ++ * its calculated value in the switch below, instead of applying \ ++ * the same relocation 4 times for each individual memory load. \ ++ */ \ ++ asm volatile("" : "=r"(p) : "0"(p)); \ ++ \ ++ switch (__CORE_RELO(s, field, BYTE_SIZE)) { \ ++ case 1: val = *(const unsigned char *)p; break; \ ++ case 2: val = *(const unsigned short *)p; break; \ ++ case 4: val = *(const unsigned int *)p; break; \ ++ case 8: val = *(const unsigned long long *)p; break; \ ++ } \ ++ val <<= __CORE_RELO(s, field, LSHIFT_U64); \ ++ if (__CORE_RELO(s, field, SIGNED)) \ ++ val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ ++ else \ ++ val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ ++ val; \ ++}) ++ ++#define ___bpf_field_ref1(field) (field) ++#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) ++#define ___bpf_field_ref(args...) \ ++ ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) ++ ++/* ++ * Convenience macro to check that field actually exists in target kernel's. ++ * Returns: ++ * 1, if matching field is present in target kernel; ++ * 0, if no matching field found. ++ * ++ * Supports two forms: ++ * - field reference through variable access: ++ * bpf_core_field_exists(p->my_field); ++ * - field reference through type and field names: ++ * bpf_core_field_exists(struct my_type, my_field). ++ */ ++#define bpf_core_field_exists(field...) \ ++ __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS) ++ ++/* ++ * Convenience macro to get the byte size of a field. Works for integers, ++ * struct/unions, pointers, arrays, and enums. ++ * ++ * Supports two forms: ++ * - field reference through variable access: ++ * bpf_core_field_size(p->my_field); ++ * - field reference through type and field names: ++ * bpf_core_field_size(struct my_type, my_field). ++ */ ++#define bpf_core_field_size(field...) \ ++ __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE) ++ ++/* ++ * Convenience macro to get field's byte offset. ++ * ++ * Supports two forms: ++ * - field reference through variable access: ++ * bpf_core_field_offset(p->my_field); ++ * - field reference through type and field names: ++ * bpf_core_field_offset(struct my_type, my_field). ++ */ ++#define bpf_core_field_offset(field...) \ ++ __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET) ++ ++/* ++ * Convenience macro to get BTF type ID of a specified type, using a local BTF ++ * information. Return 32-bit unsigned integer with type ID from program's own ++ * BTF. Always succeeds. ++ */ ++#define bpf_core_type_id_local(type) \ ++ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL) ++ ++/* ++ * Convenience macro to get BTF type ID of a target kernel's type that matches ++ * specified local type. ++ * Returns: ++ * - valid 32-bit unsigned type ID in kernel BTF; ++ * - 0, if no matching type was found in a target kernel BTF. ++ */ ++#define bpf_core_type_id_kernel(type) \ ++ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET) ++ ++/* ++ * Convenience macro to check that provided named type ++ * (struct/union/enum/typedef) exists in a target kernel. ++ * Returns: ++ * 1, if such type is present in target kernel's BTF; ++ * 0, if no matching type is found. ++ */ ++#define bpf_core_type_exists(type) \ ++ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) ++ ++/* ++ * Convenience macro to check that provided named type ++ * (struct/union/enum/typedef) "matches" that in a target kernel. ++ * Returns: ++ * 1, if the type matches in the target kernel's BTF; ++ * 0, if the type does not match any in the target kernel ++ */ ++#define bpf_core_type_matches(type) \ ++ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES) ++ ++/* ++ * Convenience macro to get the byte size of a provided named type ++ * (struct/union/enum/typedef) in a target kernel. ++ * Returns: ++ * >= 0 size (in bytes), if type is present in target kernel's BTF; ++ * 0, if no matching type is found. ++ */ ++#define bpf_core_type_size(type) \ ++ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE) ++ ++/* ++ * Convenience macro to check that provided enumerator value is defined in ++ * a target kernel. ++ * Returns: ++ * 1, if specified enum type and its enumerator value are present in target ++ * kernel's BTF; ++ * 0, if no matching enum and/or enum value within that enum is found. ++ */ ++#define bpf_core_enum_value_exists(enum_type, enum_value) \ ++ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS) ++ ++/* ++ * Convenience macro to get the integer value of an enumerator value in ++ * a target kernel. ++ * Returns: ++ * 64-bit value, if specified enum type and its enumerator value are ++ * present in target kernel's BTF; ++ * 0, if no matching enum and/or enum value within that enum is found. ++ */ ++#define bpf_core_enum_value(enum_type, enum_value) \ ++ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE) ++ ++/* ++ * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures ++ * offset relocation for source address using __builtin_preserve_access_index() ++ * built-in, provided by Clang. ++ * ++ * __builtin_preserve_access_index() takes as an argument an expression of ++ * taking an address of a field within struct/union. It makes compiler emit ++ * a relocation, which records BTF type ID describing root struct/union and an ++ * accessor string which describes exact embedded field that was used to take ++ * an address. See detailed description of this relocation format and ++ * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. ++ * ++ * This relocation allows libbpf to adjust BPF instruction to use correct ++ * actual field offset, based on target kernel BTF type that matches original ++ * (local) BTF, used to record relocation. ++ */ ++#define bpf_core_read(dst, sz, src) \ ++ bpf_probe_read_kernel(dst, sz, (const void *)__builtin_preserve_access_index(src)) ++ ++/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ ++#define bpf_core_read_user(dst, sz, src) \ ++ bpf_probe_read_user(dst, sz, (const void *)__builtin_preserve_access_index(src)) ++/* ++ * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() ++ * additionally emitting BPF CO-RE field relocation for specified source ++ * argument. ++ */ ++#define bpf_core_read_str(dst, sz, src) \ ++ bpf_probe_read_kernel_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) ++ ++/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ ++#define bpf_core_read_user_str(dst, sz, src) \ ++ bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) ++ ++#define ___concat(a, b) a ## b ++#define ___apply(fn, n) ___concat(fn, n) ++#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N ++ ++/* ++ * return number of provided arguments; used for switch-based variadic macro ++ * definitions (see ___last, ___arrow, etc below) ++ */ ++#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) ++/* ++ * return 0 if no arguments are passed, N - otherwise; used for ++ * recursively-defined macros to specify termination (0) case, and generic ++ * (N) case (e.g., ___read_ptrs, ___core_read) ++ */ ++#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) ++ ++#define ___last1(x) x ++#define ___last2(a, x) x ++#define ___last3(a, b, x) x ++#define ___last4(a, b, c, x) x ++#define ___last5(a, b, c, d, x) x ++#define ___last6(a, b, c, d, e, x) x ++#define ___last7(a, b, c, d, e, f, x) x ++#define ___last8(a, b, c, d, e, f, g, x) x ++#define ___last9(a, b, c, d, e, f, g, h, x) x ++#define ___last10(a, b, c, d, e, f, g, h, i, x) x ++#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__) ++ ++#define ___nolast2(a, _) a ++#define ___nolast3(a, b, _) a, b ++#define ___nolast4(a, b, c, _) a, b, c ++#define ___nolast5(a, b, c, d, _) a, b, c, d ++#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e ++#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f ++#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g ++#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h ++#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i ++#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__) ++ ++#define ___arrow1(a) a ++#define ___arrow2(a, b) a->b ++#define ___arrow3(a, b, c) a->b->c ++#define ___arrow4(a, b, c, d) a->b->c->d ++#define ___arrow5(a, b, c, d, e) a->b->c->d->e ++#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f ++#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g ++#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h ++#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i ++#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j ++#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) ++ ++#define ___type(...) typeof(___arrow(__VA_ARGS__)) ++ ++#define ___read(read_fn, dst, src_type, src, accessor) \ ++ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) ++ ++/* "recursively" read a sequence of inner pointers using local __t var */ ++#define ___rd_first(fn, src, a) ___read(fn, &__t, ___type(src), src, a); ++#define ___rd_last(fn, ...) \ ++ ___read(fn, &__t, ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); ++#define ___rd_p1(fn, ...) const void *__t; ___rd_first(fn, __VA_ARGS__) ++#define ___rd_p2(fn, ...) ___rd_p1(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) ++#define ___rd_p3(fn, ...) ___rd_p2(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) ++#define ___rd_p4(fn, ...) ___rd_p3(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) ++#define ___rd_p5(fn, ...) ___rd_p4(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) ++#define ___rd_p6(fn, ...) ___rd_p5(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) ++#define ___rd_p7(fn, ...) ___rd_p6(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) ++#define ___rd_p8(fn, ...) ___rd_p7(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) ++#define ___rd_p9(fn, ...) ___rd_p8(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) ++#define ___read_ptrs(fn, src, ...) \ ++ ___apply(___rd_p, ___narg(__VA_ARGS__))(fn, src, __VA_ARGS__) ++ ++#define ___core_read0(fn, fn_ptr, dst, src, a) \ ++ ___read(fn, dst, ___type(src), src, a); ++#define ___core_readN(fn, fn_ptr, dst, src, ...) \ ++ ___read_ptrs(fn_ptr, src, ___nolast(__VA_ARGS__)) \ ++ ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ ++ ___last(__VA_ARGS__)); ++#define ___core_read(fn, fn_ptr, dst, src, a, ...) \ ++ ___apply(___core_read, ___empty(__VA_ARGS__))(fn, fn_ptr, dst, \ ++ src, a, ##__VA_ARGS__) ++ ++/* ++ * BPF_CORE_READ_INTO() is a more performance-conscious variant of ++ * BPF_CORE_READ(), in which final field is read into user-provided storage. ++ * See BPF_CORE_READ() below for more details on general usage. ++ */ ++#define BPF_CORE_READ_INTO(dst, src, a, ...) ({ \ ++ ___core_read(bpf_core_read, bpf_core_read, \ ++ dst, (src), a, ##__VA_ARGS__) \ ++}) ++ ++/* ++ * Variant of BPF_CORE_READ_INTO() for reading from user-space memory. ++ * ++ * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. ++ */ ++#define BPF_CORE_READ_USER_INTO(dst, src, a, ...) ({ \ ++ ___core_read(bpf_core_read_user, bpf_core_read_user, \ ++ dst, (src), a, ##__VA_ARGS__) \ ++}) ++ ++/* Non-CO-RE variant of BPF_CORE_READ_INTO() */ ++#define BPF_PROBE_READ_INTO(dst, src, a, ...) ({ \ ++ ___core_read(bpf_probe_read, bpf_probe_read, \ ++ dst, (src), a, ##__VA_ARGS__) \ ++}) ++ ++/* Non-CO-RE variant of BPF_CORE_READ_USER_INTO(). ++ * ++ * As no CO-RE relocations are emitted, source types can be arbitrary and are ++ * not restricted to kernel types only. ++ */ ++#define BPF_PROBE_READ_USER_INTO(dst, src, a, ...) ({ \ ++ ___core_read(bpf_probe_read_user, bpf_probe_read_user, \ ++ dst, (src), a, ##__VA_ARGS__) \ ++}) ++ ++/* ++ * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as ++ * BPF_CORE_READ() for intermediate pointers, but then executes (and returns ++ * corresponding error code) bpf_core_read_str() for final string read. ++ */ ++#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) ({ \ ++ ___core_read(bpf_core_read_str, bpf_core_read, \ ++ dst, (src), a, ##__VA_ARGS__) \ ++}) ++ ++/* ++ * Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory. ++ * ++ * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. ++ */ ++#define BPF_CORE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ ++ ___core_read(bpf_core_read_user_str, bpf_core_read_user, \ ++ dst, (src), a, ##__VA_ARGS__) \ ++}) ++ ++/* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */ ++#define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({ \ ++ ___core_read(bpf_probe_read_str, bpf_probe_read, \ ++ dst, (src), a, ##__VA_ARGS__) \ ++}) ++ ++/* ++ * Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO(). ++ * ++ * As no CO-RE relocations are emitted, source types can be arbitrary and are ++ * not restricted to kernel types only. ++ */ ++#define BPF_PROBE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ ++ ___core_read(bpf_probe_read_user_str, bpf_probe_read_user, \ ++ dst, (src), a, ##__VA_ARGS__) \ ++}) ++ ++/* ++ * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially ++ * when there are few pointer chasing steps. ++ * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like: ++ * int x = s->a.b.c->d.e->f->g; ++ * can be succinctly achieved using BPF_CORE_READ as: ++ * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); ++ * ++ * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF ++ * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically ++ * equivalent to: ++ * 1. const void *__t = s->a.b.c; ++ * 2. __t = __t->d.e; ++ * 3. __t = __t->f; ++ * 4. return __t->g; ++ * ++ * Equivalence is logical, because there is a heavy type casting/preservation ++ * involved, as well as all the reads are happening through ++ * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to ++ * emit CO-RE relocations. ++ * ++ * N.B. Only up to 9 "field accessors" are supported, which should be more ++ * than enough for any practical purpose. ++ */ ++#define BPF_CORE_READ(src, a, ...) ({ \ ++ ___type((src), a, ##__VA_ARGS__) __r; \ ++ BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ ++ __r; \ ++}) ++ ++/* ++ * Variant of BPF_CORE_READ() for reading from user-space memory. ++ * ++ * NOTE: all the source types involved are still *kernel types* and need to ++ * exist in kernel (or kernel module) BTF, otherwise CO-RE relocation will ++ * fail. Custom user types are not relocatable with CO-RE. ++ * The typical situation in which BPF_CORE_READ_USER() might be used is to ++ * read kernel UAPI types from the user-space memory passed in as a syscall ++ * input argument. ++ */ ++#define BPF_CORE_READ_USER(src, a, ...) ({ \ ++ ___type((src), a, ##__VA_ARGS__) __r; \ ++ BPF_CORE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ ++ __r; \ ++}) ++ ++/* Non-CO-RE variant of BPF_CORE_READ() */ ++#define BPF_PROBE_READ(src, a, ...) ({ \ ++ ___type((src), a, ##__VA_ARGS__) __r; \ ++ BPF_PROBE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ ++ __r; \ ++}) ++ ++/* ++ * Non-CO-RE variant of BPF_CORE_READ_USER(). ++ * ++ * As no CO-RE relocations are emitted, source types can be arbitrary and are ++ * not restricted to kernel types only. ++ */ ++#define BPF_PROBE_READ_USER(src, a, ...) ({ \ ++ ___type((src), a, ##__VA_ARGS__) __r; \ ++ BPF_PROBE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ ++ __r; \ ++}) ++ ++#endif ++ +diff --git a/src/cc/libbpf/src/bpf_endian.h b/src/cc/libbpf/src/bpf_endian.h +new file mode 100644 +index 0000000..ec9db4f +--- /dev/null ++++ b/src/cc/libbpf/src/bpf_endian.h +@@ -0,0 +1,99 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++#ifndef __BPF_ENDIAN__ ++#define __BPF_ENDIAN__ ++ ++/* ++ * Isolate byte #n and put it into byte #m, for __u##b type. ++ * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64: ++ * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx ++ * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000 ++ * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn ++ * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000 ++ */ ++#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8)) ++ ++#define ___bpf_swab16(x) ((__u16)( \ ++ ___bpf_mvb(x, 16, 0, 1) | \ ++ ___bpf_mvb(x, 16, 1, 0))) ++ ++#define ___bpf_swab32(x) ((__u32)( \ ++ ___bpf_mvb(x, 32, 0, 3) | \ ++ ___bpf_mvb(x, 32, 1, 2) | \ ++ ___bpf_mvb(x, 32, 2, 1) | \ ++ ___bpf_mvb(x, 32, 3, 0))) ++ ++#define ___bpf_swab64(x) ((__u64)( \ ++ ___bpf_mvb(x, 64, 0, 7) | \ ++ ___bpf_mvb(x, 64, 1, 6) | \ ++ ___bpf_mvb(x, 64, 2, 5) | \ ++ ___bpf_mvb(x, 64, 3, 4) | \ ++ ___bpf_mvb(x, 64, 4, 3) | \ ++ ___bpf_mvb(x, 64, 5, 2) | \ ++ ___bpf_mvb(x, 64, 6, 1) | \ ++ ___bpf_mvb(x, 64, 7, 0))) ++ ++/* LLVM's BPF target selects the endianness of the CPU ++ * it compiles on, or the user specifies (bpfel/bpfeb), ++ * respectively. The used __BYTE_ORDER__ is defined by ++ * the compiler, we cannot rely on __BYTE_ORDER from ++ * libc headers, since it doesn't reflect the actual ++ * requested byte order. ++ * ++ * Note, LLVM's BPF target has different __builtin_bswapX() ++ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE ++ * in bpfel and bpfeb case, which means below, that we map ++ * to cpu_to_be16(). We could use it unconditionally in BPF ++ * case, but better not rely on it, so that this header here ++ * can be used from application and BPF program side, which ++ * use different targets. ++ */ ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++# define __bpf_ntohs(x) __builtin_bswap16(x) ++# define __bpf_htons(x) __builtin_bswap16(x) ++# define __bpf_constant_ntohs(x) ___bpf_swab16(x) ++# define __bpf_constant_htons(x) ___bpf_swab16(x) ++# define __bpf_ntohl(x) __builtin_bswap32(x) ++# define __bpf_htonl(x) __builtin_bswap32(x) ++# define __bpf_constant_ntohl(x) ___bpf_swab32(x) ++# define __bpf_constant_htonl(x) ___bpf_swab32(x) ++# define __bpf_be64_to_cpu(x) __builtin_bswap64(x) ++# define __bpf_cpu_to_be64(x) __builtin_bswap64(x) ++# define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x) ++# define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x) ++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++# define __bpf_ntohs(x) (x) ++# define __bpf_htons(x) (x) ++# define __bpf_constant_ntohs(x) (x) ++# define __bpf_constant_htons(x) (x) ++# define __bpf_ntohl(x) (x) ++# define __bpf_htonl(x) (x) ++# define __bpf_constant_ntohl(x) (x) ++# define __bpf_constant_htonl(x) (x) ++# define __bpf_be64_to_cpu(x) (x) ++# define __bpf_cpu_to_be64(x) (x) ++# define __bpf_constant_be64_to_cpu(x) (x) ++# define __bpf_constant_cpu_to_be64(x) (x) ++#else ++# error "Fix your compiler's __BYTE_ORDER__?!" ++#endif ++ ++#define bpf_htons(x) \ ++ (__builtin_constant_p(x) ? \ ++ __bpf_constant_htons(x) : __bpf_htons(x)) ++#define bpf_ntohs(x) \ ++ (__builtin_constant_p(x) ? \ ++ __bpf_constant_ntohs(x) : __bpf_ntohs(x)) ++#define bpf_htonl(x) \ ++ (__builtin_constant_p(x) ? \ ++ __bpf_constant_htonl(x) : __bpf_htonl(x)) ++#define bpf_ntohl(x) \ ++ (__builtin_constant_p(x) ? \ ++ __bpf_constant_ntohl(x) : __bpf_ntohl(x)) ++#define bpf_cpu_to_be64(x) \ ++ (__builtin_constant_p(x) ? \ ++ __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) ++#define bpf_be64_to_cpu(x) \ ++ (__builtin_constant_p(x) ? \ ++ __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) ++ ++#endif /* __BPF_ENDIAN__ */ +diff --git a/src/cc/libbpf/src/bpf_gen_internal.h b/src/cc/libbpf/src/bpf_gen_internal.h +new file mode 100644 +index 0000000..2233089 +--- /dev/null ++++ b/src/cc/libbpf/src/bpf_gen_internal.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++/* Copyright (c) 2021 Facebook */ ++#ifndef __BPF_GEN_INTERNAL_H ++#define __BPF_GEN_INTERNAL_H ++ ++#include "bpf.h" ++ ++struct ksym_relo_desc { ++ const char *name; ++ int kind; ++ int insn_idx; ++ bool is_weak; ++ bool is_typeless; ++}; ++ ++struct ksym_desc { ++ const char *name; ++ int ref; ++ int kind; ++ union { ++ /* used for kfunc */ ++ int off; ++ /* used for typeless ksym */ ++ bool typeless; ++ }; ++ int insn; ++}; ++ ++struct bpf_gen { ++ struct gen_loader_opts *opts; ++ void *data_start; ++ void *data_cur; ++ void *insn_start; ++ void *insn_cur; ++ ssize_t cleanup_label; ++ __u32 nr_progs; ++ __u32 nr_maps; ++ int log_level; ++ int error; ++ struct ksym_relo_desc *relos; ++ int relo_cnt; ++ struct bpf_core_relo *core_relos; ++ int core_relo_cnt; ++ char attach_target[128]; ++ int attach_kind; ++ struct ksym_desc *ksyms; ++ __u32 nr_ksyms; ++ int fd_array; ++ int nr_fd_array; ++}; ++ ++void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps); ++int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps); ++void bpf_gen__free(struct bpf_gen *gen); ++void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); ++void bpf_gen__map_create(struct bpf_gen *gen, ++ enum bpf_map_type map_type, const char *map_name, ++ __u32 key_size, __u32 value_size, __u32 max_entries, ++ struct bpf_map_create_opts *map_attr, int map_idx); ++void bpf_gen__prog_load(struct bpf_gen *gen, ++ enum bpf_prog_type prog_type, const char *prog_name, ++ const char *license, struct bpf_insn *insns, size_t insn_cnt, ++ struct bpf_prog_load_opts *load_attr, int prog_idx); ++void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); ++void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); ++void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); ++void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, ++ bool is_typeless, int kind, int insn_idx); ++void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); ++void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx); ++ ++#endif +diff --git a/src/cc/libbpf/src/bpf_helper_defs.h b/src/cc/libbpf/src/bpf_helper_defs.h +new file mode 100644 +index 0000000..0916f7b +--- /dev/null ++++ b/src/cc/libbpf/src/bpf_helper_defs.h +@@ -0,0 +1,4582 @@ ++/* This is auto-generated file. See bpf_doc.py for details. */ ++ ++/* Forward declarations of BPF structs */ ++struct bpf_fib_lookup; ++struct bpf_sk_lookup; ++struct bpf_perf_event_data; ++struct bpf_perf_event_value; ++struct bpf_pidns_info; ++struct bpf_redir_neigh; ++struct bpf_sock; ++struct bpf_sock_addr; ++struct bpf_sock_ops; ++struct bpf_sock_tuple; ++struct bpf_spin_lock; ++struct bpf_sysctl; ++struct bpf_tcp_sock; ++struct bpf_tunnel_key; ++struct bpf_xfrm_state; ++struct linux_binprm; ++struct pt_regs; ++struct sk_reuseport_md; ++struct sockaddr; ++struct tcphdr; ++struct seq_file; ++struct tcp6_sock; ++struct tcp_sock; ++struct tcp_timewait_sock; ++struct tcp_request_sock; ++struct udp6_sock; ++struct unix_sock; ++struct task_struct; ++struct __sk_buff; ++struct sk_msg_md; ++struct xdp_md; ++struct path; ++struct btf_ptr; ++struct inode; ++struct socket; ++struct file; ++struct bpf_timer; ++struct mptcp_sock; ++struct bpf_dynptr; ++struct iphdr; ++struct ipv6hdr; ++ ++/* ++ * bpf_map_lookup_elem ++ * ++ * Perform a lookup in *map* for an entry associated to *key*. ++ * ++ * Returns ++ * Map value associated to *key*, or **NULL** if no entry was ++ * found. ++ */ ++static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) 1; ++ ++/* ++ * bpf_map_update_elem ++ * ++ * Add or update the value of the entry associated to *key* in ++ * *map* with *value*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * Flag value **BPF_NOEXIST** cannot be used for maps of types ++ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all ++ * elements always exist), the helper would return an error. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_map_update_elem)(void *map, const void *key, const void *value, __u64 flags) = (void *) 2; ++ ++/* ++ * bpf_map_delete_elem ++ * ++ * Delete entry with *key* from *map*. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_map_delete_elem)(void *map, const void *key) = (void *) 3; ++ ++/* ++ * bpf_probe_read ++ * ++ * For tracing programs, safely attempt to read *size* bytes from ++ * kernel space address *unsafe_ptr* and store the data in *dst*. ++ * ++ * Generally, use **bpf_probe_read_user**\ () or ++ * **bpf_probe_read_kernel**\ () instead. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_probe_read)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 4; ++ ++/* ++ * bpf_ktime_get_ns ++ * ++ * Return the time elapsed since system boot, in nanoseconds. ++ * Does not include time the system was suspended. ++ * See: **clock_gettime**\ (**CLOCK_MONOTONIC**) ++ * ++ * Returns ++ * Current *ktime*. ++ */ ++static __u64 (*bpf_ktime_get_ns)(void) = (void *) 5; ++ ++/* ++ * bpf_trace_printk ++ * ++ * This helper is a "printk()-like" facility for debugging. It ++ * prints a message defined by format *fmt* (of size *fmt_size*) ++ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if ++ * available. It can take up to three additional **u64** ++ * arguments (as an eBPF helpers, the total number of arguments is ++ * limited to five). ++ * ++ * Each time the helper is called, it appends a line to the trace. ++ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is ++ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. ++ * The format of the trace is customizable, and the exact output ++ * one will get depends on the options set in ++ * *\/sys/kernel/debug/tracing/trace_options* (see also the ++ * *README* file under the same directory). However, it usually ++ * defaults to something like: ++ * ++ * :: ++ * ++ * telnet-470 [001] .N.. 419421.045894: 0x00000001: ++ * ++ * In the above: ++ * ++ * * ``telnet`` is the name of the current task. ++ * * ``470`` is the PID of the current task. ++ * * ``001`` is the CPU number on which the task is ++ * running. ++ * * In ``.N..``, each character refers to a set of ++ * options (whether irqs are enabled, scheduling ++ * options, whether hard/softirqs are running, level of ++ * preempt_disabled respectively). **N** means that ++ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** ++ * are set. ++ * * ``419421.045894`` is a timestamp. ++ * * ``0x00000001`` is a fake value used by BPF for the ++ * instruction pointer register. ++ * * ```` is the message formatted with ++ * *fmt*. ++ * ++ * The conversion specifiers supported by *fmt* are similar, but ++ * more limited than for printk(). They are **%d**, **%i**, ++ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, ++ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size ++ * of field, padding with zeroes, etc.) is available, and the ++ * helper will return **-EINVAL** (but print nothing) if it ++ * encounters an unknown specifier. ++ * ++ * Also, note that **bpf_trace_printk**\ () is slow, and should ++ * only be used for debugging purposes. For this reason, a notice ++ * block (spanning several lines) is printed to kernel logs and ++ * states that the helper should not be used "for production use" ++ * the first time this helper is used (or more precisely, when ++ * **trace_printk**\ () buffers are allocated). For passing values ++ * to user space, perf events should be preferred. ++ * ++ * Returns ++ * The number of bytes written to the buffer, or a negative error ++ * in case of failure. ++ */ ++static long (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) = (void *) 6; ++ ++/* ++ * bpf_get_prandom_u32 ++ * ++ * Get a pseudo-random number. ++ * ++ * From a security point of view, this helper uses its own ++ * pseudo-random internal state, and cannot be used to infer the ++ * seed of other random functions in the kernel. However, it is ++ * essential to note that the generator used by the helper is not ++ * cryptographically secure. ++ * ++ * Returns ++ * A random 32-bit unsigned value. ++ */ ++static __u32 (*bpf_get_prandom_u32)(void) = (void *) 7; ++ ++/* ++ * bpf_get_smp_processor_id ++ * ++ * Get the SMP (symmetric multiprocessing) processor id. Note that ++ * all programs run with migration disabled, which means that the ++ * SMP processor id is stable during all the execution of the ++ * program. ++ * ++ * Returns ++ * The SMP id of the processor running the program. ++ */ ++static __u32 (*bpf_get_smp_processor_id)(void) = (void *) 8; ++ ++/* ++ * bpf_skb_store_bytes ++ * ++ * Store *len* bytes from address *from* into the packet ++ * associated to *skb*, at *offset*. *flags* are a combination of ++ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the ++ * checksum for the packet after storing the bytes) and ++ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ ++ * **->swhash** and *skb*\ **->l4hash** to 0). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len, __u64 flags) = (void *) 9; ++ ++/* ++ * bpf_l3_csum_replace ++ * ++ * Recompute the layer 3 (e.g. IP) checksum for the packet ++ * associated to *skb*. Computation is incremental, so the helper ++ * must know the former value of the header field that was ++ * modified (*from*), the new value of this field (*to*), and the ++ * number of bytes (2 or 4) for this field, stored in *size*. ++ * Alternatively, it is possible to store the difference between ++ * the previous and the new values of the header field in *to*, by ++ * setting *from* and *size* to 0. For both methods, *offset* ++ * indicates the location of the IP checksum within the packet. ++ * ++ * This helper works in combination with **bpf_csum_diff**\ (), ++ * which does not update the checksum in-place, but offers more ++ * flexibility and can handle sizes larger than 2 or 4 for the ++ * checksum to update. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_l3_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 size) = (void *) 10; ++ ++/* ++ * bpf_l4_csum_replace ++ * ++ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the ++ * packet associated to *skb*. Computation is incremental, so the ++ * helper must know the former value of the header field that was ++ * modified (*from*), the new value of this field (*to*), and the ++ * number of bytes (2 or 4) for this field, stored on the lowest ++ * four bits of *flags*. Alternatively, it is possible to store ++ * the difference between the previous and the new values of the ++ * header field in *to*, by setting *from* and the four lowest ++ * bits of *flags* to 0. For both methods, *offset* indicates the ++ * location of the IP checksum within the packet. In addition to ++ * the size of the field, *flags* can be added (bitwise OR) actual ++ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left ++ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and ++ * for updates resulting in a null checksum the value is set to ++ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates ++ * the checksum is to be computed against a pseudo-header. ++ * ++ * This helper works in combination with **bpf_csum_diff**\ (), ++ * which does not update the checksum in-place, but offers more ++ * flexibility and can handle sizes larger than 2 or 4 for the ++ * checksum to update. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 flags) = (void *) 11; ++ ++/* ++ * bpf_tail_call ++ * ++ * This special helper is used to trigger a "tail call", or in ++ * other words, to jump into another eBPF program. The same stack ++ * frame is used (but values on stack and in registers for the ++ * caller are not accessible to the callee). This mechanism allows ++ * for program chaining, either for raising the maximum number of ++ * available eBPF instructions, or to execute given programs in ++ * conditional blocks. For security reasons, there is an upper ++ * limit to the number of successive tail calls that can be ++ * performed. ++ * ++ * Upon call of this helper, the program attempts to jump into a ++ * program referenced at index *index* in *prog_array_map*, a ++ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes ++ * *ctx*, a pointer to the context. ++ * ++ * If the call succeeds, the kernel immediately runs the first ++ * instruction of the new program. This is not a function call, ++ * and it never returns to the previous program. If the call ++ * fails, then the helper has no effect, and the caller continues ++ * to run its subsequent instructions. A call can fail if the ++ * destination program for the jump does not exist (i.e. *index* ++ * is superior to the number of entries in *prog_array_map*), or ++ * if the maximum number of tail calls has been reached for this ++ * chain of programs. This limit is defined in the kernel by the ++ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), ++ * which is currently set to 33. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_tail_call)(void *ctx, void *prog_array_map, __u32 index) = (void *) 12; ++ ++/* ++ * bpf_clone_redirect ++ * ++ * Clone and redirect the packet associated to *skb* to another ++ * net device of index *ifindex*. Both ingress and egress ++ * interfaces can be used for redirection. The **BPF_F_INGRESS** ++ * value in *flags* is used to make the distinction (ingress path ++ * is selected if the flag is present, egress path otherwise). ++ * This is the only flag supported for now. ++ * ++ * In comparison with **bpf_redirect**\ () helper, ++ * **bpf_clone_redirect**\ () has the associated cost of ++ * duplicating the packet buffer, but this can be executed out of ++ * the eBPF program. Conversely, **bpf_redirect**\ () is more ++ * efficient, but it is handled through an action code where the ++ * redirection happens only after the eBPF program has returned. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex, __u64 flags) = (void *) 13; ++ ++/* ++ * bpf_get_current_pid_tgid ++ * ++ * Get the current pid and tgid. ++ * ++ * Returns ++ * A 64-bit integer containing the current tgid and pid, and ++ * created as such: ++ * *current_task*\ **->tgid << 32 \|** ++ * *current_task*\ **->pid**. ++ */ ++static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14; ++ ++/* ++ * bpf_get_current_uid_gid ++ * ++ * Get the current uid and gid. ++ * ++ * Returns ++ * A 64-bit integer containing the current GID and UID, and ++ * created as such: *current_gid* **<< 32 \|** *current_uid*. ++ */ ++static __u64 (*bpf_get_current_uid_gid)(void) = (void *) 15; ++ ++/* ++ * bpf_get_current_comm ++ * ++ * Copy the **comm** attribute of the current task into *buf* of ++ * *size_of_buf*. The **comm** attribute contains the name of ++ * the executable (excluding the path) for the current task. The ++ * *size_of_buf* must be strictly positive. On success, the ++ * helper makes sure that the *buf* is NUL-terminated. On failure, ++ * it is filled with zeroes. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_get_current_comm)(void *buf, __u32 size_of_buf) = (void *) 16; ++ ++/* ++ * bpf_get_cgroup_classid ++ * ++ * Retrieve the classid for the current task, i.e. for the net_cls ++ * cgroup to which *skb* belongs. ++ * ++ * This helper can be used on TC egress path, but not on ingress. ++ * ++ * The net_cls cgroup provides an interface to tag network packets ++ * based on a user-provided identifier for all traffic coming from ++ * the tasks belonging to the related cgroup. See also the related ++ * kernel documentation, available from the Linux sources in file ++ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. ++ * ++ * The Linux kernel has two versions for cgroups: there are ++ * cgroups v1 and cgroups v2. Both are available to users, who can ++ * use a mixture of them, but note that the net_cls cgroup is for ++ * cgroup v1 only. This makes it incompatible with BPF programs ++ * run on cgroups, which is a cgroup-v2-only feature (a socket can ++ * only hold data for one version of cgroups at a time). ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to ++ * "**y**" or to "**m**". ++ * ++ * Returns ++ * The classid, or 0 for the default unconfigured classid. ++ */ ++static __u32 (*bpf_get_cgroup_classid)(struct __sk_buff *skb) = (void *) 17; ++ ++/* ++ * bpf_skb_vlan_push ++ * ++ * Push a *vlan_tci* (VLAN tag control information) of protocol ++ * *vlan_proto* to the packet associated to *skb*, then update ++ * the checksum. Note that if *vlan_proto* is different from ++ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to ++ * be **ETH_P_8021Q**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_vlan_push)(struct __sk_buff *skb, __be16 vlan_proto, __u16 vlan_tci) = (void *) 18; ++ ++/* ++ * bpf_skb_vlan_pop ++ * ++ * Pop a VLAN header from the packet associated to *skb*. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_vlan_pop)(struct __sk_buff *skb) = (void *) 19; ++ ++/* ++ * bpf_skb_get_tunnel_key ++ * ++ * Get tunnel metadata. This helper takes a pointer *key* to an ++ * empty **struct bpf_tunnel_key** of **size**, that will be ++ * filled with tunnel metadata for the packet associated to *skb*. ++ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which ++ * indicates that the tunnel is based on IPv6 protocol instead of ++ * IPv4. ++ * ++ * The **struct bpf_tunnel_key** is an object that generalizes the ++ * principal parameters used by various tunneling protocols into a ++ * single struct. This way, it can be used to easily make a ++ * decision based on the contents of the encapsulation header, ++ * "summarized" in this struct. In particular, it holds the IP ++ * address of the remote end (IPv4 or IPv6, depending on the case) ++ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, ++ * this struct exposes the *key*\ **->tunnel_id**, which is ++ * generally mapped to a VNI (Virtual Network Identifier), making ++ * it programmable together with the **bpf_skb_set_tunnel_key**\ ++ * () helper. ++ * ++ * Let's imagine that the following code is part of a program ++ * attached to the TC ingress interface, on one end of a GRE ++ * tunnel, and is supposed to filter out all messages coming from ++ * remote ends with IPv4 address other than 10.0.0.1: ++ * ++ * :: ++ * ++ * int ret; ++ * struct bpf_tunnel_key key = {}; ++ * ++ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); ++ * if (ret < 0) ++ * return TC_ACT_SHOT; // drop packet ++ * ++ * if (key.remote_ipv4 != 0x0a000001) ++ * return TC_ACT_SHOT; // drop packet ++ * ++ * return TC_ACT_OK; // accept packet ++ * ++ * This interface can also be used with all encapsulation devices ++ * that can operate in "collect metadata" mode: instead of having ++ * one network device per specific configuration, the "collect ++ * metadata" mode only requires a single device where the ++ * configuration can be extracted from this helper. ++ * ++ * This can be used together with various tunnels such as VXLan, ++ * Geneve, GRE or IP in IP (IPIP). ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 20; ++ ++/* ++ * bpf_skb_set_tunnel_key ++ * ++ * Populate tunnel metadata for packet associated to *skb.* The ++ * tunnel metadata is set to the contents of *key*, of *size*. The ++ * *flags* can be set to a combination of the following values: ++ * ++ * **BPF_F_TUNINFO_IPV6** ++ * Indicate that the tunnel is based on IPv6 protocol ++ * instead of IPv4. ++ * **BPF_F_ZERO_CSUM_TX** ++ * For IPv4 packets, add a flag to tunnel metadata ++ * indicating that checksum computation should be skipped ++ * and checksum set to zeroes. ++ * **BPF_F_DONT_FRAGMENT** ++ * Add a flag to tunnel metadata indicating that the ++ * packet should not be fragmented. ++ * **BPF_F_SEQ_NUMBER** ++ * Add a flag to tunnel metadata indicating that a ++ * sequence number should be added to tunnel header before ++ * sending the packet. This flag was added for GRE ++ * encapsulation, but might be used with other protocols ++ * as well in the future. ++ * ++ * Here is a typical usage on the transmit path: ++ * ++ * :: ++ * ++ * struct bpf_tunnel_key key; ++ * populate key ... ++ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); ++ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); ++ * ++ * See also the description of the **bpf_skb_get_tunnel_key**\ () ++ * helper for additional information. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_set_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 21; ++ ++/* ++ * bpf_perf_event_read ++ * ++ * Read the value of a perf event counter. This helper relies on a ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of ++ * the perf event counter is selected when *map* is updated with ++ * perf event file descriptors. The *map* is an array whose size ++ * is the number of available CPUs, and each cell contains a value ++ * relative to one CPU. The value to retrieve is indicated by ++ * *flags*, that contains the index of the CPU to look up, masked ++ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to ++ * **BPF_F_CURRENT_CPU** to indicate that the value for the ++ * current CPU should be retrieved. ++ * ++ * Note that before Linux 4.13, only hardware perf event can be ++ * retrieved. ++ * ++ * Also, be aware that the newer helper ++ * **bpf_perf_event_read_value**\ () is recommended over ++ * **bpf_perf_event_read**\ () in general. The latter has some ABI ++ * quirks where error and counter value are used as a return code ++ * (which is wrong to do since ranges may overlap). This issue is ++ * fixed with **bpf_perf_event_read_value**\ (), which at the same ++ * time provides more features over the **bpf_perf_event_read**\ ++ * () interface. Please refer to the description of ++ * **bpf_perf_event_read_value**\ () for details. ++ * ++ * Returns ++ * The value of the perf event counter read from the map, or a ++ * negative error code in case of failure. ++ */ ++static __u64 (*bpf_perf_event_read)(void *map, __u64 flags) = (void *) 22; ++ ++/* ++ * bpf_redirect ++ * ++ * Redirect the packet to another net device of index *ifindex*. ++ * This helper is somewhat similar to **bpf_clone_redirect**\ ++ * (), except that the packet is not cloned, which provides ++ * increased performance. ++ * ++ * Except for XDP, both ingress and egress interfaces can be used ++ * for redirection. The **BPF_F_INGRESS** value in *flags* is used ++ * to make the distinction (ingress path is selected if the flag ++ * is present, egress path otherwise). Currently, XDP only ++ * supports redirection to the egress interface, and accepts no ++ * flag at all. ++ * ++ * The same effect can also be attained with the more generic ++ * **bpf_redirect_map**\ (), which uses a BPF map to store the ++ * redirect target instead of providing it directly to the helper. ++ * ++ * Returns ++ * For XDP, the helper returns **XDP_REDIRECT** on success or ++ * **XDP_ABORTED** on error. For other program types, the values ++ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on ++ * error. ++ */ ++static long (*bpf_redirect)(__u32 ifindex, __u64 flags) = (void *) 23; ++ ++/* ++ * bpf_get_route_realm ++ * ++ * Retrieve the realm or the route, that is to say the ++ * **tclassid** field of the destination for the *skb*. The ++ * identifier retrieved is a user-provided tag, similar to the ++ * one used with the net_cls cgroup (see description for ++ * **bpf_get_cgroup_classid**\ () helper), but here this tag is ++ * held by a route (a destination entry), not by a task. ++ * ++ * Retrieving this identifier works with the clsact TC egress hook ++ * (see also **tc-bpf(8)**), or alternatively on conventional ++ * classful egress qdiscs, but not on TC ingress path. In case of ++ * clsact TC egress hook, this has the advantage that, internally, ++ * the destination entry has not been dropped yet in the transmit ++ * path. Therefore, the destination entry does not need to be ++ * artificially held via **netif_keep_dst**\ () for a classful ++ * qdisc until the *skb* is freed. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_IP_ROUTE_CLASSID** configuration option. ++ * ++ * Returns ++ * The realm of the route for the packet associated to *skb*, or 0 ++ * if none was found. ++ */ ++static __u32 (*bpf_get_route_realm)(struct __sk_buff *skb) = (void *) 24; ++ ++/* ++ * bpf_perf_event_output ++ * ++ * Write raw *data* blob into a special BPF perf event held by ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf ++ * event must have the following attributes: **PERF_SAMPLE_RAW** ++ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and ++ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. ++ * ++ * The *flags* are used to indicate the index in *map* for which ++ * the value must be put, masked with **BPF_F_INDEX_MASK**. ++ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** ++ * to indicate that the index of the current CPU core should be ++ * used. ++ * ++ * The value to write, of *size*, is passed through eBPF stack and ++ * pointed by *data*. ++ * ++ * The context of the program *ctx* needs also be passed to the ++ * helper. ++ * ++ * On user space, a program willing to read the values needs to ++ * call **perf_event_open**\ () on the perf event (either for ++ * one or for all CPUs) and to store the file descriptor into the ++ * *map*. This must be done before the eBPF program can send data ++ * into it. An example is available in file ++ * *samples/bpf/trace_output_user.c* in the Linux kernel source ++ * tree (the eBPF program counterpart is in ++ * *samples/bpf/trace_output_kern.c*). ++ * ++ * **bpf_perf_event_output**\ () achieves better performance ++ * than **bpf_trace_printk**\ () for sharing data with user ++ * space, and is much better suitable for streaming data from eBPF ++ * programs. ++ * ++ * Note that this helper is not restricted to tracing use cases ++ * and can be used with programs attached to TC or XDP as well, ++ * where it allows for passing data to user space listeners. Data ++ * can be: ++ * ++ * * Only custom structs, ++ * * Only the packet payload, or ++ * * A combination of both. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_perf_event_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 25; ++ ++/* ++ * bpf_skb_load_bytes ++ * ++ * This helper was provided as an easy way to load data from a ++ * packet. It can be used to load *len* bytes from *offset* from ++ * the packet associated to *skb*, into the buffer pointed by ++ * *to*. ++ * ++ * Since Linux 4.7, usage of this helper has mostly been replaced ++ * by "direct packet access", enabling packet data to be ++ * manipulated with *skb*\ **->data** and *skb*\ **->data_end** ++ * pointing respectively to the first byte of packet data and to ++ * the byte after the last byte of packet data. However, it ++ * remains useful if one wishes to read large quantities of data ++ * at once from a packet into the eBPF stack. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_load_bytes)(const void *skb, __u32 offset, void *to, __u32 len) = (void *) 26; ++ ++/* ++ * bpf_get_stackid ++ * ++ * Walk a user or a kernel stack and return its id. To achieve ++ * this, the helper needs *ctx*, which is a pointer to the context ++ * on which the tracing program is executed, and a pointer to a ++ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * a combination of the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_FAST_STACK_CMP** ++ * Compare stacks by hash only. ++ * **BPF_F_REUSE_STACKID** ++ * If two different stacks hash into the same *stackid*, ++ * discard the old one. ++ * ++ * The stack id retrieved is a 32 bit long integer handle which ++ * can be further combined with other data (including other stack ++ * ids) and used as a key into maps. This can be useful for ++ * generating a variety of graphs (such as flame graphs or off-cpu ++ * graphs). ++ * ++ * For walking a stack, this helper is an improvement over ++ * **bpf_probe_read**\ (), which can be used with unrolled loops ++ * but is not efficient and consumes a lot of eBPF instructions. ++ * Instead, **bpf_get_stackid**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * ++ * Returns ++ * The positive or null stack id on success, or a negative error ++ * in case of failure. ++ */ ++static long (*bpf_get_stackid)(void *ctx, void *map, __u64 flags) = (void *) 27; ++ ++/* ++ * bpf_csum_diff ++ * ++ * Compute a checksum difference, from the raw buffer pointed by ++ * *from*, of length *from_size* (that must be a multiple of 4), ++ * towards the raw buffer pointed by *to*, of size *to_size* ++ * (same remark). An optional *seed* can be added to the value ++ * (this can be cascaded, the seed may come from a previous call ++ * to the helper). ++ * ++ * This is flexible enough to be used in several ways: ++ * ++ * * With *from_size* == 0, *to_size* > 0 and *seed* set to ++ * checksum, it can be used when pushing new data. ++ * * With *from_size* > 0, *to_size* == 0 and *seed* set to ++ * checksum, it can be used when removing data from a packet. ++ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it ++ * can be used to compute a diff. Note that *from_size* and ++ * *to_size* do not need to be equal. ++ * ++ * This helper can be used in combination with ++ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to ++ * which one can feed in the difference computed with ++ * **bpf_csum_diff**\ (). ++ * ++ * Returns ++ * The checksum result, or a negative error code in case of ++ * failure. ++ */ ++static __s64 (*bpf_csum_diff)(__be32 *from, __u32 from_size, __be32 *to, __u32 to_size, __wsum seed) = (void *) 28; ++ ++/* ++ * bpf_skb_get_tunnel_opt ++ * ++ * Retrieve tunnel options metadata for the packet associated to ++ * *skb*, and store the raw tunnel option data to the buffer *opt* ++ * of *size*. ++ * ++ * This helper can be used with encapsulation devices that can ++ * operate in "collect metadata" mode (please refer to the related ++ * note in the description of **bpf_skb_get_tunnel_key**\ () for ++ * more details). A particular example where this can be used is ++ * in combination with the Geneve encapsulation protocol, where it ++ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) ++ * and retrieving arbitrary TLVs (Type-Length-Value headers) from ++ * the eBPF program. This allows for full customization of these ++ * headers. ++ * ++ * Returns ++ * The size of the option data retrieved. ++ */ ++static long (*bpf_skb_get_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 29; ++ ++/* ++ * bpf_skb_set_tunnel_opt ++ * ++ * Set tunnel options metadata for the packet associated to *skb* ++ * to the option data contained in the raw buffer *opt* of *size*. ++ * ++ * See also the description of the **bpf_skb_get_tunnel_opt**\ () ++ * helper for additional information. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_set_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 30; ++ ++/* ++ * bpf_skb_change_proto ++ * ++ * Change the protocol of the *skb* to *proto*. Currently ++ * supported are transition from IPv4 to IPv6, and from IPv6 to ++ * IPv4. The helper takes care of the groundwork for the ++ * transition, including resizing the socket buffer. The eBPF ++ * program is expected to fill the new headers, if any, via ++ * **skb_store_bytes**\ () and to recompute the checksums with ++ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ ++ * (). The main case for this helper is to perform NAT64 ++ * operations out of an eBPF program. ++ * ++ * Internally, the GSO type is marked as dodgy so that headers are ++ * checked and segments are recalculated by the GSO/GRO engine. ++ * The size for GSO target is adapted as well. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_change_proto)(struct __sk_buff *skb, __be16 proto, __u64 flags) = (void *) 31; ++ ++/* ++ * bpf_skb_change_type ++ * ++ * Change the packet type for the packet associated to *skb*. This ++ * comes down to setting *skb*\ **->pkt_type** to *type*, except ++ * the eBPF program does not have a write access to *skb*\ ++ * **->pkt_type** beside this helper. Using a helper here allows ++ * for graceful handling of errors. ++ * ++ * The major use case is to change incoming *skb*s to ++ * **PACKET_HOST** in a programmatic way instead of having to ++ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for ++ * example. ++ * ++ * Note that *type* only allows certain values. At this time, they ++ * are: ++ * ++ * **PACKET_HOST** ++ * Packet is for us. ++ * **PACKET_BROADCAST** ++ * Send packet to all. ++ * **PACKET_MULTICAST** ++ * Send packet to group. ++ * **PACKET_OTHERHOST** ++ * Send packet to someone else. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_change_type)(struct __sk_buff *skb, __u32 type) = (void *) 32; ++ ++/* ++ * bpf_skb_under_cgroup ++ * ++ * Check whether *skb* is a descendant of the cgroup2 held by ++ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. ++ * ++ * Returns ++ * The return value depends on the result of the test, and can be: ++ * ++ * * 0, if the *skb* failed the cgroup2 descendant test. ++ * * 1, if the *skb* succeeded the cgroup2 descendant test. ++ * * A negative error code, if an error occurred. ++ */ ++static long (*bpf_skb_under_cgroup)(struct __sk_buff *skb, void *map, __u32 index) = (void *) 33; ++ ++/* ++ * bpf_get_hash_recalc ++ * ++ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is ++ * not set, in particular if the hash was cleared due to mangling, ++ * recompute this hash. Later accesses to the hash can be done ++ * directly with *skb*\ **->hash**. ++ * ++ * Calling **bpf_set_hash_invalid**\ (), changing a packet ++ * prototype with **bpf_skb_change_proto**\ (), or calling ++ * **bpf_skb_store_bytes**\ () with the ++ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear ++ * the hash and to trigger a new computation for the next call to ++ * **bpf_get_hash_recalc**\ (). ++ * ++ * Returns ++ * The 32-bit hash. ++ */ ++static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *) 34; ++ ++/* ++ * bpf_get_current_task ++ * ++ * Get the current task. ++ * ++ * Returns ++ * A pointer to the current task struct. ++ */ ++static __u64 (*bpf_get_current_task)(void) = (void *) 35; ++ ++/* ++ * bpf_probe_write_user ++ * ++ * Attempt in a safe way to write *len* bytes from the buffer ++ * *src* to *dst* in memory. It only works for threads that are in ++ * user context, and *dst* must be a valid user space address. ++ * ++ * This helper should not be used to implement any kind of ++ * security mechanism because of TOC-TOU attacks, but rather to ++ * debug, divert, and manipulate execution of semi-cooperative ++ * processes. ++ * ++ * Keep in mind that this feature is meant for experiments, and it ++ * has a risk of crashing the system and running programs. ++ * Therefore, when an eBPF program using this helper is attached, ++ * a warning including PID and process name is printed to kernel ++ * logs. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (void *) 36; ++ ++/* ++ * bpf_current_task_under_cgroup ++ * ++ * Check whether the probe is being run is the context of a given ++ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by ++ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. ++ * ++ * Returns ++ * The return value depends on the result of the test, and can be: ++ * ++ * * 1, if current task belongs to the cgroup2. ++ * * 0, if current task does not belong to the cgroup2. ++ * * A negative error code, if an error occurred. ++ */ ++static long (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37; ++ ++/* ++ * bpf_skb_change_tail ++ * ++ * Resize (trim or grow) the packet associated to *skb* to the ++ * new *len*. The *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * The basic idea is that the helper performs the needed work to ++ * change the size of the packet, then the eBPF program rewrites ++ * the rest via helpers like **bpf_skb_store_bytes**\ (), ++ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () ++ * and others. This helper is a slow path utility intended for ++ * replies with control messages. And because it is targeted for ++ * slow path, the helper itself can afford to be slow: it ++ * implicitly linearizes, unclones and drops offloads from the ++ * *skb*. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_change_tail)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 38; ++ ++/* ++ * bpf_skb_pull_data ++ * ++ * Pull in non-linear data in case the *skb* is non-linear and not ++ * all of *len* are part of the linear section. Make *len* bytes ++ * from *skb* readable and writable. If a zero value is passed for ++ * *len*, then all bytes in the linear part of *skb* will be made ++ * readable and writable. ++ * ++ * This helper is only needed for reading and writing with direct ++ * packet access. ++ * ++ * For direct packet access, testing that offsets to access ++ * are within packet boundaries (test on *skb*\ **->data_end**) is ++ * susceptible to fail if offsets are invalid, or if the requested ++ * data is in non-linear parts of the *skb*. On failure the ++ * program can just bail out, or in the case of a non-linear ++ * buffer, use a helper to make the data available. The ++ * **bpf_skb_load_bytes**\ () helper is a first solution to access ++ * the data. Another one consists in using **bpf_skb_pull_data** ++ * to pull in once the non-linear parts, then retesting and ++ * eventually access the data. ++ * ++ * At the same time, this also makes sure the *skb* is uncloned, ++ * which is a necessary condition for direct write. As this needs ++ * to be an invariant for the write part only, the verifier ++ * detects writes and adds a prologue that is calling ++ * **bpf_skb_pull_data()** to effectively unclone the *skb* from ++ * the very beginning in case it is indeed cloned. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_pull_data)(struct __sk_buff *skb, __u32 len) = (void *) 39; ++ ++/* ++ * bpf_csum_update ++ * ++ * Add the checksum *csum* into *skb*\ **->csum** in case the ++ * driver has supplied a checksum for the entire packet into that ++ * field. Return an error otherwise. This helper is intended to be ++ * used in combination with **bpf_csum_diff**\ (), in particular ++ * when the checksum needs to be updated after data has been ++ * written into the packet through direct packet access. ++ * ++ * Returns ++ * The checksum on success, or a negative error code in case of ++ * failure. ++ */ ++static __s64 (*bpf_csum_update)(struct __sk_buff *skb, __wsum csum) = (void *) 40; ++ ++/* ++ * bpf_set_hash_invalid ++ * ++ * Invalidate the current *skb*\ **->hash**. It can be used after ++ * mangling on headers through direct packet access, in order to ++ * indicate that the hash is outdated and to trigger a ++ * recalculation the next time the kernel tries to access this ++ * hash or when the **bpf_get_hash_recalc**\ () helper is called. ++ * ++ * Returns ++ * void. ++ */ ++static void (*bpf_set_hash_invalid)(struct __sk_buff *skb) = (void *) 41; ++ ++/* ++ * bpf_get_numa_node_id ++ * ++ * Return the id of the current NUMA node. The primary use case ++ * for this helper is the selection of sockets for the local NUMA ++ * node, when the program is attached to sockets using the ++ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), ++ * but the helper is also available to other eBPF program types, ++ * similarly to **bpf_get_smp_processor_id**\ (). ++ * ++ * Returns ++ * The id of current NUMA node. ++ */ ++static long (*bpf_get_numa_node_id)(void) = (void *) 42; ++ ++/* ++ * bpf_skb_change_head ++ * ++ * Grows headroom of packet associated to *skb* and adjusts the ++ * offset of the MAC header accordingly, adding *len* bytes of ++ * space. It automatically extends and reallocates memory as ++ * required. ++ * ++ * This helper can be used on a layer 3 *skb* to push a MAC header ++ * for redirection into a layer 2 device. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_change_head)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 43; ++ ++/* ++ * bpf_xdp_adjust_head ++ * ++ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that ++ * it is possible to use a negative value for *delta*. This helper ++ * can be used to prepare the packet for pushing or popping ++ * headers. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_xdp_adjust_head)(struct xdp_md *xdp_md, int delta) = (void *) 44; ++ ++/* ++ * bpf_probe_read_str ++ * ++ * Copy a NUL terminated string from an unsafe kernel address ++ * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for ++ * more details. ++ * ++ * Generally, use **bpf_probe_read_user_str**\ () or ++ * **bpf_probe_read_kernel_str**\ () instead. ++ * ++ * Returns ++ * On success, the strictly positive length of the string, ++ * including the trailing NUL character. On error, a negative ++ * value. ++ */ ++static long (*bpf_probe_read_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 45; ++ ++/* ++ * bpf_get_socket_cookie ++ * ++ * If the **struct sk_buff** pointed by *skb* has a known socket, ++ * retrieve the cookie (generated by the kernel) of this socket. ++ * If no cookie has been set yet, generate a new cookie. Once ++ * generated, the socket cookie remains stable for the life of the ++ * socket. This helper can be useful for monitoring per socket ++ * networking traffic statistics as it provides a global socket ++ * identifier that can be assumed unique. ++ * ++ * Returns ++ * A 8-byte long unique number on success, or 0 if the socket ++ * field is missing inside *skb*. ++ */ ++static __u64 (*bpf_get_socket_cookie)(void *ctx) = (void *) 46; ++ ++/* ++ * bpf_get_socket_uid ++ * ++ * Get the owner UID of the socked associated to *skb*. ++ * ++ * Returns ++ * The owner UID of the socket associated to *skb*. If the socket ++ * is **NULL**, or if it is not a full socket (i.e. if it is a ++ * time-wait or a request socket instead), **overflowuid** value ++ * is returned (note that **overflowuid** might also be the actual ++ * UID value for the socket). ++ */ ++static __u32 (*bpf_get_socket_uid)(struct __sk_buff *skb) = (void *) 47; ++ ++/* ++ * bpf_set_hash ++ * ++ * Set the full hash for *skb* (set the field *skb*\ **->hash**) ++ * to value *hash*. ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48; ++ ++/* ++ * bpf_setsockopt ++ * ++ * Emulate a call to **setsockopt()** on the socket associated to ++ * *bpf_socket*, which must be a full socket. The *level* at ++ * which the option resides and the name *optname* of the option ++ * must be specified, see **setsockopt(2)** for more information. ++ * The option value of length *optlen* is pointed by *optval*. ++ * ++ * *bpf_socket* should be one of the following: ++ * ++ * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. ++ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** ++ * and **BPF_CGROUP_INET6_CONNECT**. ++ * ++ * This helper actually implements a subset of **setsockopt()**. ++ * It supports the following *level*\ s: ++ * ++ * * **SOL_SOCKET**, which supports the following *optname*\ s: ++ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, ++ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, ++ * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. ++ * * **IPPROTO_TCP**, which supports the following *optname*\ s: ++ * **TCP_CONGESTION**, **TCP_BPF_IW**, ++ * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, ++ * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, ++ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. ++ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. ++ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_setsockopt)(void *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49; ++ ++/* ++ * bpf_skb_adjust_room ++ * ++ * Grow or shrink the room for data in the packet associated to ++ * *skb* by *len_diff*, and according to the selected *mode*. ++ * ++ * By default, the helper will reset any offloaded checksum ++ * indicator of the skb to CHECKSUM_NONE. This can be avoided ++ * by the following flag: ++ * ++ * * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded ++ * checksum data of the skb to CHECKSUM_NONE. ++ * ++ * There are two supported modes at this time: ++ * ++ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer ++ * (room space is added or removed below the layer 2 header). ++ * ++ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer ++ * (room space is added or removed below the layer 3 header). ++ * ++ * The following flags are supported at this time: ++ * ++ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. ++ * Adjusting mss in this way is not allowed for datagrams. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, ++ * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: ++ * Any new space is reserved to hold a tunnel header. ++ * Configure skb offsets and other fields accordingly. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, ++ * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: ++ * Use with ENCAP_L3 flags to further specify the tunnel type. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): ++ * Use with ENCAP_L3/L4 flags to further specify the tunnel ++ * type; *len* is the length of the inner MAC header. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: ++ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the ++ * L2 type as Ethernet. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32 mode, __u64 flags) = (void *) 50; ++ ++/* ++ * bpf_redirect_map ++ * ++ * Redirect the packet to the endpoint referenced by *map* at ++ * index *key*. Depending on its type, this *map* can contain ++ * references to net devices (for forwarding packets through other ++ * ports), or to CPUs (for redirecting XDP frames to another CPU; ++ * but this is only implemented for native XDP (with driver ++ * support) as of this writing). ++ * ++ * The lower two bits of *flags* are used as the return code if ++ * the map lookup fails. This is so that the return value can be ++ * one of the XDP program return codes up to **XDP_TX**, as chosen ++ * by the caller. The higher bits of *flags* can be set to ++ * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. ++ * ++ * With BPF_F_BROADCAST the packet will be broadcasted to all the ++ * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress ++ * interface will be excluded when do broadcasting. ++ * ++ * See also **bpf_redirect**\ (), which only supports redirecting ++ * to an ifindex, but doesn't require a map to do so. ++ * ++ * Returns ++ * **XDP_REDIRECT** on success, or the value of the two lower bits ++ * of the *flags* argument on error. ++ */ ++static long (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51; ++ ++/* ++ * bpf_sk_redirect_map ++ * ++ * Redirect the packet to the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * ++ * Returns ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ */ ++static long (*bpf_sk_redirect_map)(struct __sk_buff *skb, void *map, __u32 key, __u64 flags) = (void *) 52; ++ ++/* ++ * bpf_sock_map_update ++ * ++ * Add an entry to, or update a *map* referencing sockets. The ++ * *skops* is used as a new value for the entry associated to ++ * *key*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * If the *map* has eBPF programs (parser and verdict), those will ++ * be inherited by the socket being added. If the socket is ++ * already attached to eBPF programs, this results in an error. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_sock_map_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 53; ++ ++/* ++ * bpf_xdp_adjust_meta ++ * ++ * Adjust the address pointed by *xdp_md*\ **->data_meta** by ++ * *delta* (which can be positive or negative). Note that this ++ * operation modifies the address stored in *xdp_md*\ **->data**, ++ * so the latter must be loaded only after the helper has been ++ * called. ++ * ++ * The use of *xdp_md*\ **->data_meta** is optional and programs ++ * are not required to use it. The rationale is that when the ++ * packet is processed with XDP (e.g. as DoS filter), it is ++ * possible to push further meta data along with it before passing ++ * to the stack, and to give the guarantee that an ingress eBPF ++ * program attached as a TC classifier on the same device can pick ++ * this up for further post-processing. Since TC works with socket ++ * buffers, it remains possible to set from XDP the **mark** or ++ * **priority** pointers, or other pointers for the socket buffer. ++ * Having this scratch space generic and programmable allows for ++ * more flexibility as the user is free to store whatever meta ++ * data they need. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54; ++ ++/* ++ * bpf_perf_event_read_value ++ * ++ * Read the value of a perf event counter, and store it into *buf* ++ * of size *buf_size*. This helper relies on a *map* of type ++ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event ++ * counter is selected when *map* is updated with perf event file ++ * descriptors. The *map* is an array whose size is the number of ++ * available CPUs, and each cell contains a value relative to one ++ * CPU. The value to retrieve is indicated by *flags*, that ++ * contains the index of the CPU to look up, masked with ++ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to ++ * **BPF_F_CURRENT_CPU** to indicate that the value for the ++ * current CPU should be retrieved. ++ * ++ * This helper behaves in a way close to ++ * **bpf_perf_event_read**\ () helper, save that instead of ++ * just returning the value observed, it fills the *buf* ++ * structure. This allows for additional data to be retrieved: in ++ * particular, the enabled and running times (in *buf*\ ++ * **->enabled** and *buf*\ **->running**, respectively) are ++ * copied. In general, **bpf_perf_event_read_value**\ () is ++ * recommended over **bpf_perf_event_read**\ (), which has some ++ * ABI issues and provides fewer functionalities. ++ * ++ * These values are interesting, because hardware PMU (Performance ++ * Monitoring Unit) counters are limited resources. When there are ++ * more PMU based perf events opened than available counters, ++ * kernel will multiplex these events so each event gets certain ++ * percentage (but not all) of the PMU time. In case that ++ * multiplexing happens, the number of samples or counter value ++ * will not reflect the case compared to when no multiplexing ++ * occurs. This makes comparison between different runs difficult. ++ * Typically, the counter value should be normalized before ++ * comparing to other experiments. The usual normalization is done ++ * as follows. ++ * ++ * :: ++ * ++ * normalized_counter = counter * t_enabled / t_running ++ * ++ * Where t_enabled is the time enabled for event and t_running is ++ * the time running for event since last normalization. The ++ * enabled and running times are accumulated since the perf event ++ * open. To achieve scaling factor between two invocations of an ++ * eBPF program, users can use CPU id as the key (which is ++ * typical for perf array usage model) to remember the previous ++ * value and do the calculation inside the eBPF program. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_perf_event_read_value)(void *map, __u64 flags, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 55; ++ ++/* ++ * bpf_perf_prog_read_value ++ * ++ * For en eBPF program attached to a perf event, retrieve the ++ * value of the event counter associated to *ctx* and store it in ++ * the structure pointed by *buf* and of size *buf_size*. Enabled ++ * and running times are also stored in the structure (see ++ * description of helper **bpf_perf_event_read_value**\ () for ++ * more details). ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 56; ++ ++/* ++ * bpf_getsockopt ++ * ++ * Emulate a call to **getsockopt()** on the socket associated to ++ * *bpf_socket*, which must be a full socket. The *level* at ++ * which the option resides and the name *optname* of the option ++ * must be specified, see **getsockopt(2)** for more information. ++ * The retrieved value is stored in the structure pointed by ++ * *opval* and of length *optlen*. ++ * ++ * *bpf_socket* should be one of the following: ++ * ++ * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. ++ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** ++ * and **BPF_CGROUP_INET6_CONNECT**. ++ * ++ * This helper actually implements a subset of **getsockopt()**. ++ * It supports the following *level*\ s: ++ * ++ * * **IPPROTO_TCP**, which supports *optname* ++ * **TCP_CONGESTION**. ++ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. ++ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_getsockopt)(void *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57; ++ ++/* ++ * bpf_override_return ++ * ++ * Used for error injection, this helper uses kprobes to override ++ * the return value of the probed function, and to set it to *rc*. ++ * The first argument is the context *regs* on which the kprobe ++ * works. ++ * ++ * This helper works by setting the PC (program counter) ++ * to an override function which is run in place of the original ++ * probed function. This means the probed function is not run at ++ * all. The replacement function just returns with the required ++ * value. ++ * ++ * This helper has security implications, and thus is subject to ++ * restrictions. It is only available if the kernel was compiled ++ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration ++ * option, and in this case it only works on functions tagged with ++ * **ALLOW_ERROR_INJECTION** in the kernel code. ++ * ++ * Also, the helper is only available for the architectures having ++ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, ++ * x86 architecture is the only one to support this feature. ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_override_return)(struct pt_regs *regs, __u64 rc) = (void *) 58; ++ ++/* ++ * bpf_sock_ops_cb_flags_set ++ * ++ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field ++ * for the full TCP socket associated to *bpf_sock_ops* to ++ * *argval*. ++ * ++ * The primary use of this field is to determine if there should ++ * be calls to eBPF programs of type ++ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP ++ * code. A program of the same type can change its value, per ++ * connection and as necessary, when the connection is ++ * established. This field is directly accessible for reading, but ++ * this helper must be used for updates in order to return an ++ * error if an eBPF program tries to set a callback that is not ++ * supported in the current kernel. ++ * ++ * *argval* is a flag array which can combine these flags: ++ * ++ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) ++ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) ++ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) ++ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) ++ * ++ * Therefore, this function can be used to clear a callback flag by ++ * setting the appropriate bit to zero. e.g. to disable the RTO ++ * callback: ++ * ++ * **bpf_sock_ops_cb_flags_set(bpf_sock,** ++ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** ++ * ++ * Here are some examples of where one could call such eBPF ++ * program: ++ * ++ * * When RTO fires. ++ * * When a packet is retransmitted. ++ * * When the connection terminates. ++ * * When a packet is sent. ++ * * When a packet is received. ++ * ++ * Returns ++ * Code **-EINVAL** if the socket is not a full TCP socket; ++ * otherwise, a positive number containing the bits that could not ++ * be set is returned (which comes down to 0 if all bits were set ++ * as required). ++ */ ++static long (*bpf_sock_ops_cb_flags_set)(struct bpf_sock_ops *bpf_sock, int argval) = (void *) 59; ++ ++/* ++ * bpf_msg_redirect_map ++ * ++ * This helper is used in programs implementing policies at the ++ * socket level. If the message *msg* is allowed to pass (i.e. if ++ * the verdict eBPF program returns **SK_PASS**), redirect it to ++ * the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * ++ * Returns ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ */ ++static long (*bpf_msg_redirect_map)(struct sk_msg_md *msg, void *map, __u32 key, __u64 flags) = (void *) 60; ++ ++/* ++ * bpf_msg_apply_bytes ++ * ++ * For socket policies, apply the verdict of the eBPF program to ++ * the next *bytes* (number of bytes) of message *msg*. ++ * ++ * For example, this helper can be used in the following cases: ++ * ++ * * A single **sendmsg**\ () or **sendfile**\ () system call ++ * contains multiple logical messages that the eBPF program is ++ * supposed to read and for which it should apply a verdict. ++ * * An eBPF program only cares to read the first *bytes* of a ++ * *msg*. If the message has a large payload, then setting up ++ * and calling the eBPF program repeatedly for all bytes, even ++ * though the verdict is already known, would create unnecessary ++ * overhead. ++ * ++ * When called from within an eBPF program, the helper sets a ++ * counter internal to the BPF infrastructure, that is used to ++ * apply the last verdict to the next *bytes*. If *bytes* is ++ * smaller than the current data being processed from a ++ * **sendmsg**\ () or **sendfile**\ () system call, the first ++ * *bytes* will be sent and the eBPF program will be re-run with ++ * the pointer for start of data pointing to byte number *bytes* ++ * **+ 1**. If *bytes* is larger than the current data being ++ * processed, then the eBPF verdict will be applied to multiple ++ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are ++ * consumed. ++ * ++ * Note that if a socket closes with the internal counter holding ++ * a non-zero value, this is not a problem because data is not ++ * being buffered for *bytes* and is sent as it is received. ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_msg_apply_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 61; ++ ++/* ++ * bpf_msg_cork_bytes ++ * ++ * For socket policies, prevent the execution of the verdict eBPF ++ * program for message *msg* until *bytes* (byte number) have been ++ * accumulated. ++ * ++ * This can be used when one needs a specific number of bytes ++ * before a verdict can be assigned, even if the data spans ++ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme ++ * case would be a user calling **sendmsg**\ () repeatedly with ++ * 1-byte long message segments. Obviously, this is bad for ++ * performance, but it is still valid. If the eBPF program needs ++ * *bytes* bytes to validate a header, this helper can be used to ++ * prevent the eBPF program to be called again until *bytes* have ++ * been accumulated. ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_msg_cork_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 62; ++ ++/* ++ * bpf_msg_pull_data ++ * ++ * For socket policies, pull in non-linear data from user space ++ * for *msg* and set pointers *msg*\ **->data** and *msg*\ ++ * **->data_end** to *start* and *end* bytes offsets into *msg*, ++ * respectively. ++ * ++ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a ++ * *msg* it can only parse data that the (**data**, **data_end**) ++ * pointers have already consumed. For **sendmsg**\ () hooks this ++ * is likely the first scatterlist element. But for calls relying ++ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will ++ * be the range (**0**, **0**) because the data is shared with ++ * user space and by default the objective is to avoid allowing ++ * user space to modify data while (or after) eBPF verdict is ++ * being decided. This helper can be used to pull in data and to ++ * set the start and end pointer to given values. Data will be ++ * copied if necessary (i.e. if data was not linear and if start ++ * and end pointers do not point to the same chunk). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_msg_pull_data)(struct sk_msg_md *msg, __u32 start, __u32 end, __u64 flags) = (void *) 63; ++ ++/* ++ * bpf_bind ++ * ++ * Bind the socket associated to *ctx* to the address pointed by ++ * *addr*, of length *addr_len*. This allows for making outgoing ++ * connection from the desired IP address, which can be useful for ++ * example when all processes inside a cgroup should use one ++ * single IP address on a host that has multiple IP configured. ++ * ++ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The ++ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or ++ * **AF_INET6**). It's advised to pass zero port (**sin_port** ++ * or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like ++ * behavior and lets the kernel efficiently pick up an unused ++ * port as long as 4-tuple is unique. Passing non-zero port might ++ * lead to degraded performance. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_bind)(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) = (void *) 64; ++ ++/* ++ * bpf_xdp_adjust_tail ++ * ++ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is ++ * possible to both shrink and grow the packet tail. ++ * Shrink done via *delta* being a negative integer. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_xdp_adjust_tail)(struct xdp_md *xdp_md, int delta) = (void *) 65; ++ ++/* ++ * bpf_skb_get_xfrm_state ++ * ++ * Retrieve the XFRM state (IP transform framework, see also ++ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. ++ * ++ * The retrieved value is stored in the **struct bpf_xfrm_state** ++ * pointed by *xfrm_state* and of length *size*. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_XFRM** configuration option. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct bpf_xfrm_state *xfrm_state, __u32 size, __u64 flags) = (void *) 66; ++ ++/* ++ * bpf_get_stack ++ * ++ * Return a user or a kernel stack in bpf program provided buffer. ++ * To achieve this, the helper needs *ctx*, which is a pointer ++ * to the context on which the tracing program is executed. ++ * To store the stacktrace, the bpf program provides *buf* with ++ * a nonnegative *size*. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_USER_BUILD_ID** ++ * Collect buildid+offset instead of ips for user stack, ++ * only valid if **BPF_F_USER_STACK** is also specified. ++ * ++ * **bpf_get_stack**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject ++ * to sufficient large buffer size. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * ++ * Returns ++ * The non-negative copied *buf* length equal to or less than ++ * *size* on success, or a negative error in case of failure. ++ */ ++static long (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67; ++ ++/* ++ * bpf_skb_load_bytes_relative ++ * ++ * This helper is similar to **bpf_skb_load_bytes**\ () in that ++ * it provides an easy way to load *len* bytes from *offset* ++ * from the packet associated to *skb*, into the buffer pointed ++ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that ++ * a fifth argument *start_header* exists in order to select a ++ * base offset to start from. *start_header* can be one of: ++ * ++ * **BPF_HDR_START_MAC** ++ * Base offset to load data from is *skb*'s mac header. ++ * **BPF_HDR_START_NET** ++ * Base offset to load data from is *skb*'s network header. ++ * ++ * In general, "direct packet access" is the preferred method to ++ * access packet data, however, this helper is in particular useful ++ * in socket filters where *skb*\ **->data** does not always point ++ * to the start of the mac header and where "direct packet access" ++ * is not available. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_load_bytes_relative)(const void *skb, __u32 offset, void *to, __u32 len, __u32 start_header) = (void *) 68; ++ ++/* ++ * bpf_fib_lookup ++ * ++ * Do FIB lookup in kernel tables using parameters in *params*. ++ * If lookup is successful and result shows packet is to be ++ * forwarded, the neighbor tables are searched for the nexthop. ++ * If successful (ie., FIB lookup shows forwarding and nexthop ++ * is resolved), the nexthop address is returned in ipv4_dst ++ * or ipv6_dst based on family, smac is set to mac address of ++ * egress device, dmac is set to nexthop mac address, rt_metric ++ * is set to metric from route (IPv4/IPv6 only), and ifindex ++ * is set to the device index of the nexthop from the FIB lookup. ++ * ++ * *plen* argument is the size of the passed in struct. ++ * *flags* argument can be a combination of one or more of the ++ * following values: ++ * ++ * **BPF_FIB_LOOKUP_DIRECT** ++ * Do a direct table lookup vs full lookup using FIB ++ * rules. ++ * **BPF_FIB_LOOKUP_OUTPUT** ++ * Perform lookup from an egress perspective (default is ++ * ingress). ++ * ++ * *ctx* is either **struct xdp_md** for XDP programs or ++ * **struct sk_buff** tc cls_act programs. ++ * ++ * Returns ++ * * < 0 if any input argument is invalid ++ * * 0 on success (packet is forwarded, nexthop neighbor exists) ++ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the ++ * packet is not forwarded or needs assist from full stack ++ * ++ * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU ++ * was exceeded and output params->mtu_result contains the MTU. ++ */ ++static long (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) 69; ++ ++/* ++ * bpf_sock_hash_update ++ * ++ * Add an entry to, or update a sockhash *map* referencing sockets. ++ * The *skops* is used as a new value for the entry associated to ++ * *key*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * If the *map* has eBPF programs (parser and verdict), those will ++ * be inherited by the socket being added. If the socket is ++ * already attached to eBPF programs, this results in an error. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_sock_hash_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 70; ++ ++/* ++ * bpf_msg_redirect_hash ++ * ++ * This helper is used in programs implementing policies at the ++ * socket level. If the message *msg* is allowed to pass (i.e. if ++ * the verdict eBPF program returns **SK_PASS**), redirect it to ++ * the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * ++ * Returns ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ */ ++static long (*bpf_msg_redirect_hash)(struct sk_msg_md *msg, void *map, void *key, __u64 flags) = (void *) 71; ++ ++/* ++ * bpf_sk_redirect_hash ++ * ++ * This helper is used in programs implementing policies at the ++ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. ++ * if the verdict eBPF program returns **SK_PASS**), redirect it ++ * to the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress otherwise). This is the only flag supported for now. ++ * ++ * Returns ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ */ ++static long (*bpf_sk_redirect_hash)(struct __sk_buff *skb, void *map, void *key, __u64 flags) = (void *) 72; ++ ++/* ++ * bpf_lwt_push_encap ++ * ++ * Encapsulate the packet associated to *skb* within a Layer 3 ++ * protocol header. This header is provided in the buffer at ++ * address *hdr*, with *len* its size in bytes. *type* indicates ++ * the protocol of the header and can be one of: ++ * ++ * **BPF_LWT_ENCAP_SEG6** ++ * IPv6 encapsulation with Segment Routing Header ++ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, ++ * the IPv6 header is computed by the kernel. ++ * **BPF_LWT_ENCAP_SEG6_INLINE** ++ * Only works if *skb* contains an IPv6 packet. Insert a ++ * Segment Routing Header (**struct ipv6_sr_hdr**) inside ++ * the IPv6 header. ++ * **BPF_LWT_ENCAP_IP** ++ * IP encapsulation (GRE/GUE/IPIP/etc). The outer header ++ * must be IPv4 or IPv6, followed by zero or more ++ * additional headers, up to **LWT_BPF_MAX_HEADROOM** ++ * total bytes in all prepended headers. Please note that ++ * if **skb_is_gso**\ (*skb*) is true, no more than two ++ * headers can be prepended, and the inner header, if ++ * present, should be either GRE or UDP/GUE. ++ * ++ * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs ++ * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can ++ * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and ++ * **BPF_PROG_TYPE_LWT_XMIT**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_lwt_push_encap)(struct __sk_buff *skb, __u32 type, void *hdr, __u32 len) = (void *) 73; ++ ++/* ++ * bpf_lwt_seg6_store_bytes ++ * ++ * Store *len* bytes from address *from* into the packet ++ * associated to *skb*, at *offset*. Only the flags, tag and TLVs ++ * inside the outermost IPv6 Segment Routing Header can be ++ * modified through this helper. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_lwt_seg6_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len) = (void *) 74; ++ ++/* ++ * bpf_lwt_seg6_adjust_srh ++ * ++ * Adjust the size allocated to TLVs in the outermost IPv6 ++ * Segment Routing Header contained in the packet associated to ++ * *skb*, at position *offset* by *delta* bytes. Only offsets ++ * after the segments are accepted. *delta* can be as well ++ * positive (growing) as negative (shrinking). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_lwt_seg6_adjust_srh)(struct __sk_buff *skb, __u32 offset, __s32 delta) = (void *) 75; ++ ++/* ++ * bpf_lwt_seg6_action ++ * ++ * Apply an IPv6 Segment Routing action of type *action* to the ++ * packet associated to *skb*. Each action takes a parameter ++ * contained at address *param*, and of length *param_len* bytes. ++ * *action* can be one of: ++ * ++ * **SEG6_LOCAL_ACTION_END_X** ++ * End.X action: Endpoint with Layer-3 cross-connect. ++ * Type of *param*: **struct in6_addr**. ++ * **SEG6_LOCAL_ACTION_END_T** ++ * End.T action: Endpoint with specific IPv6 table lookup. ++ * Type of *param*: **int**. ++ * **SEG6_LOCAL_ACTION_END_B6** ++ * End.B6 action: Endpoint bound to an SRv6 policy. ++ * Type of *param*: **struct ipv6_sr_hdr**. ++ * **SEG6_LOCAL_ACTION_END_B6_ENCAP** ++ * End.B6.Encap action: Endpoint bound to an SRv6 ++ * encapsulation policy. ++ * Type of *param*: **struct ipv6_sr_hdr**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_lwt_seg6_action)(struct __sk_buff *skb, __u32 action, void *param, __u32 param_len) = (void *) 76; ++ ++/* ++ * bpf_rc_repeat ++ * ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded repeat key message. This delays ++ * the generation of a key up event for previously generated ++ * key down event. ++ * ++ * Some IR protocols like NEC have a special IR message for ++ * repeating last button, for when a button is held down. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_rc_repeat)(void *ctx) = (void *) 77; ++ ++/* ++ * bpf_rc_keydown ++ * ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded key press with *scancode*, ++ * *toggle* value in the given *protocol*. The scancode will be ++ * translated to a keycode using the rc keymap, and reported as ++ * an input key down event. After a period a key up event is ++ * generated. This period can be extended by calling either ++ * **bpf_rc_keydown**\ () again with the same values, or calling ++ * **bpf_rc_repeat**\ (). ++ * ++ * Some protocols include a toggle bit, in case the button was ++ * released and pressed again between consecutive scancodes. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * The *protocol* is the decoded protocol number (see ++ * **enum rc_proto** for some predefined values). ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_rc_keydown)(void *ctx, __u32 protocol, __u64 scancode, __u32 toggle) = (void *) 78; ++ ++/* ++ * bpf_skb_cgroup_id ++ * ++ * Return the cgroup v2 id of the socket associated with the *skb*. ++ * This is roughly similar to the **bpf_get_cgroup_classid**\ () ++ * helper for cgroup v1 by providing a tag resp. identifier that ++ * can be matched on or used for map lookups e.g. to implement ++ * policy. The cgroup v2 id of a given path in the hierarchy is ++ * exposed in user space through the f_handle API in order to get ++ * to the same 64-bit id. ++ * ++ * This helper can be used on TC egress path, but not on ingress, ++ * and is available only if the kernel was compiled with the ++ * **CONFIG_SOCK_CGROUP_DATA** configuration option. ++ * ++ * Returns ++ * The id is returned or 0 in case the id could not be retrieved. ++ */ ++static __u64 (*bpf_skb_cgroup_id)(struct __sk_buff *skb) = (void *) 79; ++ ++/* ++ * bpf_get_current_cgroup_id ++ * ++ * Get the current cgroup id based on the cgroup within which ++ * the current task is running. ++ * ++ * Returns ++ * A 64-bit integer containing the current cgroup id based ++ * on the cgroup within which the current task is running. ++ */ ++static __u64 (*bpf_get_current_cgroup_id)(void) = (void *) 80; ++ ++/* ++ * bpf_get_local_storage ++ * ++ * Get the pointer to the local storage area. ++ * The type and the size of the local storage is defined ++ * by the *map* argument. ++ * The *flags* meaning is specific for each map type, ++ * and has to be 0 for cgroup local storage. ++ * ++ * Depending on the BPF program type, a local storage area ++ * can be shared between multiple instances of the BPF program, ++ * running simultaneously. ++ * ++ * A user should care about the synchronization by himself. ++ * For example, by using the **BPF_ATOMIC** instructions to alter ++ * the shared data. ++ * ++ * Returns ++ * A pointer to the local storage area. ++ */ ++static void *(*bpf_get_local_storage)(void *map, __u64 flags) = (void *) 81; ++ ++/* ++ * bpf_sk_select_reuseport ++ * ++ * Select a **SO_REUSEPORT** socket from a ++ * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. ++ * It checks the selected socket is matching the incoming ++ * request in the socket buffer. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_sk_select_reuseport)(struct sk_reuseport_md *reuse, void *map, void *key, __u64 flags) = (void *) 82; ++ ++/* ++ * bpf_skb_ancestor_cgroup_id ++ * ++ * Return id of cgroup v2 that is ancestor of cgroup associated ++ * with the *skb* at the *ancestor_level*. The root cgroup is at ++ * *ancestor_level* zero and each step down the hierarchy ++ * increments the level. If *ancestor_level* == level of cgroup ++ * associated with *skb*, then return value will be same as that ++ * of **bpf_skb_cgroup_id**\ (). ++ * ++ * The helper is useful to implement policies based on cgroups ++ * that are upper in hierarchy than immediate cgroup associated ++ * with *skb*. ++ * ++ * The format of returned id and helper limitations are same as in ++ * **bpf_skb_cgroup_id**\ (). ++ * ++ * Returns ++ * The id is returned or 0 in case the id could not be retrieved. ++ */ ++static __u64 (*bpf_skb_ancestor_cgroup_id)(struct __sk_buff *skb, int ancestor_level) = (void *) 83; ++ ++/* ++ * bpf_sk_lookup_tcp ++ * ++ * Look for TCP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * The *ctx* should point to the context of the program, such as ++ * the skb or socket (depending on the hook in use). This is used ++ * to determine the base network namespace for the lookup. ++ * ++ * *tuple_size* must be one of: ++ * ++ * **sizeof**\ (*tuple*\ **->ipv4**) ++ * Look for an IPv4 socket. ++ * **sizeof**\ (*tuple*\ **->ipv6**) ++ * Look for an IPv6 socket. ++ * ++ * If the *netns* is a negative signed 32-bit integer, then the ++ * socket lookup table in the netns associated with the *ctx* ++ * will be used. For the TC hooks, this is the netns of the device ++ * in the skb. For socket hooks, this is the netns of the socket. ++ * If *netns* is any other signed 32-bit value greater than or ++ * equal to zero then it specifies the ID of the netns relative to ++ * the netns associated with the *ctx*. *netns* values beyond the ++ * range of 32-bit integers are reserved for future use. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * ++ * Returns ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ */ ++static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 84; ++ ++/* ++ * bpf_sk_lookup_udp ++ * ++ * Look for UDP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * The *ctx* should point to the context of the program, such as ++ * the skb or socket (depending on the hook in use). This is used ++ * to determine the base network namespace for the lookup. ++ * ++ * *tuple_size* must be one of: ++ * ++ * **sizeof**\ (*tuple*\ **->ipv4**) ++ * Look for an IPv4 socket. ++ * **sizeof**\ (*tuple*\ **->ipv6**) ++ * Look for an IPv6 socket. ++ * ++ * If the *netns* is a negative signed 32-bit integer, then the ++ * socket lookup table in the netns associated with the *ctx* ++ * will be used. For the TC hooks, this is the netns of the device ++ * in the skb. For socket hooks, this is the netns of the socket. ++ * If *netns* is any other signed 32-bit value greater than or ++ * equal to zero then it specifies the ID of the netns relative to ++ * the netns associated with the *ctx*. *netns* values beyond the ++ * range of 32-bit integers are reserved for future use. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * ++ * Returns ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ */ ++static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 85; ++ ++/* ++ * bpf_sk_release ++ * ++ * Release the reference held by *sock*. *sock* must be a ++ * non-**NULL** pointer that was returned from ++ * **bpf_sk_lookup_xxx**\ (). ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_sk_release)(void *sock) = (void *) 86; ++ ++/* ++ * bpf_map_push_elem ++ * ++ * Push an element *value* in *map*. *flags* is one of: ++ * ++ * **BPF_EXIST** ++ * If the queue/stack is full, the oldest element is ++ * removed to make room for this. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_map_push_elem)(void *map, const void *value, __u64 flags) = (void *) 87; ++ ++/* ++ * bpf_map_pop_elem ++ * ++ * Pop an element from *map*. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_map_pop_elem)(void *map, void *value) = (void *) 88; ++ ++/* ++ * bpf_map_peek_elem ++ * ++ * Get an element from *map* without removing it. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_map_peek_elem)(void *map, void *value) = (void *) 89; ++ ++/* ++ * bpf_msg_push_data ++ * ++ * For socket policies, insert *len* bytes into *msg* at offset ++ * *start*. ++ * ++ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a ++ * *msg* it may want to insert metadata or options into the *msg*. ++ * This can later be read and used by any of the lower layer BPF ++ * hooks. ++ * ++ * This helper may fail if under memory pressure (a malloc ++ * fails) in these cases BPF programs will get an appropriate ++ * error and BPF programs will need to handle them. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_msg_push_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 90; ++ ++/* ++ * bpf_msg_pop_data ++ * ++ * Will remove *len* bytes from a *msg* starting at byte *start*. ++ * This may result in **ENOMEM** errors under certain situations if ++ * an allocation and copy are required due to a full ring buffer. ++ * However, the helper will try to avoid doing the allocation ++ * if possible. Other errors can occur if input parameters are ++ * invalid either due to *start* byte not being valid part of *msg* ++ * payload and/or *pop* value being to large. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_msg_pop_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 91; ++ ++/* ++ * bpf_rc_pointer_rel ++ * ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded pointer movement. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_rc_pointer_rel)(void *ctx, __s32 rel_x, __s32 rel_y) = (void *) 92; ++ ++/* ++ * bpf_spin_lock ++ * ++ * Acquire a spinlock represented by the pointer *lock*, which is ++ * stored as part of a value of a map. Taking the lock allows to ++ * safely update the rest of the fields in that value. The ++ * spinlock can (and must) later be released with a call to ++ * **bpf_spin_unlock**\ (\ *lock*\ ). ++ * ++ * Spinlocks in BPF programs come with a number of restrictions ++ * and constraints: ++ * ++ * * **bpf_spin_lock** objects are only allowed inside maps of ++ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this ++ * list could be extended in the future). ++ * * BTF description of the map is mandatory. ++ * * The BPF program can take ONE lock at a time, since taking two ++ * or more could cause dead locks. ++ * * Only one **struct bpf_spin_lock** is allowed per map element. ++ * * When the lock is taken, calls (either BPF to BPF or helpers) ++ * are not allowed. ++ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not ++ * allowed inside a spinlock-ed region. ++ * * The BPF program MUST call **bpf_spin_unlock**\ () to release ++ * the lock, on all execution paths, before it returns. ++ * * The BPF program can access **struct bpf_spin_lock** only via ++ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () ++ * helpers. Loading or storing data into the **struct ++ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. ++ * * To use the **bpf_spin_lock**\ () helper, the BTF description ++ * of the map value must be a struct and have **struct ++ * bpf_spin_lock** *anyname*\ **;** field at the top level. ++ * Nested lock inside another struct is not allowed. ++ * * The **struct bpf_spin_lock** *lock* field in a map value must ++ * be aligned on a multiple of 4 bytes in that value. ++ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy ++ * the **bpf_spin_lock** field to user space. ++ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from ++ * a BPF program, do not update the **bpf_spin_lock** field. ++ * * **bpf_spin_lock** cannot be on the stack or inside a ++ * networking packet (it can only be inside of a map values). ++ * * **bpf_spin_lock** is available to root only. ++ * * Tracing programs and socket filter programs cannot use ++ * **bpf_spin_lock**\ () due to insufficient preemption checks ++ * (but this may change in the future). ++ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) 93; ++ ++/* ++ * bpf_spin_unlock ++ * ++ * Release the *lock* previously locked by a call to ++ * **bpf_spin_lock**\ (\ *lock*\ ). ++ * ++ * Returns ++ * 0 ++ */ ++static long (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) 94; ++ ++/* ++ * bpf_sk_fullsock ++ * ++ * This helper gets a **struct bpf_sock** pointer such ++ * that all the fields in this **bpf_sock** can be accessed. ++ * ++ * Returns ++ * A **struct bpf_sock** pointer on success, or **NULL** in ++ * case of failure. ++ */ ++static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) 95; ++ ++/* ++ * bpf_tcp_sock ++ * ++ * This helper gets a **struct bpf_tcp_sock** pointer from a ++ * **struct bpf_sock** pointer. ++ * ++ * Returns ++ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in ++ * case of failure. ++ */ ++static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) 96; ++ ++/* ++ * bpf_skb_ecn_set_ce ++ * ++ * Set ECN (Explicit Congestion Notification) field of IP header ++ * to **CE** (Congestion Encountered) if current value is **ECT** ++ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 ++ * and IPv4. ++ * ++ * Returns ++ * 1 if the **CE** flag is set (either by the current helper call ++ * or because it was already present), 0 if it is not set. ++ */ ++static long (*bpf_skb_ecn_set_ce)(struct __sk_buff *skb) = (void *) 97; ++ ++/* ++ * bpf_get_listener_sock ++ * ++ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. ++ * **bpf_sk_release**\ () is unnecessary and not allowed. ++ * ++ * Returns ++ * A **struct bpf_sock** pointer on success, or **NULL** in ++ * case of failure. ++ */ ++static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = (void *) 98; ++ ++/* ++ * bpf_skc_lookup_tcp ++ * ++ * Look for TCP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * This function is identical to **bpf_sk_lookup_tcp**\ (), except ++ * that it also returns timewait or request sockets. Use ++ * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the ++ * full structure. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * ++ * Returns ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ */ ++static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 99; ++ ++/* ++ * bpf_tcp_check_syncookie ++ * ++ * Check whether *iph* and *th* contain a valid SYN cookie ACK for ++ * the listening socket in *sk*. ++ * ++ * *iph* points to the start of the IPv4 or IPv6 header, while ++ * *iph_len* contains **sizeof**\ (**struct iphdr**) or ++ * **sizeof**\ (**struct ipv6hdr**). ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header (at least ++ * **sizeof**\ (**struct tcphdr**)). ++ * ++ * Returns ++ * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative ++ * error otherwise. ++ */ ++static long (*bpf_tcp_check_syncookie)(void *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 100; ++ ++/* ++ * bpf_sysctl_get_name ++ * ++ * Get name of sysctl in /proc/sys/ and copy it into provided by ++ * program buffer *buf* of size *buf_len*. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * ++ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is ++ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name ++ * only (e.g. "tcp_mem"). ++ * ++ * Returns ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ */ ++static long (*bpf_sysctl_get_name)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len, __u64 flags) = (void *) 101; ++ ++/* ++ * bpf_sysctl_get_current_value ++ * ++ * Get current value of sysctl as it is presented in /proc/sys ++ * (incl. newline, etc), and copy it as a string into provided ++ * by program buffer *buf* of size *buf_len*. ++ * ++ * The whole value is copied, no matter what file position user ++ * space issued e.g. sys_read at. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * ++ * Returns ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ * ++ * **-EINVAL** if current value was unavailable, e.g. because ++ * sysctl is uninitialized and read returns -EIO for it. ++ */ ++static long (*bpf_sysctl_get_current_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 102; ++ ++/* ++ * bpf_sysctl_get_new_value ++ * ++ * Get new value being written by user space to sysctl (before ++ * the actual write happens) and copy it as a string into ++ * provided by program buffer *buf* of size *buf_len*. ++ * ++ * User space may write new value at file position > 0. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * ++ * Returns ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ * ++ * **-EINVAL** if sysctl is being read. ++ */ ++static long (*bpf_sysctl_get_new_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 103; ++ ++/* ++ * bpf_sysctl_set_new_value ++ * ++ * Override new value being written by user space to sysctl with ++ * value provided by program in buffer *buf* of size *buf_len*. ++ * ++ * *buf* should contain a string in same form as provided by user ++ * space on sysctl write. ++ * ++ * User space may write new value at file position > 0. To override ++ * the whole sysctl value file position should be set to zero. ++ * ++ * Returns ++ * 0 on success. ++ * ++ * **-E2BIG** if the *buf_len* is too big. ++ * ++ * **-EINVAL** if sysctl is being read. ++ */ ++static long (*bpf_sysctl_set_new_value)(struct bpf_sysctl *ctx, const char *buf, unsigned long buf_len) = (void *) 104; ++ ++/* ++ * bpf_strtol ++ * ++ * Convert the initial part of the string from buffer *buf* of ++ * size *buf_len* to a long integer according to the given base ++ * and save the result in *res*. ++ * ++ * The string may begin with an arbitrary amount of white space ++ * (as determined by **isspace**\ (3)) followed by a single ++ * optional '**-**' sign. ++ * ++ * Five least significant bits of *flags* encode base, other bits ++ * are currently unused. ++ * ++ * Base must be either 8, 10, 16 or 0 to detect it automatically ++ * similar to user space **strtol**\ (3). ++ * ++ * Returns ++ * Number of characters consumed on success. Must be positive but ++ * no more than *buf_len*. ++ * ++ * **-EINVAL** if no valid digits were found or unsupported base ++ * was provided. ++ * ++ * **-ERANGE** if resulting value was out of range. ++ */ ++static long (*bpf_strtol)(const char *buf, unsigned long buf_len, __u64 flags, long *res) = (void *) 105; ++ ++/* ++ * bpf_strtoul ++ * ++ * Convert the initial part of the string from buffer *buf* of ++ * size *buf_len* to an unsigned long integer according to the ++ * given base and save the result in *res*. ++ * ++ * The string may begin with an arbitrary amount of white space ++ * (as determined by **isspace**\ (3)). ++ * ++ * Five least significant bits of *flags* encode base, other bits ++ * are currently unused. ++ * ++ * Base must be either 8, 10, 16 or 0 to detect it automatically ++ * similar to user space **strtoul**\ (3). ++ * ++ * Returns ++ * Number of characters consumed on success. Must be positive but ++ * no more than *buf_len*. ++ * ++ * **-EINVAL** if no valid digits were found or unsupported base ++ * was provided. ++ * ++ * **-ERANGE** if resulting value was out of range. ++ */ ++static long (*bpf_strtoul)(const char *buf, unsigned long buf_len, __u64 flags, unsigned long *res) = (void *) 106; ++ ++/* ++ * bpf_sk_storage_get ++ * ++ * Get a bpf-local-storage from a *sk*. ++ * ++ * Logically, it could be thought of getting the value from ++ * a *map* with *sk* as the **key**. From this ++ * perspective, the usage is not much different from ++ * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this ++ * helper enforces the key must be a full socket and the map must ++ * be a **BPF_MAP_TYPE_SK_STORAGE** also. ++ * ++ * Underneath, the value is stored locally at *sk* instead of ++ * the *map*. The *map* is used as the bpf-local-storage ++ * "type". The bpf-local-storage "type" (i.e. the *map*) is ++ * searched against all bpf-local-storages residing at *sk*. ++ * ++ * *sk* is a kernel **struct sock** pointer for LSM program. ++ * *sk* is a **struct bpf_sock** pointer for other program types. ++ * ++ * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be ++ * used such that a new bpf-local-storage will be ++ * created if one does not exist. *value* can be used ++ * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify ++ * the initial value of a bpf-local-storage. If *value* is ++ * **NULL**, the new bpf-local-storage will be zero initialized. ++ * ++ * Returns ++ * A bpf-local-storage pointer is returned on success. ++ * ++ * **NULL** if not found or there was an error in adding ++ * a new bpf-local-storage. ++ */ ++static void *(*bpf_sk_storage_get)(void *map, void *sk, void *value, __u64 flags) = (void *) 107; ++ ++/* ++ * bpf_sk_storage_delete ++ * ++ * Delete a bpf-local-storage from a *sk*. ++ * ++ * Returns ++ * 0 on success. ++ * ++ * **-ENOENT** if the bpf-local-storage cannot be found. ++ * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). ++ */ ++static long (*bpf_sk_storage_delete)(void *map, void *sk) = (void *) 108; ++ ++/* ++ * bpf_send_signal ++ * ++ * Send signal *sig* to the process of the current task. ++ * The signal may be delivered to any of this process's threads. ++ * ++ * Returns ++ * 0 on success or successfully queued. ++ * ++ * **-EBUSY** if work queue under nmi is full. ++ * ++ * **-EINVAL** if *sig* is invalid. ++ * ++ * **-EPERM** if no permission to send the *sig*. ++ * ++ * **-EAGAIN** if bpf program can try again. ++ */ ++static long (*bpf_send_signal)(__u32 sig) = (void *) 109; ++ ++/* ++ * bpf_tcp_gen_syncookie ++ * ++ * Try to issue a SYN cookie for the packet with corresponding ++ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. ++ * ++ * *iph* points to the start of the IPv4 or IPv6 header, while ++ * *iph_len* contains **sizeof**\ (**struct iphdr**) or ++ * **sizeof**\ (**struct ipv6hdr**). ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header with options (at least ++ * **sizeof**\ (**struct tcphdr**)). ++ * ++ * Returns ++ * On success, lower 32 bits hold the generated SYN cookie in ++ * followed by 16 bits which hold the MSS value for that cookie, ++ * and the top 16 bits are unused. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EINVAL** SYN cookie cannot be issued due to error ++ * ++ * **-ENOENT** SYN cookie should not be issued (no SYN flood) ++ * ++ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies ++ * ++ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 ++ */ ++static __s64 (*bpf_tcp_gen_syncookie)(void *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 110; ++ ++/* ++ * bpf_skb_output ++ * ++ * Write raw *data* blob into a special BPF perf event held by ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf ++ * event must have the following attributes: **PERF_SAMPLE_RAW** ++ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and ++ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. ++ * ++ * The *flags* are used to indicate the index in *map* for which ++ * the value must be put, masked with **BPF_F_INDEX_MASK**. ++ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** ++ * to indicate that the index of the current CPU core should be ++ * used. ++ * ++ * The value to write, of *size*, is passed through eBPF stack and ++ * pointed by *data*. ++ * ++ * *ctx* is a pointer to in-kernel struct sk_buff. ++ * ++ * This helper is similar to **bpf_perf_event_output**\ () but ++ * restricted to raw_tracepoint bpf programs. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_skb_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 111; ++ ++/* ++ * bpf_probe_read_user ++ * ++ * Safely attempt to read *size* bytes from user space address ++ * *unsafe_ptr* and store the data in *dst*. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112; ++ ++/* ++ * bpf_probe_read_kernel ++ * ++ * Safely attempt to read *size* bytes from kernel space address ++ * *unsafe_ptr* and store the data in *dst*. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113; ++ ++/* ++ * bpf_probe_read_user_str ++ * ++ * Copy a NUL terminated string from an unsafe user address ++ * *unsafe_ptr* to *dst*. The *size* should include the ++ * terminating NUL byte. In case the string length is smaller than ++ * *size*, the target is not padded with further NUL bytes. If the ++ * string length is larger than *size*, just *size*-1 bytes are ++ * copied and the last byte is set to NUL. ++ * ++ * On success, returns the number of bytes that were written, ++ * including the terminal NUL. This makes this helper useful in ++ * tracing programs for reading strings, and more importantly to ++ * get its length at runtime. See the following snippet: ++ * ++ * :: ++ * ++ * SEC("kprobe/sys_open") ++ * void bpf_sys_open(struct pt_regs *ctx) ++ * { ++ * char buf[PATHLEN]; // PATHLEN is defined to 256 ++ * int res = bpf_probe_read_user_str(buf, sizeof(buf), ++ * ctx->di); ++ * ++ * // Consume buf, for example push it to ++ * // userspace via bpf_perf_event_output(); we ++ * // can use res (the string length) as event ++ * // size, after checking its boundaries. ++ * } ++ * ++ * In comparison, using **bpf_probe_read_user**\ () helper here ++ * instead to read the string would require to estimate the length ++ * at compile time, and would often result in copying more memory ++ * than necessary. ++ * ++ * Another useful use case is when parsing individual process ++ * arguments or individual environment variables navigating ++ * *current*\ **->mm->arg_start** and *current*\ ++ * **->mm->env_start**: using this helper and the return value, ++ * one can quickly iterate at the right offset of the memory area. ++ * ++ * Returns ++ * On success, the strictly positive length of the output string, ++ * including the trailing NUL character. On error, a negative ++ * value. ++ */ ++static long (*bpf_probe_read_user_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 114; ++ ++/* ++ * bpf_probe_read_kernel_str ++ * ++ * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* ++ * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. ++ * ++ * Returns ++ * On success, the strictly positive length of the string, including ++ * the trailing NUL character. On error, a negative value. ++ */ ++static long (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 115; ++ ++/* ++ * bpf_tcp_send_ack ++ * ++ * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. ++ * *rcv_nxt* is the ack_seq to be sent out. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_tcp_send_ack)(void *tp, __u32 rcv_nxt) = (void *) 116; ++ ++/* ++ * bpf_send_signal_thread ++ * ++ * Send signal *sig* to the thread corresponding to the current task. ++ * ++ * Returns ++ * 0 on success or successfully queued. ++ * ++ * **-EBUSY** if work queue under nmi is full. ++ * ++ * **-EINVAL** if *sig* is invalid. ++ * ++ * **-EPERM** if no permission to send the *sig*. ++ * ++ * **-EAGAIN** if bpf program can try again. ++ */ ++static long (*bpf_send_signal_thread)(__u32 sig) = (void *) 117; ++ ++/* ++ * bpf_jiffies64 ++ * ++ * Obtain the 64bit jiffies ++ * ++ * Returns ++ * The 64 bit jiffies ++ */ ++static __u64 (*bpf_jiffies64)(void) = (void *) 118; ++ ++/* ++ * bpf_read_branch_records ++ * ++ * For an eBPF program attached to a perf event, retrieve the ++ * branch records (**struct perf_branch_entry**) associated to *ctx* ++ * and store it in the buffer pointed by *buf* up to size ++ * *size* bytes. ++ * ++ * Returns ++ * On success, number of bytes written to *buf*. On error, a ++ * negative value. ++ * ++ * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to ++ * instead return the number of bytes required to store all the ++ * branch entries. If this flag is set, *buf* may be NULL. ++ * ++ * **-EINVAL** if arguments invalid or **size** not a multiple ++ * of **sizeof**\ (**struct perf_branch_entry**\ ). ++ * ++ * **-ENOENT** if architecture does not support branch records. ++ */ ++static long (*bpf_read_branch_records)(struct bpf_perf_event_data *ctx, void *buf, __u32 size, __u64 flags) = (void *) 119; ++ ++/* ++ * bpf_get_ns_current_pid_tgid ++ * ++ * Returns 0 on success, values for *pid* and *tgid* as seen from the current ++ * *namespace* will be returned in *nsdata*. ++ * ++ * Returns ++ * 0 on success, or one of the following in case of failure: ++ * ++ * **-EINVAL** if dev and inum supplied don't match dev_t and inode number ++ * with nsfs of current task, or if dev conversion to dev_t lost high bits. ++ * ++ * **-ENOENT** if pidns does not exists for the current task. ++ */ ++static long (*bpf_get_ns_current_pid_tgid)(__u64 dev, __u64 ino, struct bpf_pidns_info *nsdata, __u32 size) = (void *) 120; ++ ++/* ++ * bpf_xdp_output ++ * ++ * Write raw *data* blob into a special BPF perf event held by ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf ++ * event must have the following attributes: **PERF_SAMPLE_RAW** ++ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and ++ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. ++ * ++ * The *flags* are used to indicate the index in *map* for which ++ * the value must be put, masked with **BPF_F_INDEX_MASK**. ++ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** ++ * to indicate that the index of the current CPU core should be ++ * used. ++ * ++ * The value to write, of *size*, is passed through eBPF stack and ++ * pointed by *data*. ++ * ++ * *ctx* is a pointer to in-kernel struct xdp_buff. ++ * ++ * This helper is similar to **bpf_perf_eventoutput**\ () but ++ * restricted to raw_tracepoint bpf programs. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_xdp_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 121; ++ ++/* ++ * bpf_get_netns_cookie ++ * ++ * Retrieve the cookie (generated by the kernel) of the network ++ * namespace the input *ctx* is associated with. The network ++ * namespace cookie remains stable for its lifetime and provides ++ * a global identifier that can be assumed unique. If *ctx* is ++ * NULL, then the helper returns the cookie for the initial ++ * network namespace. The cookie itself is very similar to that ++ * of **bpf_get_socket_cookie**\ () helper, but for network ++ * namespaces instead of sockets. ++ * ++ * Returns ++ * A 8-byte long opaque number. ++ */ ++static __u64 (*bpf_get_netns_cookie)(void *ctx) = (void *) 122; ++ ++/* ++ * bpf_get_current_ancestor_cgroup_id ++ * ++ * Return id of cgroup v2 that is ancestor of the cgroup associated ++ * with the current task at the *ancestor_level*. The root cgroup ++ * is at *ancestor_level* zero and each step down the hierarchy ++ * increments the level. If *ancestor_level* == level of cgroup ++ * associated with the current task, then return value will be the ++ * same as that of **bpf_get_current_cgroup_id**\ (). ++ * ++ * The helper is useful to implement policies based on cgroups ++ * that are upper in hierarchy than immediate cgroup associated ++ * with the current task. ++ * ++ * The format of returned id and helper limitations are same as in ++ * **bpf_get_current_cgroup_id**\ (). ++ * ++ * Returns ++ * The id is returned or 0 in case the id could not be retrieved. ++ */ ++static __u64 (*bpf_get_current_ancestor_cgroup_id)(int ancestor_level) = (void *) 123; ++ ++/* ++ * bpf_sk_assign ++ * ++ * Helper is overloaded depending on BPF program type. This ++ * description applies to **BPF_PROG_TYPE_SCHED_CLS** and ++ * **BPF_PROG_TYPE_SCHED_ACT** programs. ++ * ++ * Assign the *sk* to the *skb*. When combined with appropriate ++ * routing configuration to receive the packet towards the socket, ++ * will cause *skb* to be delivered to the specified socket. ++ * Subsequent redirection of *skb* via **bpf_redirect**\ (), ++ * **bpf_clone_redirect**\ () or other methods outside of BPF may ++ * interfere with successful delivery to the socket. ++ * ++ * This operation is only valid from TC ingress path. ++ * ++ * The *flags* argument must be zero. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure: ++ * ++ * **-EINVAL** if specified *flags* are not supported. ++ * ++ * **-ENOENT** if the socket is unavailable for assignment. ++ * ++ * **-ENETUNREACH** if the socket is unreachable (wrong netns). ++ * ++ * **-EOPNOTSUPP** if the operation is not supported, for example ++ * a call from outside of TC ingress. ++ * ++ * **-ESOCKTNOSUPPORT** if the socket type is not supported ++ * (reuseport). ++ */ ++static long (*bpf_sk_assign)(void *ctx, void *sk, __u64 flags) = (void *) 124; ++ ++/* ++ * bpf_ktime_get_boot_ns ++ * ++ * Return the time elapsed since system boot, in nanoseconds. ++ * Does include the time the system was suspended. ++ * See: **clock_gettime**\ (**CLOCK_BOOTTIME**) ++ * ++ * Returns ++ * Current *ktime*. ++ */ ++static __u64 (*bpf_ktime_get_boot_ns)(void) = (void *) 125; ++ ++/* ++ * bpf_seq_printf ++ * ++ * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print ++ * out the format string. ++ * The *m* represents the seq_file. The *fmt* and *fmt_size* are for ++ * the format string itself. The *data* and *data_len* are format string ++ * arguments. The *data* are a **u64** array and corresponding format string ++ * values are stored in the array. For strings and pointers where pointees ++ * are accessed, only the pointer values are stored in the *data* array. ++ * The *data_len* is the size of *data* in bytes - must be a multiple of 8. ++ * ++ * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. ++ * Reading kernel memory may fail due to either invalid address or ++ * valid address but requiring a major memory fault. If reading kernel memory ++ * fails, the string for **%s** will be an empty string, and the ip ++ * address for **%p{i,I}{4,6}** will be 0. Not returning error to ++ * bpf program is consistent with what **bpf_trace_printk**\ () does for now. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure: ++ * ++ * **-EBUSY** if per-CPU memory copy buffer is busy, can try again ++ * by returning 1 from bpf program. ++ * ++ * **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported. ++ * ++ * **-E2BIG** if *fmt* contains too many format specifiers. ++ * ++ * **-EOVERFLOW** if an overflow happened: The same object will be tried again. ++ */ ++static long (*bpf_seq_printf)(struct seq_file *m, const char *fmt, __u32 fmt_size, const void *data, __u32 data_len) = (void *) 126; ++ ++/* ++ * bpf_seq_write ++ * ++ * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. ++ * The *m* represents the seq_file. The *data* and *len* represent the ++ * data to write in bytes. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure: ++ * ++ * **-EOVERFLOW** if an overflow happened: The same object will be tried again. ++ */ ++static long (*bpf_seq_write)(struct seq_file *m, const void *data, __u32 len) = (void *) 127; ++ ++/* ++ * bpf_sk_cgroup_id ++ * ++ * Return the cgroup v2 id of the socket *sk*. ++ * ++ * *sk* must be a non-**NULL** pointer to a socket, e.g. one ++ * returned from **bpf_sk_lookup_xxx**\ (), ++ * **bpf_sk_fullsock**\ (), etc. The format of returned id is ++ * same as in **bpf_skb_cgroup_id**\ (). ++ * ++ * This helper is available only if the kernel was compiled with ++ * the **CONFIG_SOCK_CGROUP_DATA** configuration option. ++ * ++ * Returns ++ * The id is returned or 0 in case the id could not be retrieved. ++ */ ++static __u64 (*bpf_sk_cgroup_id)(void *sk) = (void *) 128; ++ ++/* ++ * bpf_sk_ancestor_cgroup_id ++ * ++ * Return id of cgroup v2 that is ancestor of cgroup associated ++ * with the *sk* at the *ancestor_level*. The root cgroup is at ++ * *ancestor_level* zero and each step down the hierarchy ++ * increments the level. If *ancestor_level* == level of cgroup ++ * associated with *sk*, then return value will be same as that ++ * of **bpf_sk_cgroup_id**\ (). ++ * ++ * The helper is useful to implement policies based on cgroups ++ * that are upper in hierarchy than immediate cgroup associated ++ * with *sk*. ++ * ++ * The format of returned id and helper limitations are same as in ++ * **bpf_sk_cgroup_id**\ (). ++ * ++ * Returns ++ * The id is returned or 0 in case the id could not be retrieved. ++ */ ++static __u64 (*bpf_sk_ancestor_cgroup_id)(void *sk, int ancestor_level) = (void *) 129; ++ ++/* ++ * bpf_ringbuf_output ++ * ++ * Copy *size* bytes from *data* into a ring buffer *ringbuf*. ++ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification ++ * of new data availability is sent. ++ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification ++ * of new data availability is sent unconditionally. ++ * If **0** is specified in *flags*, an adaptive notification ++ * of new data availability is sent. ++ * ++ * An adaptive notification is a notification sent whenever the user-space ++ * process has caught up and consumed all available payloads. In case the user-space ++ * process is still processing a previous payload, then no notification is needed ++ * as it will process the newly added payload automatically. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_ringbuf_output)(void *ringbuf, void *data, __u64 size, __u64 flags) = (void *) 130; ++ ++/* ++ * bpf_ringbuf_reserve ++ * ++ * Reserve *size* bytes of payload in a ring buffer *ringbuf*. ++ * *flags* must be 0. ++ * ++ * Returns ++ * Valid pointer with *size* bytes of memory available; NULL, ++ * otherwise. ++ */ ++static void *(*bpf_ringbuf_reserve)(void *ringbuf, __u64 size, __u64 flags) = (void *) 131; ++ ++/* ++ * bpf_ringbuf_submit ++ * ++ * Submit reserved ring buffer sample, pointed to by *data*. ++ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification ++ * of new data availability is sent. ++ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification ++ * of new data availability is sent unconditionally. ++ * If **0** is specified in *flags*, an adaptive notification ++ * of new data availability is sent. ++ * ++ * See 'bpf_ringbuf_output()' for the definition of adaptive notification. ++ * ++ * Returns ++ * Nothing. Always succeeds. ++ */ ++static void (*bpf_ringbuf_submit)(void *data, __u64 flags) = (void *) 132; ++ ++/* ++ * bpf_ringbuf_discard ++ * ++ * Discard reserved ring buffer sample, pointed to by *data*. ++ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification ++ * of new data availability is sent. ++ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification ++ * of new data availability is sent unconditionally. ++ * If **0** is specified in *flags*, an adaptive notification ++ * of new data availability is sent. ++ * ++ * See 'bpf_ringbuf_output()' for the definition of adaptive notification. ++ * ++ * Returns ++ * Nothing. Always succeeds. ++ */ ++static void (*bpf_ringbuf_discard)(void *data, __u64 flags) = (void *) 133; ++ ++/* ++ * bpf_ringbuf_query ++ * ++ * Query various characteristics of provided ring buffer. What ++ * exactly is queries is determined by *flags*: ++ * ++ * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. ++ * * **BPF_RB_RING_SIZE**: The size of ring buffer. ++ * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). ++ * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). ++ * ++ * Data returned is just a momentary snapshot of actual values ++ * and could be inaccurate, so this facility should be used to ++ * power heuristics and for reporting, not to make 100% correct ++ * calculation. ++ * ++ * Returns ++ * Requested value, or 0, if *flags* are not recognized. ++ */ ++static __u64 (*bpf_ringbuf_query)(void *ringbuf, __u64 flags) = (void *) 134; ++ ++/* ++ * bpf_csum_level ++ * ++ * Change the skbs checksum level by one layer up or down, or ++ * reset it entirely to none in order to have the stack perform ++ * checksum validation. The level is applicable to the following ++ * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of ++ * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | ++ * through **bpf_skb_adjust_room**\ () helper with passing in ++ * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call ++ * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since ++ * the UDP header is removed. Similarly, an encap of the latter ++ * into the former could be accompanied by a helper call to ++ * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the ++ * skb is still intended to be processed in higher layers of the ++ * stack instead of just egressing at tc. ++ * ++ * There are three supported level settings at this time: ++ * ++ * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs ++ * with CHECKSUM_UNNECESSARY. ++ * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs ++ * with CHECKSUM_UNNECESSARY. ++ * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and ++ * sets CHECKSUM_NONE to force checksum validation by the stack. ++ * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current ++ * skb->csum_level. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. In the ++ * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level ++ * is returned or the error code -EACCES in case the skb is not ++ * subject to CHECKSUM_UNNECESSARY. ++ */ ++static long (*bpf_csum_level)(struct __sk_buff *skb, __u64 level) = (void *) 135; ++ ++/* ++ * bpf_skc_to_tcp6_sock ++ * ++ * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. ++ * ++ * Returns ++ * *sk* if casting is valid, or **NULL** otherwise. ++ */ ++static struct tcp6_sock *(*bpf_skc_to_tcp6_sock)(void *sk) = (void *) 136; ++ ++/* ++ * bpf_skc_to_tcp_sock ++ * ++ * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. ++ * ++ * Returns ++ * *sk* if casting is valid, or **NULL** otherwise. ++ */ ++static struct tcp_sock *(*bpf_skc_to_tcp_sock)(void *sk) = (void *) 137; ++ ++/* ++ * bpf_skc_to_tcp_timewait_sock ++ * ++ * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. ++ * ++ * Returns ++ * *sk* if casting is valid, or **NULL** otherwise. ++ */ ++static struct tcp_timewait_sock *(*bpf_skc_to_tcp_timewait_sock)(void *sk) = (void *) 138; ++ ++/* ++ * bpf_skc_to_tcp_request_sock ++ * ++ * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. ++ * ++ * Returns ++ * *sk* if casting is valid, or **NULL** otherwise. ++ */ ++static struct tcp_request_sock *(*bpf_skc_to_tcp_request_sock)(void *sk) = (void *) 139; ++ ++/* ++ * bpf_skc_to_udp6_sock ++ * ++ * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. ++ * ++ * Returns ++ * *sk* if casting is valid, or **NULL** otherwise. ++ */ ++static struct udp6_sock *(*bpf_skc_to_udp6_sock)(void *sk) = (void *) 140; ++ ++/* ++ * bpf_get_task_stack ++ * ++ * Return a user or a kernel stack in bpf program provided buffer. ++ * To achieve this, the helper needs *task*, which is a valid ++ * pointer to **struct task_struct**. To store the stacktrace, the ++ * bpf program provides *buf* with a nonnegative *size*. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_USER_BUILD_ID** ++ * Collect buildid+offset instead of ips for user stack, ++ * only valid if **BPF_F_USER_STACK** is also specified. ++ * ++ * **bpf_get_task_stack**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject ++ * to sufficient large buffer size. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * ++ * Returns ++ * The non-negative copied *buf* length equal to or less than ++ * *size* on success, or a negative error in case of failure. ++ */ ++static long (*bpf_get_task_stack)(struct task_struct *task, void *buf, __u32 size, __u64 flags) = (void *) 141; ++ ++/* ++ * bpf_load_hdr_opt ++ * ++ * Load header option. Support reading a particular TCP header ++ * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**). ++ * ++ * If *flags* is 0, it will search the option from the ++ * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops** ++ * has details on what skb_data contains under different ++ * *skops*\ **->op**. ++ * ++ * The first byte of the *searchby_res* specifies the ++ * kind that it wants to search. ++ * ++ * If the searching kind is an experimental kind ++ * (i.e. 253 or 254 according to RFC6994). It also ++ * needs to specify the "magic" which is either ++ * 2 bytes or 4 bytes. It then also needs to ++ * specify the size of the magic by using ++ * the 2nd byte which is "kind-length" of a TCP ++ * header option and the "kind-length" also ++ * includes the first 2 bytes "kind" and "kind-length" ++ * itself as a normal TCP header option also does. ++ * ++ * For example, to search experimental kind 254 with ++ * 2 byte magic 0xeB9F, the searchby_res should be ++ * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. ++ * ++ * To search for the standard window scale option (3), ++ * the *searchby_res* should be [ 3, 0, 0, .... 0 ]. ++ * Note, kind-length must be 0 for regular option. ++ * ++ * Searching for No-Op (0) and End-of-Option-List (1) are ++ * not supported. ++ * ++ * *len* must be at least 2 bytes which is the minimal size ++ * of a header option. ++ * ++ * Supported flags: ++ * ++ * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the ++ * saved_syn packet or the just-received syn packet. ++ * ++ * ++ * Returns ++ * > 0 when found, the header option is copied to *searchby_res*. ++ * The return value is the total length copied. On failure, a ++ * negative error code is returned: ++ * ++ * **-EINVAL** if a parameter is invalid. ++ * ++ * **-ENOMSG** if the option is not found. ++ * ++ * **-ENOENT** if no syn packet is available when ++ * **BPF_LOAD_HDR_OPT_TCP_SYN** is used. ++ * ++ * **-ENOSPC** if there is not enough space. Only *len* number of ++ * bytes are copied. ++ * ++ * **-EFAULT** on failure to parse the header options in the ++ * packet. ++ * ++ * **-EPERM** if the helper cannot be used under the current ++ * *skops*\ **->op**. ++ */ ++static long (*bpf_load_hdr_opt)(struct bpf_sock_ops *skops, void *searchby_res, __u32 len, __u64 flags) = (void *) 142; ++ ++/* ++ * bpf_store_hdr_opt ++ * ++ * Store header option. The data will be copied ++ * from buffer *from* with length *len* to the TCP header. ++ * ++ * The buffer *from* should have the whole option that ++ * includes the kind, kind-length, and the actual ++ * option data. The *len* must be at least kind-length ++ * long. The kind-length does not have to be 4 byte ++ * aligned. The kernel will take care of the padding ++ * and setting the 4 bytes aligned value to th->doff. ++ * ++ * This helper will check for duplicated option ++ * by searching the same option in the outgoing skb. ++ * ++ * This helper can only be called during ++ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. ++ * ++ * ++ * Returns ++ * 0 on success, or negative error in case of failure: ++ * ++ * **-EINVAL** If param is invalid. ++ * ++ * **-ENOSPC** if there is not enough space in the header. ++ * Nothing has been written ++ * ++ * **-EEXIST** if the option already exists. ++ * ++ * **-EFAULT** on failrue to parse the existing header options. ++ * ++ * **-EPERM** if the helper cannot be used under the current ++ * *skops*\ **->op**. ++ */ ++static long (*bpf_store_hdr_opt)(struct bpf_sock_ops *skops, const void *from, __u32 len, __u64 flags) = (void *) 143; ++ ++/* ++ * bpf_reserve_hdr_opt ++ * ++ * Reserve *len* bytes for the bpf header option. The ++ * space will be used by **bpf_store_hdr_opt**\ () later in ++ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. ++ * ++ * If **bpf_reserve_hdr_opt**\ () is called multiple times, ++ * the total number of bytes will be reserved. ++ * ++ * This helper can only be called during ++ * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**. ++ * ++ * ++ * Returns ++ * 0 on success, or negative error in case of failure: ++ * ++ * **-EINVAL** if a parameter is invalid. ++ * ++ * **-ENOSPC** if there is not enough space in the header. ++ * ++ * **-EPERM** if the helper cannot be used under the current ++ * *skops*\ **->op**. ++ */ ++static long (*bpf_reserve_hdr_opt)(struct bpf_sock_ops *skops, __u32 len, __u64 flags) = (void *) 144; ++ ++/* ++ * bpf_inode_storage_get ++ * ++ * Get a bpf_local_storage from an *inode*. ++ * ++ * Logically, it could be thought of as getting the value from ++ * a *map* with *inode* as the **key**. From this ++ * perspective, the usage is not much different from ++ * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this ++ * helper enforces the key must be an inode and the map must also ++ * be a **BPF_MAP_TYPE_INODE_STORAGE**. ++ * ++ * Underneath, the value is stored locally at *inode* instead of ++ * the *map*. The *map* is used as the bpf-local-storage ++ * "type". The bpf-local-storage "type" (i.e. the *map*) is ++ * searched against all bpf_local_storage residing at *inode*. ++ * ++ * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be ++ * used such that a new bpf_local_storage will be ++ * created if one does not exist. *value* can be used ++ * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify ++ * the initial value of a bpf_local_storage. If *value* is ++ * **NULL**, the new bpf_local_storage will be zero initialized. ++ * ++ * Returns ++ * A bpf_local_storage pointer is returned on success. ++ * ++ * **NULL** if not found or there was an error in adding ++ * a new bpf_local_storage. ++ */ ++static void *(*bpf_inode_storage_get)(void *map, void *inode, void *value, __u64 flags) = (void *) 145; ++ ++/* ++ * bpf_inode_storage_delete ++ * ++ * Delete a bpf_local_storage from an *inode*. ++ * ++ * Returns ++ * 0 on success. ++ * ++ * **-ENOENT** if the bpf_local_storage cannot be found. ++ */ ++static int (*bpf_inode_storage_delete)(void *map, void *inode) = (void *) 146; ++ ++/* ++ * bpf_d_path ++ * ++ * Return full path for given **struct path** object, which ++ * needs to be the kernel BTF *path* object. The path is ++ * returned in the provided buffer *buf* of size *sz* and ++ * is zero terminated. ++ * ++ * ++ * Returns ++ * On success, the strictly positive length of the string, ++ * including the trailing NUL character. On error, a negative ++ * value. ++ */ ++static long (*bpf_d_path)(struct path *path, char *buf, __u32 sz) = (void *) 147; ++ ++/* ++ * bpf_copy_from_user ++ * ++ * Read *size* bytes from user space address *user_ptr* and store ++ * the data in *dst*. This is a wrapper of **copy_from_user**\ (). ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_copy_from_user)(void *dst, __u32 size, const void *user_ptr) = (void *) 148; ++ ++/* ++ * bpf_snprintf_btf ++ * ++ * Use BTF to store a string representation of *ptr*->ptr in *str*, ++ * using *ptr*->type_id. This value should specify the type ++ * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) ++ * can be used to look up vmlinux BTF type ids. Traversing the ++ * data structure using BTF, the type information and values are ++ * stored in the first *str_size* - 1 bytes of *str*. Safe copy of ++ * the pointer data is carried out to avoid kernel crashes during ++ * operation. Smaller types can use string space on the stack; ++ * larger programs can use map data to store the string ++ * representation. ++ * ++ * The string can be subsequently shared with userspace via ++ * bpf_perf_event_output() or ring buffer interfaces. ++ * bpf_trace_printk() is to be avoided as it places too small ++ * a limit on string size to be useful. ++ * ++ * *flags* is a combination of ++ * ++ * **BTF_F_COMPACT** ++ * no formatting around type information ++ * **BTF_F_NONAME** ++ * no struct/union member names/types ++ * **BTF_F_PTR_RAW** ++ * show raw (unobfuscated) pointer values; ++ * equivalent to printk specifier %px. ++ * **BTF_F_ZERO** ++ * show zero-valued struct/union members; they ++ * are not displayed by default ++ * ++ * ++ * Returns ++ * The number of bytes that were written (or would have been ++ * written if output had to be truncated due to string size), ++ * or a negative error in cases of failure. ++ */ ++static long (*bpf_snprintf_btf)(char *str, __u32 str_size, struct btf_ptr *ptr, __u32 btf_ptr_size, __u64 flags) = (void *) 149; ++ ++/* ++ * bpf_seq_printf_btf ++ * ++ * Use BTF to write to seq_write a string representation of ++ * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf(). ++ * *flags* are identical to those used for bpf_snprintf_btf. ++ * ++ * Returns ++ * 0 on success or a negative error in case of failure. ++ */ ++static long (*bpf_seq_printf_btf)(struct seq_file *m, struct btf_ptr *ptr, __u32 ptr_size, __u64 flags) = (void *) 150; ++ ++/* ++ * bpf_skb_cgroup_classid ++ * ++ * See **bpf_get_cgroup_classid**\ () for the main description. ++ * This helper differs from **bpf_get_cgroup_classid**\ () in that ++ * the cgroup v1 net_cls class is retrieved only from the *skb*'s ++ * associated socket instead of the current process. ++ * ++ * Returns ++ * The id is returned or 0 in case the id could not be retrieved. ++ */ ++static __u64 (*bpf_skb_cgroup_classid)(struct __sk_buff *skb) = (void *) 151; ++ ++/* ++ * bpf_redirect_neigh ++ * ++ * Redirect the packet to another net device of index *ifindex* ++ * and fill in L2 addresses from neighboring subsystem. This helper ++ * is somewhat similar to **bpf_redirect**\ (), except that it ++ * populates L2 addresses as well, meaning, internally, the helper ++ * relies on the neighbor lookup for the L2 address of the nexthop. ++ * ++ * The helper will perform a FIB lookup based on the skb's ++ * networking header to get the address of the next hop, unless ++ * this is supplied by the caller in the *params* argument. The ++ * *plen* argument indicates the len of *params* and should be set ++ * to 0 if *params* is NULL. ++ * ++ * The *flags* argument is reserved and must be 0. The helper is ++ * currently only supported for tc BPF program types, and enabled ++ * for IPv4 and IPv6 protocols. ++ * ++ * Returns ++ * The helper returns **TC_ACT_REDIRECT** on success or ++ * **TC_ACT_SHOT** on error. ++ */ ++static long (*bpf_redirect_neigh)(__u32 ifindex, struct bpf_redir_neigh *params, int plen, __u64 flags) = (void *) 152; ++ ++/* ++ * bpf_per_cpu_ptr ++ * ++ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a ++ * pointer to the percpu kernel variable on *cpu*. A ksym is an ++ * extern variable decorated with '__ksym'. For ksym, there is a ++ * global var (either static or global) defined of the same name ++ * in the kernel. The ksym is percpu if the global var is percpu. ++ * The returned pointer points to the global percpu var on *cpu*. ++ * ++ * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the ++ * kernel, except that bpf_per_cpu_ptr() may return NULL. This ++ * happens if *cpu* is larger than nr_cpu_ids. The caller of ++ * bpf_per_cpu_ptr() must check the returned value. ++ * ++ * Returns ++ * A pointer pointing to the kernel percpu variable on *cpu*, or ++ * NULL, if *cpu* is invalid. ++ */ ++static void *(*bpf_per_cpu_ptr)(const void *percpu_ptr, __u32 cpu) = (void *) 153; ++ ++/* ++ * bpf_this_cpu_ptr ++ * ++ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a ++ * pointer to the percpu kernel variable on this cpu. See the ++ * description of 'ksym' in **bpf_per_cpu_ptr**\ (). ++ * ++ * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in ++ * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would ++ * never return NULL. ++ * ++ * Returns ++ * A pointer pointing to the kernel percpu variable on this cpu. ++ */ ++static void *(*bpf_this_cpu_ptr)(const void *percpu_ptr) = (void *) 154; ++ ++/* ++ * bpf_redirect_peer ++ * ++ * Redirect the packet to another net device of index *ifindex*. ++ * This helper is somewhat similar to **bpf_redirect**\ (), except ++ * that the redirection happens to the *ifindex*' peer device and ++ * the netns switch takes place from ingress to ingress without ++ * going through the CPU's backlog queue. ++ * ++ * The *flags* argument is reserved and must be 0. The helper is ++ * currently only supported for tc BPF program types at the ingress ++ * hook and for veth device types. The peer device must reside in a ++ * different network namespace. ++ * ++ * Returns ++ * The helper returns **TC_ACT_REDIRECT** on success or ++ * **TC_ACT_SHOT** on error. ++ */ ++static long (*bpf_redirect_peer)(__u32 ifindex, __u64 flags) = (void *) 155; ++ ++/* ++ * bpf_task_storage_get ++ * ++ * Get a bpf_local_storage from the *task*. ++ * ++ * Logically, it could be thought of as getting the value from ++ * a *map* with *task* as the **key**. From this ++ * perspective, the usage is not much different from ++ * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this ++ * helper enforces the key must be an task_struct and the map must also ++ * be a **BPF_MAP_TYPE_TASK_STORAGE**. ++ * ++ * Underneath, the value is stored locally at *task* instead of ++ * the *map*. The *map* is used as the bpf-local-storage ++ * "type". The bpf-local-storage "type" (i.e. the *map*) is ++ * searched against all bpf_local_storage residing at *task*. ++ * ++ * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be ++ * used such that a new bpf_local_storage will be ++ * created if one does not exist. *value* can be used ++ * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify ++ * the initial value of a bpf_local_storage. If *value* is ++ * **NULL**, the new bpf_local_storage will be zero initialized. ++ * ++ * Returns ++ * A bpf_local_storage pointer is returned on success. ++ * ++ * **NULL** if not found or there was an error in adding ++ * a new bpf_local_storage. ++ */ ++static void *(*bpf_task_storage_get)(void *map, struct task_struct *task, void *value, __u64 flags) = (void *) 156; ++ ++/* ++ * bpf_task_storage_delete ++ * ++ * Delete a bpf_local_storage from a *task*. ++ * ++ * Returns ++ * 0 on success. ++ * ++ * **-ENOENT** if the bpf_local_storage cannot be found. ++ */ ++static long (*bpf_task_storage_delete)(void *map, struct task_struct *task) = (void *) 157; ++ ++/* ++ * bpf_get_current_task_btf ++ * ++ * Return a BTF pointer to the "current" task. ++ * This pointer can also be used in helpers that accept an ++ * *ARG_PTR_TO_BTF_ID* of type *task_struct*. ++ * ++ * Returns ++ * Pointer to the current task. ++ */ ++static struct task_struct *(*bpf_get_current_task_btf)(void) = (void *) 158; ++ ++/* ++ * bpf_bprm_opts_set ++ * ++ * Set or clear certain options on *bprm*: ++ * ++ * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit ++ * which sets the **AT_SECURE** auxv for glibc. The bit ++ * is cleared if the flag is not specified. ++ * ++ * Returns ++ * **-EINVAL** if invalid *flags* are passed, zero otherwise. ++ */ ++static long (*bpf_bprm_opts_set)(struct linux_binprm *bprm, __u64 flags) = (void *) 159; ++ ++/* ++ * bpf_ktime_get_coarse_ns ++ * ++ * Return a coarse-grained version of the time elapsed since ++ * system boot, in nanoseconds. Does not include time the system ++ * was suspended. ++ * ++ * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) ++ * ++ * Returns ++ * Current *ktime*. ++ */ ++static __u64 (*bpf_ktime_get_coarse_ns)(void) = (void *) 160; ++ ++/* ++ * bpf_ima_inode_hash ++ * ++ * Returns the stored IMA hash of the *inode* (if it's avaialable). ++ * If the hash is larger than *size*, then only *size* ++ * bytes will be copied to *dst* ++ * ++ * Returns ++ * The **hash_algo** is returned on success, ++ * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if ++ * invalid arguments are passed. ++ */ ++static long (*bpf_ima_inode_hash)(struct inode *inode, void *dst, __u32 size) = (void *) 161; ++ ++/* ++ * bpf_sock_from_file ++ * ++ * If the given file represents a socket, returns the associated ++ * socket. ++ * ++ * Returns ++ * A pointer to a struct socket on success or NULL if the file is ++ * not a socket. ++ */ ++static struct socket *(*bpf_sock_from_file)(struct file *file) = (void *) 162; ++ ++/* ++ * bpf_check_mtu ++ * ++ * Check packet size against exceeding MTU of net device (based ++ * on *ifindex*). This helper will likely be used in combination ++ * with helpers that adjust/change the packet size. ++ * ++ * The argument *len_diff* can be used for querying with a planned ++ * size change. This allows to check MTU prior to changing packet ++ * ctx. Providing an *len_diff* adjustment that is larger than the ++ * actual packet size (resulting in negative packet size) will in ++ * principle not exceed the MTU, why it is not considered a ++ * failure. Other BPF-helpers are needed for performing the ++ * planned size change, why the responsability for catch a negative ++ * packet size belong in those helpers. ++ * ++ * Specifying *ifindex* zero means the MTU check is performed ++ * against the current net device. This is practical if this isn't ++ * used prior to redirect. ++ * ++ * On input *mtu_len* must be a valid pointer, else verifier will ++ * reject BPF program. If the value *mtu_len* is initialized to ++ * zero then the ctx packet size is use. When value *mtu_len* is ++ * provided as input this specify the L3 length that the MTU check ++ * is done against. Remember XDP and TC length operate at L2, but ++ * this value is L3 as this correlate to MTU and IP-header tot_len ++ * values which are L3 (similar behavior as bpf_fib_lookup). ++ * ++ * The Linux kernel route table can configure MTUs on a more ++ * specific per route level, which is not provided by this helper. ++ * For route level MTU checks use the **bpf_fib_lookup**\ () ++ * helper. ++ * ++ * *ctx* is either **struct xdp_md** for XDP programs or ++ * **struct sk_buff** for tc cls_act programs. ++ * ++ * The *flags* argument can be a combination of one or more of the ++ * following values: ++ * ++ * **BPF_MTU_CHK_SEGS** ++ * This flag will only works for *ctx* **struct sk_buff**. ++ * If packet context contains extra packet segment buffers ++ * (often knows as GSO skb), then MTU check is harder to ++ * check at this point, because in transmit path it is ++ * possible for the skb packet to get re-segmented ++ * (depending on net device features). This could still be ++ * a MTU violation, so this flag enables performing MTU ++ * check against segments, with a different violation ++ * return code to tell it apart. Check cannot use len_diff. ++ * ++ * On return *mtu_len* pointer contains the MTU value of the net ++ * device. Remember the net device configured MTU is the L3 size, ++ * which is returned here and XDP and TC length operate at L2. ++ * Helper take this into account for you, but remember when using ++ * MTU value in your BPF-code. ++ * ++ * ++ * Returns ++ * * 0 on success, and populate MTU value in *mtu_len* pointer. ++ * ++ * * < 0 if any input argument is invalid (*mtu_len* not updated) ++ * ++ * MTU violations return positive values, but also populate MTU ++ * value in *mtu_len* pointer, as this can be needed for ++ * implementing PMTU handing: ++ * ++ * * **BPF_MTU_CHK_RET_FRAG_NEEDED** ++ * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** ++ */ ++static long (*bpf_check_mtu)(void *ctx, __u32 ifindex, __u32 *mtu_len, __s32 len_diff, __u64 flags) = (void *) 163; ++ ++/* ++ * bpf_for_each_map_elem ++ * ++ * For each element in **map**, call **callback_fn** function with ++ * **map**, **callback_ctx** and other map-specific parameters. ++ * The **callback_fn** should be a static function and ++ * the **callback_ctx** should be a pointer to the stack. ++ * The **flags** is used to control certain aspects of the helper. ++ * Currently, the **flags** must be 0. ++ * ++ * The following are a list of supported map types and their ++ * respective expected callback signatures: ++ * ++ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, ++ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, ++ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY ++ * ++ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); ++ * ++ * For per_cpu maps, the map_value is the value on the cpu where the ++ * bpf_prog is running. ++ * ++ * If **callback_fn** return 0, the helper will continue to the next ++ * element. If return value is 1, the helper will skip the rest of ++ * elements and return. Other return values are not used now. ++ * ++ * ++ * Returns ++ * The number of traversed map elements for success, **-EINVAL** for ++ * invalid **flags**. ++ */ ++static long (*bpf_for_each_map_elem)(void *map, void *callback_fn, void *callback_ctx, __u64 flags) = (void *) 164; ++ ++/* ++ * bpf_snprintf ++ * ++ * Outputs a string into the **str** buffer of size **str_size** ++ * based on a format string stored in a read-only map pointed by ++ * **fmt**. ++ * ++ * Each format specifier in **fmt** corresponds to one u64 element ++ * in the **data** array. For strings and pointers where pointees ++ * are accessed, only the pointer values are stored in the *data* ++ * array. The *data_len* is the size of *data* in bytes - must be ++ * a multiple of 8. ++ * ++ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel ++ * memory. Reading kernel memory may fail due to either invalid ++ * address or valid address but requiring a major memory fault. If ++ * reading kernel memory fails, the string for **%s** will be an ++ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0. ++ * Not returning error to bpf program is consistent with what ++ * **bpf_trace_printk**\ () does for now. ++ * ++ * ++ * Returns ++ * The strictly positive length of the formatted string, including ++ * the trailing zero character. If the return value is greater than ++ * **str_size**, **str** contains a truncated string, guaranteed to ++ * be zero-terminated except when **str_size** is 0. ++ * ++ * Or **-EBUSY** if the per-CPU memory copy buffer is busy. ++ */ ++static long (*bpf_snprintf)(char *str, __u32 str_size, const char *fmt, __u64 *data, __u32 data_len) = (void *) 165; ++ ++/* ++ * bpf_sys_bpf ++ * ++ * Execute bpf syscall with given arguments. ++ * ++ * Returns ++ * A syscall result. ++ */ ++static long (*bpf_sys_bpf)(__u32 cmd, void *attr, __u32 attr_size) = (void *) 166; ++ ++/* ++ * bpf_btf_find_by_name_kind ++ * ++ * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. ++ * ++ * Returns ++ * Returns btf_id and btf_obj_fd in lower and upper 32 bits. ++ */ ++static long (*bpf_btf_find_by_name_kind)(char *name, int name_sz, __u32 kind, int flags) = (void *) 167; ++ ++/* ++ * bpf_sys_close ++ * ++ * Execute close syscall for given FD. ++ * ++ * Returns ++ * A syscall result. ++ */ ++static long (*bpf_sys_close)(__u32 fd) = (void *) 168; ++ ++/* ++ * bpf_timer_init ++ * ++ * Initialize the timer. ++ * First 4 bits of *flags* specify clockid. ++ * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. ++ * All other bits of *flags* are reserved. ++ * The verifier will reject the program if *timer* is not from ++ * the same *map*. ++ * ++ * Returns ++ * 0 on success. ++ * **-EBUSY** if *timer* is already initialized. ++ * **-EINVAL** if invalid *flags* are passed. ++ * **-EPERM** if *timer* is in a map that doesn't have any user references. ++ * The user space should either hold a file descriptor to a map with timers ++ * or pin such map in bpffs. When map is unpinned or file descriptor is ++ * closed all timers in the map will be cancelled and freed. ++ */ ++static long (*bpf_timer_init)(struct bpf_timer *timer, void *map, __u64 flags) = (void *) 169; ++ ++/* ++ * bpf_timer_set_callback ++ * ++ * Configure the timer to call *callback_fn* static function. ++ * ++ * Returns ++ * 0 on success. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. ++ * **-EPERM** if *timer* is in a map that doesn't have any user references. ++ * The user space should either hold a file descriptor to a map with timers ++ * or pin such map in bpffs. When map is unpinned or file descriptor is ++ * closed all timers in the map will be cancelled and freed. ++ */ ++static long (*bpf_timer_set_callback)(struct bpf_timer *timer, void *callback_fn) = (void *) 170; ++ ++/* ++ * bpf_timer_start ++ * ++ * Set timer expiration N nanoseconds from the current time. The ++ * configured callback will be invoked in soft irq context on some cpu ++ * and will not repeat unless another bpf_timer_start() is made. ++ * In such case the next invocation can migrate to a different cpu. ++ * Since struct bpf_timer is a field inside map element the map ++ * owns the timer. The bpf_timer_set_callback() will increment refcnt ++ * of BPF program to make sure that callback_fn code stays valid. ++ * When user space reference to a map reaches zero all timers ++ * in a map are cancelled and corresponding program's refcnts are ++ * decremented. This is done to make sure that Ctrl-C of a user ++ * process doesn't leave any timers running. If map is pinned in ++ * bpffs the callback_fn can re-arm itself indefinitely. ++ * bpf_map_update/delete_elem() helpers and user space sys_bpf commands ++ * cancel and free the timer in the given map element. ++ * The map can contain timers that invoke callback_fn-s from different ++ * programs. The same callback_fn can serve different timers from ++ * different maps if key/value layout matches across maps. ++ * Every bpf_timer_set_callback() can have different callback_fn. ++ * ++ * ++ * Returns ++ * 0 on success. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier ++ * or invalid *flags* are passed. ++ */ ++static long (*bpf_timer_start)(struct bpf_timer *timer, __u64 nsecs, __u64 flags) = (void *) 171; ++ ++/* ++ * bpf_timer_cancel ++ * ++ * Cancel the timer and wait for callback_fn to finish if it was running. ++ * ++ * Returns ++ * 0 if the timer was not active. ++ * 1 if the timer was active. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. ++ * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its ++ * own timer which would have led to a deadlock otherwise. ++ */ ++static long (*bpf_timer_cancel)(struct bpf_timer *timer) = (void *) 172; ++ ++/* ++ * bpf_get_func_ip ++ * ++ * Get address of the traced function (for tracing and kprobe programs). ++ * ++ * Returns ++ * Address of the traced function. ++ */ ++static __u64 (*bpf_get_func_ip)(void *ctx) = (void *) 173; ++ ++/* ++ * bpf_get_attach_cookie ++ * ++ * Get bpf_cookie value provided (optionally) during the program ++ * attachment. It might be different for each individual ++ * attachment, even if BPF program itself is the same. ++ * Expects BPF program context *ctx* as a first argument. ++ * ++ * Supported for the following program types: ++ * - kprobe/uprobe; ++ * - tracepoint; ++ * - perf_event. ++ * ++ * Returns ++ * Value specified by user at BPF link creation/attachment time ++ * or 0, if it was not specified. ++ */ ++static __u64 (*bpf_get_attach_cookie)(void *ctx) = (void *) 174; ++ ++/* ++ * bpf_task_pt_regs ++ * ++ * Get the struct pt_regs associated with **task**. ++ * ++ * Returns ++ * A pointer to struct pt_regs. ++ */ ++static long (*bpf_task_pt_regs)(struct task_struct *task) = (void *) 175; ++ ++/* ++ * bpf_get_branch_snapshot ++ * ++ * Get branch trace from hardware engines like Intel LBR. The ++ * hardware engine is stopped shortly after the helper is ++ * called. Therefore, the user need to filter branch entries ++ * based on the actual use case. To capture branch trace ++ * before the trigger point of the BPF program, the helper ++ * should be called at the beginning of the BPF program. ++ * ++ * The data is stored as struct perf_branch_entry into output ++ * buffer *entries*. *size* is the size of *entries* in bytes. ++ * *flags* is reserved for now and must be zero. ++ * ++ * ++ * Returns ++ * On success, number of bytes written to *buf*. On error, a ++ * negative value. ++ * ++ * **-EINVAL** if *flags* is not zero. ++ * ++ * **-ENOENT** if architecture does not support branch records. ++ */ ++static long (*bpf_get_branch_snapshot)(void *entries, __u32 size, __u64 flags) = (void *) 176; ++ ++/* ++ * bpf_trace_vprintk ++ * ++ * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 ++ * to format and can handle more format args as a result. ++ * ++ * Arguments are to be used as in **bpf_seq_printf**\ () helper. ++ * ++ * Returns ++ * The number of bytes written to the buffer, or a negative error ++ * in case of failure. ++ */ ++static long (*bpf_trace_vprintk)(const char *fmt, __u32 fmt_size, const void *data, __u32 data_len) = (void *) 177; ++ ++/* ++ * bpf_skc_to_unix_sock ++ * ++ * Dynamically cast a *sk* pointer to a *unix_sock* pointer. ++ * ++ * Returns ++ * *sk* if casting is valid, or **NULL** otherwise. ++ */ ++static struct unix_sock *(*bpf_skc_to_unix_sock)(void *sk) = (void *) 178; ++ ++/* ++ * bpf_kallsyms_lookup_name ++ * ++ * Get the address of a kernel symbol, returned in *res*. *res* is ++ * set to 0 if the symbol is not found. ++ * ++ * Returns ++ * On success, zero. On error, a negative value. ++ * ++ * **-EINVAL** if *flags* is not zero. ++ * ++ * **-EINVAL** if string *name* is not the same size as *name_sz*. ++ * ++ * **-ENOENT** if symbol is not found. ++ * ++ * **-EPERM** if caller does not have permission to obtain kernel address. ++ */ ++static long (*bpf_kallsyms_lookup_name)(const char *name, int name_sz, int flags, __u64 *res) = (void *) 179; ++ ++/* ++ * bpf_find_vma ++ * ++ * Find vma of *task* that contains *addr*, call *callback_fn* ++ * function with *task*, *vma*, and *callback_ctx*. ++ * The *callback_fn* should be a static function and ++ * the *callback_ctx* should be a pointer to the stack. ++ * The *flags* is used to control certain aspects of the helper. ++ * Currently, the *flags* must be 0. ++ * ++ * The expected callback signature is ++ * ++ * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); ++ * ++ * ++ * Returns ++ * 0 on success. ++ * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. ++ * **-EBUSY** if failed to try lock mmap_lock. ++ * **-EINVAL** for invalid **flags**. ++ */ ++static long (*bpf_find_vma)(struct task_struct *task, __u64 addr, void *callback_fn, void *callback_ctx, __u64 flags) = (void *) 180; ++ ++/* ++ * bpf_loop ++ * ++ * For **nr_loops**, call **callback_fn** function ++ * with **callback_ctx** as the context parameter. ++ * The **callback_fn** should be a static function and ++ * the **callback_ctx** should be a pointer to the stack. ++ * The **flags** is used to control certain aspects of the helper. ++ * Currently, the **flags** must be 0. Currently, nr_loops is ++ * limited to 1 << 23 (~8 million) loops. ++ * ++ * long (\*callback_fn)(u32 index, void \*ctx); ++ * ++ * where **index** is the current index in the loop. The index ++ * is zero-indexed. ++ * ++ * If **callback_fn** returns 0, the helper will continue to the next ++ * loop. If return value is 1, the helper will skip the rest of ++ * the loops and return. Other return values are not used now, ++ * and will be rejected by the verifier. ++ * ++ * ++ * Returns ++ * The number of loops performed, **-EINVAL** for invalid **flags**, ++ * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. ++ */ ++static long (*bpf_loop)(__u32 nr_loops, void *callback_fn, void *callback_ctx, __u64 flags) = (void *) 181; ++ ++/* ++ * bpf_strncmp ++ * ++ * Do strncmp() between **s1** and **s2**. **s1** doesn't need ++ * to be null-terminated and **s1_sz** is the maximum storage ++ * size of **s1**. **s2** must be a read-only string. ++ * ++ * Returns ++ * An integer less than, equal to, or greater than zero ++ * if the first **s1_sz** bytes of **s1** is found to be ++ * less than, to match, or be greater than **s2**. ++ */ ++static long (*bpf_strncmp)(const char *s1, __u32 s1_sz, const char *s2) = (void *) 182; ++ ++/* ++ * bpf_get_func_arg ++ * ++ * Get **n**-th argument (zero based) of the traced function (for tracing programs) ++ * returned in **value**. ++ * ++ * ++ * Returns ++ * 0 on success. ++ * **-EINVAL** if n >= arguments count of traced function. ++ */ ++static long (*bpf_get_func_arg)(void *ctx, __u32 n, __u64 *value) = (void *) 183; ++ ++/* ++ * bpf_get_func_ret ++ * ++ * Get return value of the traced function (for tracing programs) ++ * in **value**. ++ * ++ * ++ * Returns ++ * 0 on success. ++ * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. ++ */ ++static long (*bpf_get_func_ret)(void *ctx, __u64 *value) = (void *) 184; ++ ++/* ++ * bpf_get_func_arg_cnt ++ * ++ * Get number of arguments of the traced function (for tracing programs). ++ * ++ * ++ * Returns ++ * The number of arguments of the traced function. ++ */ ++static long (*bpf_get_func_arg_cnt)(void *ctx) = (void *) 185; ++ ++/* ++ * bpf_get_retval ++ * ++ * Get the syscall's return value that will be returned to userspace. ++ * ++ * This helper is currently supported by cgroup programs only. ++ * ++ * Returns ++ * The syscall's return value. ++ */ ++static int (*bpf_get_retval)(void) = (void *) 186; ++ ++/* ++ * bpf_set_retval ++ * ++ * Set the syscall's return value that will be returned to userspace. ++ * ++ * This helper is currently supported by cgroup programs only. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static int (*bpf_set_retval)(int retval) = (void *) 187; ++ ++/* ++ * bpf_xdp_get_buff_len ++ * ++ * Get the total size of a given xdp buff (linear and paged area) ++ * ++ * Returns ++ * The total size of a given xdp buffer. ++ */ ++static __u64 (*bpf_xdp_get_buff_len)(struct xdp_md *xdp_md) = (void *) 188; ++ ++/* ++ * bpf_xdp_load_bytes ++ * ++ * This helper is provided as an easy way to load data from a ++ * xdp buffer. It can be used to load *len* bytes from *offset* from ++ * the frame associated to *xdp_md*, into the buffer pointed by ++ * *buf*. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_xdp_load_bytes)(struct xdp_md *xdp_md, __u32 offset, void *buf, __u32 len) = (void *) 189; ++ ++/* ++ * bpf_xdp_store_bytes ++ * ++ * Store *len* bytes from buffer *buf* into the frame ++ * associated to *xdp_md*, at *offset*. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_xdp_store_bytes)(struct xdp_md *xdp_md, __u32 offset, void *buf, __u32 len) = (void *) 190; ++ ++/* ++ * bpf_copy_from_user_task ++ * ++ * Read *size* bytes from user space address *user_ptr* in *tsk*'s ++ * address space, and stores the data in *dst*. *flags* is not ++ * used yet and is provided for future extensibility. This helper ++ * can only be used by sleepable programs. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. On error ++ * *dst* buffer is zeroed out. ++ */ ++static long (*bpf_copy_from_user_task)(void *dst, __u32 size, const void *user_ptr, struct task_struct *tsk, __u64 flags) = (void *) 191; ++ ++/* ++ * bpf_skb_set_tstamp ++ * ++ * Change the __sk_buff->tstamp_type to *tstamp_type* ++ * and set *tstamp* to the __sk_buff->tstamp together. ++ * ++ * If there is no need to change the __sk_buff->tstamp_type, ++ * the tstamp value can be directly written to __sk_buff->tstamp ++ * instead. ++ * ++ * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that ++ * will be kept during bpf_redirect_*(). A non zero ++ * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO ++ * *tstamp_type*. ++ * ++ * A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used ++ * with a zero *tstamp*. ++ * ++ * Only IPv4 and IPv6 skb->protocol are supported. ++ * ++ * This function is most useful when it needs to set a ++ * mono delivery time to __sk_buff->tstamp and then ++ * bpf_redirect_*() to the egress of an iface. For example, ++ * changing the (rcv) timestamp in __sk_buff->tstamp at ++ * ingress to a mono delivery time and then bpf_redirect_*() ++ * to sch_fq@phy-dev. ++ * ++ * Returns ++ * 0 on success. ++ * **-EINVAL** for invalid input ++ * **-EOPNOTSUPP** for unsupported protocol ++ */ ++static long (*bpf_skb_set_tstamp)(struct __sk_buff *skb, __u64 tstamp, __u32 tstamp_type) = (void *) 192; ++ ++/* ++ * bpf_ima_file_hash ++ * ++ * Returns a calculated IMA hash of the *file*. ++ * If the hash is larger than *size*, then only *size* ++ * bytes will be copied to *dst* ++ * ++ * Returns ++ * The **hash_algo** is returned on success, ++ * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if ++ * invalid arguments are passed. ++ */ ++static long (*bpf_ima_file_hash)(struct file *file, void *dst, __u32 size) = (void *) 193; ++ ++/* ++ * bpf_kptr_xchg ++ * ++ * Exchange kptr at pointer *map_value* with *ptr*, and return the ++ * old value. *ptr* can be NULL, otherwise it must be a referenced ++ * pointer which will be released when this helper is called. ++ * ++ * Returns ++ * The old value of kptr (which can be NULL). The returned pointer ++ * if not NULL, is a reference which must be released using its ++ * corresponding release function, or moved into a BPF map before ++ * program exit. ++ */ ++static void *(*bpf_kptr_xchg)(void *map_value, void *ptr) = (void *) 194; ++ ++/* ++ * bpf_map_lookup_percpu_elem ++ * ++ * Perform a lookup in *percpu map* for an entry associated to ++ * *key* on *cpu*. ++ * ++ * Returns ++ * Map value associated to *key* on *cpu*, or **NULL** if no entry ++ * was found or *cpu* is invalid. ++ */ ++static void *(*bpf_map_lookup_percpu_elem)(void *map, const void *key, __u32 cpu) = (void *) 195; ++ ++/* ++ * bpf_skc_to_mptcp_sock ++ * ++ * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. ++ * ++ * Returns ++ * *sk* if casting is valid, or **NULL** otherwise. ++ */ ++static struct mptcp_sock *(*bpf_skc_to_mptcp_sock)(void *sk) = (void *) 196; ++ ++/* ++ * bpf_dynptr_from_mem ++ * ++ * Get a dynptr to local memory *data*. ++ * ++ * *data* must be a ptr to a map value. ++ * The maximum *size* supported is DYNPTR_MAX_SIZE. ++ * *flags* is currently unused. ++ * ++ * Returns ++ * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, ++ * -EINVAL if flags is not 0. ++ */ ++static long (*bpf_dynptr_from_mem)(void *data, __u32 size, __u64 flags, struct bpf_dynptr *ptr) = (void *) 197; ++ ++/* ++ * bpf_ringbuf_reserve_dynptr ++ * ++ * Reserve *size* bytes of payload in a ring buffer *ringbuf* ++ * through the dynptr interface. *flags* must be 0. ++ * ++ * Please note that a corresponding bpf_ringbuf_submit_dynptr or ++ * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the ++ * reservation fails. This is enforced by the verifier. ++ * ++ * Returns ++ * 0 on success, or a negative error in case of failure. ++ */ ++static long (*bpf_ringbuf_reserve_dynptr)(void *ringbuf, __u32 size, __u64 flags, struct bpf_dynptr *ptr) = (void *) 198; ++ ++/* ++ * bpf_ringbuf_submit_dynptr ++ * ++ * Submit reserved ring buffer sample, pointed to by *data*, ++ * through the dynptr interface. This is a no-op if the dynptr is ++ * invalid/null. ++ * ++ * For more information on *flags*, please see ++ * 'bpf_ringbuf_submit'. ++ * ++ * Returns ++ * Nothing. Always succeeds. ++ */ ++static void (*bpf_ringbuf_submit_dynptr)(struct bpf_dynptr *ptr, __u64 flags) = (void *) 199; ++ ++/* ++ * bpf_ringbuf_discard_dynptr ++ * ++ * Discard reserved ring buffer sample through the dynptr ++ * interface. This is a no-op if the dynptr is invalid/null. ++ * ++ * For more information on *flags*, please see ++ * 'bpf_ringbuf_discard'. ++ * ++ * Returns ++ * Nothing. Always succeeds. ++ */ ++static void (*bpf_ringbuf_discard_dynptr)(struct bpf_dynptr *ptr, __u64 flags) = (void *) 200; ++ ++/* ++ * bpf_dynptr_read ++ * ++ * Read *len* bytes from *src* into *dst*, starting from *offset* ++ * into *src*. ++ * *flags* is currently unused. ++ * ++ * Returns ++ * 0 on success, -E2BIG if *offset* + *len* exceeds the length ++ * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if ++ * *flags* is not 0. ++ */ ++static long (*bpf_dynptr_read)(void *dst, __u32 len, struct bpf_dynptr *src, __u32 offset, __u64 flags) = (void *) 201; ++ ++/* ++ * bpf_dynptr_write ++ * ++ * Write *len* bytes from *src* into *dst*, starting from *offset* ++ * into *dst*. ++ * *flags* is currently unused. ++ * ++ * Returns ++ * 0 on success, -E2BIG if *offset* + *len* exceeds the length ++ * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* ++ * is a read-only dynptr or if *flags* is not 0. ++ */ ++static long (*bpf_dynptr_write)(struct bpf_dynptr *dst, __u32 offset, void *src, __u32 len, __u64 flags) = (void *) 202; ++ ++/* ++ * bpf_dynptr_data ++ * ++ * Get a pointer to the underlying dynptr data. ++ * ++ * *len* must be a statically known value. The returned data slice ++ * is invalidated whenever the dynptr is invalidated. ++ * ++ * Returns ++ * Pointer to the underlying dynptr data, NULL if the dynptr is ++ * read-only, if the dynptr is invalid, or if the offset and length ++ * is out of bounds. ++ */ ++static void *(*bpf_dynptr_data)(struct bpf_dynptr *ptr, __u32 offset, __u32 len) = (void *) 203; ++ ++/* ++ * bpf_tcp_raw_gen_syncookie_ipv4 ++ * ++ * Try to issue a SYN cookie for the packet with corresponding ++ * IPv4/TCP headers, *iph* and *th*, without depending on a ++ * listening socket. ++ * ++ * *iph* points to the IPv4 header. ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header (at least ++ * **sizeof**\ (**struct tcphdr**)). ++ * ++ * Returns ++ * On success, lower 32 bits hold the generated SYN cookie in ++ * followed by 16 bits which hold the MSS value for that cookie, ++ * and the top 16 bits are unused. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EINVAL** if *th_len* is invalid. ++ */ ++static __s64 (*bpf_tcp_raw_gen_syncookie_ipv4)(struct iphdr *iph, struct tcphdr *th, __u32 th_len) = (void *) 204; ++ ++/* ++ * bpf_tcp_raw_gen_syncookie_ipv6 ++ * ++ * Try to issue a SYN cookie for the packet with corresponding ++ * IPv6/TCP headers, *iph* and *th*, without depending on a ++ * listening socket. ++ * ++ * *iph* points to the IPv6 header. ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header (at least ++ * **sizeof**\ (**struct tcphdr**)). ++ * ++ * Returns ++ * On success, lower 32 bits hold the generated SYN cookie in ++ * followed by 16 bits which hold the MSS value for that cookie, ++ * and the top 16 bits are unused. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EINVAL** if *th_len* is invalid. ++ * ++ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. ++ */ ++static __s64 (*bpf_tcp_raw_gen_syncookie_ipv6)(struct ipv6hdr *iph, struct tcphdr *th, __u32 th_len) = (void *) 205; ++ ++/* ++ * bpf_tcp_raw_check_syncookie_ipv4 ++ * ++ * Check whether *iph* and *th* contain a valid SYN cookie ACK ++ * without depending on a listening socket. ++ * ++ * *iph* points to the IPv4 header. ++ * ++ * *th* points to the TCP header. ++ * ++ * Returns ++ * 0 if *iph* and *th* are a valid SYN cookie ACK. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EACCES** if the SYN cookie is not valid. ++ */ ++static long (*bpf_tcp_raw_check_syncookie_ipv4)(struct iphdr *iph, struct tcphdr *th) = (void *) 206; ++ ++/* ++ * bpf_tcp_raw_check_syncookie_ipv6 ++ * ++ * Check whether *iph* and *th* contain a valid SYN cookie ACK ++ * without depending on a listening socket. ++ * ++ * *iph* points to the IPv6 header. ++ * ++ * *th* points to the TCP header. ++ * ++ * Returns ++ * 0 if *iph* and *th* are a valid SYN cookie ACK. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EACCES** if the SYN cookie is not valid. ++ * ++ * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. ++ */ ++static long (*bpf_tcp_raw_check_syncookie_ipv6)(struct ipv6hdr *iph, struct tcphdr *th) = (void *) 207; ++ ++ +diff --git a/src/cc/libbpf/src/bpf_helpers.h b/src/cc/libbpf/src/bpf_helpers.h +new file mode 100644 +index 0000000..7349b16 +--- /dev/null ++++ b/src/cc/libbpf/src/bpf_helpers.h +@@ -0,0 +1,301 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++#ifndef __BPF_HELPERS__ ++#define __BPF_HELPERS__ ++ ++/* ++ * Note that bpf programs need to include either ++ * vmlinux.h (auto-generated from BTF) or linux/types.h ++ * in advance since bpf_helper_defs.h uses such types ++ * as __u64. ++ */ ++#include "bpf_helper_defs.h" ++ ++#define __uint(name, val) int (*name)[val] ++#define __type(name, val) typeof(val) *name ++#define __array(name, val) typeof(val) *name[] ++ ++/* ++ * Helper macro to place programs, maps, license in ++ * different sections in elf_bpf file. Section names ++ * are interpreted by libbpf depending on the context (BPF programs, BPF maps, ++ * extern variables, etc). ++ * To allow use of SEC() with externs (e.g., for extern .maps declarations), ++ * make sure __attribute__((unused)) doesn't trigger compilation warning. ++ */ ++#if __GNUC__ && !__clang__ ++ ++/* ++ * Pragma macros are broken on GCC ++ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578 ++ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400 ++ */ ++#define SEC(name) __attribute__((section(name), used)) ++ ++#else ++ ++#define SEC(name) \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \ ++ __attribute__((section(name), used)) \ ++ _Pragma("GCC diagnostic pop") \ ++ ++#endif ++ ++/* Avoid 'linux/stddef.h' definition of '__always_inline'. */ ++#undef __always_inline ++#define __always_inline inline __attribute__((always_inline)) ++ ++#ifndef __noinline ++#define __noinline __attribute__((noinline)) ++#endif ++#ifndef __weak ++#define __weak __attribute__((weak)) ++#endif ++ ++/* ++ * Use __hidden attribute to mark a non-static BPF subprogram effectively ++ * static for BPF verifier's verification algorithm purposes, allowing more ++ * extensive and permissive BPF verification process, taking into account ++ * subprogram's caller context. ++ */ ++#define __hidden __attribute__((visibility("hidden"))) ++ ++/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include ++ * any system-level headers (such as stddef.h, linux/version.h, etc), and ++ * commonly-used macros like NULL and KERNEL_VERSION aren't available through ++ * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define ++ * them on their own. So as a convenience, provide such definitions here. ++ */ ++#ifndef NULL ++#define NULL ((void *)0) ++#endif ++ ++#ifndef KERNEL_VERSION ++#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))) ++#endif ++ ++/* ++ * Helper macros to manipulate data structures ++ */ ++#ifndef offsetof ++#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER) ++#endif ++#ifndef container_of ++#define container_of(ptr, type, member) \ ++ ({ \ ++ void *__mptr = (void *)(ptr); \ ++ ((type *)(__mptr - offsetof(type, member))); \ ++ }) ++#endif ++ ++/* ++ * Compiler (optimization) barrier. ++ */ ++#ifndef barrier ++#define barrier() asm volatile("" ::: "memory") ++#endif ++ ++/* Variable-specific compiler (optimization) barrier. It's a no-op which makes ++ * compiler believe that there is some black box modification of a given ++ * variable and thus prevents compiler from making extra assumption about its ++ * value and potential simplifications and optimizations on this variable. ++ * ++ * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of ++ * a variable, making some code patterns unverifiable. Putting barrier_var() ++ * in place will ensure that cast is performed before the barrier_var() ++ * invocation, because compiler has to pessimistically assume that embedded ++ * asm section might perform some extra operations on that variable. ++ * ++ * This is a variable-specific variant of more global barrier(). ++ */ ++#ifndef barrier_var ++#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) ++#endif ++ ++/* ++ * Helper macro to throw a compilation error if __bpf_unreachable() gets ++ * built into the resulting code. This works given BPF back end does not ++ * implement __builtin_trap(). This is useful to assert that certain paths ++ * of the program code are never used and hence eliminated by the compiler. ++ * ++ * For example, consider a switch statement that covers known cases used by ++ * the program. __bpf_unreachable() can then reside in the default case. If ++ * the program gets extended such that a case is not covered in the switch ++ * statement, then it will throw a build error due to the default case not ++ * being compiled out. ++ */ ++#ifndef __bpf_unreachable ++# define __bpf_unreachable() __builtin_trap() ++#endif ++ ++/* ++ * Helper function to perform a tail call with a constant/immediate map slot. ++ */ ++#if __clang_major__ >= 8 && defined(__bpf__) ++static __always_inline void ++bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) ++{ ++ if (!__builtin_constant_p(slot)) ++ __bpf_unreachable(); ++ ++ /* ++ * Provide a hard guarantee that LLVM won't optimize setting r2 (map ++ * pointer) and r3 (constant map index) from _different paths_ ending ++ * up at the _same_ call insn as otherwise we won't be able to use the ++ * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel ++ * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key ++ * tracking for prog array pokes") for details on verifier tracking. ++ * ++ * Note on clobber list: we need to stay in-line with BPF calling ++ * convention, so even if we don't end up using r0, r4, r5, we need ++ * to mark them as clobber so that LLVM doesn't end up using them ++ * before / after the call. ++ */ ++ asm volatile("r1 = %[ctx]\n\t" ++ "r2 = %[map]\n\t" ++ "r3 = %[slot]\n\t" ++ "call 12" ++ :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot) ++ : "r0", "r1", "r2", "r3", "r4", "r5"); ++} ++#endif ++ ++/* ++ * Helper structure used by eBPF C program ++ * to describe BPF map attributes to libbpf loader ++ */ ++struct bpf_map_def { ++ unsigned int type; ++ unsigned int key_size; ++ unsigned int value_size; ++ unsigned int max_entries; ++ unsigned int map_flags; ++} __attribute__((deprecated("use BTF-defined maps in .maps section"))); ++ ++enum libbpf_pin_type { ++ LIBBPF_PIN_NONE, ++ /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ ++ LIBBPF_PIN_BY_NAME, ++}; ++ ++enum libbpf_tristate { ++ TRI_NO = 0, ++ TRI_YES = 1, ++ TRI_MODULE = 2, ++}; ++ ++#define __kconfig __attribute__((section(".kconfig"))) ++#define __ksym __attribute__((section(".ksyms"))) ++#define __kptr __attribute__((btf_type_tag("kptr"))) ++#define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) ++ ++#ifndef ___bpf_concat ++#define ___bpf_concat(a, b) a ## b ++#endif ++#ifndef ___bpf_apply ++#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) ++#endif ++#ifndef ___bpf_nth ++#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N ++#endif ++#ifndef ___bpf_narg ++#define ___bpf_narg(...) \ ++ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) ++#endif ++ ++#define ___bpf_fill0(arr, p, x) do {} while (0) ++#define ___bpf_fill1(arr, p, x) arr[p] = x ++#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args) ++#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args) ++#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args) ++#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args) ++#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args) ++#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args) ++#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args) ++#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args) ++#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args) ++#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args) ++#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args) ++#define ___bpf_fill(arr, args...) \ ++ ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args) ++ ++/* ++ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values ++ * in a structure. ++ */ ++#define BPF_SEQ_PRINTF(seq, fmt, args...) \ ++({ \ ++ static const char ___fmt[] = fmt; \ ++ unsigned long long ___param[___bpf_narg(args)]; \ ++ \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ++ ___bpf_fill(___param, args); \ ++ _Pragma("GCC diagnostic pop") \ ++ \ ++ bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \ ++ ___param, sizeof(___param)); \ ++}) ++ ++/* ++ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of ++ * an array of u64. ++ */ ++#define BPF_SNPRINTF(out, out_size, fmt, args...) \ ++({ \ ++ static const char ___fmt[] = fmt; \ ++ unsigned long long ___param[___bpf_narg(args)]; \ ++ \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ++ ___bpf_fill(___param, args); \ ++ _Pragma("GCC diagnostic pop") \ ++ \ ++ bpf_snprintf(out, out_size, ___fmt, \ ++ ___param, sizeof(___param)); \ ++}) ++ ++#ifdef BPF_NO_GLOBAL_DATA ++#define BPF_PRINTK_FMT_MOD ++#else ++#define BPF_PRINTK_FMT_MOD static const ++#endif ++ ++#define __bpf_printk(fmt, ...) \ ++({ \ ++ BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \ ++ bpf_trace_printk(____fmt, sizeof(____fmt), \ ++ ##__VA_ARGS__); \ ++}) ++ ++/* ++ * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments ++ * instead of an array of u64. ++ */ ++#define __bpf_vprintk(fmt, args...) \ ++({ \ ++ static const char ___fmt[] = fmt; \ ++ unsigned long long ___param[___bpf_narg(args)]; \ ++ \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ++ ___bpf_fill(___param, args); \ ++ _Pragma("GCC diagnostic pop") \ ++ \ ++ bpf_trace_vprintk(___fmt, sizeof(___fmt), \ ++ ___param, sizeof(___param)); \ ++}) ++ ++/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args ++ * Otherwise use __bpf_vprintk ++ */ ++#define ___bpf_pick_printk(...) \ ++ ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ ++ __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ ++ __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\ ++ __bpf_printk /*1*/, __bpf_printk /*0*/) ++ ++/* Helper macro to print out debug messages */ ++#define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args) ++ ++#endif +diff --git a/src/cc/libbpf/src/bpf_prog_linfo.c b/src/cc/libbpf/src/bpf_prog_linfo.c +new file mode 100644 +index 0000000..5c50309 +--- /dev/null ++++ b/src/cc/libbpf/src/bpf_prog_linfo.c +@@ -0,0 +1,246 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2018 Facebook */ ++ ++#include ++#include ++#include ++#include ++#include "libbpf.h" ++#include "libbpf_internal.h" ++ ++struct bpf_prog_linfo { ++ void *raw_linfo; ++ void *raw_jited_linfo; ++ __u32 *nr_jited_linfo_per_func; ++ __u32 *jited_linfo_func_idx; ++ __u32 nr_linfo; ++ __u32 nr_jited_func; ++ __u32 rec_size; ++ __u32 jited_rec_size; ++}; ++ ++static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo, ++ const __u64 *ksym_func, const __u32 *ksym_len) ++{ ++ __u32 nr_jited_func, nr_linfo; ++ const void *raw_jited_linfo; ++ const __u64 *jited_linfo; ++ __u64 last_jited_linfo; ++ /* ++ * Index to raw_jited_linfo: ++ * i: Index for searching the next ksym_func ++ * prev_i: Index to the last found ksym_func ++ */ ++ __u32 i, prev_i; ++ __u32 f; /* Index to ksym_func */ ++ ++ raw_jited_linfo = prog_linfo->raw_jited_linfo; ++ jited_linfo = raw_jited_linfo; ++ if (ksym_func[0] != *jited_linfo) ++ goto errout; ++ ++ prog_linfo->jited_linfo_func_idx[0] = 0; ++ nr_jited_func = prog_linfo->nr_jited_func; ++ nr_linfo = prog_linfo->nr_linfo; ++ ++ for (prev_i = 0, i = 1, f = 1; ++ i < nr_linfo && f < nr_jited_func; ++ i++) { ++ raw_jited_linfo += prog_linfo->jited_rec_size; ++ last_jited_linfo = *jited_linfo; ++ jited_linfo = raw_jited_linfo; ++ ++ if (ksym_func[f] == *jited_linfo) { ++ prog_linfo->jited_linfo_func_idx[f] = i; ++ ++ /* Sanity check */ ++ if (last_jited_linfo - ksym_func[f - 1] + 1 > ++ ksym_len[f - 1]) ++ goto errout; ++ ++ prog_linfo->nr_jited_linfo_per_func[f - 1] = ++ i - prev_i; ++ prev_i = i; ++ ++ /* ++ * The ksym_func[f] is found in jited_linfo. ++ * Look for the next one. ++ */ ++ f++; ++ } else if (*jited_linfo <= last_jited_linfo) { ++ /* Ensure the addr is increasing _within_ a func */ ++ goto errout; ++ } ++ } ++ ++ if (f != nr_jited_func) ++ goto errout; ++ ++ prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] = ++ nr_linfo - prev_i; ++ ++ return 0; ++ ++errout: ++ return -EINVAL; ++} ++ ++void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo) ++{ ++ if (!prog_linfo) ++ return; ++ ++ free(prog_linfo->raw_linfo); ++ free(prog_linfo->raw_jited_linfo); ++ free(prog_linfo->nr_jited_linfo_per_func); ++ free(prog_linfo->jited_linfo_func_idx); ++ free(prog_linfo); ++} ++ ++struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) ++{ ++ struct bpf_prog_linfo *prog_linfo; ++ __u32 nr_linfo, nr_jited_func; ++ __u64 data_sz; ++ ++ nr_linfo = info->nr_line_info; ++ ++ if (!nr_linfo) ++ return errno = EINVAL, NULL; ++ ++ /* ++ * The min size that bpf_prog_linfo has to access for ++ * searching purpose. ++ */ ++ if (info->line_info_rec_size < ++ offsetof(struct bpf_line_info, file_name_off)) ++ return errno = EINVAL, NULL; ++ ++ prog_linfo = calloc(1, sizeof(*prog_linfo)); ++ if (!prog_linfo) ++ return errno = ENOMEM, NULL; ++ ++ /* Copy xlated line_info */ ++ prog_linfo->nr_linfo = nr_linfo; ++ prog_linfo->rec_size = info->line_info_rec_size; ++ data_sz = (__u64)nr_linfo * prog_linfo->rec_size; ++ prog_linfo->raw_linfo = malloc(data_sz); ++ if (!prog_linfo->raw_linfo) ++ goto err_free; ++ memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz); ++ ++ nr_jited_func = info->nr_jited_ksyms; ++ if (!nr_jited_func || ++ !info->jited_line_info || ++ info->nr_jited_line_info != nr_linfo || ++ info->jited_line_info_rec_size < sizeof(__u64) || ++ info->nr_jited_func_lens != nr_jited_func || ++ !info->jited_ksyms || ++ !info->jited_func_lens) ++ /* Not enough info to provide jited_line_info */ ++ return prog_linfo; ++ ++ /* Copy jited_line_info */ ++ prog_linfo->nr_jited_func = nr_jited_func; ++ prog_linfo->jited_rec_size = info->jited_line_info_rec_size; ++ data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size; ++ prog_linfo->raw_jited_linfo = malloc(data_sz); ++ if (!prog_linfo->raw_jited_linfo) ++ goto err_free; ++ memcpy(prog_linfo->raw_jited_linfo, ++ (void *)(long)info->jited_line_info, data_sz); ++ ++ /* Number of jited_line_info per jited func */ ++ prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * ++ sizeof(__u32)); ++ if (!prog_linfo->nr_jited_linfo_per_func) ++ goto err_free; ++ ++ /* ++ * For each jited func, ++ * the start idx to the "linfo" and "jited_linfo" array, ++ */ ++ prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func * ++ sizeof(__u32)); ++ if (!prog_linfo->jited_linfo_func_idx) ++ goto err_free; ++ ++ if (dissect_jited_func(prog_linfo, ++ (__u64 *)(long)info->jited_ksyms, ++ (__u32 *)(long)info->jited_func_lens)) ++ goto err_free; ++ ++ return prog_linfo; ++ ++err_free: ++ bpf_prog_linfo__free(prog_linfo); ++ return errno = EINVAL, NULL; ++} ++ ++const struct bpf_line_info * ++bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, ++ __u64 addr, __u32 func_idx, __u32 nr_skip) ++{ ++ __u32 jited_rec_size, rec_size, nr_linfo, start, i; ++ const void *raw_jited_linfo, *raw_linfo; ++ const __u64 *jited_linfo; ++ ++ if (func_idx >= prog_linfo->nr_jited_func) ++ return errno = ENOENT, NULL; ++ ++ nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; ++ if (nr_skip >= nr_linfo) ++ return errno = ENOENT, NULL; ++ ++ start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; ++ jited_rec_size = prog_linfo->jited_rec_size; ++ raw_jited_linfo = prog_linfo->raw_jited_linfo + ++ (start * jited_rec_size); ++ jited_linfo = raw_jited_linfo; ++ if (addr < *jited_linfo) ++ return errno = ENOENT, NULL; ++ ++ nr_linfo -= nr_skip; ++ rec_size = prog_linfo->rec_size; ++ raw_linfo = prog_linfo->raw_linfo + (start * rec_size); ++ for (i = 0; i < nr_linfo; i++) { ++ if (addr < *jited_linfo) ++ break; ++ ++ raw_linfo += rec_size; ++ raw_jited_linfo += jited_rec_size; ++ jited_linfo = raw_jited_linfo; ++ } ++ ++ return raw_linfo - rec_size; ++} ++ ++const struct bpf_line_info * ++bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, ++ __u32 insn_off, __u32 nr_skip) ++{ ++ const struct bpf_line_info *linfo; ++ __u32 rec_size, nr_linfo, i; ++ const void *raw_linfo; ++ ++ nr_linfo = prog_linfo->nr_linfo; ++ if (nr_skip >= nr_linfo) ++ return errno = ENOENT, NULL; ++ ++ rec_size = prog_linfo->rec_size; ++ raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); ++ linfo = raw_linfo; ++ if (insn_off < linfo->insn_off) ++ return errno = ENOENT, NULL; ++ ++ nr_linfo -= nr_skip; ++ for (i = 0; i < nr_linfo; i++) { ++ if (insn_off < linfo->insn_off) ++ break; ++ ++ raw_linfo += rec_size; ++ linfo = raw_linfo; ++ } ++ ++ return raw_linfo - rec_size; ++} +diff --git a/src/cc/libbpf/src/bpf_tracing.h b/src/cc/libbpf/src/bpf_tracing.h +new file mode 100644 +index 0000000..43ca3af +--- /dev/null ++++ b/src/cc/libbpf/src/bpf_tracing.h +@@ -0,0 +1,563 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++#ifndef __BPF_TRACING_H__ ++#define __BPF_TRACING_H__ ++ ++#include ++ ++/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ ++#if defined(__TARGET_ARCH_x86) ++ #define bpf_target_x86 ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_s390) ++ #define bpf_target_s390 ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_arm) ++ #define bpf_target_arm ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_arm64) ++ #define bpf_target_arm64 ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_mips) ++ #define bpf_target_mips ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_powerpc) ++ #define bpf_target_powerpc ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_sparc) ++ #define bpf_target_sparc ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_riscv) ++ #define bpf_target_riscv ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_arc) ++ #define bpf_target_arc ++ #define bpf_target_defined ++#else ++ ++/* Fall back to what the compiler says */ ++#if defined(__x86_64__) ++ #define bpf_target_x86 ++ #define bpf_target_defined ++#elif defined(__s390__) ++ #define bpf_target_s390 ++ #define bpf_target_defined ++#elif defined(__arm__) ++ #define bpf_target_arm ++ #define bpf_target_defined ++#elif defined(__aarch64__) ++ #define bpf_target_arm64 ++ #define bpf_target_defined ++#elif defined(__mips__) ++ #define bpf_target_mips ++ #define bpf_target_defined ++#elif defined(__powerpc__) ++ #define bpf_target_powerpc ++ #define bpf_target_defined ++#elif defined(__sparc__) ++ #define bpf_target_sparc ++ #define bpf_target_defined ++#elif defined(__riscv) && __riscv_xlen == 64 ++ #define bpf_target_riscv ++ #define bpf_target_defined ++#elif defined(__arc__) ++ #define bpf_target_arc ++ #define bpf_target_defined ++#endif /* no compiler target */ ++ ++#endif ++ ++#ifndef __BPF_TARGET_MISSING ++#define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\"" ++#endif ++ ++#if defined(bpf_target_x86) ++ ++#if defined(__KERNEL__) || defined(__VMLINUX_H__) ++ ++#define __PT_PARM1_REG di ++#define __PT_PARM2_REG si ++#define __PT_PARM3_REG dx ++#define __PT_PARM4_REG cx ++#define __PT_PARM5_REG r8 ++#define __PT_RET_REG sp ++#define __PT_FP_REG bp ++#define __PT_RC_REG ax ++#define __PT_SP_REG sp ++#define __PT_IP_REG ip ++/* syscall uses r10 for PARM4 */ ++#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10) ++#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10) ++ ++#else ++ ++#ifdef __i386__ ++ ++#define __PT_PARM1_REG eax ++#define __PT_PARM2_REG edx ++#define __PT_PARM3_REG ecx ++/* i386 kernel is built with -mregparm=3 */ ++#define __PT_PARM4_REG __unsupported__ ++#define __PT_PARM5_REG __unsupported__ ++#define __PT_RET_REG esp ++#define __PT_FP_REG ebp ++#define __PT_RC_REG eax ++#define __PT_SP_REG esp ++#define __PT_IP_REG eip ++ ++#else /* __i386__ */ ++ ++#define __PT_PARM1_REG rdi ++#define __PT_PARM2_REG rsi ++#define __PT_PARM3_REG rdx ++#define __PT_PARM4_REG rcx ++#define __PT_PARM5_REG r8 ++#define __PT_RET_REG rsp ++#define __PT_FP_REG rbp ++#define __PT_RC_REG rax ++#define __PT_SP_REG rsp ++#define __PT_IP_REG rip ++/* syscall uses r10 for PARM4 */ ++#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10) ++#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10) ++ ++#endif /* __i386__ */ ++ ++#endif /* __KERNEL__ || __VMLINUX_H__ */ ++ ++#elif defined(bpf_target_s390) ++ ++struct pt_regs___s390 { ++ unsigned long orig_gpr2; ++}; ++ ++/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ ++#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) ++#define __PT_PARM1_REG gprs[2] ++#define __PT_PARM2_REG gprs[3] ++#define __PT_PARM3_REG gprs[4] ++#define __PT_PARM4_REG gprs[5] ++#define __PT_PARM5_REG gprs[6] ++#define __PT_RET_REG grps[14] ++#define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */ ++#define __PT_RC_REG gprs[2] ++#define __PT_SP_REG gprs[15] ++#define __PT_IP_REG psw.addr ++#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) ++#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2) ++ ++#elif defined(bpf_target_arm) ++ ++#define __PT_PARM1_REG uregs[0] ++#define __PT_PARM2_REG uregs[1] ++#define __PT_PARM3_REG uregs[2] ++#define __PT_PARM4_REG uregs[3] ++#define __PT_PARM5_REG uregs[4] ++#define __PT_RET_REG uregs[14] ++#define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */ ++#define __PT_RC_REG uregs[0] ++#define __PT_SP_REG uregs[13] ++#define __PT_IP_REG uregs[12] ++ ++#elif defined(bpf_target_arm64) ++ ++struct pt_regs___arm64 { ++ unsigned long orig_x0; ++}; ++ ++/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ ++#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) ++#define __PT_PARM1_REG regs[0] ++#define __PT_PARM2_REG regs[1] ++#define __PT_PARM3_REG regs[2] ++#define __PT_PARM4_REG regs[3] ++#define __PT_PARM5_REG regs[4] ++#define __PT_RET_REG regs[30] ++#define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */ ++#define __PT_RC_REG regs[0] ++#define __PT_SP_REG sp ++#define __PT_IP_REG pc ++#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) ++#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0) ++ ++#elif defined(bpf_target_mips) ++ ++#define __PT_PARM1_REG regs[4] ++#define __PT_PARM2_REG regs[5] ++#define __PT_PARM3_REG regs[6] ++#define __PT_PARM4_REG regs[7] ++#define __PT_PARM5_REG regs[8] ++#define __PT_RET_REG regs[31] ++#define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */ ++#define __PT_RC_REG regs[2] ++#define __PT_SP_REG regs[29] ++#define __PT_IP_REG cp0_epc ++ ++#elif defined(bpf_target_powerpc) ++ ++#define __PT_PARM1_REG gpr[3] ++#define __PT_PARM2_REG gpr[4] ++#define __PT_PARM3_REG gpr[5] ++#define __PT_PARM4_REG gpr[6] ++#define __PT_PARM5_REG gpr[7] ++#define __PT_RET_REG regs[31] ++#define __PT_FP_REG __unsupported__ ++#define __PT_RC_REG gpr[3] ++#define __PT_SP_REG sp ++#define __PT_IP_REG nip ++/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */ ++#define PT_REGS_SYSCALL_REGS(ctx) ctx ++ ++#elif defined(bpf_target_sparc) ++ ++#define __PT_PARM1_REG u_regs[UREG_I0] ++#define __PT_PARM2_REG u_regs[UREG_I1] ++#define __PT_PARM3_REG u_regs[UREG_I2] ++#define __PT_PARM4_REG u_regs[UREG_I3] ++#define __PT_PARM5_REG u_regs[UREG_I4] ++#define __PT_RET_REG u_regs[UREG_I7] ++#define __PT_FP_REG __unsupported__ ++#define __PT_RC_REG u_regs[UREG_I0] ++#define __PT_SP_REG u_regs[UREG_FP] ++/* Should this also be a bpf_target check for the sparc case? */ ++#if defined(__arch64__) ++#define __PT_IP_REG tpc ++#else ++#define __PT_IP_REG pc ++#endif ++ ++#elif defined(bpf_target_riscv) ++ ++#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) ++#define __PT_PARM1_REG a0 ++#define __PT_PARM2_REG a1 ++#define __PT_PARM3_REG a2 ++#define __PT_PARM4_REG a3 ++#define __PT_PARM5_REG a4 ++#define __PT_RET_REG ra ++#define __PT_FP_REG s0 ++#define __PT_RC_REG a0 ++#define __PT_SP_REG sp ++#define __PT_IP_REG pc ++/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ ++#define PT_REGS_SYSCALL_REGS(ctx) ctx ++ ++#elif defined(bpf_target_arc) ++ ++/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */ ++#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) ++#define __PT_PARM1_REG scratch.r0 ++#define __PT_PARM2_REG scratch.r1 ++#define __PT_PARM3_REG scratch.r2 ++#define __PT_PARM4_REG scratch.r3 ++#define __PT_PARM5_REG scratch.r4 ++#define __PT_RET_REG scratch.blink ++#define __PT_FP_REG __unsupported__ ++#define __PT_RC_REG scratch.r0 ++#define __PT_SP_REG scratch.sp ++#define __PT_IP_REG scratch.ret ++/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */ ++#define PT_REGS_SYSCALL_REGS(ctx) ctx ++ ++#endif ++ ++#if defined(bpf_target_defined) ++ ++struct pt_regs; ++ ++/* allow some architecutres to override `struct pt_regs` */ ++#ifndef __PT_REGS_CAST ++#define __PT_REGS_CAST(x) (x) ++#endif ++ ++#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG) ++#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG) ++#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG) ++#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG) ++#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG) ++#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG) ++#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG) ++#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG) ++#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG) ++#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG) ++ ++#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG) ++#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG) ++#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG) ++#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG) ++#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG) ++#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG) ++#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG) ++#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG) ++#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG) ++#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG) ++ ++#if defined(bpf_target_powerpc) ++ ++#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) ++#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP ++ ++#elif defined(bpf_target_sparc) ++ ++#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) ++#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP ++ ++#else ++ ++#define BPF_KPROBE_READ_RET_IP(ip, ctx) \ ++ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) ++#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ ++ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) ++ ++#endif ++ ++#ifndef PT_REGS_PARM1_SYSCALL ++#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1(x) ++#endif ++#define PT_REGS_PARM2_SYSCALL(x) PT_REGS_PARM2(x) ++#define PT_REGS_PARM3_SYSCALL(x) PT_REGS_PARM3(x) ++#ifndef PT_REGS_PARM4_SYSCALL ++#define PT_REGS_PARM4_SYSCALL(x) PT_REGS_PARM4(x) ++#endif ++#define PT_REGS_PARM5_SYSCALL(x) PT_REGS_PARM5(x) ++ ++#ifndef PT_REGS_PARM1_CORE_SYSCALL ++#define PT_REGS_PARM1_CORE_SYSCALL(x) PT_REGS_PARM1_CORE(x) ++#endif ++#define PT_REGS_PARM2_CORE_SYSCALL(x) PT_REGS_PARM2_CORE(x) ++#define PT_REGS_PARM3_CORE_SYSCALL(x) PT_REGS_PARM3_CORE(x) ++#ifndef PT_REGS_PARM4_CORE_SYSCALL ++#define PT_REGS_PARM4_CORE_SYSCALL(x) PT_REGS_PARM4_CORE(x) ++#endif ++#define PT_REGS_PARM5_CORE_SYSCALL(x) PT_REGS_PARM5_CORE(x) ++ ++#else /* defined(bpf_target_defined) */ ++ ++#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++ ++#define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++ ++#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++ ++#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM2_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++ ++#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) ++ ++#endif /* defined(bpf_target_defined) */ ++ ++/* ++ * When invoked from a syscall handler kprobe, returns a pointer to a ++ * struct pt_regs containing syscall arguments and suitable for passing to ++ * PT_REGS_PARMn_SYSCALL() and PT_REGS_PARMn_CORE_SYSCALL(). ++ */ ++#ifndef PT_REGS_SYSCALL_REGS ++/* By default, assume that the arch selects ARCH_HAS_SYSCALL_WRAPPER. */ ++#define PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx)) ++#endif ++ ++#ifndef ___bpf_concat ++#define ___bpf_concat(a, b) a ## b ++#endif ++#ifndef ___bpf_apply ++#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) ++#endif ++#ifndef ___bpf_nth ++#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N ++#endif ++#ifndef ___bpf_narg ++#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) ++#endif ++ ++#define ___bpf_ctx_cast0() ctx ++#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] ++#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] ++#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] ++#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] ++#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] ++#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] ++#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] ++#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] ++#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] ++#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] ++#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] ++#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] ++#define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) ++ ++/* ++ * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and ++ * similar kinds of BPF programs, that accept input arguments as a single ++ * pointer to untyped u64 array, where each u64 can actually be a typed ++ * pointer or integer of different size. Instead of requring user to write ++ * manual casts and work with array elements by index, BPF_PROG macro ++ * allows user to declare a list of named and typed input arguments in the ++ * same syntax as for normal C function. All the casting is hidden and ++ * performed transparently, while user code can just assume working with ++ * function arguments of specified type and name. ++ * ++ * Original raw context argument is preserved as well as 'ctx' argument. ++ * This is useful when using BPF helpers that expect original context ++ * as one of the parameters (e.g., for bpf_perf_event_output()). ++ */ ++#define BPF_PROG(name, args...) \ ++name(unsigned long long *ctx); \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(unsigned long long *ctx, ##args); \ ++typeof(name(0)) name(unsigned long long *ctx) \ ++{ \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ++ return ____##name(___bpf_ctx_cast(args)); \ ++ _Pragma("GCC diagnostic pop") \ ++} \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(unsigned long long *ctx, ##args) ++ ++struct pt_regs; ++ ++#define ___bpf_kprobe_args0() ctx ++#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) ++#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) ++#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) ++#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) ++#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) ++#define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) ++ ++/* ++ * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for ++ * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific ++ * low-level way of getting kprobe input arguments from struct pt_regs, and ++ * provides a familiar typed and named function arguments syntax and ++ * semantics of accessing kprobe input paremeters. ++ * ++ * Original struct pt_regs* context is preserved as 'ctx' argument. This might ++ * be necessary when using BPF helpers like bpf_perf_event_output(). ++ */ ++#define BPF_KPROBE(name, args...) \ ++name(struct pt_regs *ctx); \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(struct pt_regs *ctx, ##args); \ ++typeof(name(0)) name(struct pt_regs *ctx) \ ++{ \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ++ return ____##name(___bpf_kprobe_args(args)); \ ++ _Pragma("GCC diagnostic pop") \ ++} \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(struct pt_regs *ctx, ##args) ++ ++#define ___bpf_kretprobe_args0() ctx ++#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) ++#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) ++ ++/* ++ * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional ++ * return value (in addition to `struct pt_regs *ctx`), but no input ++ * arguments, because they will be clobbered by the time probed function ++ * returns. ++ */ ++#define BPF_KRETPROBE(name, args...) \ ++name(struct pt_regs *ctx); \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(struct pt_regs *ctx, ##args); \ ++typeof(name(0)) name(struct pt_regs *ctx) \ ++{ \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ++ return ____##name(___bpf_kretprobe_args(args)); \ ++ _Pragma("GCC diagnostic pop") \ ++} \ ++static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) ++ ++/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */ ++#define ___bpf_syscall_args0() ctx ++#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs) ++#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs) ++#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs) ++#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs) ++#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs) ++#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) ++ ++/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */ ++#define ___bpf_syswrap_args0() ctx ++#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) ++#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) ++#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) ++#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) ++#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) ++#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args) ++ ++/* ++ * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for ++ * tracing syscall functions, like __x64_sys_close. It hides the underlying ++ * platform-specific low-level way of getting syscall input arguments from ++ * struct pt_regs, and provides a familiar typed and named function arguments ++ * syntax and semantics of accessing syscall input parameters. ++ * ++ * Original struct pt_regs * context is preserved as 'ctx' argument. This might ++ * be necessary when using BPF helpers like bpf_perf_event_output(). ++ * ++ * At the moment BPF_KSYSCALL does not transparently handle all the calling ++ * convention quirks for the following syscalls: ++ * ++ * - mmap(): __ARCH_WANT_SYS_OLD_MMAP. ++ * - clone(): CONFIG_CLONE_BACKWARDS, CONFIG_CLONE_BACKWARDS2 and ++ * CONFIG_CLONE_BACKWARDS3. ++ * - socket-related syscalls: __ARCH_WANT_SYS_SOCKETCALL. ++ * - compat syscalls. ++ * ++ * This may or may not change in the future. User needs to take extra measures ++ * to handle such quirks explicitly, if necessary. ++ * ++ * This macro relies on BPF CO-RE support and virtual __kconfig externs. ++ */ ++#define BPF_KSYSCALL(name, args...) \ ++name(struct pt_regs *ctx); \ ++extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(struct pt_regs *ctx, ##args); \ ++typeof(name(0)) name(struct pt_regs *ctx) \ ++{ \ ++ struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \ ++ ? (struct pt_regs *)PT_REGS_PARM1(ctx) \ ++ : ctx; \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ++ if (LINUX_HAS_SYSCALL_WRAPPER) \ ++ return ____##name(___bpf_syswrap_args(args)); \ ++ else \ ++ return ____##name(___bpf_syscall_args(args)); \ ++ _Pragma("GCC diagnostic pop") \ ++} \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(struct pt_regs *ctx, ##args) ++ ++#define BPF_KPROBE_SYSCALL BPF_KSYSCALL ++ ++#endif +diff --git a/src/cc/libbpf/src/btf.c b/src/cc/libbpf/src/btf.c +new file mode 100644 +index 0000000..2d14f1a +--- /dev/null ++++ b/src/cc/libbpf/src/btf.c +@@ -0,0 +1,4909 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2018 Facebook */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "btf.h" ++#include "bpf.h" ++#include "libbpf.h" ++#include "libbpf_internal.h" ++#include "hashmap.h" ++#include "strset.h" ++ ++#define BTF_MAX_NR_TYPES 0x7fffffffU ++#define BTF_MAX_STR_OFFSET 0x7fffffffU ++ ++static struct btf_type btf_void; ++ ++struct btf { ++ /* raw BTF data in native endianness */ ++ void *raw_data; ++ /* raw BTF data in non-native endianness */ ++ void *raw_data_swapped; ++ __u32 raw_size; ++ /* whether target endianness differs from the native one */ ++ bool swapped_endian; ++ ++ /* ++ * When BTF is loaded from an ELF or raw memory it is stored ++ * in a contiguous memory block. The hdr, type_data, and, strs_data ++ * point inside that memory region to their respective parts of BTF ++ * representation: ++ * ++ * +--------------------------------+ ++ * | Header | Types | Strings | ++ * +--------------------------------+ ++ * ^ ^ ^ ++ * | | | ++ * hdr | | ++ * types_data-+ | ++ * strs_data------------+ ++ * ++ * If BTF data is later modified, e.g., due to types added or ++ * removed, BTF deduplication performed, etc, this contiguous ++ * representation is broken up into three independently allocated ++ * memory regions to be able to modify them independently. ++ * raw_data is nulled out at that point, but can be later allocated ++ * and cached again if user calls btf__raw_data(), at which point ++ * raw_data will contain a contiguous copy of header, types, and ++ * strings: ++ * ++ * +----------+ +---------+ +-----------+ ++ * | Header | | Types | | Strings | ++ * +----------+ +---------+ +-----------+ ++ * ^ ^ ^ ++ * | | | ++ * hdr | | ++ * types_data----+ | ++ * strset__data(strs_set)-----+ ++ * ++ * +----------+---------+-----------+ ++ * | Header | Types | Strings | ++ * raw_data----->+----------+---------+-----------+ ++ */ ++ struct btf_header *hdr; ++ ++ void *types_data; ++ size_t types_data_cap; /* used size stored in hdr->type_len */ ++ ++ /* type ID to `struct btf_type *` lookup index ++ * type_offs[0] corresponds to the first non-VOID type: ++ * - for base BTF it's type [1]; ++ * - for split BTF it's the first non-base BTF type. ++ */ ++ __u32 *type_offs; ++ size_t type_offs_cap; ++ /* number of types in this BTF instance: ++ * - doesn't include special [0] void type; ++ * - for split BTF counts number of types added on top of base BTF. ++ */ ++ __u32 nr_types; ++ /* if not NULL, points to the base BTF on top of which the current ++ * split BTF is based ++ */ ++ struct btf *base_btf; ++ /* BTF type ID of the first type in this BTF instance: ++ * - for base BTF it's equal to 1; ++ * - for split BTF it's equal to biggest type ID of base BTF plus 1. ++ */ ++ int start_id; ++ /* logical string offset of this BTF instance: ++ * - for base BTF it's equal to 0; ++ * - for split BTF it's equal to total size of base BTF's string section size. ++ */ ++ int start_str_off; ++ ++ /* only one of strs_data or strs_set can be non-NULL, depending on ++ * whether BTF is in a modifiable state (strs_set is used) or not ++ * (strs_data points inside raw_data) ++ */ ++ void *strs_data; ++ /* a set of unique strings */ ++ struct strset *strs_set; ++ /* whether strings are already deduplicated */ ++ bool strs_deduped; ++ ++ /* BTF object FD, if loaded into kernel */ ++ int fd; ++ ++ /* Pointer size (in bytes) for a target architecture of this BTF */ ++ int ptr_sz; ++}; ++ ++static inline __u64 ptr_to_u64(const void *ptr) ++{ ++ return (__u64) (unsigned long) ptr; ++} ++ ++/* Ensure given dynamically allocated memory region pointed to by *data* with ++ * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough ++ * memory to accommodate *add_cnt* new elements, assuming *cur_cnt* elements ++ * are already used. At most *max_cnt* elements can be ever allocated. ++ * If necessary, memory is reallocated and all existing data is copied over, ++ * new pointer to the memory region is stored at *data, new memory region ++ * capacity (in number of elements) is stored in *cap. ++ * On success, memory pointer to the beginning of unused memory is returned. ++ * On error, NULL is returned. ++ */ ++void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, ++ size_t cur_cnt, size_t max_cnt, size_t add_cnt) ++{ ++ size_t new_cnt; ++ void *new_data; ++ ++ if (cur_cnt + add_cnt <= *cap_cnt) ++ return *data + cur_cnt * elem_sz; ++ ++ /* requested more than the set limit */ ++ if (cur_cnt + add_cnt > max_cnt) ++ return NULL; ++ ++ new_cnt = *cap_cnt; ++ new_cnt += new_cnt / 4; /* expand by 25% */ ++ if (new_cnt < 16) /* but at least 16 elements */ ++ new_cnt = 16; ++ if (new_cnt > max_cnt) /* but not exceeding a set limit */ ++ new_cnt = max_cnt; ++ if (new_cnt < cur_cnt + add_cnt) /* also ensure we have enough memory */ ++ new_cnt = cur_cnt + add_cnt; ++ ++ new_data = libbpf_reallocarray(*data, new_cnt, elem_sz); ++ if (!new_data) ++ return NULL; ++ ++ /* zero out newly allocated portion of memory */ ++ memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz); ++ ++ *data = new_data; ++ *cap_cnt = new_cnt; ++ return new_data + cur_cnt * elem_sz; ++} ++ ++/* Ensure given dynamically allocated memory region has enough allocated space ++ * to accommodate *need_cnt* elements of size *elem_sz* bytes each ++ */ ++int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt) ++{ ++ void *p; ++ ++ if (need_cnt <= *cap_cnt) ++ return 0; ++ ++ p = libbpf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt); ++ if (!p) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void *btf_add_type_offs_mem(struct btf *btf, size_t add_cnt) ++{ ++ return libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), ++ btf->nr_types, BTF_MAX_NR_TYPES, add_cnt); ++} ++ ++static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) ++{ ++ __u32 *p; ++ ++ p = btf_add_type_offs_mem(btf, 1); ++ if (!p) ++ return -ENOMEM; ++ ++ *p = type_off; ++ return 0; ++} ++ ++static void btf_bswap_hdr(struct btf_header *h) ++{ ++ h->magic = bswap_16(h->magic); ++ h->hdr_len = bswap_32(h->hdr_len); ++ h->type_off = bswap_32(h->type_off); ++ h->type_len = bswap_32(h->type_len); ++ h->str_off = bswap_32(h->str_off); ++ h->str_len = bswap_32(h->str_len); ++} ++ ++static int btf_parse_hdr(struct btf *btf) ++{ ++ struct btf_header *hdr = btf->hdr; ++ __u32 meta_left; ++ ++ if (btf->raw_size < sizeof(struct btf_header)) { ++ pr_debug("BTF header not found\n"); ++ return -EINVAL; ++ } ++ ++ if (hdr->magic == bswap_16(BTF_MAGIC)) { ++ btf->swapped_endian = true; ++ if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) { ++ pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n", ++ bswap_32(hdr->hdr_len)); ++ return -ENOTSUP; ++ } ++ btf_bswap_hdr(hdr); ++ } else if (hdr->magic != BTF_MAGIC) { ++ pr_debug("Invalid BTF magic: %x\n", hdr->magic); ++ return -EINVAL; ++ } ++ ++ if (btf->raw_size < hdr->hdr_len) { ++ pr_debug("BTF header len %u larger than data size %u\n", ++ hdr->hdr_len, btf->raw_size); ++ return -EINVAL; ++ } ++ ++ meta_left = btf->raw_size - hdr->hdr_len; ++ if (meta_left < (long long)hdr->str_off + hdr->str_len) { ++ pr_debug("Invalid BTF total size: %u\n", btf->raw_size); ++ return -EINVAL; ++ } ++ ++ if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) { ++ pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", ++ hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); ++ return -EINVAL; ++ } ++ ++ if (hdr->type_off % 4) { ++ pr_debug("BTF type section is not aligned to 4 bytes\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int btf_parse_str_sec(struct btf *btf) ++{ ++ const struct btf_header *hdr = btf->hdr; ++ const char *start = btf->strs_data; ++ const char *end = start + btf->hdr->str_len; ++ ++ if (btf->base_btf && hdr->str_len == 0) ++ return 0; ++ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { ++ pr_debug("Invalid BTF string section\n"); ++ return -EINVAL; ++ } ++ if (!btf->base_btf && start[0]) { ++ pr_debug("Invalid BTF string section\n"); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int btf_type_size(const struct btf_type *t) ++{ ++ const int base_size = sizeof(struct btf_type); ++ __u16 vlen = btf_vlen(t); ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_FWD: ++ case BTF_KIND_CONST: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_FLOAT: ++ case BTF_KIND_TYPE_TAG: ++ return base_size; ++ case BTF_KIND_INT: ++ return base_size + sizeof(__u32); ++ case BTF_KIND_ENUM: ++ return base_size + vlen * sizeof(struct btf_enum); ++ case BTF_KIND_ENUM64: ++ return base_size + vlen * sizeof(struct btf_enum64); ++ case BTF_KIND_ARRAY: ++ return base_size + sizeof(struct btf_array); ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ return base_size + vlen * sizeof(struct btf_member); ++ case BTF_KIND_FUNC_PROTO: ++ return base_size + vlen * sizeof(struct btf_param); ++ case BTF_KIND_VAR: ++ return base_size + sizeof(struct btf_var); ++ case BTF_KIND_DATASEC: ++ return base_size + vlen * sizeof(struct btf_var_secinfo); ++ case BTF_KIND_DECL_TAG: ++ return base_size + sizeof(struct btf_decl_tag); ++ default: ++ pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); ++ return -EINVAL; ++ } ++} ++ ++static void btf_bswap_type_base(struct btf_type *t) ++{ ++ t->name_off = bswap_32(t->name_off); ++ t->info = bswap_32(t->info); ++ t->type = bswap_32(t->type); ++} ++ ++static int btf_bswap_type_rest(struct btf_type *t) ++{ ++ struct btf_var_secinfo *v; ++ struct btf_enum64 *e64; ++ struct btf_member *m; ++ struct btf_array *a; ++ struct btf_param *p; ++ struct btf_enum *e; ++ __u16 vlen = btf_vlen(t); ++ int i; ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_FWD: ++ case BTF_KIND_CONST: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_FLOAT: ++ case BTF_KIND_TYPE_TAG: ++ return 0; ++ case BTF_KIND_INT: ++ *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); ++ return 0; ++ case BTF_KIND_ENUM: ++ for (i = 0, e = btf_enum(t); i < vlen; i++, e++) { ++ e->name_off = bswap_32(e->name_off); ++ e->val = bswap_32(e->val); ++ } ++ return 0; ++ case BTF_KIND_ENUM64: ++ for (i = 0, e64 = btf_enum64(t); i < vlen; i++, e64++) { ++ e64->name_off = bswap_32(e64->name_off); ++ e64->val_lo32 = bswap_32(e64->val_lo32); ++ e64->val_hi32 = bswap_32(e64->val_hi32); ++ } ++ return 0; ++ case BTF_KIND_ARRAY: ++ a = btf_array(t); ++ a->type = bswap_32(a->type); ++ a->index_type = bswap_32(a->index_type); ++ a->nelems = bswap_32(a->nelems); ++ return 0; ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ for (i = 0, m = btf_members(t); i < vlen; i++, m++) { ++ m->name_off = bswap_32(m->name_off); ++ m->type = bswap_32(m->type); ++ m->offset = bswap_32(m->offset); ++ } ++ return 0; ++ case BTF_KIND_FUNC_PROTO: ++ for (i = 0, p = btf_params(t); i < vlen; i++, p++) { ++ p->name_off = bswap_32(p->name_off); ++ p->type = bswap_32(p->type); ++ } ++ return 0; ++ case BTF_KIND_VAR: ++ btf_var(t)->linkage = bswap_32(btf_var(t)->linkage); ++ return 0; ++ case BTF_KIND_DATASEC: ++ for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) { ++ v->type = bswap_32(v->type); ++ v->offset = bswap_32(v->offset); ++ v->size = bswap_32(v->size); ++ } ++ return 0; ++ case BTF_KIND_DECL_TAG: ++ btf_decl_tag(t)->component_idx = bswap_32(btf_decl_tag(t)->component_idx); ++ return 0; ++ default: ++ pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); ++ return -EINVAL; ++ } ++} ++ ++static int btf_parse_type_sec(struct btf *btf) ++{ ++ struct btf_header *hdr = btf->hdr; ++ void *next_type = btf->types_data; ++ void *end_type = next_type + hdr->type_len; ++ int err, type_size; ++ ++ while (next_type + sizeof(struct btf_type) <= end_type) { ++ if (btf->swapped_endian) ++ btf_bswap_type_base(next_type); ++ ++ type_size = btf_type_size(next_type); ++ if (type_size < 0) ++ return type_size; ++ if (next_type + type_size > end_type) { ++ pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types); ++ return -EINVAL; ++ } ++ ++ if (btf->swapped_endian && btf_bswap_type_rest(next_type)) ++ return -EINVAL; ++ ++ err = btf_add_type_idx_entry(btf, next_type - btf->types_data); ++ if (err) ++ return err; ++ ++ next_type += type_size; ++ btf->nr_types++; ++ } ++ ++ if (next_type != end_type) { ++ pr_warn("BTF types data is malformed\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++__u32 btf__type_cnt(const struct btf *btf) ++{ ++ return btf->start_id + btf->nr_types; ++} ++ ++const struct btf *btf__base_btf(const struct btf *btf) ++{ ++ return btf->base_btf; ++} ++ ++/* internal helper returning non-const pointer to a type */ ++struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id) ++{ ++ if (type_id == 0) ++ return &btf_void; ++ if (type_id < btf->start_id) ++ return btf_type_by_id(btf->base_btf, type_id); ++ return btf->types_data + btf->type_offs[type_id - btf->start_id]; ++} ++ ++const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) ++{ ++ if (type_id >= btf->start_id + btf->nr_types) ++ return errno = EINVAL, NULL; ++ return btf_type_by_id((struct btf *)btf, type_id); ++} ++ ++static int determine_ptr_size(const struct btf *btf) ++{ ++ static const char * const long_aliases[] = { ++ "long", ++ "long int", ++ "int long", ++ "unsigned long", ++ "long unsigned", ++ "unsigned long int", ++ "unsigned int long", ++ "long unsigned int", ++ "long int unsigned", ++ "int unsigned long", ++ "int long unsigned", ++ }; ++ const struct btf_type *t; ++ const char *name; ++ int i, j, n; ++ ++ if (btf->base_btf && btf->base_btf->ptr_sz > 0) ++ return btf->base_btf->ptr_sz; ++ ++ n = btf__type_cnt(btf); ++ for (i = 1; i < n; i++) { ++ t = btf__type_by_id(btf, i); ++ if (!btf_is_int(t)) ++ continue; ++ ++ if (t->size != 4 && t->size != 8) ++ continue; ++ ++ name = btf__name_by_offset(btf, t->name_off); ++ if (!name) ++ continue; ++ ++ for (j = 0; j < ARRAY_SIZE(long_aliases); j++) { ++ if (strcmp(name, long_aliases[j]) == 0) ++ return t->size; ++ } ++ } ++ ++ return -1; ++} ++ ++static size_t btf_ptr_sz(const struct btf *btf) ++{ ++ if (!btf->ptr_sz) ++ ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf); ++ return btf->ptr_sz < 0 ? sizeof(void *) : btf->ptr_sz; ++} ++ ++/* Return pointer size this BTF instance assumes. The size is heuristically ++ * determined by looking for 'long' or 'unsigned long' integer type and ++ * recording its size in bytes. If BTF type information doesn't have any such ++ * type, this function returns 0. In the latter case, native architecture's ++ * pointer size is assumed, so will be either 4 or 8, depending on ++ * architecture that libbpf was compiled for. It's possible to override ++ * guessed value by using btf__set_pointer_size() API. ++ */ ++size_t btf__pointer_size(const struct btf *btf) ++{ ++ if (!btf->ptr_sz) ++ ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf); ++ ++ if (btf->ptr_sz < 0) ++ /* not enough BTF type info to guess */ ++ return 0; ++ ++ return btf->ptr_sz; ++} ++ ++/* Override or set pointer size in bytes. Only values of 4 and 8 are ++ * supported. ++ */ ++int btf__set_pointer_size(struct btf *btf, size_t ptr_sz) ++{ ++ if (ptr_sz != 4 && ptr_sz != 8) ++ return libbpf_err(-EINVAL); ++ btf->ptr_sz = ptr_sz; ++ return 0; ++} ++ ++static bool is_host_big_endian(void) ++{ ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ return false; ++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ return true; ++#else ++# error "Unrecognized __BYTE_ORDER__" ++#endif ++} ++ ++enum btf_endianness btf__endianness(const struct btf *btf) ++{ ++ if (is_host_big_endian()) ++ return btf->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN; ++ else ++ return btf->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; ++} ++ ++int btf__set_endianness(struct btf *btf, enum btf_endianness endian) ++{ ++ if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) ++ return libbpf_err(-EINVAL); ++ ++ btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); ++ if (!btf->swapped_endian) { ++ free(btf->raw_data_swapped); ++ btf->raw_data_swapped = NULL; ++ } ++ return 0; ++} ++ ++static bool btf_type_is_void(const struct btf_type *t) ++{ ++ return t == &btf_void || btf_is_fwd(t); ++} ++ ++static bool btf_type_is_void_or_null(const struct btf_type *t) ++{ ++ return !t || btf_type_is_void(t); ++} ++ ++#define MAX_RESOLVE_DEPTH 32 ++ ++__s64 btf__resolve_size(const struct btf *btf, __u32 type_id) ++{ ++ const struct btf_array *array; ++ const struct btf_type *t; ++ __u32 nelems = 1; ++ __s64 size = -1; ++ int i; ++ ++ t = btf__type_by_id(btf, type_id); ++ for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) { ++ switch (btf_kind(t)) { ++ case BTF_KIND_INT: ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ case BTF_KIND_DATASEC: ++ case BTF_KIND_FLOAT: ++ size = t->size; ++ goto done; ++ case BTF_KIND_PTR: ++ size = btf_ptr_sz(btf); ++ goto done; ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_VAR: ++ case BTF_KIND_DECL_TAG: ++ case BTF_KIND_TYPE_TAG: ++ type_id = t->type; ++ break; ++ case BTF_KIND_ARRAY: ++ array = btf_array(t); ++ if (nelems && array->nelems > UINT32_MAX / nelems) ++ return libbpf_err(-E2BIG); ++ nelems *= array->nelems; ++ type_id = array->type; ++ break; ++ default: ++ return libbpf_err(-EINVAL); ++ } ++ ++ t = btf__type_by_id(btf, type_id); ++ } ++ ++done: ++ if (size < 0) ++ return libbpf_err(-EINVAL); ++ if (nelems && size > UINT32_MAX / nelems) ++ return libbpf_err(-E2BIG); ++ ++ return nelems * size; ++} ++ ++int btf__align_of(const struct btf *btf, __u32 id) ++{ ++ const struct btf_type *t = btf__type_by_id(btf, id); ++ __u16 kind = btf_kind(t); ++ ++ switch (kind) { ++ case BTF_KIND_INT: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ case BTF_KIND_FLOAT: ++ return min(btf_ptr_sz(btf), (size_t)t->size); ++ case BTF_KIND_PTR: ++ return btf_ptr_sz(btf); ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_TYPE_TAG: ++ return btf__align_of(btf, t->type); ++ case BTF_KIND_ARRAY: ++ return btf__align_of(btf, btf_array(t)->type); ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: { ++ const struct btf_member *m = btf_members(t); ++ __u16 vlen = btf_vlen(t); ++ int i, max_align = 1, align; ++ ++ for (i = 0; i < vlen; i++, m++) { ++ align = btf__align_of(btf, m->type); ++ if (align <= 0) ++ return libbpf_err(align); ++ max_align = max(max_align, align); ++ } ++ ++ return max_align; ++ } ++ default: ++ pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); ++ return errno = EINVAL, 0; ++ } ++} ++ ++int btf__resolve_type(const struct btf *btf, __u32 type_id) ++{ ++ const struct btf_type *t; ++ int depth = 0; ++ ++ t = btf__type_by_id(btf, type_id); ++ while (depth < MAX_RESOLVE_DEPTH && ++ !btf_type_is_void_or_null(t) && ++ (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) { ++ type_id = t->type; ++ t = btf__type_by_id(btf, type_id); ++ depth++; ++ } ++ ++ if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t)) ++ return libbpf_err(-EINVAL); ++ ++ return type_id; ++} ++ ++__s32 btf__find_by_name(const struct btf *btf, const char *type_name) ++{ ++ __u32 i, nr_types = btf__type_cnt(btf); ++ ++ if (!strcmp(type_name, "void")) ++ return 0; ++ ++ for (i = 1; i < nr_types; i++) { ++ const struct btf_type *t = btf__type_by_id(btf, i); ++ const char *name = btf__name_by_offset(btf, t->name_off); ++ ++ if (name && !strcmp(type_name, name)) ++ return i; ++ } ++ ++ return libbpf_err(-ENOENT); ++} ++ ++static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, ++ const char *type_name, __u32 kind) ++{ ++ __u32 i, nr_types = btf__type_cnt(btf); ++ ++ if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) ++ return 0; ++ ++ for (i = start_id; i < nr_types; i++) { ++ const struct btf_type *t = btf__type_by_id(btf, i); ++ const char *name; ++ ++ if (btf_kind(t) != kind) ++ continue; ++ name = btf__name_by_offset(btf, t->name_off); ++ if (name && !strcmp(type_name, name)) ++ return i; ++ } ++ ++ return libbpf_err(-ENOENT); ++} ++ ++__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, ++ __u32 kind) ++{ ++ return btf_find_by_name_kind(btf, btf->start_id, type_name, kind); ++} ++ ++__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, ++ __u32 kind) ++{ ++ return btf_find_by_name_kind(btf, 1, type_name, kind); ++} ++ ++static bool btf_is_modifiable(const struct btf *btf) ++{ ++ return (void *)btf->hdr != btf->raw_data; ++} ++ ++void btf__free(struct btf *btf) ++{ ++ if (IS_ERR_OR_NULL(btf)) ++ return; ++ ++ if (btf->fd >= 0) ++ close(btf->fd); ++ ++ if (btf_is_modifiable(btf)) { ++ /* if BTF was modified after loading, it will have a split ++ * in-memory representation for header, types, and strings ++ * sections, so we need to free all of them individually. It ++ * might still have a cached contiguous raw data present, ++ * which will be unconditionally freed below. ++ */ ++ free(btf->hdr); ++ free(btf->types_data); ++ strset__free(btf->strs_set); ++ } ++ free(btf->raw_data); ++ free(btf->raw_data_swapped); ++ free(btf->type_offs); ++ free(btf); ++} ++ ++static struct btf *btf_new_empty(struct btf *base_btf) ++{ ++ struct btf *btf; ++ ++ btf = calloc(1, sizeof(*btf)); ++ if (!btf) ++ return ERR_PTR(-ENOMEM); ++ ++ btf->nr_types = 0; ++ btf->start_id = 1; ++ btf->start_str_off = 0; ++ btf->fd = -1; ++ btf->ptr_sz = sizeof(void *); ++ btf->swapped_endian = false; ++ ++ if (base_btf) { ++ btf->base_btf = base_btf; ++ btf->start_id = btf__type_cnt(base_btf); ++ btf->start_str_off = base_btf->hdr->str_len; ++ } ++ ++ /* +1 for empty string at offset 0 */ ++ btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1); ++ btf->raw_data = calloc(1, btf->raw_size); ++ if (!btf->raw_data) { ++ free(btf); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ btf->hdr = btf->raw_data; ++ btf->hdr->hdr_len = sizeof(struct btf_header); ++ btf->hdr->magic = BTF_MAGIC; ++ btf->hdr->version = BTF_VERSION; ++ ++ btf->types_data = btf->raw_data + btf->hdr->hdr_len; ++ btf->strs_data = btf->raw_data + btf->hdr->hdr_len; ++ btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */ ++ ++ return btf; ++} ++ ++struct btf *btf__new_empty(void) ++{ ++ return libbpf_ptr(btf_new_empty(NULL)); ++} ++ ++struct btf *btf__new_empty_split(struct btf *base_btf) ++{ ++ return libbpf_ptr(btf_new_empty(base_btf)); ++} ++ ++static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) ++{ ++ struct btf *btf; ++ int err; ++ ++ btf = calloc(1, sizeof(struct btf)); ++ if (!btf) ++ return ERR_PTR(-ENOMEM); ++ ++ btf->nr_types = 0; ++ btf->start_id = 1; ++ btf->start_str_off = 0; ++ btf->fd = -1; ++ ++ if (base_btf) { ++ btf->base_btf = base_btf; ++ btf->start_id = btf__type_cnt(base_btf); ++ btf->start_str_off = base_btf->hdr->str_len; ++ } ++ ++ btf->raw_data = malloc(size); ++ if (!btf->raw_data) { ++ err = -ENOMEM; ++ goto done; ++ } ++ memcpy(btf->raw_data, data, size); ++ btf->raw_size = size; ++ ++ btf->hdr = btf->raw_data; ++ err = btf_parse_hdr(btf); ++ if (err) ++ goto done; ++ ++ btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off; ++ btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off; ++ ++ err = btf_parse_str_sec(btf); ++ err = err ?: btf_parse_type_sec(btf); ++ if (err) ++ goto done; ++ ++done: ++ if (err) { ++ btf__free(btf); ++ return ERR_PTR(err); ++ } ++ ++ return btf; ++} ++ ++struct btf *btf__new(const void *data, __u32 size) ++{ ++ return libbpf_ptr(btf_new(data, size, NULL)); ++} ++ ++static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, ++ struct btf_ext **btf_ext) ++{ ++ Elf_Data *btf_data = NULL, *btf_ext_data = NULL; ++ int err = 0, fd = -1, idx = 0; ++ struct btf *btf = NULL; ++ Elf_Scn *scn = NULL; ++ Elf *elf = NULL; ++ GElf_Ehdr ehdr; ++ size_t shstrndx; ++ ++ if (elf_version(EV_CURRENT) == EV_NONE) { ++ pr_warn("failed to init libelf for %s\n", path); ++ return ERR_PTR(-LIBBPF_ERRNO__LIBELF); ++ } ++ ++ fd = open(path, O_RDONLY | O_CLOEXEC); ++ if (fd < 0) { ++ err = -errno; ++ pr_warn("failed to open %s: %s\n", path, strerror(errno)); ++ return ERR_PTR(err); ++ } ++ ++ err = -LIBBPF_ERRNO__FORMAT; ++ ++ elf = elf_begin(fd, ELF_C_READ, NULL); ++ if (!elf) { ++ pr_warn("failed to open %s as ELF file\n", path); ++ goto done; ++ } ++ if (!gelf_getehdr(elf, &ehdr)) { ++ pr_warn("failed to get EHDR from %s\n", path); ++ goto done; ++ } ++ ++ if (elf_getshdrstrndx(elf, &shstrndx)) { ++ pr_warn("failed to get section names section index for %s\n", ++ path); ++ goto done; ++ } ++ ++ if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) { ++ pr_warn("failed to get e_shstrndx from %s\n", path); ++ goto done; ++ } ++ ++ while ((scn = elf_nextscn(elf, scn)) != NULL) { ++ GElf_Shdr sh; ++ char *name; ++ ++ idx++; ++ if (gelf_getshdr(scn, &sh) != &sh) { ++ pr_warn("failed to get section(%d) header from %s\n", ++ idx, path); ++ goto done; ++ } ++ name = elf_strptr(elf, shstrndx, sh.sh_name); ++ if (!name) { ++ pr_warn("failed to get section(%d) name from %s\n", ++ idx, path); ++ goto done; ++ } ++ if (strcmp(name, BTF_ELF_SEC) == 0) { ++ btf_data = elf_getdata(scn, 0); ++ if (!btf_data) { ++ pr_warn("failed to get section(%d, %s) data from %s\n", ++ idx, name, path); ++ goto done; ++ } ++ continue; ++ } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { ++ btf_ext_data = elf_getdata(scn, 0); ++ if (!btf_ext_data) { ++ pr_warn("failed to get section(%d, %s) data from %s\n", ++ idx, name, path); ++ goto done; ++ } ++ continue; ++ } ++ } ++ ++ err = 0; ++ ++ if (!btf_data) { ++ err = -ENOENT; ++ goto done; ++ } ++ btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); ++ err = libbpf_get_error(btf); ++ if (err) ++ goto done; ++ ++ switch (gelf_getclass(elf)) { ++ case ELFCLASS32: ++ btf__set_pointer_size(btf, 4); ++ break; ++ case ELFCLASS64: ++ btf__set_pointer_size(btf, 8); ++ break; ++ default: ++ pr_warn("failed to get ELF class (bitness) for %s\n", path); ++ break; ++ } ++ ++ if (btf_ext && btf_ext_data) { ++ *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); ++ err = libbpf_get_error(*btf_ext); ++ if (err) ++ goto done; ++ } else if (btf_ext) { ++ *btf_ext = NULL; ++ } ++done: ++ if (elf) ++ elf_end(elf); ++ close(fd); ++ ++ if (!err) ++ return btf; ++ ++ if (btf_ext) ++ btf_ext__free(*btf_ext); ++ btf__free(btf); ++ ++ return ERR_PTR(err); ++} ++ ++struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) ++{ ++ return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext)); ++} ++ ++struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf) ++{ ++ return libbpf_ptr(btf_parse_elf(path, base_btf, NULL)); ++} ++ ++static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) ++{ ++ struct btf *btf = NULL; ++ void *data = NULL; ++ FILE *f = NULL; ++ __u16 magic; ++ int err = 0; ++ long sz; ++ ++ f = fopen(path, "rb"); ++ if (!f) { ++ err = -errno; ++ goto err_out; ++ } ++ ++ /* check BTF magic */ ++ if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) { ++ err = -EIO; ++ goto err_out; ++ } ++ if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) { ++ /* definitely not a raw BTF */ ++ err = -EPROTO; ++ goto err_out; ++ } ++ ++ /* get file size */ ++ if (fseek(f, 0, SEEK_END)) { ++ err = -errno; ++ goto err_out; ++ } ++ sz = ftell(f); ++ if (sz < 0) { ++ err = -errno; ++ goto err_out; ++ } ++ /* rewind to the start */ ++ if (fseek(f, 0, SEEK_SET)) { ++ err = -errno; ++ goto err_out; ++ } ++ ++ /* pre-alloc memory and read all of BTF data */ ++ data = malloc(sz); ++ if (!data) { ++ err = -ENOMEM; ++ goto err_out; ++ } ++ if (fread(data, 1, sz, f) < sz) { ++ err = -EIO; ++ goto err_out; ++ } ++ ++ /* finally parse BTF data */ ++ btf = btf_new(data, sz, base_btf); ++ ++err_out: ++ free(data); ++ if (f) ++ fclose(f); ++ return err ? ERR_PTR(err) : btf; ++} ++ ++struct btf *btf__parse_raw(const char *path) ++{ ++ return libbpf_ptr(btf_parse_raw(path, NULL)); ++} ++ ++struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) ++{ ++ return libbpf_ptr(btf_parse_raw(path, base_btf)); ++} ++ ++static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) ++{ ++ struct btf *btf; ++ int err; ++ ++ if (btf_ext) ++ *btf_ext = NULL; ++ ++ btf = btf_parse_raw(path, base_btf); ++ err = libbpf_get_error(btf); ++ if (!err) ++ return btf; ++ if (err != -EPROTO) ++ return ERR_PTR(err); ++ return btf_parse_elf(path, base_btf, btf_ext); ++} ++ ++struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) ++{ ++ return libbpf_ptr(btf_parse(path, NULL, btf_ext)); ++} ++ ++struct btf *btf__parse_split(const char *path, struct btf *base_btf) ++{ ++ return libbpf_ptr(btf_parse(path, base_btf, NULL)); ++} ++ ++static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); ++ ++int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) ++{ ++ LIBBPF_OPTS(bpf_btf_load_opts, opts); ++ __u32 buf_sz = 0, raw_size; ++ char *buf = NULL, *tmp; ++ void *raw_data; ++ int err = 0; ++ ++ if (btf->fd >= 0) ++ return libbpf_err(-EEXIST); ++ if (log_sz && !log_buf) ++ return libbpf_err(-EINVAL); ++ ++ /* cache native raw data representation */ ++ raw_data = btf_get_raw_data(btf, &raw_size, false); ++ if (!raw_data) { ++ err = -ENOMEM; ++ goto done; ++ } ++ btf->raw_size = raw_size; ++ btf->raw_data = raw_data; ++ ++retry_load: ++ /* if log_level is 0, we won't provide log_buf/log_size to the kernel, ++ * initially. Only if BTF loading fails, we bump log_level to 1 and ++ * retry, using either auto-allocated or custom log_buf. This way ++ * non-NULL custom log_buf provides a buffer just in case, but hopes ++ * for successful load and no need for log_buf. ++ */ ++ if (log_level) { ++ /* if caller didn't provide custom log_buf, we'll keep ++ * allocating our own progressively bigger buffers for BTF ++ * verification log ++ */ ++ if (!log_buf) { ++ buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2); ++ tmp = realloc(buf, buf_sz); ++ if (!tmp) { ++ err = -ENOMEM; ++ goto done; ++ } ++ buf = tmp; ++ buf[0] = '\0'; ++ } ++ ++ opts.log_buf = log_buf ? log_buf : buf; ++ opts.log_size = log_buf ? log_sz : buf_sz; ++ opts.log_level = log_level; ++ } ++ ++ btf->fd = bpf_btf_load(raw_data, raw_size, &opts); ++ if (btf->fd < 0) { ++ /* time to turn on verbose mode and try again */ ++ if (log_level == 0) { ++ log_level = 1; ++ goto retry_load; ++ } ++ /* only retry if caller didn't provide custom log_buf, but ++ * make sure we can never overflow buf_sz ++ */ ++ if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2) ++ goto retry_load; ++ ++ err = -errno; ++ pr_warn("BTF loading error: %d\n", err); ++ /* don't print out contents of custom log_buf */ ++ if (!log_buf && buf[0]) ++ pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); ++ } ++ ++done: ++ free(buf); ++ return libbpf_err(err); ++} ++ ++int btf__load_into_kernel(struct btf *btf) ++{ ++ return btf_load_into_kernel(btf, NULL, 0, 0); ++} ++ ++int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); ++ ++int btf__fd(const struct btf *btf) ++{ ++ return btf->fd; ++} ++ ++void btf__set_fd(struct btf *btf, int fd) ++{ ++ btf->fd = fd; ++} ++ ++static const void *btf_strs_data(const struct btf *btf) ++{ ++ return btf->strs_data ? btf->strs_data : strset__data(btf->strs_set); ++} ++ ++static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian) ++{ ++ struct btf_header *hdr = btf->hdr; ++ struct btf_type *t; ++ void *data, *p; ++ __u32 data_sz; ++ int i; ++ ++ data = swap_endian ? btf->raw_data_swapped : btf->raw_data; ++ if (data) { ++ *size = btf->raw_size; ++ return data; ++ } ++ ++ data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len; ++ data = calloc(1, data_sz); ++ if (!data) ++ return NULL; ++ p = data; ++ ++ memcpy(p, hdr, hdr->hdr_len); ++ if (swap_endian) ++ btf_bswap_hdr(p); ++ p += hdr->hdr_len; ++ ++ memcpy(p, btf->types_data, hdr->type_len); ++ if (swap_endian) { ++ for (i = 0; i < btf->nr_types; i++) { ++ t = p + btf->type_offs[i]; ++ /* btf_bswap_type_rest() relies on native t->info, so ++ * we swap base type info after we swapped all the ++ * additional information ++ */ ++ if (btf_bswap_type_rest(t)) ++ goto err_out; ++ btf_bswap_type_base(t); ++ } ++ } ++ p += hdr->type_len; ++ ++ memcpy(p, btf_strs_data(btf), hdr->str_len); ++ p += hdr->str_len; ++ ++ *size = data_sz; ++ return data; ++err_out: ++ free(data); ++ return NULL; ++} ++ ++const void *btf__raw_data(const struct btf *btf_ro, __u32 *size) ++{ ++ struct btf *btf = (struct btf *)btf_ro; ++ __u32 data_sz; ++ void *data; ++ ++ data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian); ++ if (!data) ++ return errno = ENOMEM, NULL; ++ ++ btf->raw_size = data_sz; ++ if (btf->swapped_endian) ++ btf->raw_data_swapped = data; ++ else ++ btf->raw_data = data; ++ *size = data_sz; ++ return data; ++} ++ ++__attribute__((alias("btf__raw_data"))) ++const void *btf__get_raw_data(const struct btf *btf, __u32 *size); ++ ++const char *btf__str_by_offset(const struct btf *btf, __u32 offset) ++{ ++ if (offset < btf->start_str_off) ++ return btf__str_by_offset(btf->base_btf, offset); ++ else if (offset - btf->start_str_off < btf->hdr->str_len) ++ return btf_strs_data(btf) + (offset - btf->start_str_off); ++ else ++ return errno = EINVAL, NULL; ++} ++ ++const char *btf__name_by_offset(const struct btf *btf, __u32 offset) ++{ ++ return btf__str_by_offset(btf, offset); ++} ++ ++struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) ++{ ++ struct bpf_btf_info btf_info; ++ __u32 len = sizeof(btf_info); ++ __u32 last_size; ++ struct btf *btf; ++ void *ptr; ++ int err; ++ ++ /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so ++ * let's start with a sane default - 4KiB here - and resize it only if ++ * bpf_obj_get_info_by_fd() needs a bigger buffer. ++ */ ++ last_size = 4096; ++ ptr = malloc(last_size); ++ if (!ptr) ++ return ERR_PTR(-ENOMEM); ++ ++ memset(&btf_info, 0, sizeof(btf_info)); ++ btf_info.btf = ptr_to_u64(ptr); ++ btf_info.btf_size = last_size; ++ err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); ++ ++ if (!err && btf_info.btf_size > last_size) { ++ void *temp_ptr; ++ ++ last_size = btf_info.btf_size; ++ temp_ptr = realloc(ptr, last_size); ++ if (!temp_ptr) { ++ btf = ERR_PTR(-ENOMEM); ++ goto exit_free; ++ } ++ ptr = temp_ptr; ++ ++ len = sizeof(btf_info); ++ memset(&btf_info, 0, sizeof(btf_info)); ++ btf_info.btf = ptr_to_u64(ptr); ++ btf_info.btf_size = last_size; ++ ++ err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); ++ } ++ ++ if (err || btf_info.btf_size > last_size) { ++ btf = err ? ERR_PTR(-errno) : ERR_PTR(-E2BIG); ++ goto exit_free; ++ } ++ ++ btf = btf_new(ptr, btf_info.btf_size, base_btf); ++ ++exit_free: ++ free(ptr); ++ return btf; ++} ++ ++struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) ++{ ++ struct btf *btf; ++ int btf_fd; ++ ++ btf_fd = bpf_btf_get_fd_by_id(id); ++ if (btf_fd < 0) ++ return libbpf_err_ptr(-errno); ++ ++ btf = btf_get_from_fd(btf_fd, base_btf); ++ close(btf_fd); ++ ++ return libbpf_ptr(btf); ++} ++ ++struct btf *btf__load_from_kernel_by_id(__u32 id) ++{ ++ return btf__load_from_kernel_by_id_split(id, NULL); ++} ++ ++static void btf_invalidate_raw_data(struct btf *btf) ++{ ++ if (btf->raw_data) { ++ free(btf->raw_data); ++ btf->raw_data = NULL; ++ } ++ if (btf->raw_data_swapped) { ++ free(btf->raw_data_swapped); ++ btf->raw_data_swapped = NULL; ++ } ++} ++ ++/* Ensure BTF is ready to be modified (by splitting into a three memory ++ * regions for header, types, and strings). Also invalidate cached ++ * raw_data, if any. ++ */ ++static int btf_ensure_modifiable(struct btf *btf) ++{ ++ void *hdr, *types; ++ struct strset *set = NULL; ++ int err = -ENOMEM; ++ ++ if (btf_is_modifiable(btf)) { ++ /* any BTF modification invalidates raw_data */ ++ btf_invalidate_raw_data(btf); ++ return 0; ++ } ++ ++ /* split raw data into three memory regions */ ++ hdr = malloc(btf->hdr->hdr_len); ++ types = malloc(btf->hdr->type_len); ++ if (!hdr || !types) ++ goto err_out; ++ ++ memcpy(hdr, btf->hdr, btf->hdr->hdr_len); ++ memcpy(types, btf->types_data, btf->hdr->type_len); ++ ++ /* build lookup index for all strings */ ++ set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len); ++ if (IS_ERR(set)) { ++ err = PTR_ERR(set); ++ goto err_out; ++ } ++ ++ /* only when everything was successful, update internal state */ ++ btf->hdr = hdr; ++ btf->types_data = types; ++ btf->types_data_cap = btf->hdr->type_len; ++ btf->strs_data = NULL; ++ btf->strs_set = set; ++ /* if BTF was created from scratch, all strings are guaranteed to be ++ * unique and deduplicated ++ */ ++ if (btf->hdr->str_len == 0) ++ btf->strs_deduped = true; ++ if (!btf->base_btf && btf->hdr->str_len == 1) ++ btf->strs_deduped = true; ++ ++ /* invalidate raw_data representation */ ++ btf_invalidate_raw_data(btf); ++ ++ return 0; ++ ++err_out: ++ strset__free(set); ++ free(hdr); ++ free(types); ++ return err; ++} ++ ++/* Find an offset in BTF string section that corresponds to a given string *s*. ++ * Returns: ++ * - >0 offset into string section, if string is found; ++ * - -ENOENT, if string is not in the string section; ++ * - <0, on any other error. ++ */ ++int btf__find_str(struct btf *btf, const char *s) ++{ ++ int off; ++ ++ if (btf->base_btf) { ++ off = btf__find_str(btf->base_btf, s); ++ if (off != -ENOENT) ++ return off; ++ } ++ ++ /* BTF needs to be in a modifiable state to build string lookup index */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ off = strset__find_str(btf->strs_set, s); ++ if (off < 0) ++ return libbpf_err(off); ++ ++ return btf->start_str_off + off; ++} ++ ++/* Add a string s to the BTF string section. ++ * Returns: ++ * - > 0 offset into string section, on success; ++ * - < 0, on error. ++ */ ++int btf__add_str(struct btf *btf, const char *s) ++{ ++ int off; ++ ++ if (btf->base_btf) { ++ off = btf__find_str(btf->base_btf, s); ++ if (off != -ENOENT) ++ return off; ++ } ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ off = strset__add_str(btf->strs_set, s); ++ if (off < 0) ++ return libbpf_err(off); ++ ++ btf->hdr->str_len = strset__data_size(btf->strs_set); ++ ++ return btf->start_str_off + off; ++} ++ ++static void *btf_add_type_mem(struct btf *btf, size_t add_sz) ++{ ++ return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1, ++ btf->hdr->type_len, UINT_MAX, add_sz); ++} ++ ++static void btf_type_inc_vlen(struct btf_type *t) ++{ ++ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); ++} ++ ++static int btf_commit_type(struct btf *btf, int data_sz) ++{ ++ int err; ++ ++ err = btf_add_type_idx_entry(btf, btf->hdr->type_len); ++ if (err) ++ return libbpf_err(err); ++ ++ btf->hdr->type_len += data_sz; ++ btf->hdr->str_off += data_sz; ++ btf->nr_types++; ++ return btf->start_id + btf->nr_types - 1; ++} ++ ++struct btf_pipe { ++ const struct btf *src; ++ struct btf *dst; ++ struct hashmap *str_off_map; /* map string offsets from src to dst */ ++}; ++ ++static int btf_rewrite_str(__u32 *str_off, void *ctx) ++{ ++ struct btf_pipe *p = ctx; ++ void *mapped_off; ++ int off, err; ++ ++ if (!*str_off) /* nothing to do for empty strings */ ++ return 0; ++ ++ if (p->str_off_map && ++ hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) { ++ *str_off = (__u32)(long)mapped_off; ++ return 0; ++ } ++ ++ off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off)); ++ if (off < 0) ++ return off; ++ ++ /* Remember string mapping from src to dst. It avoids ++ * performing expensive string comparisons. ++ */ ++ if (p->str_off_map) { ++ err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off); ++ if (err) ++ return err; ++ } ++ ++ *str_off = off; ++ return 0; ++} ++ ++int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) ++{ ++ struct btf_pipe p = { .src = src_btf, .dst = btf }; ++ struct btf_type *t; ++ int sz, err; ++ ++ sz = btf_type_size(src_type); ++ if (sz < 0) ++ return libbpf_err(sz); ++ ++ /* deconstruct BTF, if necessary, and invalidate raw_data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ memcpy(t, src_type, sz); ++ ++ err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); ++ if (err) ++ return libbpf_err(err); ++ ++ return btf_commit_type(btf, sz); ++} ++ ++static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) ++{ ++ struct btf *btf = ctx; ++ ++ if (!*type_id) /* nothing to do for VOID references */ ++ return 0; ++ ++ /* we haven't updated btf's type count yet, so ++ * btf->start_id + btf->nr_types - 1 is the type ID offset we should ++ * add to all newly added BTF types ++ */ ++ *type_id += btf->start_id + btf->nr_types - 1; ++ return 0; ++} ++ ++static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx); ++static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx); ++ ++int btf__add_btf(struct btf *btf, const struct btf *src_btf) ++{ ++ struct btf_pipe p = { .src = src_btf, .dst = btf }; ++ int data_sz, sz, cnt, i, err, old_strs_len; ++ __u32 *off; ++ void *t; ++ ++ /* appending split BTF isn't supported yet */ ++ if (src_btf->base_btf) ++ return libbpf_err(-ENOTSUP); ++ ++ /* deconstruct BTF, if necessary, and invalidate raw_data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ /* remember original strings section size if we have to roll back ++ * partial strings section changes ++ */ ++ old_strs_len = btf->hdr->str_len; ++ ++ data_sz = src_btf->hdr->type_len; ++ cnt = btf__type_cnt(src_btf) - 1; ++ ++ /* pre-allocate enough memory for new types */ ++ t = btf_add_type_mem(btf, data_sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ /* pre-allocate enough memory for type offset index for new types */ ++ off = btf_add_type_offs_mem(btf, cnt); ++ if (!off) ++ return libbpf_err(-ENOMEM); ++ ++ /* Map the string offsets from src_btf to the offsets from btf to improve performance */ ++ p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); ++ if (IS_ERR(p.str_off_map)) ++ return libbpf_err(-ENOMEM); ++ ++ /* bulk copy types data for all types from src_btf */ ++ memcpy(t, src_btf->types_data, data_sz); ++ ++ for (i = 0; i < cnt; i++) { ++ sz = btf_type_size(t); ++ if (sz < 0) { ++ /* unlikely, has to be corrupted src_btf */ ++ err = sz; ++ goto err_out; ++ } ++ ++ /* fill out type ID to type offset mapping for lookups by type ID */ ++ *off = t - btf->types_data; ++ ++ /* add, dedup, and remap strings referenced by this BTF type */ ++ err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); ++ if (err) ++ goto err_out; ++ ++ /* remap all type IDs referenced from this BTF type */ ++ err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); ++ if (err) ++ goto err_out; ++ ++ /* go to next type data and type offset index entry */ ++ t += sz; ++ off++; ++ } ++ ++ /* Up until now any of the copied type data was effectively invisible, ++ * so if we exited early before this point due to error, BTF would be ++ * effectively unmodified. There would be extra internal memory ++ * pre-allocated, but it would not be available for querying. But now ++ * that we've copied and rewritten all the data successfully, we can ++ * update type count and various internal offsets and sizes to ++ * "commit" the changes and made them visible to the outside world. ++ */ ++ btf->hdr->type_len += data_sz; ++ btf->hdr->str_off += data_sz; ++ btf->nr_types += cnt; ++ ++ hashmap__free(p.str_off_map); ++ ++ /* return type ID of the first added BTF type */ ++ return btf->start_id + btf->nr_types - cnt; ++err_out: ++ /* zero out preallocated memory as if it was just allocated with ++ * libbpf_add_mem() ++ */ ++ memset(btf->types_data + btf->hdr->type_len, 0, data_sz); ++ memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); ++ ++ /* and now restore original strings section size; types data size ++ * wasn't modified, so doesn't need restoring, see big comment above */ ++ btf->hdr->str_len = old_strs_len; ++ ++ hashmap__free(p.str_off_map); ++ ++ return libbpf_err(err); ++} ++ ++/* ++ * Append new BTF_KIND_INT type with: ++ * - *name* - non-empty, non-NULL type name; ++ * - *sz* - power-of-2 (1, 2, 4, ..) size of the type, in bytes; ++ * - encoding is a combination of BTF_INT_SIGNED, BTF_INT_CHAR, BTF_INT_BOOL. ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding) ++{ ++ struct btf_type *t; ++ int sz, name_off; ++ ++ /* non-empty name */ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ /* byte_sz must be power of 2 */ ++ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16) ++ return libbpf_err(-EINVAL); ++ if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL)) ++ return libbpf_err(-EINVAL); ++ ++ /* deconstruct BTF, if necessary, and invalidate raw_data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type) + sizeof(int); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ /* if something goes wrong later, we might end up with an extra string, ++ * but that shouldn't be a problem, because BTF can't be constructed ++ * completely anyway and will most probably be just discarded ++ */ ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ ++ t->name_off = name_off; ++ t->info = btf_type_info(BTF_KIND_INT, 0, 0); ++ t->size = byte_sz; ++ /* set INT info, we don't allow setting legacy bit offset/size */ ++ *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8); ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* ++ * Append new BTF_KIND_FLOAT type with: ++ * - *name* - non-empty, non-NULL type name; ++ * - *sz* - size of the type, in bytes; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_float(struct btf *btf, const char *name, size_t byte_sz) ++{ ++ struct btf_type *t; ++ int sz, name_off; ++ ++ /* non-empty name */ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ ++ /* byte_sz must be one of the explicitly allowed values */ ++ if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 && ++ byte_sz != 16) ++ return libbpf_err(-EINVAL); ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ ++ t->name_off = name_off; ++ t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0); ++ t->size = byte_sz; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* it's completely legal to append BTF types with type IDs pointing forward to ++ * types that haven't been appended yet, so we only make sure that id looks ++ * sane, we can't guarantee that ID will always be valid ++ */ ++static int validate_type_id(int id) ++{ ++ if (id < 0 || id > BTF_MAX_NR_TYPES) ++ return -EINVAL; ++ return 0; ++} ++ ++/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */ ++static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) ++{ ++ struct btf_type *t; ++ int sz, name_off = 0; ++ ++ if (validate_type_id(ref_type_id)) ++ return libbpf_err(-EINVAL); ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ if (name && name[0]) { ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ } ++ ++ t->name_off = name_off; ++ t->info = btf_type_info(kind, 0, 0); ++ t->type = ref_type_id; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* ++ * Append new BTF_KIND_PTR type with: ++ * - *ref_type_id* - referenced type ID, it might not exist yet; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_ptr(struct btf *btf, int ref_type_id) ++{ ++ return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id); ++} ++ ++/* ++ * Append new BTF_KIND_ARRAY type with: ++ * - *index_type_id* - type ID of the type describing array index; ++ * - *elem_type_id* - type ID of the type describing array element; ++ * - *nr_elems* - the size of the array; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 nr_elems) ++{ ++ struct btf_type *t; ++ struct btf_array *a; ++ int sz; ++ ++ if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) ++ return libbpf_err(-EINVAL); ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type) + sizeof(struct btf_array); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ t->name_off = 0; ++ t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0); ++ t->size = 0; ++ ++ a = btf_array(t); ++ a->type = elem_type_id; ++ a->index_type = index_type_id; ++ a->nelems = nr_elems; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* generic STRUCT/UNION append function */ ++static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz) ++{ ++ struct btf_type *t; ++ int sz, name_off = 0; ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ if (name && name[0]) { ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ } ++ ++ /* start out with vlen=0 and no kflag; this will be adjusted when ++ * adding each member ++ */ ++ t->name_off = name_off; ++ t->info = btf_type_info(kind, 0, 0); ++ t->size = bytes_sz; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* ++ * Append new BTF_KIND_STRUCT type with: ++ * - *name* - name of the struct, can be NULL or empty for anonymous structs; ++ * - *byte_sz* - size of the struct, in bytes; ++ * ++ * Struct initially has no fields in it. Fields can be added by ++ * btf__add_field() right after btf__add_struct() succeeds. ++ * ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_struct(struct btf *btf, const char *name, __u32 byte_sz) ++{ ++ return btf_add_composite(btf, BTF_KIND_STRUCT, name, byte_sz); ++} ++ ++/* ++ * Append new BTF_KIND_UNION type with: ++ * - *name* - name of the union, can be NULL or empty for anonymous union; ++ * - *byte_sz* - size of the union, in bytes; ++ * ++ * Union initially has no fields in it. Fields can be added by ++ * btf__add_field() right after btf__add_union() succeeds. All fields ++ * should have *bit_offset* of 0. ++ * ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) ++{ ++ return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz); ++} ++ ++static struct btf_type *btf_last_type(struct btf *btf) ++{ ++ return btf_type_by_id(btf, btf__type_cnt(btf) - 1); ++} ++ ++/* ++ * Append new field for the current STRUCT/UNION type with: ++ * - *name* - name of the field, can be NULL or empty for anonymous field; ++ * - *type_id* - type ID for the type describing field type; ++ * - *bit_offset* - bit offset of the start of the field within struct/union; ++ * - *bit_size* - bit size of a bitfield, 0 for non-bitfield fields; ++ * Returns: ++ * - 0, on success; ++ * - <0, on error. ++ */ ++int btf__add_field(struct btf *btf, const char *name, int type_id, ++ __u32 bit_offset, __u32 bit_size) ++{ ++ struct btf_type *t; ++ struct btf_member *m; ++ bool is_bitfield; ++ int sz, name_off = 0; ++ ++ /* last type should be union/struct */ ++ if (btf->nr_types == 0) ++ return libbpf_err(-EINVAL); ++ t = btf_last_type(btf); ++ if (!btf_is_composite(t)) ++ return libbpf_err(-EINVAL); ++ ++ if (validate_type_id(type_id)) ++ return libbpf_err(-EINVAL); ++ /* best-effort bit field offset/size enforcement */ ++ is_bitfield = bit_size || (bit_offset % 8 != 0); ++ if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff)) ++ return libbpf_err(-EINVAL); ++ ++ /* only offset 0 is allowed for unions */ ++ if (btf_is_union(t) && bit_offset) ++ return libbpf_err(-EINVAL); ++ ++ /* decompose and invalidate raw data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_member); ++ m = btf_add_type_mem(btf, sz); ++ if (!m) ++ return libbpf_err(-ENOMEM); ++ ++ if (name && name[0]) { ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ } ++ ++ m->name_off = name_off; ++ m->type = type_id; ++ m->offset = bit_offset | (bit_size << 24); ++ ++ /* btf_add_type_mem can invalidate t pointer */ ++ t = btf_last_type(btf); ++ /* update parent type's vlen and kflag */ ++ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); ++ ++ btf->hdr->type_len += sz; ++ btf->hdr->str_off += sz; ++ return 0; ++} ++ ++static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz, ++ bool is_signed, __u8 kind) ++{ ++ struct btf_type *t; ++ int sz, name_off = 0; ++ ++ /* byte_sz must be power of 2 */ ++ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) ++ return libbpf_err(-EINVAL); ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ if (name && name[0]) { ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ } ++ ++ /* start out with vlen=0; it will be adjusted when adding enum values */ ++ t->name_off = name_off; ++ t->info = btf_type_info(kind, 0, is_signed); ++ t->size = byte_sz; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* ++ * Append new BTF_KIND_ENUM type with: ++ * - *name* - name of the enum, can be NULL or empty for anonymous enums; ++ * - *byte_sz* - size of the enum, in bytes. ++ * ++ * Enum initially has no enum values in it (and corresponds to enum forward ++ * declaration). Enumerator values can be added by btf__add_enum_value() ++ * immediately after btf__add_enum() succeeds. ++ * ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) ++{ ++ /* ++ * set the signedness to be unsigned, it will change to signed ++ * if any later enumerator is negative. ++ */ ++ return btf_add_enum_common(btf, name, byte_sz, false, BTF_KIND_ENUM); ++} ++ ++/* ++ * Append new enum value for the current ENUM type with: ++ * - *name* - name of the enumerator value, can't be NULL or empty; ++ * - *value* - integer value corresponding to enum value *name*; ++ * Returns: ++ * - 0, on success; ++ * - <0, on error. ++ */ ++int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) ++{ ++ struct btf_type *t; ++ struct btf_enum *v; ++ int sz, name_off; ++ ++ /* last type should be BTF_KIND_ENUM */ ++ if (btf->nr_types == 0) ++ return libbpf_err(-EINVAL); ++ t = btf_last_type(btf); ++ if (!btf_is_enum(t)) ++ return libbpf_err(-EINVAL); ++ ++ /* non-empty name */ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ if (value < INT_MIN || value > UINT_MAX) ++ return libbpf_err(-E2BIG); ++ ++ /* decompose and invalidate raw data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_enum); ++ v = btf_add_type_mem(btf, sz); ++ if (!v) ++ return libbpf_err(-ENOMEM); ++ ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ ++ v->name_off = name_off; ++ v->val = value; ++ ++ /* update parent type's vlen */ ++ t = btf_last_type(btf); ++ btf_type_inc_vlen(t); ++ ++ /* if negative value, set signedness to signed */ ++ if (value < 0) ++ t->info = btf_type_info(btf_kind(t), btf_vlen(t), true); ++ ++ btf->hdr->type_len += sz; ++ btf->hdr->str_off += sz; ++ return 0; ++} ++ ++/* ++ * Append new BTF_KIND_ENUM64 type with: ++ * - *name* - name of the enum, can be NULL or empty for anonymous enums; ++ * - *byte_sz* - size of the enum, in bytes. ++ * - *is_signed* - whether the enum values are signed or not; ++ * ++ * Enum initially has no enum values in it (and corresponds to enum forward ++ * declaration). Enumerator values can be added by btf__add_enum64_value() ++ * immediately after btf__add_enum64() succeeds. ++ * ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_enum64(struct btf *btf, const char *name, __u32 byte_sz, ++ bool is_signed) ++{ ++ return btf_add_enum_common(btf, name, byte_sz, is_signed, ++ BTF_KIND_ENUM64); ++} ++ ++/* ++ * Append new enum value for the current ENUM64 type with: ++ * - *name* - name of the enumerator value, can't be NULL or empty; ++ * - *value* - integer value corresponding to enum value *name*; ++ * Returns: ++ * - 0, on success; ++ * - <0, on error. ++ */ ++int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) ++{ ++ struct btf_enum64 *v; ++ struct btf_type *t; ++ int sz, name_off; ++ ++ /* last type should be BTF_KIND_ENUM64 */ ++ if (btf->nr_types == 0) ++ return libbpf_err(-EINVAL); ++ t = btf_last_type(btf); ++ if (!btf_is_enum64(t)) ++ return libbpf_err(-EINVAL); ++ ++ /* non-empty name */ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ ++ /* decompose and invalidate raw data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_enum64); ++ v = btf_add_type_mem(btf, sz); ++ if (!v) ++ return libbpf_err(-ENOMEM); ++ ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ ++ v->name_off = name_off; ++ v->val_lo32 = (__u32)value; ++ v->val_hi32 = value >> 32; ++ ++ /* update parent type's vlen */ ++ t = btf_last_type(btf); ++ btf_type_inc_vlen(t); ++ ++ btf->hdr->type_len += sz; ++ btf->hdr->str_off += sz; ++ return 0; ++} ++ ++/* ++ * Append new BTF_KIND_FWD type with: ++ * - *name*, non-empty/non-NULL name; ++ * - *fwd_kind*, kind of forward declaration, one of BTF_FWD_STRUCT, ++ * BTF_FWD_UNION, or BTF_FWD_ENUM; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) ++{ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ ++ switch (fwd_kind) { ++ case BTF_FWD_STRUCT: ++ case BTF_FWD_UNION: { ++ struct btf_type *t; ++ int id; ++ ++ id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0); ++ if (id <= 0) ++ return id; ++ t = btf_type_by_id(btf, id); ++ t->info = btf_type_info(BTF_KIND_FWD, 0, fwd_kind == BTF_FWD_UNION); ++ return id; ++ } ++ case BTF_FWD_ENUM: ++ /* enum forward in BTF currently is just an enum with no enum ++ * values; we also assume a standard 4-byte size for it ++ */ ++ return btf__add_enum(btf, name, sizeof(int)); ++ default: ++ return libbpf_err(-EINVAL); ++ } ++} ++ ++/* ++ * Append new BTF_KING_TYPEDEF type with: ++ * - *name*, non-empty/non-NULL name; ++ * - *ref_type_id* - referenced type ID, it might not exist yet; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) ++{ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ ++ return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id); ++} ++ ++/* ++ * Append new BTF_KIND_VOLATILE type with: ++ * - *ref_type_id* - referenced type ID, it might not exist yet; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_volatile(struct btf *btf, int ref_type_id) ++{ ++ return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id); ++} ++ ++/* ++ * Append new BTF_KIND_CONST type with: ++ * - *ref_type_id* - referenced type ID, it might not exist yet; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_const(struct btf *btf, int ref_type_id) ++{ ++ return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id); ++} ++ ++/* ++ * Append new BTF_KIND_RESTRICT type with: ++ * - *ref_type_id* - referenced type ID, it might not exist yet; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_restrict(struct btf *btf, int ref_type_id) ++{ ++ return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id); ++} ++ ++/* ++ * Append new BTF_KIND_TYPE_TAG type with: ++ * - *value*, non-empty/non-NULL tag value; ++ * - *ref_type_id* - referenced type ID, it might not exist yet; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) ++{ ++ if (!value|| !value[0]) ++ return libbpf_err(-EINVAL); ++ ++ return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); ++} ++ ++/* ++ * Append new BTF_KIND_FUNC type with: ++ * - *name*, non-empty/non-NULL name; ++ * - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_func(struct btf *btf, const char *name, ++ enum btf_func_linkage linkage, int proto_type_id) ++{ ++ int id; ++ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL && ++ linkage != BTF_FUNC_EXTERN) ++ return libbpf_err(-EINVAL); ++ ++ id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id); ++ if (id > 0) { ++ struct btf_type *t = btf_type_by_id(btf, id); ++ ++ t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0); ++ } ++ return libbpf_err(id); ++} ++ ++/* ++ * Append new BTF_KIND_FUNC_PROTO with: ++ * - *ret_type_id* - type ID for return result of a function. ++ * ++ * Function prototype initially has no arguments, but they can be added by ++ * btf__add_func_param() one by one, immediately after ++ * btf__add_func_proto() succeeded. ++ * ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_func_proto(struct btf *btf, int ret_type_id) ++{ ++ struct btf_type *t; ++ int sz; ++ ++ if (validate_type_id(ret_type_id)) ++ return libbpf_err(-EINVAL); ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ /* start out with vlen=0; this will be adjusted when adding enum ++ * values, if necessary ++ */ ++ t->name_off = 0; ++ t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0); ++ t->type = ret_type_id; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* ++ * Append new function parameter for current FUNC_PROTO type with: ++ * - *name* - parameter name, can be NULL or empty; ++ * - *type_id* - type ID describing the type of the parameter. ++ * Returns: ++ * - 0, on success; ++ * - <0, on error. ++ */ ++int btf__add_func_param(struct btf *btf, const char *name, int type_id) ++{ ++ struct btf_type *t; ++ struct btf_param *p; ++ int sz, name_off = 0; ++ ++ if (validate_type_id(type_id)) ++ return libbpf_err(-EINVAL); ++ ++ /* last type should be BTF_KIND_FUNC_PROTO */ ++ if (btf->nr_types == 0) ++ return libbpf_err(-EINVAL); ++ t = btf_last_type(btf); ++ if (!btf_is_func_proto(t)) ++ return libbpf_err(-EINVAL); ++ ++ /* decompose and invalidate raw data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_param); ++ p = btf_add_type_mem(btf, sz); ++ if (!p) ++ return libbpf_err(-ENOMEM); ++ ++ if (name && name[0]) { ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ } ++ ++ p->name_off = name_off; ++ p->type = type_id; ++ ++ /* update parent type's vlen */ ++ t = btf_last_type(btf); ++ btf_type_inc_vlen(t); ++ ++ btf->hdr->type_len += sz; ++ btf->hdr->str_off += sz; ++ return 0; ++} ++ ++/* ++ * Append new BTF_KIND_VAR type with: ++ * - *name* - non-empty/non-NULL name; ++ * - *linkage* - variable linkage, one of BTF_VAR_STATIC, ++ * BTF_VAR_GLOBAL_ALLOCATED, or BTF_VAR_GLOBAL_EXTERN; ++ * - *type_id* - type ID of the type describing the type of the variable. ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) ++{ ++ struct btf_type *t; ++ struct btf_var *v; ++ int sz, name_off; ++ ++ /* non-empty name */ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED && ++ linkage != BTF_VAR_GLOBAL_EXTERN) ++ return libbpf_err(-EINVAL); ++ if (validate_type_id(type_id)) ++ return libbpf_err(-EINVAL); ++ ++ /* deconstruct BTF, if necessary, and invalidate raw_data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type) + sizeof(struct btf_var); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ ++ t->name_off = name_off; ++ t->info = btf_type_info(BTF_KIND_VAR, 0, 0); ++ t->type = type_id; ++ ++ v = btf_var(t); ++ v->linkage = linkage; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* ++ * Append new BTF_KIND_DATASEC type with: ++ * - *name* - non-empty/non-NULL name; ++ * - *byte_sz* - data section size, in bytes. ++ * ++ * Data section is initially empty. Variables info can be added with ++ * btf__add_datasec_var_info() calls, after btf__add_datasec() succeeds. ++ * ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) ++{ ++ struct btf_type *t; ++ int sz, name_off; ++ ++ /* non-empty name */ ++ if (!name || !name[0]) ++ return libbpf_err(-EINVAL); ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ name_off = btf__add_str(btf, name); ++ if (name_off < 0) ++ return name_off; ++ ++ /* start with vlen=0, which will be update as var_secinfos are added */ ++ t->name_off = name_off; ++ t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0); ++ t->size = byte_sz; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++/* ++ * Append new data section variable information entry for current DATASEC type: ++ * - *var_type_id* - type ID, describing type of the variable; ++ * - *offset* - variable offset within data section, in bytes; ++ * - *byte_sz* - variable size, in bytes. ++ * ++ * Returns: ++ * - 0, on success; ++ * - <0, on error. ++ */ ++int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz) ++{ ++ struct btf_type *t; ++ struct btf_var_secinfo *v; ++ int sz; ++ ++ /* last type should be BTF_KIND_DATASEC */ ++ if (btf->nr_types == 0) ++ return libbpf_err(-EINVAL); ++ t = btf_last_type(btf); ++ if (!btf_is_datasec(t)) ++ return libbpf_err(-EINVAL); ++ ++ if (validate_type_id(var_type_id)) ++ return libbpf_err(-EINVAL); ++ ++ /* decompose and invalidate raw data */ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_var_secinfo); ++ v = btf_add_type_mem(btf, sz); ++ if (!v) ++ return libbpf_err(-ENOMEM); ++ ++ v->type = var_type_id; ++ v->offset = offset; ++ v->size = byte_sz; ++ ++ /* update parent type's vlen */ ++ t = btf_last_type(btf); ++ btf_type_inc_vlen(t); ++ ++ btf->hdr->type_len += sz; ++ btf->hdr->str_off += sz; ++ return 0; ++} ++ ++/* ++ * Append new BTF_KIND_DECL_TAG type with: ++ * - *value* - non-empty/non-NULL string; ++ * - *ref_type_id* - referenced type ID, it might not exist yet; ++ * - *component_idx* - -1 for tagging reference type, otherwise struct/union ++ * member or function argument index; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, ++ int component_idx) ++{ ++ struct btf_type *t; ++ int sz, value_off; ++ ++ if (!value || !value[0] || component_idx < -1) ++ return libbpf_err(-EINVAL); ++ ++ if (validate_type_id(ref_type_id)) ++ return libbpf_err(-EINVAL); ++ ++ if (btf_ensure_modifiable(btf)) ++ return libbpf_err(-ENOMEM); ++ ++ sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag); ++ t = btf_add_type_mem(btf, sz); ++ if (!t) ++ return libbpf_err(-ENOMEM); ++ ++ value_off = btf__add_str(btf, value); ++ if (value_off < 0) ++ return value_off; ++ ++ t->name_off = value_off; ++ t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); ++ t->type = ref_type_id; ++ btf_decl_tag(t)->component_idx = component_idx; ++ ++ return btf_commit_type(btf, sz); ++} ++ ++struct btf_ext_sec_setup_param { ++ __u32 off; ++ __u32 len; ++ __u32 min_rec_size; ++ struct btf_ext_info *ext_info; ++ const char *desc; ++}; ++ ++static int btf_ext_setup_info(struct btf_ext *btf_ext, ++ struct btf_ext_sec_setup_param *ext_sec) ++{ ++ const struct btf_ext_info_sec *sinfo; ++ struct btf_ext_info *ext_info; ++ __u32 info_left, record_size; ++ size_t sec_cnt = 0; ++ /* The start of the info sec (including the __u32 record_size). */ ++ void *info; ++ ++ if (ext_sec->len == 0) ++ return 0; ++ ++ if (ext_sec->off & 0x03) { ++ pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n", ++ ext_sec->desc); ++ return -EINVAL; ++ } ++ ++ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; ++ info_left = ext_sec->len; ++ ++ if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) { ++ pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", ++ ext_sec->desc, ext_sec->off, ext_sec->len); ++ return -EINVAL; ++ } ++ ++ /* At least a record size */ ++ if (info_left < sizeof(__u32)) { ++ pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc); ++ return -EINVAL; ++ } ++ ++ /* The record size needs to meet the minimum standard */ ++ record_size = *(__u32 *)info; ++ if (record_size < ext_sec->min_rec_size || ++ record_size & 0x03) { ++ pr_debug("%s section in .BTF.ext has invalid record size %u\n", ++ ext_sec->desc, record_size); ++ return -EINVAL; ++ } ++ ++ sinfo = info + sizeof(__u32); ++ info_left -= sizeof(__u32); ++ ++ /* If no records, return failure now so .BTF.ext won't be used. */ ++ if (!info_left) { ++ pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); ++ return -EINVAL; ++ } ++ ++ while (info_left) { ++ unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); ++ __u64 total_record_size; ++ __u32 num_records; ++ ++ if (info_left < sec_hdrlen) { ++ pr_debug("%s section header is not found in .BTF.ext\n", ++ ext_sec->desc); ++ return -EINVAL; ++ } ++ ++ num_records = sinfo->num_info; ++ if (num_records == 0) { ++ pr_debug("%s section has incorrect num_records in .BTF.ext\n", ++ ext_sec->desc); ++ return -EINVAL; ++ } ++ ++ total_record_size = sec_hdrlen + (__u64)num_records * record_size; ++ if (info_left < total_record_size) { ++ pr_debug("%s section has incorrect num_records in .BTF.ext\n", ++ ext_sec->desc); ++ return -EINVAL; ++ } ++ ++ info_left -= total_record_size; ++ sinfo = (void *)sinfo + total_record_size; ++ sec_cnt++; ++ } ++ ++ ext_info = ext_sec->ext_info; ++ ext_info->len = ext_sec->len - sizeof(__u32); ++ ext_info->rec_size = record_size; ++ ext_info->info = info + sizeof(__u32); ++ ext_info->sec_cnt = sec_cnt; ++ ++ return 0; ++} ++ ++static int btf_ext_setup_func_info(struct btf_ext *btf_ext) ++{ ++ struct btf_ext_sec_setup_param param = { ++ .off = btf_ext->hdr->func_info_off, ++ .len = btf_ext->hdr->func_info_len, ++ .min_rec_size = sizeof(struct bpf_func_info_min), ++ .ext_info = &btf_ext->func_info, ++ .desc = "func_info" ++ }; ++ ++ return btf_ext_setup_info(btf_ext, ¶m); ++} ++ ++static int btf_ext_setup_line_info(struct btf_ext *btf_ext) ++{ ++ struct btf_ext_sec_setup_param param = { ++ .off = btf_ext->hdr->line_info_off, ++ .len = btf_ext->hdr->line_info_len, ++ .min_rec_size = sizeof(struct bpf_line_info_min), ++ .ext_info = &btf_ext->line_info, ++ .desc = "line_info", ++ }; ++ ++ return btf_ext_setup_info(btf_ext, ¶m); ++} ++ ++static int btf_ext_setup_core_relos(struct btf_ext *btf_ext) ++{ ++ struct btf_ext_sec_setup_param param = { ++ .off = btf_ext->hdr->core_relo_off, ++ .len = btf_ext->hdr->core_relo_len, ++ .min_rec_size = sizeof(struct bpf_core_relo), ++ .ext_info = &btf_ext->core_relo_info, ++ .desc = "core_relo", ++ }; ++ ++ return btf_ext_setup_info(btf_ext, ¶m); ++} ++ ++static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) ++{ ++ const struct btf_ext_header *hdr = (struct btf_ext_header *)data; ++ ++ if (data_size < offsetofend(struct btf_ext_header, hdr_len) || ++ data_size < hdr->hdr_len) { ++ pr_debug("BTF.ext header not found"); ++ return -EINVAL; ++ } ++ ++ if (hdr->magic == bswap_16(BTF_MAGIC)) { ++ pr_warn("BTF.ext in non-native endianness is not supported\n"); ++ return -ENOTSUP; ++ } else if (hdr->magic != BTF_MAGIC) { ++ pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); ++ return -EINVAL; ++ } ++ ++ if (hdr->version != BTF_VERSION) { ++ pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); ++ return -ENOTSUP; ++ } ++ ++ if (hdr->flags) { ++ pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags); ++ return -ENOTSUP; ++ } ++ ++ if (data_size == hdr->hdr_len) { ++ pr_debug("BTF.ext has no data\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++void btf_ext__free(struct btf_ext *btf_ext) ++{ ++ if (IS_ERR_OR_NULL(btf_ext)) ++ return; ++ free(btf_ext->func_info.sec_idxs); ++ free(btf_ext->line_info.sec_idxs); ++ free(btf_ext->core_relo_info.sec_idxs); ++ free(btf_ext->data); ++ free(btf_ext); ++} ++ ++struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) ++{ ++ struct btf_ext *btf_ext; ++ int err; ++ ++ btf_ext = calloc(1, sizeof(struct btf_ext)); ++ if (!btf_ext) ++ return libbpf_err_ptr(-ENOMEM); ++ ++ btf_ext->data_size = size; ++ btf_ext->data = malloc(size); ++ if (!btf_ext->data) { ++ err = -ENOMEM; ++ goto done; ++ } ++ memcpy(btf_ext->data, data, size); ++ ++ err = btf_ext_parse_hdr(btf_ext->data, size); ++ if (err) ++ goto done; ++ ++ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { ++ err = -EINVAL; ++ goto done; ++ } ++ ++ err = btf_ext_setup_func_info(btf_ext); ++ if (err) ++ goto done; ++ ++ err = btf_ext_setup_line_info(btf_ext); ++ if (err) ++ goto done; ++ ++ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) ++ goto done; /* skip core relos parsing */ ++ ++ err = btf_ext_setup_core_relos(btf_ext); ++ if (err) ++ goto done; ++ ++done: ++ if (err) { ++ btf_ext__free(btf_ext); ++ return libbpf_err_ptr(err); ++ } ++ ++ return btf_ext; ++} ++ ++const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) ++{ ++ *size = btf_ext->data_size; ++ return btf_ext->data; ++} ++ ++struct btf_dedup; ++ ++static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts); ++static void btf_dedup_free(struct btf_dedup *d); ++static int btf_dedup_prep(struct btf_dedup *d); ++static int btf_dedup_strings(struct btf_dedup *d); ++static int btf_dedup_prim_types(struct btf_dedup *d); ++static int btf_dedup_struct_types(struct btf_dedup *d); ++static int btf_dedup_ref_types(struct btf_dedup *d); ++static int btf_dedup_compact_types(struct btf_dedup *d); ++static int btf_dedup_remap_types(struct btf_dedup *d); ++ ++/* ++ * Deduplicate BTF types and strings. ++ * ++ * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF ++ * section with all BTF type descriptors and string data. It overwrites that ++ * memory in-place with deduplicated types and strings without any loss of ++ * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section ++ * is provided, all the strings referenced from .BTF.ext section are honored ++ * and updated to point to the right offsets after deduplication. ++ * ++ * If function returns with error, type/string data might be garbled and should ++ * be discarded. ++ * ++ * More verbose and detailed description of both problem btf_dedup is solving, ++ * as well as solution could be found at: ++ * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html ++ * ++ * Problem description and justification ++ * ===================================== ++ * ++ * BTF type information is typically emitted either as a result of conversion ++ * from DWARF to BTF or directly by compiler. In both cases, each compilation ++ * unit contains information about a subset of all the types that are used ++ * in an application. These subsets are frequently overlapping and contain a lot ++ * of duplicated information when later concatenated together into a single ++ * binary. This algorithm ensures that each unique type is represented by single ++ * BTF type descriptor, greatly reducing resulting size of BTF data. ++ * ++ * Compilation unit isolation and subsequent duplication of data is not the only ++ * problem. The same type hierarchy (e.g., struct and all the type that struct ++ * references) in different compilation units can be represented in BTF to ++ * various degrees of completeness (or, rather, incompleteness) due to ++ * struct/union forward declarations. ++ * ++ * Let's take a look at an example, that we'll use to better understand the ++ * problem (and solution). Suppose we have two compilation units, each using ++ * same `struct S`, but each of them having incomplete type information about ++ * struct's fields: ++ * ++ * // CU #1: ++ * struct S; ++ * struct A { ++ * int a; ++ * struct A* self; ++ * struct S* parent; ++ * }; ++ * struct B; ++ * struct S { ++ * struct A* a_ptr; ++ * struct B* b_ptr; ++ * }; ++ * ++ * // CU #2: ++ * struct S; ++ * struct A; ++ * struct B { ++ * int b; ++ * struct B* self; ++ * struct S* parent; ++ * }; ++ * struct S { ++ * struct A* a_ptr; ++ * struct B* b_ptr; ++ * }; ++ * ++ * In case of CU #1, BTF data will know only that `struct B` exist (but no ++ * more), but will know the complete type information about `struct A`. While ++ * for CU #2, it will know full type information about `struct B`, but will ++ * only know about forward declaration of `struct A` (in BTF terms, it will ++ * have `BTF_KIND_FWD` type descriptor with name `B`). ++ * ++ * This compilation unit isolation means that it's possible that there is no ++ * single CU with complete type information describing structs `S`, `A`, and ++ * `B`. Also, we might get tons of duplicated and redundant type information. ++ * ++ * Additional complication we need to keep in mind comes from the fact that ++ * types, in general, can form graphs containing cycles, not just DAGs. ++ * ++ * While algorithm does deduplication, it also merges and resolves type ++ * information (unless disabled throught `struct btf_opts`), whenever possible. ++ * E.g., in the example above with two compilation units having partial type ++ * information for structs `A` and `B`, the output of algorithm will emit ++ * a single copy of each BTF type that describes structs `A`, `B`, and `S` ++ * (as well as type information for `int` and pointers), as if they were defined ++ * in a single compilation unit as: ++ * ++ * struct A { ++ * int a; ++ * struct A* self; ++ * struct S* parent; ++ * }; ++ * struct B { ++ * int b; ++ * struct B* self; ++ * struct S* parent; ++ * }; ++ * struct S { ++ * struct A* a_ptr; ++ * struct B* b_ptr; ++ * }; ++ * ++ * Algorithm summary ++ * ================= ++ * ++ * Algorithm completes its work in 6 separate passes: ++ * ++ * 1. Strings deduplication. ++ * 2. Primitive types deduplication (int, enum, fwd). ++ * 3. Struct/union types deduplication. ++ * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func ++ * protos, and const/volatile/restrict modifiers). ++ * 5. Types compaction. ++ * 6. Types remapping. ++ * ++ * Algorithm determines canonical type descriptor, which is a single ++ * representative type for each truly unique type. This canonical type is the ++ * one that will go into final deduplicated BTF type information. For ++ * struct/unions, it is also the type that algorithm will merge additional type ++ * information into (while resolving FWDs), as it discovers it from data in ++ * other CUs. Each input BTF type eventually gets either mapped to itself, if ++ * that type is canonical, or to some other type, if that type is equivalent ++ * and was chosen as canonical representative. This mapping is stored in ++ * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that ++ * FWD type got resolved to. ++ * ++ * To facilitate fast discovery of canonical types, we also maintain canonical ++ * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash ++ * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types ++ * that match that signature. With sufficiently good choice of type signature ++ * hashing function, we can limit number of canonical types for each unique type ++ * signature to a very small number, allowing to find canonical type for any ++ * duplicated type very quickly. ++ * ++ * Struct/union deduplication is the most critical part and algorithm for ++ * deduplicating structs/unions is described in greater details in comments for ++ * `btf_dedup_is_equiv` function. ++ */ ++int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) ++{ ++ struct btf_dedup *d; ++ int err; ++ ++ if (!OPTS_VALID(opts, btf_dedup_opts)) ++ return libbpf_err(-EINVAL); ++ ++ d = btf_dedup_new(btf, opts); ++ if (IS_ERR(d)) { ++ pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); ++ return libbpf_err(-EINVAL); ++ } ++ ++ if (btf_ensure_modifiable(btf)) { ++ err = -ENOMEM; ++ goto done; ++ } ++ ++ err = btf_dedup_prep(d); ++ if (err) { ++ pr_debug("btf_dedup_prep failed:%d\n", err); ++ goto done; ++ } ++ err = btf_dedup_strings(d); ++ if (err < 0) { ++ pr_debug("btf_dedup_strings failed:%d\n", err); ++ goto done; ++ } ++ err = btf_dedup_prim_types(d); ++ if (err < 0) { ++ pr_debug("btf_dedup_prim_types failed:%d\n", err); ++ goto done; ++ } ++ err = btf_dedup_struct_types(d); ++ if (err < 0) { ++ pr_debug("btf_dedup_struct_types failed:%d\n", err); ++ goto done; ++ } ++ err = btf_dedup_ref_types(d); ++ if (err < 0) { ++ pr_debug("btf_dedup_ref_types failed:%d\n", err); ++ goto done; ++ } ++ err = btf_dedup_compact_types(d); ++ if (err < 0) { ++ pr_debug("btf_dedup_compact_types failed:%d\n", err); ++ goto done; ++ } ++ err = btf_dedup_remap_types(d); ++ if (err < 0) { ++ pr_debug("btf_dedup_remap_types failed:%d\n", err); ++ goto done; ++ } ++ ++done: ++ btf_dedup_free(d); ++ return libbpf_err(err); ++} ++ ++#define BTF_UNPROCESSED_ID ((__u32)-1) ++#define BTF_IN_PROGRESS_ID ((__u32)-2) ++ ++struct btf_dedup { ++ /* .BTF section to be deduped in-place */ ++ struct btf *btf; ++ /* ++ * Optional .BTF.ext section. When provided, any strings referenced ++ * from it will be taken into account when deduping strings ++ */ ++ struct btf_ext *btf_ext; ++ /* ++ * This is a map from any type's signature hash to a list of possible ++ * canonical representative type candidates. Hash collisions are ++ * ignored, so even types of various kinds can share same list of ++ * candidates, which is fine because we rely on subsequent ++ * btf_xxx_equal() checks to authoritatively verify type equality. ++ */ ++ struct hashmap *dedup_table; ++ /* Canonical types map */ ++ __u32 *map; ++ /* Hypothetical mapping, used during type graph equivalence checks */ ++ __u32 *hypot_map; ++ __u32 *hypot_list; ++ size_t hypot_cnt; ++ size_t hypot_cap; ++ /* Whether hypothetical mapping, if successful, would need to adjust ++ * already canonicalized types (due to a new forward declaration to ++ * concrete type resolution). In such case, during split BTF dedup ++ * candidate type would still be considered as different, because base ++ * BTF is considered to be immutable. ++ */ ++ bool hypot_adjust_canon; ++ /* Various option modifying behavior of algorithm */ ++ struct btf_dedup_opts opts; ++ /* temporary strings deduplication state */ ++ struct strset *strs_set; ++}; ++ ++static long hash_combine(long h, long value) ++{ ++ return h * 31 + value; ++} ++ ++#define for_each_dedup_cand(d, node, hash) \ ++ hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash) ++ ++static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id) ++{ ++ return hashmap__append(d->dedup_table, ++ (void *)hash, (void *)(long)type_id); ++} ++ ++static int btf_dedup_hypot_map_add(struct btf_dedup *d, ++ __u32 from_id, __u32 to_id) ++{ ++ if (d->hypot_cnt == d->hypot_cap) { ++ __u32 *new_list; ++ ++ d->hypot_cap += max((size_t)16, d->hypot_cap / 2); ++ new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32)); ++ if (!new_list) ++ return -ENOMEM; ++ d->hypot_list = new_list; ++ } ++ d->hypot_list[d->hypot_cnt++] = from_id; ++ d->hypot_map[from_id] = to_id; ++ return 0; ++} ++ ++static void btf_dedup_clear_hypot_map(struct btf_dedup *d) ++{ ++ int i; ++ ++ for (i = 0; i < d->hypot_cnt; i++) ++ d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; ++ d->hypot_cnt = 0; ++ d->hypot_adjust_canon = false; ++} ++ ++static void btf_dedup_free(struct btf_dedup *d) ++{ ++ hashmap__free(d->dedup_table); ++ d->dedup_table = NULL; ++ ++ free(d->map); ++ d->map = NULL; ++ ++ free(d->hypot_map); ++ d->hypot_map = NULL; ++ ++ free(d->hypot_list); ++ d->hypot_list = NULL; ++ ++ free(d); ++} ++ ++static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx) ++{ ++ return (size_t)key; ++} ++ ++static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx) ++{ ++ return 0; ++} ++ ++static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) ++{ ++ return k1 == k2; ++} ++ ++static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts) ++{ ++ struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); ++ hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; ++ int i, err = 0, type_cnt; ++ ++ if (!d) ++ return ERR_PTR(-ENOMEM); ++ ++ if (OPTS_GET(opts, force_collisions, false)) ++ hash_fn = btf_dedup_collision_hash_fn; ++ ++ d->btf = btf; ++ d->btf_ext = OPTS_GET(opts, btf_ext, NULL); ++ ++ d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL); ++ if (IS_ERR(d->dedup_table)) { ++ err = PTR_ERR(d->dedup_table); ++ d->dedup_table = NULL; ++ goto done; ++ } ++ ++ type_cnt = btf__type_cnt(btf); ++ d->map = malloc(sizeof(__u32) * type_cnt); ++ if (!d->map) { ++ err = -ENOMEM; ++ goto done; ++ } ++ /* special BTF "void" type is made canonical immediately */ ++ d->map[0] = 0; ++ for (i = 1; i < type_cnt; i++) { ++ struct btf_type *t = btf_type_by_id(d->btf, i); ++ ++ /* VAR and DATASEC are never deduped and are self-canonical */ ++ if (btf_is_var(t) || btf_is_datasec(t)) ++ d->map[i] = i; ++ else ++ d->map[i] = BTF_UNPROCESSED_ID; ++ } ++ ++ d->hypot_map = malloc(sizeof(__u32) * type_cnt); ++ if (!d->hypot_map) { ++ err = -ENOMEM; ++ goto done; ++ } ++ for (i = 0; i < type_cnt; i++) ++ d->hypot_map[i] = BTF_UNPROCESSED_ID; ++ ++done: ++ if (err) { ++ btf_dedup_free(d); ++ return ERR_PTR(err); ++ } ++ ++ return d; ++} ++ ++/* ++ * Iterate over all possible places in .BTF and .BTF.ext that can reference ++ * string and pass pointer to it to a provided callback `fn`. ++ */ ++static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void *ctx) ++{ ++ int i, r; ++ ++ for (i = 0; i < d->btf->nr_types; i++) { ++ struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); ++ ++ r = btf_type_visit_str_offs(t, fn, ctx); ++ if (r) ++ return r; ++ } ++ ++ if (!d->btf_ext) ++ return 0; ++ ++ r = btf_ext_visit_str_offs(d->btf_ext, fn, ctx); ++ if (r) ++ return r; ++ ++ return 0; ++} ++ ++static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx) ++{ ++ struct btf_dedup *d = ctx; ++ __u32 str_off = *str_off_ptr; ++ const char *s; ++ int off, err; ++ ++ /* don't touch empty string or string in main BTF */ ++ if (str_off == 0 || str_off < d->btf->start_str_off) ++ return 0; ++ ++ s = btf__str_by_offset(d->btf, str_off); ++ if (d->btf->base_btf) { ++ err = btf__find_str(d->btf->base_btf, s); ++ if (err >= 0) { ++ *str_off_ptr = err; ++ return 0; ++ } ++ if (err != -ENOENT) ++ return err; ++ } ++ ++ off = strset__add_str(d->strs_set, s); ++ if (off < 0) ++ return off; ++ ++ *str_off_ptr = d->btf->start_str_off + off; ++ return 0; ++} ++ ++/* ++ * Dedup string and filter out those that are not referenced from either .BTF ++ * or .BTF.ext (if provided) sections. ++ * ++ * This is done by building index of all strings in BTF's string section, ++ * then iterating over all entities that can reference strings (e.g., type ++ * names, struct field names, .BTF.ext line info, etc) and marking corresponding ++ * strings as used. After that all used strings are deduped and compacted into ++ * sequential blob of memory and new offsets are calculated. Then all the string ++ * references are iterated again and rewritten using new offsets. ++ */ ++static int btf_dedup_strings(struct btf_dedup *d) ++{ ++ int err; ++ ++ if (d->btf->strs_deduped) ++ return 0; ++ ++ d->strs_set = strset__new(BTF_MAX_STR_OFFSET, NULL, 0); ++ if (IS_ERR(d->strs_set)) { ++ err = PTR_ERR(d->strs_set); ++ goto err_out; ++ } ++ ++ if (!d->btf->base_btf) { ++ /* insert empty string; we won't be looking it up during strings ++ * dedup, but it's good to have it for generic BTF string lookups ++ */ ++ err = strset__add_str(d->strs_set, ""); ++ if (err < 0) ++ goto err_out; ++ } ++ ++ /* remap string offsets */ ++ err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d); ++ if (err) ++ goto err_out; ++ ++ /* replace BTF string data and hash with deduped ones */ ++ strset__free(d->btf->strs_set); ++ d->btf->hdr->str_len = strset__data_size(d->strs_set); ++ d->btf->strs_set = d->strs_set; ++ d->strs_set = NULL; ++ d->btf->strs_deduped = true; ++ return 0; ++ ++err_out: ++ strset__free(d->strs_set); ++ d->strs_set = NULL; ++ ++ return err; ++} ++ ++static long btf_hash_common(struct btf_type *t) ++{ ++ long h; ++ ++ h = hash_combine(0, t->name_off); ++ h = hash_combine(h, t->info); ++ h = hash_combine(h, t->size); ++ return h; ++} ++ ++static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) ++{ ++ return t1->name_off == t2->name_off && ++ t1->info == t2->info && ++ t1->size == t2->size; ++} ++ ++/* Calculate type signature hash of INT or TAG. */ ++static long btf_hash_int_decl_tag(struct btf_type *t) ++{ ++ __u32 info = *(__u32 *)(t + 1); ++ long h; ++ ++ h = btf_hash_common(t); ++ h = hash_combine(h, info); ++ return h; ++} ++ ++/* Check structural equality of two INTs or TAGs. */ ++static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2) ++{ ++ __u32 info1, info2; ++ ++ if (!btf_equal_common(t1, t2)) ++ return false; ++ info1 = *(__u32 *)(t1 + 1); ++ info2 = *(__u32 *)(t2 + 1); ++ return info1 == info2; ++} ++ ++/* Calculate type signature hash of ENUM/ENUM64. */ ++static long btf_hash_enum(struct btf_type *t) ++{ ++ long h; ++ ++ /* don't hash vlen and enum members to support enum fwd resolving */ ++ h = hash_combine(0, t->name_off); ++ h = hash_combine(h, t->info & ~0xffff); ++ h = hash_combine(h, t->size); ++ return h; ++} ++ ++/* Check structural equality of two ENUMs. */ ++static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) ++{ ++ const struct btf_enum *m1, *m2; ++ __u16 vlen; ++ int i; ++ ++ if (!btf_equal_common(t1, t2)) ++ return false; ++ ++ vlen = btf_vlen(t1); ++ m1 = btf_enum(t1); ++ m2 = btf_enum(t2); ++ for (i = 0; i < vlen; i++) { ++ if (m1->name_off != m2->name_off || m1->val != m2->val) ++ return false; ++ m1++; ++ m2++; ++ } ++ return true; ++} ++ ++static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2) ++{ ++ const struct btf_enum64 *m1, *m2; ++ __u16 vlen; ++ int i; ++ ++ if (!btf_equal_common(t1, t2)) ++ return false; ++ ++ vlen = btf_vlen(t1); ++ m1 = btf_enum64(t1); ++ m2 = btf_enum64(t2); ++ for (i = 0; i < vlen; i++) { ++ if (m1->name_off != m2->name_off || m1->val_lo32 != m2->val_lo32 || ++ m1->val_hi32 != m2->val_hi32) ++ return false; ++ m1++; ++ m2++; ++ } ++ return true; ++} ++ ++static inline bool btf_is_enum_fwd(struct btf_type *t) ++{ ++ return btf_is_any_enum(t) && btf_vlen(t) == 0; ++} ++ ++static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) ++{ ++ if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) ++ return btf_equal_enum(t1, t2); ++ /* ignore vlen when comparing */ ++ return t1->name_off == t2->name_off && ++ (t1->info & ~0xffff) == (t2->info & ~0xffff) && ++ t1->size == t2->size; ++} ++ ++static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2) ++{ ++ if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) ++ return btf_equal_enum64(t1, t2); ++ ++ /* ignore vlen when comparing */ ++ return t1->name_off == t2->name_off && ++ (t1->info & ~0xffff) == (t2->info & ~0xffff) && ++ t1->size == t2->size; ++} ++ ++/* ++ * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, ++ * as referenced type IDs equivalence is established separately during type ++ * graph equivalence check algorithm. ++ */ ++static long btf_hash_struct(struct btf_type *t) ++{ ++ const struct btf_member *member = btf_members(t); ++ __u32 vlen = btf_vlen(t); ++ long h = btf_hash_common(t); ++ int i; ++ ++ for (i = 0; i < vlen; i++) { ++ h = hash_combine(h, member->name_off); ++ h = hash_combine(h, member->offset); ++ /* no hashing of referenced type ID, it can be unresolved yet */ ++ member++; ++ } ++ return h; ++} ++ ++/* ++ * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced ++ * type IDs. This check is performed during type graph equivalence check and ++ * referenced types equivalence is checked separately. ++ */ ++static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) ++{ ++ const struct btf_member *m1, *m2; ++ __u16 vlen; ++ int i; ++ ++ if (!btf_equal_common(t1, t2)) ++ return false; ++ ++ vlen = btf_vlen(t1); ++ m1 = btf_members(t1); ++ m2 = btf_members(t2); ++ for (i = 0; i < vlen; i++) { ++ if (m1->name_off != m2->name_off || m1->offset != m2->offset) ++ return false; ++ m1++; ++ m2++; ++ } ++ return true; ++} ++ ++/* ++ * Calculate type signature hash of ARRAY, including referenced type IDs, ++ * under assumption that they were already resolved to canonical type IDs and ++ * are not going to change. ++ */ ++static long btf_hash_array(struct btf_type *t) ++{ ++ const struct btf_array *info = btf_array(t); ++ long h = btf_hash_common(t); ++ ++ h = hash_combine(h, info->type); ++ h = hash_combine(h, info->index_type); ++ h = hash_combine(h, info->nelems); ++ return h; ++} ++ ++/* ++ * Check exact equality of two ARRAYs, taking into account referenced ++ * type IDs, under assumption that they were already resolved to canonical ++ * type IDs and are not going to change. ++ * This function is called during reference types deduplication to compare ++ * ARRAY to potential canonical representative. ++ */ ++static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2) ++{ ++ const struct btf_array *info1, *info2; ++ ++ if (!btf_equal_common(t1, t2)) ++ return false; ++ ++ info1 = btf_array(t1); ++ info2 = btf_array(t2); ++ return info1->type == info2->type && ++ info1->index_type == info2->index_type && ++ info1->nelems == info2->nelems; ++} ++ ++/* ++ * Check structural compatibility of two ARRAYs, ignoring referenced type ++ * IDs. This check is performed during type graph equivalence check and ++ * referenced types equivalence is checked separately. ++ */ ++static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) ++{ ++ if (!btf_equal_common(t1, t2)) ++ return false; ++ ++ return btf_array(t1)->nelems == btf_array(t2)->nelems; ++} ++ ++/* ++ * Calculate type signature hash of FUNC_PROTO, including referenced type IDs, ++ * under assumption that they were already resolved to canonical type IDs and ++ * are not going to change. ++ */ ++static long btf_hash_fnproto(struct btf_type *t) ++{ ++ const struct btf_param *member = btf_params(t); ++ __u16 vlen = btf_vlen(t); ++ long h = btf_hash_common(t); ++ int i; ++ ++ for (i = 0; i < vlen; i++) { ++ h = hash_combine(h, member->name_off); ++ h = hash_combine(h, member->type); ++ member++; ++ } ++ return h; ++} ++ ++/* ++ * Check exact equality of two FUNC_PROTOs, taking into account referenced ++ * type IDs, under assumption that they were already resolved to canonical ++ * type IDs and are not going to change. ++ * This function is called during reference types deduplication to compare ++ * FUNC_PROTO to potential canonical representative. ++ */ ++static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) ++{ ++ const struct btf_param *m1, *m2; ++ __u16 vlen; ++ int i; ++ ++ if (!btf_equal_common(t1, t2)) ++ return false; ++ ++ vlen = btf_vlen(t1); ++ m1 = btf_params(t1); ++ m2 = btf_params(t2); ++ for (i = 0; i < vlen; i++) { ++ if (m1->name_off != m2->name_off || m1->type != m2->type) ++ return false; ++ m1++; ++ m2++; ++ } ++ return true; ++} ++ ++/* ++ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type ++ * IDs. This check is performed during type graph equivalence check and ++ * referenced types equivalence is checked separately. ++ */ ++static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) ++{ ++ const struct btf_param *m1, *m2; ++ __u16 vlen; ++ int i; ++ ++ /* skip return type ID */ ++ if (t1->name_off != t2->name_off || t1->info != t2->info) ++ return false; ++ ++ vlen = btf_vlen(t1); ++ m1 = btf_params(t1); ++ m2 = btf_params(t2); ++ for (i = 0; i < vlen; i++) { ++ if (m1->name_off != m2->name_off) ++ return false; ++ m1++; ++ m2++; ++ } ++ return true; ++} ++ ++/* Prepare split BTF for deduplication by calculating hashes of base BTF's ++ * types and initializing the rest of the state (canonical type mapping) for ++ * the fixed base BTF part. ++ */ ++static int btf_dedup_prep(struct btf_dedup *d) ++{ ++ struct btf_type *t; ++ int type_id; ++ long h; ++ ++ if (!d->btf->base_btf) ++ return 0; ++ ++ for (type_id = 1; type_id < d->btf->start_id; type_id++) { ++ t = btf_type_by_id(d->btf, type_id); ++ ++ /* all base BTF types are self-canonical by definition */ ++ d->map[type_id] = type_id; ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_VAR: ++ case BTF_KIND_DATASEC: ++ /* VAR and DATASEC are never hash/deduplicated */ ++ continue; ++ case BTF_KIND_CONST: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_FWD: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_FLOAT: ++ case BTF_KIND_TYPE_TAG: ++ h = btf_hash_common(t); ++ break; ++ case BTF_KIND_INT: ++ case BTF_KIND_DECL_TAG: ++ h = btf_hash_int_decl_tag(t); ++ break; ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ h = btf_hash_enum(t); ++ break; ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ h = btf_hash_struct(t); ++ break; ++ case BTF_KIND_ARRAY: ++ h = btf_hash_array(t); ++ break; ++ case BTF_KIND_FUNC_PROTO: ++ h = btf_hash_fnproto(t); ++ break; ++ default: ++ pr_debug("unknown kind %d for type [%d]\n", btf_kind(t), type_id); ++ return -EINVAL; ++ } ++ if (btf_dedup_table_add(d, h, type_id)) ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Deduplicate primitive types, that can't reference other types, by calculating ++ * their type signature hash and comparing them with any possible canonical ++ * candidate. If no canonical candidate matches, type itself is marked as ++ * canonical and is added into `btf_dedup->dedup_table` as another candidate. ++ */ ++static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) ++{ ++ struct btf_type *t = btf_type_by_id(d->btf, type_id); ++ struct hashmap_entry *hash_entry; ++ struct btf_type *cand; ++ /* if we don't find equivalent type, then we are canonical */ ++ __u32 new_id = type_id; ++ __u32 cand_id; ++ long h; ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_CONST: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_ARRAY: ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_FUNC_PROTO: ++ case BTF_KIND_VAR: ++ case BTF_KIND_DATASEC: ++ case BTF_KIND_DECL_TAG: ++ case BTF_KIND_TYPE_TAG: ++ return 0; ++ ++ case BTF_KIND_INT: ++ h = btf_hash_int_decl_tag(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ cand_id = (__u32)(long)hash_entry->value; ++ cand = btf_type_by_id(d->btf, cand_id); ++ if (btf_equal_int_tag(t, cand)) { ++ new_id = cand_id; ++ break; ++ } ++ } ++ break; ++ ++ case BTF_KIND_ENUM: ++ h = btf_hash_enum(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ cand_id = (__u32)(long)hash_entry->value; ++ cand = btf_type_by_id(d->btf, cand_id); ++ if (btf_equal_enum(t, cand)) { ++ new_id = cand_id; ++ break; ++ } ++ if (btf_compat_enum(t, cand)) { ++ if (btf_is_enum_fwd(t)) { ++ /* resolve fwd to full enum */ ++ new_id = cand_id; ++ break; ++ } ++ /* resolve canonical enum fwd to full enum */ ++ d->map[cand_id] = type_id; ++ } ++ } ++ break; ++ ++ case BTF_KIND_ENUM64: ++ h = btf_hash_enum(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ cand_id = (__u32)(long)hash_entry->value; ++ cand = btf_type_by_id(d->btf, cand_id); ++ if (btf_equal_enum64(t, cand)) { ++ new_id = cand_id; ++ break; ++ } ++ if (btf_compat_enum64(t, cand)) { ++ if (btf_is_enum_fwd(t)) { ++ /* resolve fwd to full enum */ ++ new_id = cand_id; ++ break; ++ } ++ /* resolve canonical enum fwd to full enum */ ++ d->map[cand_id] = type_id; ++ } ++ } ++ break; ++ ++ case BTF_KIND_FWD: ++ case BTF_KIND_FLOAT: ++ h = btf_hash_common(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ cand_id = (__u32)(long)hash_entry->value; ++ cand = btf_type_by_id(d->btf, cand_id); ++ if (btf_equal_common(t, cand)) { ++ new_id = cand_id; ++ break; ++ } ++ } ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ d->map[type_id] = new_id; ++ if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static int btf_dedup_prim_types(struct btf_dedup *d) ++{ ++ int i, err; ++ ++ for (i = 0; i < d->btf->nr_types; i++) { ++ err = btf_dedup_prim_type(d, d->btf->start_id + i); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++/* ++ * Check whether type is already mapped into canonical one (could be to itself). ++ */ ++static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) ++{ ++ return d->map[type_id] <= BTF_MAX_NR_TYPES; ++} ++ ++/* ++ * Resolve type ID into its canonical type ID, if any; otherwise return original ++ * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow ++ * STRUCT/UNION link and resolve it into canonical type ID as well. ++ */ ++static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id) ++{ ++ while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) ++ type_id = d->map[type_id]; ++ return type_id; ++} ++ ++/* ++ * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original ++ * type ID. ++ */ ++static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) ++{ ++ __u32 orig_type_id = type_id; ++ ++ if (!btf_is_fwd(btf__type_by_id(d->btf, type_id))) ++ return type_id; ++ ++ while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) ++ type_id = d->map[type_id]; ++ ++ if (!btf_is_fwd(btf__type_by_id(d->btf, type_id))) ++ return type_id; ++ ++ return orig_type_id; ++} ++ ++ ++static inline __u16 btf_fwd_kind(struct btf_type *t) ++{ ++ return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; ++} ++ ++/* Check if given two types are identical ARRAY definitions */ ++static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) ++{ ++ struct btf_type *t1, *t2; ++ ++ t1 = btf_type_by_id(d->btf, id1); ++ t2 = btf_type_by_id(d->btf, id2); ++ if (!btf_is_array(t1) || !btf_is_array(t2)) ++ return 0; ++ ++ return btf_equal_array(t1, t2); ++} ++ ++/* Check if given two types are identical STRUCT/UNION definitions */ ++static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) ++{ ++ const struct btf_member *m1, *m2; ++ struct btf_type *t1, *t2; ++ int n, i; ++ ++ t1 = btf_type_by_id(d->btf, id1); ++ t2 = btf_type_by_id(d->btf, id2); ++ ++ if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) ++ return false; ++ ++ if (!btf_shallow_equal_struct(t1, t2)) ++ return false; ++ ++ m1 = btf_members(t1); ++ m2 = btf_members(t2); ++ for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { ++ if (m1->type != m2->type) ++ return false; ++ } ++ return true; ++} ++ ++/* ++ * Check equivalence of BTF type graph formed by candidate struct/union (we'll ++ * call it "candidate graph" in this description for brevity) to a type graph ++ * formed by (potential) canonical struct/union ("canonical graph" for brevity ++ * here, though keep in mind that not all types in canonical graph are ++ * necessarily canonical representatives themselves, some of them might be ++ * duplicates or its uniqueness might not have been established yet). ++ * Returns: ++ * - >0, if type graphs are equivalent; ++ * - 0, if not equivalent; ++ * - <0, on error. ++ * ++ * Algorithm performs side-by-side DFS traversal of both type graphs and checks ++ * equivalence of BTF types at each step. If at any point BTF types in candidate ++ * and canonical graphs are not compatible structurally, whole graphs are ++ * incompatible. If types are structurally equivalent (i.e., all information ++ * except referenced type IDs is exactly the same), a mapping from `canon_id` to ++ * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). ++ * If a type references other types, then those referenced types are checked ++ * for equivalence recursively. ++ * ++ * During DFS traversal, if we find that for current `canon_id` type we ++ * already have some mapping in hypothetical map, we check for two possible ++ * situations: ++ * - `canon_id` is mapped to exactly the same type as `cand_id`. This will ++ * happen when type graphs have cycles. In this case we assume those two ++ * types are equivalent. ++ * - `canon_id` is mapped to different type. This is contradiction in our ++ * hypothetical mapping, because same graph in canonical graph corresponds ++ * to two different types in candidate graph, which for equivalent type ++ * graphs shouldn't happen. This condition terminates equivalence check ++ * with negative result. ++ * ++ * If type graphs traversal exhausts types to check and find no contradiction, ++ * then type graphs are equivalent. ++ * ++ * When checking types for equivalence, there is one special case: FWD types. ++ * If FWD type resolution is allowed and one of the types (either from canonical ++ * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind ++ * flag) and their names match, hypothetical mapping is updated to point from ++ * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully, ++ * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently. ++ * ++ * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution, ++ * if there are two exactly named (or anonymous) structs/unions that are ++ * compatible structurally, one of which has FWD field, while other is concrete ++ * STRUCT/UNION, but according to C sources they are different structs/unions ++ * that are referencing different types with the same name. This is extremely ++ * unlikely to happen, but btf_dedup API allows to disable FWD resolution if ++ * this logic is causing problems. ++ * ++ * Doing FWD resolution means that both candidate and/or canonical graphs can ++ * consists of portions of the graph that come from multiple compilation units. ++ * This is due to the fact that types within single compilation unit are always ++ * deduplicated and FWDs are already resolved, if referenced struct/union ++ * definiton is available. So, if we had unresolved FWD and found corresponding ++ * STRUCT/UNION, they will be from different compilation units. This ++ * consequently means that when we "link" FWD to corresponding STRUCT/UNION, ++ * type graph will likely have at least two different BTF types that describe ++ * same type (e.g., most probably there will be two different BTF types for the ++ * same 'int' primitive type) and could even have "overlapping" parts of type ++ * graph that describe same subset of types. ++ * ++ * This in turn means that our assumption that each type in canonical graph ++ * must correspond to exactly one type in candidate graph might not hold ++ * anymore and will make it harder to detect contradictions using hypothetical ++ * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION ++ * resolution only in canonical graph. FWDs in candidate graphs are never ++ * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs ++ * that can occur: ++ * - Both types in canonical and candidate graphs are FWDs. If they are ++ * structurally equivalent, then they can either be both resolved to the ++ * same STRUCT/UNION or not resolved at all. In both cases they are ++ * equivalent and there is no need to resolve FWD on candidate side. ++ * - Both types in canonical and candidate graphs are concrete STRUCT/UNION, ++ * so nothing to resolve as well, algorithm will check equivalence anyway. ++ * - Type in canonical graph is FWD, while type in candidate is concrete ++ * STRUCT/UNION. In this case candidate graph comes from single compilation ++ * unit, so there is exactly one BTF type for each unique C type. After ++ * resolving FWD into STRUCT/UNION, there might be more than one BTF type ++ * in canonical graph mapping to single BTF type in candidate graph, but ++ * because hypothetical mapping maps from canonical to candidate types, it's ++ * alright, and we still maintain the property of having single `canon_id` ++ * mapping to single `cand_id` (there could be two different `canon_id` ++ * mapped to the same `cand_id`, but it's not contradictory). ++ * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate ++ * graph is FWD. In this case we are just going to check compatibility of ++ * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll ++ * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to ++ * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs ++ * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from ++ * canonical graph. ++ */ ++static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, ++ __u32 canon_id) ++{ ++ struct btf_type *cand_type; ++ struct btf_type *canon_type; ++ __u32 hypot_type_id; ++ __u16 cand_kind; ++ __u16 canon_kind; ++ int i, eq; ++ ++ /* if both resolve to the same canonical, they must be equivalent */ ++ if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id)) ++ return 1; ++ ++ canon_id = resolve_fwd_id(d, canon_id); ++ ++ hypot_type_id = d->hypot_map[canon_id]; ++ if (hypot_type_id <= BTF_MAX_NR_TYPES) { ++ if (hypot_type_id == cand_id) ++ return 1; ++ /* In some cases compiler will generate different DWARF types ++ * for *identical* array type definitions and use them for ++ * different fields within the *same* struct. This breaks type ++ * equivalence check, which makes an assumption that candidate ++ * types sub-graph has a consistent and deduped-by-compiler ++ * types within a single CU. So work around that by explicitly ++ * allowing identical array types here. ++ */ ++ if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) ++ return 1; ++ /* It turns out that similar situation can happen with ++ * struct/union sometimes, sigh... Handle the case where ++ * structs/unions are exactly the same, down to the referenced ++ * type IDs. Anything more complicated (e.g., if referenced ++ * types are different, but equivalent) is *way more* ++ * complicated and requires a many-to-many equivalence mapping. ++ */ ++ if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) ++ return 1; ++ return 0; ++ } ++ ++ if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) ++ return -ENOMEM; ++ ++ cand_type = btf_type_by_id(d->btf, cand_id); ++ canon_type = btf_type_by_id(d->btf, canon_id); ++ cand_kind = btf_kind(cand_type); ++ canon_kind = btf_kind(canon_type); ++ ++ if (cand_type->name_off != canon_type->name_off) ++ return 0; ++ ++ /* FWD <--> STRUCT/UNION equivalence check, if enabled */ ++ if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) ++ && cand_kind != canon_kind) { ++ __u16 real_kind; ++ __u16 fwd_kind; ++ ++ if (cand_kind == BTF_KIND_FWD) { ++ real_kind = canon_kind; ++ fwd_kind = btf_fwd_kind(cand_type); ++ } else { ++ real_kind = cand_kind; ++ fwd_kind = btf_fwd_kind(canon_type); ++ /* we'd need to resolve base FWD to STRUCT/UNION */ ++ if (fwd_kind == real_kind && canon_id < d->btf->start_id) ++ d->hypot_adjust_canon = true; ++ } ++ return fwd_kind == real_kind; ++ } ++ ++ if (cand_kind != canon_kind) ++ return 0; ++ ++ switch (cand_kind) { ++ case BTF_KIND_INT: ++ return btf_equal_int_tag(cand_type, canon_type); ++ ++ case BTF_KIND_ENUM: ++ return btf_compat_enum(cand_type, canon_type); ++ ++ case BTF_KIND_ENUM64: ++ return btf_compat_enum64(cand_type, canon_type); ++ ++ case BTF_KIND_FWD: ++ case BTF_KIND_FLOAT: ++ return btf_equal_common(cand_type, canon_type); ++ ++ case BTF_KIND_CONST: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_TYPE_TAG: ++ if (cand_type->info != canon_type->info) ++ return 0; ++ return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); ++ ++ case BTF_KIND_ARRAY: { ++ const struct btf_array *cand_arr, *canon_arr; ++ ++ if (!btf_compat_array(cand_type, canon_type)) ++ return 0; ++ cand_arr = btf_array(cand_type); ++ canon_arr = btf_array(canon_type); ++ eq = btf_dedup_is_equiv(d, cand_arr->index_type, canon_arr->index_type); ++ if (eq <= 0) ++ return eq; ++ return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); ++ } ++ ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: { ++ const struct btf_member *cand_m, *canon_m; ++ __u16 vlen; ++ ++ if (!btf_shallow_equal_struct(cand_type, canon_type)) ++ return 0; ++ vlen = btf_vlen(cand_type); ++ cand_m = btf_members(cand_type); ++ canon_m = btf_members(canon_type); ++ for (i = 0; i < vlen; i++) { ++ eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); ++ if (eq <= 0) ++ return eq; ++ cand_m++; ++ canon_m++; ++ } ++ ++ return 1; ++ } ++ ++ case BTF_KIND_FUNC_PROTO: { ++ const struct btf_param *cand_p, *canon_p; ++ __u16 vlen; ++ ++ if (!btf_compat_fnproto(cand_type, canon_type)) ++ return 0; ++ eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type); ++ if (eq <= 0) ++ return eq; ++ vlen = btf_vlen(cand_type); ++ cand_p = btf_params(cand_type); ++ canon_p = btf_params(canon_type); ++ for (i = 0; i < vlen; i++) { ++ eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type); ++ if (eq <= 0) ++ return eq; ++ cand_p++; ++ canon_p++; ++ } ++ return 1; ++ } ++ ++ default: ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/* ++ * Use hypothetical mapping, produced by successful type graph equivalence ++ * check, to augment existing struct/union canonical mapping, where possible. ++ * ++ * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record ++ * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional: ++ * it doesn't matter if FWD type was part of canonical graph or candidate one, ++ * we are recording the mapping anyway. As opposed to carefulness required ++ * for struct/union correspondence mapping (described below), for FWD resolution ++ * it's not important, as by the time that FWD type (reference type) will be ++ * deduplicated all structs/unions will be deduped already anyway. ++ * ++ * Recording STRUCT/UNION mapping is purely a performance optimization and is ++ * not required for correctness. It needs to be done carefully to ensure that ++ * struct/union from candidate's type graph is not mapped into corresponding ++ * struct/union from canonical type graph that itself hasn't been resolved into ++ * canonical representative. The only guarantee we have is that canonical ++ * struct/union was determined as canonical and that won't change. But any ++ * types referenced through that struct/union fields could have been not yet ++ * resolved, so in case like that it's too early to establish any kind of ++ * correspondence between structs/unions. ++ * ++ * No canonical correspondence is derived for primitive types (they are already ++ * deduplicated completely already anyway) or reference types (they rely on ++ * stability of struct/union canonical relationship for equivalence checks). ++ */ ++static void btf_dedup_merge_hypot_map(struct btf_dedup *d) ++{ ++ __u32 canon_type_id, targ_type_id; ++ __u16 t_kind, c_kind; ++ __u32 t_id, c_id; ++ int i; ++ ++ for (i = 0; i < d->hypot_cnt; i++) { ++ canon_type_id = d->hypot_list[i]; ++ targ_type_id = d->hypot_map[canon_type_id]; ++ t_id = resolve_type_id(d, targ_type_id); ++ c_id = resolve_type_id(d, canon_type_id); ++ t_kind = btf_kind(btf__type_by_id(d->btf, t_id)); ++ c_kind = btf_kind(btf__type_by_id(d->btf, c_id)); ++ /* ++ * Resolve FWD into STRUCT/UNION. ++ * It's ok to resolve FWD into STRUCT/UNION that's not yet ++ * mapped to canonical representative (as opposed to ++ * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because ++ * eventually that struct is going to be mapped and all resolved ++ * FWDs will automatically resolve to correct canonical ++ * representative. This will happen before ref type deduping, ++ * which critically depends on stability of these mapping. This ++ * stability is not a requirement for STRUCT/UNION equivalence ++ * checks, though. ++ */ ++ ++ /* if it's the split BTF case, we still need to point base FWD ++ * to STRUCT/UNION in a split BTF, because FWDs from split BTF ++ * will be resolved against base FWD. If we don't point base ++ * canonical FWD to the resolved STRUCT/UNION, then all the ++ * FWDs in split BTF won't be correctly resolved to a proper ++ * STRUCT/UNION. ++ */ ++ if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) ++ d->map[c_id] = t_id; ++ ++ /* if graph equivalence determined that we'd need to adjust ++ * base canonical types, then we need to only point base FWDs ++ * to STRUCTs/UNIONs and do no more modifications. For all ++ * other purposes the type graphs were not equivalent. ++ */ ++ if (d->hypot_adjust_canon) ++ continue; ++ ++ if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) ++ d->map[t_id] = c_id; ++ ++ if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && ++ c_kind != BTF_KIND_FWD && ++ is_type_mapped(d, c_id) && ++ !is_type_mapped(d, t_id)) { ++ /* ++ * as a perf optimization, we can map struct/union ++ * that's part of type graph we just verified for ++ * equivalence. We can do that for struct/union that has ++ * canonical representative only, though. ++ */ ++ d->map[t_id] = c_id; ++ } ++ } ++} ++ ++/* ++ * Deduplicate struct/union types. ++ * ++ * For each struct/union type its type signature hash is calculated, taking ++ * into account type's name, size, number, order and names of fields, but ++ * ignoring type ID's referenced from fields, because they might not be deduped ++ * completely until after reference types deduplication phase. This type hash ++ * is used to iterate over all potential canonical types, sharing same hash. ++ * For each canonical candidate we check whether type graphs that they form ++ * (through referenced types in fields and so on) are equivalent using algorithm ++ * implemented in `btf_dedup_is_equiv`. If such equivalence is found and ++ * BTF_KIND_FWD resolution is allowed, then hypothetical mapping ++ * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence ++ * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to ++ * potentially map other structs/unions to their canonical representatives, ++ * if such relationship hasn't yet been established. This speeds up algorithm ++ * by eliminating some of the duplicate work. ++ * ++ * If no matching canonical representative was found, struct/union is marked ++ * as canonical for itself and is added into btf_dedup->dedup_table hash map ++ * for further look ups. ++ */ ++static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) ++{ ++ struct btf_type *cand_type, *t; ++ struct hashmap_entry *hash_entry; ++ /* if we don't find equivalent type, then we are canonical */ ++ __u32 new_id = type_id; ++ __u16 kind; ++ long h; ++ ++ /* already deduped or is in process of deduping (loop detected) */ ++ if (d->map[type_id] <= BTF_MAX_NR_TYPES) ++ return 0; ++ ++ t = btf_type_by_id(d->btf, type_id); ++ kind = btf_kind(t); ++ ++ if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) ++ return 0; ++ ++ h = btf_hash_struct(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ __u32 cand_id = (__u32)(long)hash_entry->value; ++ int eq; ++ ++ /* ++ * Even though btf_dedup_is_equiv() checks for ++ * btf_shallow_equal_struct() internally when checking two ++ * structs (unions) for equivalence, we need to guard here ++ * from picking matching FWD type as a dedup candidate. ++ * This can happen due to hash collision. In such case just ++ * relying on btf_dedup_is_equiv() would lead to potentially ++ * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because ++ * FWD and compatible STRUCT/UNION are considered equivalent. ++ */ ++ cand_type = btf_type_by_id(d->btf, cand_id); ++ if (!btf_shallow_equal_struct(t, cand_type)) ++ continue; ++ ++ btf_dedup_clear_hypot_map(d); ++ eq = btf_dedup_is_equiv(d, type_id, cand_id); ++ if (eq < 0) ++ return eq; ++ if (!eq) ++ continue; ++ btf_dedup_merge_hypot_map(d); ++ if (d->hypot_adjust_canon) /* not really equivalent */ ++ continue; ++ new_id = cand_id; ++ break; ++ } ++ ++ d->map[type_id] = new_id; ++ if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static int btf_dedup_struct_types(struct btf_dedup *d) ++{ ++ int i, err; ++ ++ for (i = 0; i < d->btf->nr_types; i++) { ++ err = btf_dedup_struct_type(d, d->btf->start_id + i); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++/* ++ * Deduplicate reference type. ++ * ++ * Once all primitive and struct/union types got deduplicated, we can easily ++ * deduplicate all other (reference) BTF types. This is done in two steps: ++ * ++ * 1. Resolve all referenced type IDs into their canonical type IDs. This ++ * resolution can be done either immediately for primitive or struct/union types ++ * (because they were deduped in previous two phases) or recursively for ++ * reference types. Recursion will always terminate at either primitive or ++ * struct/union type, at which point we can "unwind" chain of reference types ++ * one by one. There is no danger of encountering cycles because in C type ++ * system the only way to form type cycle is through struct/union, so any chain ++ * of reference types, even those taking part in a type cycle, will inevitably ++ * reach struct/union at some point. ++ * ++ * 2. Once all referenced type IDs are resolved into canonical ones, BTF type ++ * becomes "stable", in the sense that no further deduplication will cause ++ * any changes to it. With that, it's now possible to calculate type's signature ++ * hash (this time taking into account referenced type IDs) and loop over all ++ * potential canonical representatives. If no match was found, current type ++ * will become canonical representative of itself and will be added into ++ * btf_dedup->dedup_table as another possible canonical representative. ++ */ ++static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) ++{ ++ struct hashmap_entry *hash_entry; ++ __u32 new_id = type_id, cand_id; ++ struct btf_type *t, *cand; ++ /* if we don't find equivalent type, then we are representative type */ ++ int ref_type_id; ++ long h; ++ ++ if (d->map[type_id] == BTF_IN_PROGRESS_ID) ++ return -ELOOP; ++ if (d->map[type_id] <= BTF_MAX_NR_TYPES) ++ return resolve_type_id(d, type_id); ++ ++ t = btf_type_by_id(d->btf, type_id); ++ d->map[type_id] = BTF_IN_PROGRESS_ID; ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_CONST: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_TYPE_TAG: ++ ref_type_id = btf_dedup_ref_type(d, t->type); ++ if (ref_type_id < 0) ++ return ref_type_id; ++ t->type = ref_type_id; ++ ++ h = btf_hash_common(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ cand_id = (__u32)(long)hash_entry->value; ++ cand = btf_type_by_id(d->btf, cand_id); ++ if (btf_equal_common(t, cand)) { ++ new_id = cand_id; ++ break; ++ } ++ } ++ break; ++ ++ case BTF_KIND_DECL_TAG: ++ ref_type_id = btf_dedup_ref_type(d, t->type); ++ if (ref_type_id < 0) ++ return ref_type_id; ++ t->type = ref_type_id; ++ ++ h = btf_hash_int_decl_tag(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ cand_id = (__u32)(long)hash_entry->value; ++ cand = btf_type_by_id(d->btf, cand_id); ++ if (btf_equal_int_tag(t, cand)) { ++ new_id = cand_id; ++ break; ++ } ++ } ++ break; ++ ++ case BTF_KIND_ARRAY: { ++ struct btf_array *info = btf_array(t); ++ ++ ref_type_id = btf_dedup_ref_type(d, info->type); ++ if (ref_type_id < 0) ++ return ref_type_id; ++ info->type = ref_type_id; ++ ++ ref_type_id = btf_dedup_ref_type(d, info->index_type); ++ if (ref_type_id < 0) ++ return ref_type_id; ++ info->index_type = ref_type_id; ++ ++ h = btf_hash_array(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ cand_id = (__u32)(long)hash_entry->value; ++ cand = btf_type_by_id(d->btf, cand_id); ++ if (btf_equal_array(t, cand)) { ++ new_id = cand_id; ++ break; ++ } ++ } ++ break; ++ } ++ ++ case BTF_KIND_FUNC_PROTO: { ++ struct btf_param *param; ++ __u16 vlen; ++ int i; ++ ++ ref_type_id = btf_dedup_ref_type(d, t->type); ++ if (ref_type_id < 0) ++ return ref_type_id; ++ t->type = ref_type_id; ++ ++ vlen = btf_vlen(t); ++ param = btf_params(t); ++ for (i = 0; i < vlen; i++) { ++ ref_type_id = btf_dedup_ref_type(d, param->type); ++ if (ref_type_id < 0) ++ return ref_type_id; ++ param->type = ref_type_id; ++ param++; ++ } ++ ++ h = btf_hash_fnproto(t); ++ for_each_dedup_cand(d, hash_entry, h) { ++ cand_id = (__u32)(long)hash_entry->value; ++ cand = btf_type_by_id(d->btf, cand_id); ++ if (btf_equal_fnproto(t, cand)) { ++ new_id = cand_id; ++ break; ++ } ++ } ++ break; ++ } ++ ++ default: ++ return -EINVAL; ++ } ++ ++ d->map[type_id] = new_id; ++ if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) ++ return -ENOMEM; ++ ++ return new_id; ++} ++ ++static int btf_dedup_ref_types(struct btf_dedup *d) ++{ ++ int i, err; ++ ++ for (i = 0; i < d->btf->nr_types; i++) { ++ err = btf_dedup_ref_type(d, d->btf->start_id + i); ++ if (err < 0) ++ return err; ++ } ++ /* we won't need d->dedup_table anymore */ ++ hashmap__free(d->dedup_table); ++ d->dedup_table = NULL; ++ return 0; ++} ++ ++/* ++ * Compact types. ++ * ++ * After we established for each type its corresponding canonical representative ++ * type, we now can eliminate types that are not canonical and leave only ++ * canonical ones layed out sequentially in memory by copying them over ++ * duplicates. During compaction btf_dedup->hypot_map array is reused to store ++ * a map from original type ID to a new compacted type ID, which will be used ++ * during next phase to "fix up" type IDs, referenced from struct/union and ++ * reference types. ++ */ ++static int btf_dedup_compact_types(struct btf_dedup *d) ++{ ++ __u32 *new_offs; ++ __u32 next_type_id = d->btf->start_id; ++ const struct btf_type *t; ++ void *p; ++ int i, id, len; ++ ++ /* we are going to reuse hypot_map to store compaction remapping */ ++ d->hypot_map[0] = 0; ++ /* base BTF types are not renumbered */ ++ for (id = 1; id < d->btf->start_id; id++) ++ d->hypot_map[id] = id; ++ for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) ++ d->hypot_map[id] = BTF_UNPROCESSED_ID; ++ ++ p = d->btf->types_data; ++ ++ for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) { ++ if (d->map[id] != id) ++ continue; ++ ++ t = btf__type_by_id(d->btf, id); ++ len = btf_type_size(t); ++ if (len < 0) ++ return len; ++ ++ memmove(p, t, len); ++ d->hypot_map[id] = next_type_id; ++ d->btf->type_offs[next_type_id - d->btf->start_id] = p - d->btf->types_data; ++ p += len; ++ next_type_id++; ++ } ++ ++ /* shrink struct btf's internal types index and update btf_header */ ++ d->btf->nr_types = next_type_id - d->btf->start_id; ++ d->btf->type_offs_cap = d->btf->nr_types; ++ d->btf->hdr->type_len = p - d->btf->types_data; ++ new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, ++ sizeof(*new_offs)); ++ if (d->btf->type_offs_cap && !new_offs) ++ return -ENOMEM; ++ d->btf->type_offs = new_offs; ++ d->btf->hdr->str_off = d->btf->hdr->type_len; ++ d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len; ++ return 0; ++} ++ ++/* ++ * Figure out final (deduplicated and compacted) type ID for provided original ++ * `type_id` by first resolving it into corresponding canonical type ID and ++ * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map, ++ * which is populated during compaction phase. ++ */ ++static int btf_dedup_remap_type_id(__u32 *type_id, void *ctx) ++{ ++ struct btf_dedup *d = ctx; ++ __u32 resolved_type_id, new_type_id; ++ ++ resolved_type_id = resolve_type_id(d, *type_id); ++ new_type_id = d->hypot_map[resolved_type_id]; ++ if (new_type_id > BTF_MAX_NR_TYPES) ++ return -EINVAL; ++ ++ *type_id = new_type_id; ++ return 0; ++} ++ ++/* ++ * Remap referenced type IDs into deduped type IDs. ++ * ++ * After BTF types are deduplicated and compacted, their final type IDs may ++ * differ from original ones. The map from original to a corresponding ++ * deduped type ID is stored in btf_dedup->hypot_map and is populated during ++ * compaction phase. During remapping phase we are rewriting all type IDs ++ * referenced from any BTF type (e.g., struct fields, func proto args, etc) to ++ * their final deduped type IDs. ++ */ ++static int btf_dedup_remap_types(struct btf_dedup *d) ++{ ++ int i, r; ++ ++ for (i = 0; i < d->btf->nr_types; i++) { ++ struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); ++ ++ r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d); ++ if (r) ++ return r; ++ } ++ ++ if (!d->btf_ext) ++ return 0; ++ ++ r = btf_ext_visit_type_ids(d->btf_ext, btf_dedup_remap_type_id, d); ++ if (r) ++ return r; ++ ++ return 0; ++} ++ ++/* ++ * Probe few well-known locations for vmlinux kernel image and try to load BTF ++ * data out of it to use for target BTF. ++ */ ++struct btf *btf__load_vmlinux_btf(void) ++{ ++ struct { ++ const char *path_fmt; ++ bool raw_btf; ++ } locations[] = { ++ /* try canonical vmlinux BTF through sysfs first */ ++ { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, ++ /* fall back to trying to find vmlinux ELF on disk otherwise */ ++ { "/boot/vmlinux-%1$s" }, ++ { "/lib/modules/%1$s/vmlinux-%1$s" }, ++ { "/lib/modules/%1$s/build/vmlinux" }, ++ { "/usr/lib/modules/%1$s/kernel/vmlinux" }, ++ { "/usr/lib/debug/boot/vmlinux-%1$s" }, ++ { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, ++ { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, ++ }; ++ char path[PATH_MAX + 1]; ++ struct utsname buf; ++ struct btf *btf; ++ int i, err; ++ ++ uname(&buf); ++ ++ for (i = 0; i < ARRAY_SIZE(locations); i++) { ++ snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); ++ ++ if (access(path, R_OK)) ++ continue; ++ ++ if (locations[i].raw_btf) ++ btf = btf__parse_raw(path); ++ else ++ btf = btf__parse_elf(path, NULL); ++ err = libbpf_get_error(btf); ++ pr_debug("loading kernel BTF '%s': %d\n", path, err); ++ if (err) ++ continue; ++ ++ return btf; ++ } ++ ++ pr_warn("failed to find valid kernel BTF\n"); ++ return libbpf_err_ptr(-ESRCH); ++} ++ ++struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf"))); ++ ++struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf) ++{ ++ char path[80]; ++ ++ snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name); ++ return btf__parse_split(path, vmlinux_btf); ++} ++ ++int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) ++{ ++ int i, n, err; ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_INT: ++ case BTF_KIND_FLOAT: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ return 0; ++ ++ case BTF_KIND_FWD: ++ case BTF_KIND_CONST: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_VAR: ++ case BTF_KIND_DECL_TAG: ++ case BTF_KIND_TYPE_TAG: ++ return visit(&t->type, ctx); ++ ++ case BTF_KIND_ARRAY: { ++ struct btf_array *a = btf_array(t); ++ ++ err = visit(&a->type, ctx); ++ err = err ?: visit(&a->index_type, ctx); ++ return err; ++ } ++ ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: { ++ struct btf_member *m = btf_members(t); ++ ++ for (i = 0, n = btf_vlen(t); i < n; i++, m++) { ++ err = visit(&m->type, ctx); ++ if (err) ++ return err; ++ } ++ return 0; ++ } ++ ++ case BTF_KIND_FUNC_PROTO: { ++ struct btf_param *m = btf_params(t); ++ ++ err = visit(&t->type, ctx); ++ if (err) ++ return err; ++ for (i = 0, n = btf_vlen(t); i < n; i++, m++) { ++ err = visit(&m->type, ctx); ++ if (err) ++ return err; ++ } ++ return 0; ++ } ++ ++ case BTF_KIND_DATASEC: { ++ struct btf_var_secinfo *m = btf_var_secinfos(t); ++ ++ for (i = 0, n = btf_vlen(t); i < n; i++, m++) { ++ err = visit(&m->type, ctx); ++ if (err) ++ return err; ++ } ++ return 0; ++ } ++ ++ default: ++ return -EINVAL; ++ } ++} ++ ++int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx) ++{ ++ int i, n, err; ++ ++ err = visit(&t->name_off, ctx); ++ if (err) ++ return err; ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: { ++ struct btf_member *m = btf_members(t); ++ ++ for (i = 0, n = btf_vlen(t); i < n; i++, m++) { ++ err = visit(&m->name_off, ctx); ++ if (err) ++ return err; ++ } ++ break; ++ } ++ case BTF_KIND_ENUM: { ++ struct btf_enum *m = btf_enum(t); ++ ++ for (i = 0, n = btf_vlen(t); i < n; i++, m++) { ++ err = visit(&m->name_off, ctx); ++ if (err) ++ return err; ++ } ++ break; ++ } ++ case BTF_KIND_ENUM64: { ++ struct btf_enum64 *m = btf_enum64(t); ++ ++ for (i = 0, n = btf_vlen(t); i < n; i++, m++) { ++ err = visit(&m->name_off, ctx); ++ if (err) ++ return err; ++ } ++ break; ++ } ++ case BTF_KIND_FUNC_PROTO: { ++ struct btf_param *m = btf_params(t); ++ ++ for (i = 0, n = btf_vlen(t); i < n; i++, m++) { ++ err = visit(&m->name_off, ctx); ++ if (err) ++ return err; ++ } ++ break; ++ } ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx) ++{ ++ const struct btf_ext_info *seg; ++ struct btf_ext_info_sec *sec; ++ int i, err; ++ ++ seg = &btf_ext->func_info; ++ for_each_btf_ext_sec(seg, sec) { ++ struct bpf_func_info_min *rec; ++ ++ for_each_btf_ext_rec(seg, sec, i, rec) { ++ err = visit(&rec->type_id, ctx); ++ if (err < 0) ++ return err; ++ } ++ } ++ ++ seg = &btf_ext->core_relo_info; ++ for_each_btf_ext_sec(seg, sec) { ++ struct bpf_core_relo *rec; ++ ++ for_each_btf_ext_rec(seg, sec, i, rec) { ++ err = visit(&rec->type_id, ctx); ++ if (err < 0) ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx) ++{ ++ const struct btf_ext_info *seg; ++ struct btf_ext_info_sec *sec; ++ int i, err; ++ ++ seg = &btf_ext->func_info; ++ for_each_btf_ext_sec(seg, sec) { ++ err = visit(&sec->sec_name_off, ctx); ++ if (err) ++ return err; ++ } ++ ++ seg = &btf_ext->line_info; ++ for_each_btf_ext_sec(seg, sec) { ++ struct bpf_line_info_min *rec; ++ ++ err = visit(&sec->sec_name_off, ctx); ++ if (err) ++ return err; ++ ++ for_each_btf_ext_rec(seg, sec, i, rec) { ++ err = visit(&rec->file_name_off, ctx); ++ if (err) ++ return err; ++ err = visit(&rec->line_off, ctx); ++ if (err) ++ return err; ++ } ++ } ++ ++ seg = &btf_ext->core_relo_info; ++ for_each_btf_ext_sec(seg, sec) { ++ struct bpf_core_relo *rec; ++ ++ err = visit(&sec->sec_name_off, ctx); ++ if (err) ++ return err; ++ ++ for_each_btf_ext_rec(seg, sec, i, rec) { ++ err = visit(&rec->access_str_off, ctx); ++ if (err) ++ return err; ++ } ++ } ++ ++ return 0; ++} +diff --git a/src/cc/libbpf/src/btf.h b/src/cc/libbpf/src/btf.h +new file mode 100644 +index 0000000..583760d +--- /dev/null ++++ b/src/cc/libbpf/src/btf.h +@@ -0,0 +1,553 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++/* Copyright (c) 2018 Facebook */ ++/*! \file */ ++ ++#ifndef __LIBBPF_BTF_H ++#define __LIBBPF_BTF_H ++ ++#include ++#include ++#include ++#include ++ ++#include "libbpf_common.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define BTF_ELF_SEC ".BTF" ++#define BTF_EXT_ELF_SEC ".BTF.ext" ++#define MAPS_ELF_SEC ".maps" ++ ++struct btf; ++struct btf_ext; ++struct btf_type; ++ ++struct bpf_object; ++ ++enum btf_endianness { ++ BTF_LITTLE_ENDIAN = 0, ++ BTF_BIG_ENDIAN = 1, ++}; ++ ++/** ++ * @brief **btf__free()** frees all data of a BTF object ++ * @param btf BTF object to free ++ */ ++LIBBPF_API void btf__free(struct btf *btf); ++ ++/** ++ * @brief **btf__new()** creates a new instance of a BTF object from the raw ++ * bytes of an ELF's BTF section ++ * @param data raw bytes ++ * @param size number of bytes passed in `data` ++ * @return new BTF object instance which has to be eventually freed with ++ * **btf__free()** ++ * ++ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract ++ * error code from such a pointer `libbpf_get_error()` should be used. If ++ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is ++ * returned on error instead. In both cases thread-local `errno` variable is ++ * always set to error code as well. ++ */ ++LIBBPF_API struct btf *btf__new(const void *data, __u32 size); ++ ++/** ++ * @brief **btf__new_split()** create a new instance of a BTF object from the ++ * provided raw data bytes. It takes another BTF instance, **base_btf**, which ++ * serves as a base BTF, which is extended by types in a newly created BTF ++ * instance ++ * @param data raw bytes ++ * @param size length of raw bytes ++ * @param base_btf the base BTF object ++ * @return new BTF object instance which has to be eventually freed with ++ * **btf__free()** ++ * ++ * If *base_btf* is NULL, `btf__new_split()` is equivalent to `btf__new()` and ++ * creates non-split BTF. ++ * ++ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract ++ * error code from such a pointer `libbpf_get_error()` should be used. If ++ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is ++ * returned on error instead. In both cases thread-local `errno` variable is ++ * always set to error code as well. ++ */ ++LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf); ++ ++/** ++ * @brief **btf__new_empty()** creates an empty BTF object. Use ++ * `btf__add_*()` to populate such BTF object. ++ * @return new BTF object instance which has to be eventually freed with ++ * **btf__free()** ++ * ++ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract ++ * error code from such a pointer `libbpf_get_error()` should be used. If ++ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is ++ * returned on error instead. In both cases thread-local `errno` variable is ++ * always set to error code as well. ++ */ ++LIBBPF_API struct btf *btf__new_empty(void); ++ ++/** ++ * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an ++ * ELF BTF section except with a base BTF on top of which split BTF should be ++ * based ++ * @return new BTF object instance which has to be eventually freed with ++ * **btf__free()** ++ * ++ * If *base_btf* is NULL, `btf__new_empty_split()` is equivalent to ++ * `btf__new_empty()` and creates non-split BTF. ++ * ++ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract ++ * error code from such a pointer `libbpf_get_error()` should be used. If ++ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is ++ * returned on error instead. In both cases thread-local `errno` variable is ++ * always set to error code as well. ++ */ ++LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); ++ ++LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); ++LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf); ++LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); ++LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf); ++LIBBPF_API struct btf *btf__parse_raw(const char *path); ++LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); ++ ++LIBBPF_API struct btf *btf__load_vmlinux_btf(void); ++LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf); ++LIBBPF_API struct btf *libbpf_find_kernel_btf(void); ++ ++LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); ++LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); ++ ++LIBBPF_API int btf__load_into_kernel(struct btf *btf); ++LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, ++ const char *type_name); ++LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, ++ const char *type_name, __u32 kind); ++LIBBPF_API __u32 btf__type_cnt(const struct btf *btf); ++LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf); ++LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, ++ __u32 id); ++LIBBPF_API size_t btf__pointer_size(const struct btf *btf); ++LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz); ++LIBBPF_API enum btf_endianness btf__endianness(const struct btf *btf); ++LIBBPF_API int btf__set_endianness(struct btf *btf, enum btf_endianness endian); ++LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); ++LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); ++LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); ++LIBBPF_API int btf__fd(const struct btf *btf); ++LIBBPF_API void btf__set_fd(struct btf *btf, int fd); ++LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); ++LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); ++LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); ++ ++LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); ++LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); ++LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); ++ ++LIBBPF_API int btf__find_str(struct btf *btf, const char *s); ++LIBBPF_API int btf__add_str(struct btf *btf, const char *s); ++LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, ++ const struct btf_type *src_type); ++/** ++ * @brief **btf__add_btf()** appends all the BTF types from *src_btf* into *btf* ++ * @param btf BTF object which all the BTF types and strings are added to ++ * @param src_btf BTF object which all BTF types and referenced strings are copied from ++ * @return BTF type ID of the first appended BTF type, or negative error code ++ * ++ * **btf__add_btf()** can be used to simply and efficiently append the entire ++ * contents of one BTF object to another one. All the BTF type data is copied ++ * over, all referenced type IDs are adjusted by adding a necessary ID offset. ++ * Only strings referenced from BTF types are copied over and deduplicated, so ++ * if there were some unused strings in *src_btf*, those won't be copied over, ++ * which is consistent with the general string deduplication semantics of BTF ++ * writing APIs. ++ * ++ * If any error is encountered during this process, the contents of *btf* is ++ * left intact, which means that **btf__add_btf()** follows the transactional ++ * semantics and the operation as a whole is all-or-nothing. ++ * ++ * *src_btf* has to be non-split BTF, as of now copying types from split BTF ++ * is not supported and will result in -ENOTSUP error code returned. ++ */ ++LIBBPF_API int btf__add_btf(struct btf *btf, const struct btf *src_btf); ++ ++LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding); ++LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz); ++LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id); ++LIBBPF_API int btf__add_array(struct btf *btf, ++ int index_type_id, int elem_type_id, __u32 nr_elems); ++/* struct/union construction APIs */ ++LIBBPF_API int btf__add_struct(struct btf *btf, const char *name, __u32 sz); ++LIBBPF_API int btf__add_union(struct btf *btf, const char *name, __u32 sz); ++LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_id, ++ __u32 bit_offset, __u32 bit_size); ++ ++/* enum construction APIs */ ++LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz); ++LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value); ++LIBBPF_API int btf__add_enum64(struct btf *btf, const char *name, __u32 bytes_sz, bool is_signed); ++LIBBPF_API int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value); ++ ++enum btf_fwd_kind { ++ BTF_FWD_STRUCT = 0, ++ BTF_FWD_UNION = 1, ++ BTF_FWD_ENUM = 2, ++}; ++ ++LIBBPF_API int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind); ++LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id); ++LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); ++LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); ++LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); ++LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); ++ ++/* func and func_proto construction APIs */ ++LIBBPF_API int btf__add_func(struct btf *btf, const char *name, ++ enum btf_func_linkage linkage, int proto_type_id); ++LIBBPF_API int btf__add_func_proto(struct btf *btf, int ret_type_id); ++LIBBPF_API int btf__add_func_param(struct btf *btf, const char *name, int type_id); ++ ++/* var & datasec construction APIs */ ++LIBBPF_API int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id); ++LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz); ++LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, ++ __u32 offset, __u32 byte_sz); ++ ++/* tag construction API */ ++LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, ++ int component_idx); ++ ++struct btf_dedup_opts { ++ size_t sz; ++ /* optional .BTF.ext info to dedup along the main BTF info */ ++ struct btf_ext *btf_ext; ++ /* force hash collisions (used for testing) */ ++ bool force_collisions; ++ size_t :0; ++}; ++#define btf_dedup_opts__last_field force_collisions ++ ++LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); ++ ++struct btf_dump; ++ ++struct btf_dump_opts { ++ size_t sz; ++}; ++#define btf_dump_opts__last_field sz ++ ++typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); ++ ++LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, ++ btf_dump_printf_fn_t printf_fn, ++ void *ctx, ++ const struct btf_dump_opts *opts); ++ ++LIBBPF_API void btf_dump__free(struct btf_dump *d); ++ ++LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); ++ ++struct btf_dump_emit_type_decl_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++ /* optional field name for type declaration, e.g.: ++ * - struct my_struct ++ * - void (*)(int) ++ * - char (*)[123] ++ */ ++ const char *field_name; ++ /* extra indentation level (in number of tabs) to emit for multi-line ++ * type declarations (e.g., anonymous struct); applies for lines ++ * starting from the second one (first line is assumed to have ++ * necessary indentation already ++ */ ++ int indent_level; ++ /* strip all the const/volatile/restrict mods */ ++ bool strip_mods; ++ size_t :0; ++}; ++#define btf_dump_emit_type_decl_opts__last_field strip_mods ++ ++LIBBPF_API int ++btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, ++ const struct btf_dump_emit_type_decl_opts *opts); ++ ++ ++struct btf_dump_type_data_opts { ++ /* size of this struct, for forward/backward compatibility */ ++ size_t sz; ++ const char *indent_str; ++ int indent_level; ++ /* below match "show" flags for bpf_show_snprintf() */ ++ bool compact; /* no newlines/indentation */ ++ bool skip_names; /* skip member/type names */ ++ bool emit_zeroes; /* show 0-valued fields */ ++ size_t :0; ++}; ++#define btf_dump_type_data_opts__last_field emit_zeroes ++ ++LIBBPF_API int ++btf_dump__dump_type_data(struct btf_dump *d, __u32 id, ++ const void *data, size_t data_sz, ++ const struct btf_dump_type_data_opts *opts); ++ ++/* ++ * A set of helpers for easier BTF types handling. ++ * ++ * The inline functions below rely on constants from the kernel headers which ++ * may not be available for applications including this header file. To avoid ++ * compilation errors, we define all the constants here that were added after ++ * the initial introduction of the BTF_KIND* constants. ++ */ ++#ifndef BTF_KIND_FUNC ++#define BTF_KIND_FUNC 12 /* Function */ ++#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ ++#endif ++#ifndef BTF_KIND_VAR ++#define BTF_KIND_VAR 14 /* Variable */ ++#define BTF_KIND_DATASEC 15 /* Section */ ++#endif ++#ifndef BTF_KIND_FLOAT ++#define BTF_KIND_FLOAT 16 /* Floating point */ ++#endif ++/* The kernel header switched to enums, so the following were never #defined */ ++#define BTF_KIND_DECL_TAG 17 /* Decl Tag */ ++#define BTF_KIND_TYPE_TAG 18 /* Type Tag */ ++#define BTF_KIND_ENUM64 19 /* Enum for up-to 64bit values */ ++ ++static inline __u16 btf_kind(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info); ++} ++ ++static inline __u16 btf_vlen(const struct btf_type *t) ++{ ++ return BTF_INFO_VLEN(t->info); ++} ++ ++static inline bool btf_kflag(const struct btf_type *t) ++{ ++ return BTF_INFO_KFLAG(t->info); ++} ++ ++static inline bool btf_is_void(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_UNKN; ++} ++ ++static inline bool btf_is_int(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_INT; ++} ++ ++static inline bool btf_is_ptr(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_PTR; ++} ++ ++static inline bool btf_is_array(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_ARRAY; ++} ++ ++static inline bool btf_is_struct(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_STRUCT; ++} ++ ++static inline bool btf_is_union(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_UNION; ++} ++ ++static inline bool btf_is_composite(const struct btf_type *t) ++{ ++ __u16 kind = btf_kind(t); ++ ++ return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; ++} ++ ++static inline bool btf_is_enum(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_ENUM; ++} ++ ++static inline bool btf_is_enum64(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_ENUM64; ++} ++ ++static inline bool btf_is_fwd(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_FWD; ++} ++ ++static inline bool btf_is_typedef(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_TYPEDEF; ++} ++ ++static inline bool btf_is_volatile(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_VOLATILE; ++} ++ ++static inline bool btf_is_const(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_CONST; ++} ++ ++static inline bool btf_is_restrict(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_RESTRICT; ++} ++ ++static inline bool btf_is_mod(const struct btf_type *t) ++{ ++ __u16 kind = btf_kind(t); ++ ++ return kind == BTF_KIND_VOLATILE || ++ kind == BTF_KIND_CONST || ++ kind == BTF_KIND_RESTRICT || ++ kind == BTF_KIND_TYPE_TAG; ++} ++ ++static inline bool btf_is_func(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_FUNC; ++} ++ ++static inline bool btf_is_func_proto(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_FUNC_PROTO; ++} ++ ++static inline bool btf_is_var(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_VAR; ++} ++ ++static inline bool btf_is_datasec(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_DATASEC; ++} ++ ++static inline bool btf_is_float(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_FLOAT; ++} ++ ++static inline bool btf_is_decl_tag(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_DECL_TAG; ++} ++ ++static inline bool btf_is_type_tag(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_TYPE_TAG; ++} ++ ++static inline bool btf_is_any_enum(const struct btf_type *t) ++{ ++ return btf_is_enum(t) || btf_is_enum64(t); ++} ++ ++static inline bool btf_kind_core_compat(const struct btf_type *t1, ++ const struct btf_type *t2) ++{ ++ return btf_kind(t1) == btf_kind(t2) || ++ (btf_is_any_enum(t1) && btf_is_any_enum(t2)); ++} ++ ++static inline __u8 btf_int_encoding(const struct btf_type *t) ++{ ++ return BTF_INT_ENCODING(*(__u32 *)(t + 1)); ++} ++ ++static inline __u8 btf_int_offset(const struct btf_type *t) ++{ ++ return BTF_INT_OFFSET(*(__u32 *)(t + 1)); ++} ++ ++static inline __u8 btf_int_bits(const struct btf_type *t) ++{ ++ return BTF_INT_BITS(*(__u32 *)(t + 1)); ++} ++ ++static inline struct btf_array *btf_array(const struct btf_type *t) ++{ ++ return (struct btf_array *)(t + 1); ++} ++ ++static inline struct btf_enum *btf_enum(const struct btf_type *t) ++{ ++ return (struct btf_enum *)(t + 1); ++} ++ ++static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) ++{ ++ return (struct btf_enum64 *)(t + 1); ++} ++ ++static inline __u64 btf_enum64_value(const struct btf_enum64 *e) ++{ ++ return ((__u64)e->val_hi32 << 32) | e->val_lo32; ++} ++ ++static inline struct btf_member *btf_members(const struct btf_type *t) ++{ ++ return (struct btf_member *)(t + 1); ++} ++ ++/* Get bit offset of a member with specified index. */ ++static inline __u32 btf_member_bit_offset(const struct btf_type *t, ++ __u32 member_idx) ++{ ++ const struct btf_member *m = btf_members(t) + member_idx; ++ bool kflag = btf_kflag(t); ++ ++ return kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset; ++} ++/* ++ * Get bitfield size of a member, assuming t is BTF_KIND_STRUCT or ++ * BTF_KIND_UNION. If member is not a bitfield, zero is returned. ++ */ ++static inline __u32 btf_member_bitfield_size(const struct btf_type *t, ++ __u32 member_idx) ++{ ++ const struct btf_member *m = btf_members(t) + member_idx; ++ bool kflag = btf_kflag(t); ++ ++ return kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0; ++} ++ ++static inline struct btf_param *btf_params(const struct btf_type *t) ++{ ++ return (struct btf_param *)(t + 1); ++} ++ ++static inline struct btf_var *btf_var(const struct btf_type *t) ++{ ++ return (struct btf_var *)(t + 1); ++} ++ ++static inline struct btf_var_secinfo * ++btf_var_secinfos(const struct btf_type *t) ++{ ++ return (struct btf_var_secinfo *)(t + 1); ++} ++ ++struct btf_decl_tag; ++static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t) ++{ ++ return (struct btf_decl_tag *)(t + 1); ++} ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* __LIBBPF_BTF_H */ +diff --git a/src/cc/libbpf/src/btf_dump.c b/src/cc/libbpf/src/btf_dump.c +new file mode 100644 +index 0000000..627edb5 +--- /dev/null ++++ b/src/cc/libbpf/src/btf_dump.c +@@ -0,0 +1,2403 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++/* ++ * BTF-to-C type converter. ++ * ++ * Copyright (c) 2019 Facebook ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "btf.h" ++#include "hashmap.h" ++#include "libbpf.h" ++#include "libbpf_internal.h" ++ ++static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; ++static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; ++ ++static const char *pfx(int lvl) ++{ ++ return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl]; ++} ++ ++enum btf_dump_type_order_state { ++ NOT_ORDERED, ++ ORDERING, ++ ORDERED, ++}; ++ ++enum btf_dump_type_emit_state { ++ NOT_EMITTED, ++ EMITTING, ++ EMITTED, ++}; ++ ++/* per-type auxiliary state */ ++struct btf_dump_type_aux_state { ++ /* topological sorting state */ ++ enum btf_dump_type_order_state order_state: 2; ++ /* emitting state used to determine the need for forward declaration */ ++ enum btf_dump_type_emit_state emit_state: 2; ++ /* whether forward declaration was already emitted */ ++ __u8 fwd_emitted: 1; ++ /* whether unique non-duplicate name was already assigned */ ++ __u8 name_resolved: 1; ++ /* whether type is referenced from any other type */ ++ __u8 referenced: 1; ++}; ++ ++/* indent string length; one indent string is added for each indent level */ ++#define BTF_DATA_INDENT_STR_LEN 32 ++ ++/* ++ * Common internal data for BTF type data dump operations. ++ */ ++struct btf_dump_data { ++ const void *data_end; /* end of valid data to show */ ++ bool compact; ++ bool skip_names; ++ bool emit_zeroes; ++ __u8 indent_lvl; /* base indent level */ ++ char indent_str[BTF_DATA_INDENT_STR_LEN]; ++ /* below are used during iteration */ ++ int depth; ++ bool is_array_member; ++ bool is_array_terminated; ++ bool is_array_char; ++}; ++ ++struct btf_dump { ++ const struct btf *btf; ++ btf_dump_printf_fn_t printf_fn; ++ void *cb_ctx; ++ int ptr_sz; ++ bool strip_mods; ++ bool skip_anon_defs; ++ int last_id; ++ ++ /* per-type auxiliary state */ ++ struct btf_dump_type_aux_state *type_states; ++ size_t type_states_cap; ++ /* per-type optional cached unique name, must be freed, if present */ ++ const char **cached_names; ++ size_t cached_names_cap; ++ ++ /* topo-sorted list of dependent type definitions */ ++ __u32 *emit_queue; ++ int emit_queue_cap; ++ int emit_queue_cnt; ++ ++ /* ++ * stack of type declarations (e.g., chain of modifiers, arrays, ++ * funcs, etc) ++ */ ++ __u32 *decl_stack; ++ int decl_stack_cap; ++ int decl_stack_cnt; ++ ++ /* maps struct/union/enum name to a number of name occurrences */ ++ struct hashmap *type_names; ++ /* ++ * maps typedef identifiers and enum value names to a number of such ++ * name occurrences ++ */ ++ struct hashmap *ident_names; ++ /* ++ * data for typed display; allocated if needed. ++ */ ++ struct btf_dump_data *typed_dump; ++}; ++ ++static size_t str_hash_fn(const void *key, void *ctx) ++{ ++ return str_hash(key); ++} ++ ++static bool str_equal_fn(const void *a, const void *b, void *ctx) ++{ ++ return strcmp(a, b) == 0; ++} ++ ++static const char *btf_name_of(const struct btf_dump *d, __u32 name_off) ++{ ++ return btf__name_by_offset(d->btf, name_off); ++} ++ ++static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ d->printf_fn(d->cb_ctx, fmt, args); ++ va_end(args); ++} ++ ++static int btf_dump_mark_referenced(struct btf_dump *d); ++static int btf_dump_resize(struct btf_dump *d); ++ ++struct btf_dump *btf_dump__new(const struct btf *btf, ++ btf_dump_printf_fn_t printf_fn, ++ void *ctx, ++ const struct btf_dump_opts *opts) ++{ ++ struct btf_dump *d; ++ int err; ++ ++ if (!OPTS_VALID(opts, btf_dump_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (!printf_fn) ++ return libbpf_err_ptr(-EINVAL); ++ ++ d = calloc(1, sizeof(struct btf_dump)); ++ if (!d) ++ return libbpf_err_ptr(-ENOMEM); ++ ++ d->btf = btf; ++ d->printf_fn = printf_fn; ++ d->cb_ctx = ctx; ++ d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *); ++ ++ d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); ++ if (IS_ERR(d->type_names)) { ++ err = PTR_ERR(d->type_names); ++ d->type_names = NULL; ++ goto err; ++ } ++ d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); ++ if (IS_ERR(d->ident_names)) { ++ err = PTR_ERR(d->ident_names); ++ d->ident_names = NULL; ++ goto err; ++ } ++ ++ err = btf_dump_resize(d); ++ if (err) ++ goto err; ++ ++ return d; ++err: ++ btf_dump__free(d); ++ return libbpf_err_ptr(err); ++} ++ ++static int btf_dump_resize(struct btf_dump *d) ++{ ++ int err, last_id = btf__type_cnt(d->btf) - 1; ++ ++ if (last_id <= d->last_id) ++ return 0; ++ ++ if (libbpf_ensure_mem((void **)&d->type_states, &d->type_states_cap, ++ sizeof(*d->type_states), last_id + 1)) ++ return -ENOMEM; ++ if (libbpf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap, ++ sizeof(*d->cached_names), last_id + 1)) ++ return -ENOMEM; ++ ++ if (d->last_id == 0) { ++ /* VOID is special */ ++ d->type_states[0].order_state = ORDERED; ++ d->type_states[0].emit_state = EMITTED; ++ } ++ ++ /* eagerly determine referenced types for anon enums */ ++ err = btf_dump_mark_referenced(d); ++ if (err) ++ return err; ++ ++ d->last_id = last_id; ++ return 0; ++} ++ ++void btf_dump__free(struct btf_dump *d) ++{ ++ int i; ++ ++ if (IS_ERR_OR_NULL(d)) ++ return; ++ ++ free(d->type_states); ++ if (d->cached_names) { ++ /* any set cached name is owned by us and should be freed */ ++ for (i = 0; i <= d->last_id; i++) { ++ if (d->cached_names[i]) ++ free((void *)d->cached_names[i]); ++ } ++ } ++ free(d->cached_names); ++ free(d->emit_queue); ++ free(d->decl_stack); ++ hashmap__free(d->type_names); ++ hashmap__free(d->ident_names); ++ ++ free(d); ++} ++ ++static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); ++static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); ++ ++/* ++ * Dump BTF type in a compilable C syntax, including all the necessary ++ * dependent types, necessary for compilation. If some of the dependent types ++ * were already emitted as part of previous btf_dump__dump_type() invocation ++ * for another type, they won't be emitted again. This API allows callers to ++ * filter out BTF types according to user-defined criterias and emitted only ++ * minimal subset of types, necessary to compile everything. Full struct/union ++ * definitions will still be emitted, even if the only usage is through ++ * pointer and could be satisfied with just a forward declaration. ++ * ++ * Dumping is done in two high-level passes: ++ * 1. Topologically sort type definitions to satisfy C rules of compilation. ++ * 2. Emit type definitions in C syntax. ++ * ++ * Returns 0 on success; <0, otherwise. ++ */ ++int btf_dump__dump_type(struct btf_dump *d, __u32 id) ++{ ++ int err, i; ++ ++ if (id >= btf__type_cnt(d->btf)) ++ return libbpf_err(-EINVAL); ++ ++ err = btf_dump_resize(d); ++ if (err) ++ return libbpf_err(err); ++ ++ d->emit_queue_cnt = 0; ++ err = btf_dump_order_type(d, id, false); ++ if (err < 0) ++ return libbpf_err(err); ++ ++ for (i = 0; i < d->emit_queue_cnt; i++) ++ btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/); ++ ++ return 0; ++} ++ ++/* ++ * Mark all types that are referenced from any other type. This is used to ++ * determine top-level anonymous enums that need to be emitted as an ++ * independent type declarations. ++ * Anonymous enums come in two flavors: either embedded in a struct's field ++ * definition, in which case they have to be declared inline as part of field ++ * type declaration; or as a top-level anonymous enum, typically used for ++ * declaring global constants. It's impossible to distinguish between two ++ * without knowning whether given enum type was referenced from other type: ++ * top-level anonymous enum won't be referenced by anything, while embedded ++ * one will. ++ */ ++static int btf_dump_mark_referenced(struct btf_dump *d) ++{ ++ int i, j, n = btf__type_cnt(d->btf); ++ const struct btf_type *t; ++ __u16 vlen; ++ ++ for (i = d->last_id + 1; i < n; i++) { ++ t = btf__type_by_id(d->btf, i); ++ vlen = btf_vlen(t); ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_INT: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ case BTF_KIND_FWD: ++ case BTF_KIND_FLOAT: ++ break; ++ ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_VAR: ++ case BTF_KIND_DECL_TAG: ++ case BTF_KIND_TYPE_TAG: ++ d->type_states[t->type].referenced = 1; ++ break; ++ ++ case BTF_KIND_ARRAY: { ++ const struct btf_array *a = btf_array(t); ++ ++ d->type_states[a->index_type].referenced = 1; ++ d->type_states[a->type].referenced = 1; ++ break; ++ } ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: { ++ const struct btf_member *m = btf_members(t); ++ ++ for (j = 0; j < vlen; j++, m++) ++ d->type_states[m->type].referenced = 1; ++ break; ++ } ++ case BTF_KIND_FUNC_PROTO: { ++ const struct btf_param *p = btf_params(t); ++ ++ for (j = 0; j < vlen; j++, p++) ++ d->type_states[p->type].referenced = 1; ++ break; ++ } ++ case BTF_KIND_DATASEC: { ++ const struct btf_var_secinfo *v = btf_var_secinfos(t); ++ ++ for (j = 0; j < vlen; j++, v++) ++ d->type_states[v->type].referenced = 1; ++ break; ++ } ++ default: ++ return -EINVAL; ++ } ++ } ++ return 0; ++} ++ ++static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id) ++{ ++ __u32 *new_queue; ++ size_t new_cap; ++ ++ if (d->emit_queue_cnt >= d->emit_queue_cap) { ++ new_cap = max(16, d->emit_queue_cap * 3 / 2); ++ new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0])); ++ if (!new_queue) ++ return -ENOMEM; ++ d->emit_queue = new_queue; ++ d->emit_queue_cap = new_cap; ++ } ++ ++ d->emit_queue[d->emit_queue_cnt++] = id; ++ return 0; ++} ++ ++/* ++ * Determine order of emitting dependent types and specified type to satisfy ++ * C compilation rules. This is done through topological sorting with an ++ * additional complication which comes from C rules. The main idea for C is ++ * that if some type is "embedded" into a struct/union, it's size needs to be ++ * known at the time of definition of containing type. E.g., for: ++ * ++ * struct A {}; ++ * struct B { struct A x; } ++ * ++ * struct A *HAS* to be defined before struct B, because it's "embedded", ++ * i.e., it is part of struct B layout. But in the following case: ++ * ++ * struct A; ++ * struct B { struct A *x; } ++ * struct A {}; ++ * ++ * it's enough to just have a forward declaration of struct A at the time of ++ * struct B definition, as struct B has a pointer to struct A, so the size of ++ * field x is known without knowing struct A size: it's sizeof(void *). ++ * ++ * Unfortunately, there are some trickier cases we need to handle, e.g.: ++ * ++ * struct A {}; // if this was forward-declaration: compilation error ++ * struct B { ++ * struct { // anonymous struct ++ * struct A y; ++ * } *x; ++ * }; ++ * ++ * In this case, struct B's field x is a pointer, so it's size is known ++ * regardless of the size of (anonymous) struct it points to. But because this ++ * struct is anonymous and thus defined inline inside struct B, *and* it ++ * embeds struct A, compiler requires full definition of struct A to be known ++ * before struct B can be defined. This creates a transitive dependency ++ * between struct A and struct B. If struct A was forward-declared before ++ * struct B definition and fully defined after struct B definition, that would ++ * trigger compilation error. ++ * ++ * All this means that while we are doing topological sorting on BTF type ++ * graph, we need to determine relationships between different types (graph ++ * nodes): ++ * - weak link (relationship) between X and Y, if Y *CAN* be ++ * forward-declared at the point of X definition; ++ * - strong link, if Y *HAS* to be fully-defined before X can be defined. ++ * ++ * The rule is as follows. Given a chain of BTF types from X to Y, if there is ++ * BTF_KIND_PTR type in the chain and at least one non-anonymous type ++ * Z (excluding X, including Y), then link is weak. Otherwise, it's strong. ++ * Weak/strong relationship is determined recursively during DFS traversal and ++ * is returned as a result from btf_dump_order_type(). ++ * ++ * btf_dump_order_type() is trying to avoid unnecessary forward declarations, ++ * but it is not guaranteeing that no extraneous forward declarations will be ++ * emitted. ++ * ++ * To avoid extra work, algorithm marks some of BTF types as ORDERED, when ++ * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT, ++ * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the ++ * entire graph path, so depending where from one came to that BTF type, it ++ * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM, ++ * once they are processed, there is no need to do it again, so they are ++ * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces ++ * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But ++ * in any case, once those are processed, no need to do it again, as the ++ * result won't change. ++ * ++ * Returns: ++ * - 1, if type is part of strong link (so there is strong topological ++ * ordering requirements); ++ * - 0, if type is part of weak link (so can be satisfied through forward ++ * declaration); ++ * - <0, on error (e.g., unsatisfiable type loop detected). ++ */ ++static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) ++{ ++ /* ++ * Order state is used to detect strong link cycles, but only for BTF ++ * kinds that are or could be an independent definition (i.e., ++ * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays, ++ * func_protos, modifiers are just means to get to these definitions. ++ * Int/void don't need definitions, they are assumed to be always ++ * properly defined. We also ignore datasec, var, and funcs for now. ++ * So for all non-defining kinds, we never even set ordering state, ++ * for defining kinds we set ORDERING and subsequently ORDERED if it ++ * forms a strong link. ++ */ ++ struct btf_dump_type_aux_state *tstate = &d->type_states[id]; ++ const struct btf_type *t; ++ __u16 vlen; ++ int err, i; ++ ++ /* return true, letting typedefs know that it's ok to be emitted */ ++ if (tstate->order_state == ORDERED) ++ return 1; ++ ++ t = btf__type_by_id(d->btf, id); ++ ++ if (tstate->order_state == ORDERING) { ++ /* type loop, but resolvable through fwd declaration */ ++ if (btf_is_composite(t) && through_ptr && t->name_off != 0) ++ return 0; ++ pr_warn("unsatisfiable type cycle, id:[%u]\n", id); ++ return -ELOOP; ++ } ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_INT: ++ case BTF_KIND_FLOAT: ++ tstate->order_state = ORDERED; ++ return 0; ++ ++ case BTF_KIND_PTR: ++ err = btf_dump_order_type(d, t->type, true); ++ tstate->order_state = ORDERED; ++ return err; ++ ++ case BTF_KIND_ARRAY: ++ return btf_dump_order_type(d, btf_array(t)->type, false); ++ ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: { ++ const struct btf_member *m = btf_members(t); ++ /* ++ * struct/union is part of strong link, only if it's embedded ++ * (so no ptr in a path) or it's anonymous (so has to be ++ * defined inline, even if declared through ptr) ++ */ ++ if (through_ptr && t->name_off != 0) ++ return 0; ++ ++ tstate->order_state = ORDERING; ++ ++ vlen = btf_vlen(t); ++ for (i = 0; i < vlen; i++, m++) { ++ err = btf_dump_order_type(d, m->type, false); ++ if (err < 0) ++ return err; ++ } ++ ++ if (t->name_off != 0) { ++ err = btf_dump_add_emit_queue_id(d, id); ++ if (err < 0) ++ return err; ++ } ++ ++ tstate->order_state = ORDERED; ++ return 1; ++ } ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ case BTF_KIND_FWD: ++ /* ++ * non-anonymous or non-referenced enums are top-level ++ * declarations and should be emitted. Same logic can be ++ * applied to FWDs, it won't hurt anyways. ++ */ ++ if (t->name_off != 0 || !tstate->referenced) { ++ err = btf_dump_add_emit_queue_id(d, id); ++ if (err) ++ return err; ++ } ++ tstate->order_state = ORDERED; ++ return 1; ++ ++ case BTF_KIND_TYPEDEF: { ++ int is_strong; ++ ++ is_strong = btf_dump_order_type(d, t->type, through_ptr); ++ if (is_strong < 0) ++ return is_strong; ++ ++ /* typedef is similar to struct/union w.r.t. fwd-decls */ ++ if (through_ptr && !is_strong) ++ return 0; ++ ++ /* typedef is always a named definition */ ++ err = btf_dump_add_emit_queue_id(d, id); ++ if (err) ++ return err; ++ ++ d->type_states[id].order_state = ORDERED; ++ return 1; ++ } ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_TYPE_TAG: ++ return btf_dump_order_type(d, t->type, through_ptr); ++ ++ case BTF_KIND_FUNC_PROTO: { ++ const struct btf_param *p = btf_params(t); ++ bool is_strong; ++ ++ err = btf_dump_order_type(d, t->type, through_ptr); ++ if (err < 0) ++ return err; ++ is_strong = err > 0; ++ ++ vlen = btf_vlen(t); ++ for (i = 0; i < vlen; i++, p++) { ++ err = btf_dump_order_type(d, p->type, through_ptr); ++ if (err < 0) ++ return err; ++ if (err > 0) ++ is_strong = true; ++ } ++ return is_strong; ++ } ++ case BTF_KIND_FUNC: ++ case BTF_KIND_VAR: ++ case BTF_KIND_DATASEC: ++ case BTF_KIND_DECL_TAG: ++ d->type_states[id].order_state = ORDERED; ++ return 0; ++ ++ default: ++ return -EINVAL; ++ } ++} ++ ++static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, ++ const struct btf_type *t); ++ ++static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, ++ const struct btf_type *t); ++static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id, ++ const struct btf_type *t, int lvl); ++ ++static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, ++ const struct btf_type *t); ++static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, ++ const struct btf_type *t, int lvl); ++ ++static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, ++ const struct btf_type *t); ++ ++static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, ++ const struct btf_type *t, int lvl); ++ ++/* a local view into a shared stack */ ++struct id_stack { ++ const __u32 *ids; ++ int cnt; ++}; ++ ++static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, ++ const char *fname, int lvl); ++static void btf_dump_emit_type_chain(struct btf_dump *d, ++ struct id_stack *decl_stack, ++ const char *fname, int lvl); ++ ++static const char *btf_dump_type_name(struct btf_dump *d, __u32 id); ++static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id); ++static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, ++ const char *orig_name); ++ ++static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id) ++{ ++ const struct btf_type *t = btf__type_by_id(d->btf, id); ++ ++ /* __builtin_va_list is a compiler built-in, which causes compilation ++ * errors, when compiling w/ different compiler, then used to compile ++ * original code (e.g., GCC to compile kernel, Clang to use generated ++ * C header from BTF). As it is built-in, it should be already defined ++ * properly internally in compiler. ++ */ ++ if (t->name_off == 0) ++ return false; ++ return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0; ++} ++ ++/* ++ * Emit C-syntax definitions of types from chains of BTF types. ++ * ++ * High-level handling of determining necessary forward declarations are handled ++ * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type ++ * declarations/definitions in C syntax are handled by a combo of ++ * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to ++ * corresponding btf_dump_emit_*_{def,fwd}() functions. ++ * ++ * We also keep track of "containing struct/union type ID" to determine when ++ * we reference it from inside and thus can avoid emitting unnecessary forward ++ * declaration. ++ * ++ * This algorithm is designed in such a way, that even if some error occurs ++ * (either technical, e.g., out of memory, or logical, i.e., malformed BTF ++ * that doesn't comply to C rules completely), algorithm will try to proceed ++ * and produce as much meaningful output as possible. ++ */ ++static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) ++{ ++ struct btf_dump_type_aux_state *tstate = &d->type_states[id]; ++ bool top_level_def = cont_id == 0; ++ const struct btf_type *t; ++ __u16 kind; ++ ++ if (tstate->emit_state == EMITTED) ++ return; ++ ++ t = btf__type_by_id(d->btf, id); ++ kind = btf_kind(t); ++ ++ if (tstate->emit_state == EMITTING) { ++ if (tstate->fwd_emitted) ++ return; ++ ++ switch (kind) { ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ /* ++ * if we are referencing a struct/union that we are ++ * part of - then no need for fwd declaration ++ */ ++ if (id == cont_id) ++ return; ++ if (t->name_off == 0) { ++ pr_warn("anonymous struct/union loop, id:[%u]\n", ++ id); ++ return; ++ } ++ btf_dump_emit_struct_fwd(d, id, t); ++ btf_dump_printf(d, ";\n\n"); ++ tstate->fwd_emitted = 1; ++ break; ++ case BTF_KIND_TYPEDEF: ++ /* ++ * for typedef fwd_emitted means typedef definition ++ * was emitted, but it can be used only for "weak" ++ * references through pointer only, not for embedding ++ */ ++ if (!btf_dump_is_blacklisted(d, id)) { ++ btf_dump_emit_typedef_def(d, id, t, 0); ++ btf_dump_printf(d, ";\n\n"); ++ } ++ tstate->fwd_emitted = 1; ++ break; ++ default: ++ break; ++ } ++ ++ return; ++ } ++ ++ switch (kind) { ++ case BTF_KIND_INT: ++ /* Emit type alias definitions if necessary */ ++ btf_dump_emit_missing_aliases(d, id, t); ++ ++ tstate->emit_state = EMITTED; ++ break; ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ if (top_level_def) { ++ btf_dump_emit_enum_def(d, id, t, 0); ++ btf_dump_printf(d, ";\n\n"); ++ } ++ tstate->emit_state = EMITTED; ++ break; ++ case BTF_KIND_PTR: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_TYPE_TAG: ++ btf_dump_emit_type(d, t->type, cont_id); ++ break; ++ case BTF_KIND_ARRAY: ++ btf_dump_emit_type(d, btf_array(t)->type, cont_id); ++ break; ++ case BTF_KIND_FWD: ++ btf_dump_emit_fwd_def(d, id, t); ++ btf_dump_printf(d, ";\n\n"); ++ tstate->emit_state = EMITTED; ++ break; ++ case BTF_KIND_TYPEDEF: ++ tstate->emit_state = EMITTING; ++ btf_dump_emit_type(d, t->type, id); ++ /* ++ * typedef can server as both definition and forward ++ * declaration; at this stage someone depends on ++ * typedef as a forward declaration (refers to it ++ * through pointer), so unless we already did it, ++ * emit typedef as a forward declaration ++ */ ++ if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) { ++ btf_dump_emit_typedef_def(d, id, t, 0); ++ btf_dump_printf(d, ";\n\n"); ++ } ++ tstate->emit_state = EMITTED; ++ break; ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ tstate->emit_state = EMITTING; ++ /* if it's a top-level struct/union definition or struct/union ++ * is anonymous, then in C we'll be emitting all fields and ++ * their types (as opposed to just `struct X`), so we need to ++ * make sure that all types, referenced from struct/union ++ * members have necessary forward-declarations, where ++ * applicable ++ */ ++ if (top_level_def || t->name_off == 0) { ++ const struct btf_member *m = btf_members(t); ++ __u16 vlen = btf_vlen(t); ++ int i, new_cont_id; ++ ++ new_cont_id = t->name_off == 0 ? cont_id : id; ++ for (i = 0; i < vlen; i++, m++) ++ btf_dump_emit_type(d, m->type, new_cont_id); ++ } else if (!tstate->fwd_emitted && id != cont_id) { ++ btf_dump_emit_struct_fwd(d, id, t); ++ btf_dump_printf(d, ";\n\n"); ++ tstate->fwd_emitted = 1; ++ } ++ ++ if (top_level_def) { ++ btf_dump_emit_struct_def(d, id, t, 0); ++ btf_dump_printf(d, ";\n\n"); ++ tstate->emit_state = EMITTED; ++ } else { ++ tstate->emit_state = NOT_EMITTED; ++ } ++ break; ++ case BTF_KIND_FUNC_PROTO: { ++ const struct btf_param *p = btf_params(t); ++ __u16 n = btf_vlen(t); ++ int i; ++ ++ btf_dump_emit_type(d, t->type, cont_id); ++ for (i = 0; i < n; i++, p++) ++ btf_dump_emit_type(d, p->type, cont_id); ++ ++ break; ++ } ++ default: ++ break; ++ } ++} ++ ++static bool btf_is_struct_packed(const struct btf *btf, __u32 id, ++ const struct btf_type *t) ++{ ++ const struct btf_member *m; ++ int align, i, bit_sz; ++ __u16 vlen; ++ ++ align = btf__align_of(btf, id); ++ /* size of a non-packed struct has to be a multiple of its alignment*/ ++ if (align && t->size % align) ++ return true; ++ ++ m = btf_members(t); ++ vlen = btf_vlen(t); ++ /* all non-bitfield fields have to be naturally aligned */ ++ for (i = 0; i < vlen; i++, m++) { ++ align = btf__align_of(btf, m->type); ++ bit_sz = btf_member_bitfield_size(t, i); ++ if (align && bit_sz == 0 && m->offset % (8 * align) != 0) ++ return true; ++ } ++ ++ /* ++ * if original struct was marked as packed, but its layout is ++ * naturally aligned, we'll detect that it's not packed ++ */ ++ return false; ++} ++ ++static int chip_away_bits(int total, int at_most) ++{ ++ return total % at_most ? : at_most; ++} ++ ++static void btf_dump_emit_bit_padding(const struct btf_dump *d, ++ int cur_off, int m_off, int m_bit_sz, ++ int align, int lvl) ++{ ++ int off_diff = m_off - cur_off; ++ int ptr_bits = d->ptr_sz * 8; ++ ++ if (off_diff <= 0) ++ /* no gap */ ++ return; ++ if (m_bit_sz == 0 && off_diff < align * 8) ++ /* natural padding will take care of a gap */ ++ return; ++ ++ while (off_diff > 0) { ++ const char *pad_type; ++ int pad_bits; ++ ++ if (ptr_bits > 32 && off_diff > 32) { ++ pad_type = "long"; ++ pad_bits = chip_away_bits(off_diff, ptr_bits); ++ } else if (off_diff > 16) { ++ pad_type = "int"; ++ pad_bits = chip_away_bits(off_diff, 32); ++ } else if (off_diff > 8) { ++ pad_type = "short"; ++ pad_bits = chip_away_bits(off_diff, 16); ++ } else { ++ pad_type = "char"; ++ pad_bits = chip_away_bits(off_diff, 8); ++ } ++ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); ++ off_diff -= pad_bits; ++ } ++} ++ ++static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, ++ const struct btf_type *t) ++{ ++ btf_dump_printf(d, "%s%s%s", ++ btf_is_struct(t) ? "struct" : "union", ++ t->name_off ? " " : "", ++ btf_dump_type_name(d, id)); ++} ++ ++static void btf_dump_emit_struct_def(struct btf_dump *d, ++ __u32 id, ++ const struct btf_type *t, ++ int lvl) ++{ ++ const struct btf_member *m = btf_members(t); ++ bool is_struct = btf_is_struct(t); ++ int align, i, packed, off = 0; ++ __u16 vlen = btf_vlen(t); ++ ++ packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; ++ ++ btf_dump_printf(d, "%s%s%s {", ++ is_struct ? "struct" : "union", ++ t->name_off ? " " : "", ++ btf_dump_type_name(d, id)); ++ ++ for (i = 0; i < vlen; i++, m++) { ++ const char *fname; ++ int m_off, m_sz; ++ ++ fname = btf_name_of(d, m->name_off); ++ m_sz = btf_member_bitfield_size(t, i); ++ m_off = btf_member_bit_offset(t, i); ++ align = packed ? 1 : btf__align_of(d->btf, m->type); ++ ++ btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); ++ btf_dump_printf(d, "\n%s", pfx(lvl + 1)); ++ btf_dump_emit_type_decl(d, m->type, fname, lvl + 1); ++ ++ if (m_sz) { ++ btf_dump_printf(d, ": %d", m_sz); ++ off = m_off + m_sz; ++ } else { ++ m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); ++ off = m_off + m_sz * 8; ++ } ++ btf_dump_printf(d, ";"); ++ } ++ ++ /* pad at the end, if necessary */ ++ if (is_struct) { ++ align = packed ? 1 : btf__align_of(d->btf, id); ++ btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, ++ lvl + 1); ++ } ++ ++ if (vlen) ++ btf_dump_printf(d, "\n"); ++ btf_dump_printf(d, "%s}", pfx(lvl)); ++ if (packed) ++ btf_dump_printf(d, " __attribute__((packed))"); ++} ++ ++static const char *missing_base_types[][2] = { ++ /* ++ * GCC emits typedefs to its internal __PolyX_t types when compiling Arm ++ * SIMD intrinsics. Alias them to standard base types. ++ */ ++ { "__Poly8_t", "unsigned char" }, ++ { "__Poly16_t", "unsigned short" }, ++ { "__Poly64_t", "unsigned long long" }, ++ { "__Poly128_t", "unsigned __int128" }, ++}; ++ ++static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id, ++ const struct btf_type *t) ++{ ++ const char *name = btf_dump_type_name(d, id); ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) { ++ if (strcmp(name, missing_base_types[i][0]) == 0) { ++ btf_dump_printf(d, "typedef %s %s;\n\n", ++ missing_base_types[i][1], name); ++ break; ++ } ++ } ++} ++ ++static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, ++ const struct btf_type *t) ++{ ++ btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id)); ++} ++ ++static void btf_dump_emit_enum32_val(struct btf_dump *d, ++ const struct btf_type *t, ++ int lvl, __u16 vlen) ++{ ++ const struct btf_enum *v = btf_enum(t); ++ bool is_signed = btf_kflag(t); ++ const char *fmt_str; ++ const char *name; ++ size_t dup_cnt; ++ int i; ++ ++ for (i = 0; i < vlen; i++, v++) { ++ name = btf_name_of(d, v->name_off); ++ /* enumerators share namespace with typedef idents */ ++ dup_cnt = btf_dump_name_dups(d, d->ident_names, name); ++ if (dup_cnt > 1) { ++ fmt_str = is_signed ? "\n%s%s___%zd = %d," : "\n%s%s___%zd = %u,"; ++ btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, dup_cnt, v->val); ++ } else { ++ fmt_str = is_signed ? "\n%s%s = %d," : "\n%s%s = %u,"; ++ btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, v->val); ++ } ++ } ++} ++ ++static void btf_dump_emit_enum64_val(struct btf_dump *d, ++ const struct btf_type *t, ++ int lvl, __u16 vlen) ++{ ++ const struct btf_enum64 *v = btf_enum64(t); ++ bool is_signed = btf_kflag(t); ++ const char *fmt_str; ++ const char *name; ++ size_t dup_cnt; ++ __u64 val; ++ int i; ++ ++ for (i = 0; i < vlen; i++, v++) { ++ name = btf_name_of(d, v->name_off); ++ dup_cnt = btf_dump_name_dups(d, d->ident_names, name); ++ val = btf_enum64_value(v); ++ if (dup_cnt > 1) { ++ fmt_str = is_signed ? "\n%s%s___%zd = %lldLL," ++ : "\n%s%s___%zd = %lluULL,"; ++ btf_dump_printf(d, fmt_str, ++ pfx(lvl + 1), name, dup_cnt, ++ (unsigned long long)val); ++ } else { ++ fmt_str = is_signed ? "\n%s%s = %lldLL," ++ : "\n%s%s = %lluULL,"; ++ btf_dump_printf(d, fmt_str, ++ pfx(lvl + 1), name, ++ (unsigned long long)val); ++ } ++ } ++} ++static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, ++ const struct btf_type *t, ++ int lvl) ++{ ++ __u16 vlen = btf_vlen(t); ++ ++ btf_dump_printf(d, "enum%s%s", ++ t->name_off ? " " : "", ++ btf_dump_type_name(d, id)); ++ ++ if (!vlen) ++ return; ++ ++ btf_dump_printf(d, " {"); ++ if (btf_is_enum(t)) ++ btf_dump_emit_enum32_val(d, t, lvl, vlen); ++ else ++ btf_dump_emit_enum64_val(d, t, lvl, vlen); ++ btf_dump_printf(d, "\n%s}", pfx(lvl)); ++} ++ ++static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, ++ const struct btf_type *t) ++{ ++ const char *name = btf_dump_type_name(d, id); ++ ++ if (btf_kflag(t)) ++ btf_dump_printf(d, "union %s", name); ++ else ++ btf_dump_printf(d, "struct %s", name); ++} ++ ++static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, ++ const struct btf_type *t, int lvl) ++{ ++ const char *name = btf_dump_ident_name(d, id); ++ ++ /* ++ * Old GCC versions are emitting invalid typedef for __gnuc_va_list ++ * pointing to VOID. This generates warnings from btf_dump() and ++ * results in uncompilable header file, so we are fixing it up here ++ * with valid typedef into __builtin_va_list. ++ */ ++ if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) { ++ btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list"); ++ return; ++ } ++ ++ btf_dump_printf(d, "typedef "); ++ btf_dump_emit_type_decl(d, t->type, name, lvl); ++} ++ ++static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) ++{ ++ __u32 *new_stack; ++ size_t new_cap; ++ ++ if (d->decl_stack_cnt >= d->decl_stack_cap) { ++ new_cap = max(16, d->decl_stack_cap * 3 / 2); ++ new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0])); ++ if (!new_stack) ++ return -ENOMEM; ++ d->decl_stack = new_stack; ++ d->decl_stack_cap = new_cap; ++ } ++ ++ d->decl_stack[d->decl_stack_cnt++] = id; ++ ++ return 0; ++} ++ ++/* ++ * Emit type declaration (e.g., field type declaration in a struct or argument ++ * declaration in function prototype) in correct C syntax. ++ * ++ * For most types it's trivial, but there are few quirky type declaration ++ * cases worth mentioning: ++ * - function prototypes (especially nesting of function prototypes); ++ * - arrays; ++ * - const/volatile/restrict for pointers vs other types. ++ * ++ * For a good discussion of *PARSING* C syntax (as a human), see ++ * Peter van der Linden's "Expert C Programming: Deep C Secrets", ++ * Ch.3 "Unscrambling Declarations in C". ++ * ++ * It won't help with BTF to C conversion much, though, as it's an opposite ++ * problem. So we came up with this algorithm in reverse to van der Linden's ++ * parsing algorithm. It goes from structured BTF representation of type ++ * declaration to a valid compilable C syntax. ++ * ++ * For instance, consider this C typedef: ++ * typedef const int * const * arr[10] arr_t; ++ * It will be represented in BTF with this chain of BTF types: ++ * [typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int] ++ * ++ * Notice how [const] modifier always goes before type it modifies in BTF type ++ * graph, but in C syntax, const/volatile/restrict modifiers are written to ++ * the right of pointers, but to the left of other types. There are also other ++ * quirks, like function pointers, arrays of them, functions returning other ++ * functions, etc. ++ * ++ * We handle that by pushing all the types to a stack, until we hit "terminal" ++ * type (int/enum/struct/union/fwd). Then depending on the kind of a type on ++ * top of a stack, modifiers are handled differently. Array/function pointers ++ * have also wildly different syntax and how nesting of them are done. See ++ * code for authoritative definition. ++ * ++ * To avoid allocating new stack for each independent chain of BTF types, we ++ * share one bigger stack, with each chain working only on its own local view ++ * of a stack frame. Some care is required to "pop" stack frames after ++ * processing type declaration chain. ++ */ ++int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, ++ const struct btf_dump_emit_type_decl_opts *opts) ++{ ++ const char *fname; ++ int lvl, err; ++ ++ if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) ++ return libbpf_err(-EINVAL); ++ ++ err = btf_dump_resize(d); ++ if (err) ++ return libbpf_err(err); ++ ++ fname = OPTS_GET(opts, field_name, ""); ++ lvl = OPTS_GET(opts, indent_level, 0); ++ d->strip_mods = OPTS_GET(opts, strip_mods, false); ++ btf_dump_emit_type_decl(d, id, fname, lvl); ++ d->strip_mods = false; ++ return 0; ++} ++ ++static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, ++ const char *fname, int lvl) ++{ ++ struct id_stack decl_stack; ++ const struct btf_type *t; ++ int err, stack_start; ++ ++ stack_start = d->decl_stack_cnt; ++ for (;;) { ++ t = btf__type_by_id(d->btf, id); ++ if (d->strip_mods && btf_is_mod(t)) ++ goto skip_mod; ++ ++ err = btf_dump_push_decl_stack_id(d, id); ++ if (err < 0) { ++ /* ++ * if we don't have enough memory for entire type decl ++ * chain, restore stack, emit warning, and try to ++ * proceed nevertheless ++ */ ++ pr_warn("not enough memory for decl stack:%d", err); ++ d->decl_stack_cnt = stack_start; ++ return; ++ } ++skip_mod: ++ /* VOID */ ++ if (id == 0) ++ break; ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_PTR: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ case BTF_KIND_FUNC_PROTO: ++ case BTF_KIND_TYPE_TAG: ++ id = t->type; ++ break; ++ case BTF_KIND_ARRAY: ++ id = btf_array(t)->type; ++ break; ++ case BTF_KIND_INT: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ case BTF_KIND_FWD: ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_FLOAT: ++ goto done; ++ default: ++ pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", ++ btf_kind(t), id); ++ goto done; ++ } ++ } ++done: ++ /* ++ * We might be inside a chain of declarations (e.g., array of function ++ * pointers returning anonymous (so inlined) structs, having another ++ * array field). Each of those needs its own "stack frame" to handle ++ * emitting of declarations. Those stack frames are non-overlapping ++ * portions of shared btf_dump->decl_stack. To make it a bit nicer to ++ * handle this set of nested stacks, we create a view corresponding to ++ * our own "stack frame" and work with it as an independent stack. ++ * We'll need to clean up after emit_type_chain() returns, though. ++ */ ++ decl_stack.ids = d->decl_stack + stack_start; ++ decl_stack.cnt = d->decl_stack_cnt - stack_start; ++ btf_dump_emit_type_chain(d, &decl_stack, fname, lvl); ++ /* ++ * emit_type_chain() guarantees that it will pop its entire decl_stack ++ * frame before returning. But it works with a read-only view into ++ * decl_stack, so it doesn't actually pop anything from the ++ * perspective of shared btf_dump->decl_stack, per se. We need to ++ * reset decl_stack state to how it was before us to avoid it growing ++ * all the time. ++ */ ++ d->decl_stack_cnt = stack_start; ++} ++ ++static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack) ++{ ++ const struct btf_type *t; ++ __u32 id; ++ ++ while (decl_stack->cnt) { ++ id = decl_stack->ids[decl_stack->cnt - 1]; ++ t = btf__type_by_id(d->btf, id); ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_VOLATILE: ++ btf_dump_printf(d, "volatile "); ++ break; ++ case BTF_KIND_CONST: ++ btf_dump_printf(d, "const "); ++ break; ++ case BTF_KIND_RESTRICT: ++ btf_dump_printf(d, "restrict "); ++ break; ++ default: ++ return; ++ } ++ decl_stack->cnt--; ++ } ++} ++ ++static void btf_dump_drop_mods(struct btf_dump *d, struct id_stack *decl_stack) ++{ ++ const struct btf_type *t; ++ __u32 id; ++ ++ while (decl_stack->cnt) { ++ id = decl_stack->ids[decl_stack->cnt - 1]; ++ t = btf__type_by_id(d->btf, id); ++ if (!btf_is_mod(t)) ++ return; ++ decl_stack->cnt--; ++ } ++} ++ ++static void btf_dump_emit_name(const struct btf_dump *d, ++ const char *name, bool last_was_ptr) ++{ ++ bool separate = name[0] && !last_was_ptr; ++ ++ btf_dump_printf(d, "%s%s", separate ? " " : "", name); ++} ++ ++static void btf_dump_emit_type_chain(struct btf_dump *d, ++ struct id_stack *decls, ++ const char *fname, int lvl) ++{ ++ /* ++ * last_was_ptr is used to determine if we need to separate pointer ++ * asterisk (*) from previous part of type signature with space, so ++ * that we get `int ***`, instead of `int * * *`. We default to true ++ * for cases where we have single pointer in a chain. E.g., in ptr -> ++ * func_proto case. func_proto will start a new emit_type_chain call ++ * with just ptr, which should be emitted as (*) or (*), so we ++ * don't want to prepend space for that last pointer. ++ */ ++ bool last_was_ptr = true; ++ const struct btf_type *t; ++ const char *name; ++ __u16 kind; ++ __u32 id; ++ ++ while (decls->cnt) { ++ id = decls->ids[--decls->cnt]; ++ if (id == 0) { ++ /* VOID is a special snowflake */ ++ btf_dump_emit_mods(d, decls); ++ btf_dump_printf(d, "void"); ++ last_was_ptr = false; ++ continue; ++ } ++ ++ t = btf__type_by_id(d->btf, id); ++ kind = btf_kind(t); ++ ++ switch (kind) { ++ case BTF_KIND_INT: ++ case BTF_KIND_FLOAT: ++ btf_dump_emit_mods(d, decls); ++ name = btf_name_of(d, t->name_off); ++ btf_dump_printf(d, "%s", name); ++ break; ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ btf_dump_emit_mods(d, decls); ++ /* inline anonymous struct/union */ ++ if (t->name_off == 0 && !d->skip_anon_defs) ++ btf_dump_emit_struct_def(d, id, t, lvl); ++ else ++ btf_dump_emit_struct_fwd(d, id, t); ++ break; ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ btf_dump_emit_mods(d, decls); ++ /* inline anonymous enum */ ++ if (t->name_off == 0 && !d->skip_anon_defs) ++ btf_dump_emit_enum_def(d, id, t, lvl); ++ else ++ btf_dump_emit_enum_fwd(d, id, t); ++ break; ++ case BTF_KIND_FWD: ++ btf_dump_emit_mods(d, decls); ++ btf_dump_emit_fwd_def(d, id, t); ++ break; ++ case BTF_KIND_TYPEDEF: ++ btf_dump_emit_mods(d, decls); ++ btf_dump_printf(d, "%s", btf_dump_ident_name(d, id)); ++ break; ++ case BTF_KIND_PTR: ++ btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *"); ++ break; ++ case BTF_KIND_VOLATILE: ++ btf_dump_printf(d, " volatile"); ++ break; ++ case BTF_KIND_CONST: ++ btf_dump_printf(d, " const"); ++ break; ++ case BTF_KIND_RESTRICT: ++ btf_dump_printf(d, " restrict"); ++ break; ++ case BTF_KIND_TYPE_TAG: ++ btf_dump_emit_mods(d, decls); ++ name = btf_name_of(d, t->name_off); ++ btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); ++ break; ++ case BTF_KIND_ARRAY: { ++ const struct btf_array *a = btf_array(t); ++ const struct btf_type *next_t; ++ __u32 next_id; ++ bool multidim; ++ /* ++ * GCC has a bug ++ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354) ++ * which causes it to emit extra const/volatile ++ * modifiers for an array, if array's element type has ++ * const/volatile modifiers. Clang doesn't do that. ++ * In general, it doesn't seem very meaningful to have ++ * a const/volatile modifier for array, so we are ++ * going to silently skip them here. ++ */ ++ btf_dump_drop_mods(d, decls); ++ ++ if (decls->cnt == 0) { ++ btf_dump_emit_name(d, fname, last_was_ptr); ++ btf_dump_printf(d, "[%u]", a->nelems); ++ return; ++ } ++ ++ next_id = decls->ids[decls->cnt - 1]; ++ next_t = btf__type_by_id(d->btf, next_id); ++ multidim = btf_is_array(next_t); ++ /* we need space if we have named non-pointer */ ++ if (fname[0] && !last_was_ptr) ++ btf_dump_printf(d, " "); ++ /* no parentheses for multi-dimensional array */ ++ if (!multidim) ++ btf_dump_printf(d, "("); ++ btf_dump_emit_type_chain(d, decls, fname, lvl); ++ if (!multidim) ++ btf_dump_printf(d, ")"); ++ btf_dump_printf(d, "[%u]", a->nelems); ++ return; ++ } ++ case BTF_KIND_FUNC_PROTO: { ++ const struct btf_param *p = btf_params(t); ++ __u16 vlen = btf_vlen(t); ++ int i; ++ ++ /* ++ * GCC emits extra volatile qualifier for ++ * __attribute__((noreturn)) function pointers. Clang ++ * doesn't do it. It's a GCC quirk for backwards ++ * compatibility with code written for GCC <2.5. So, ++ * similarly to extra qualifiers for array, just drop ++ * them, instead of handling them. ++ */ ++ btf_dump_drop_mods(d, decls); ++ if (decls->cnt) { ++ btf_dump_printf(d, " ("); ++ btf_dump_emit_type_chain(d, decls, fname, lvl); ++ btf_dump_printf(d, ")"); ++ } else { ++ btf_dump_emit_name(d, fname, last_was_ptr); ++ } ++ btf_dump_printf(d, "("); ++ /* ++ * Clang for BPF target generates func_proto with no ++ * args as a func_proto with a single void arg (e.g., ++ * `int (*f)(void)` vs just `int (*f)()`). We are ++ * going to pretend there are no args for such case. ++ */ ++ if (vlen == 1 && p->type == 0) { ++ btf_dump_printf(d, ")"); ++ return; ++ } ++ ++ for (i = 0; i < vlen; i++, p++) { ++ if (i > 0) ++ btf_dump_printf(d, ", "); ++ ++ /* last arg of type void is vararg */ ++ if (i == vlen - 1 && p->type == 0) { ++ btf_dump_printf(d, "..."); ++ break; ++ } ++ ++ name = btf_name_of(d, p->name_off); ++ btf_dump_emit_type_decl(d, p->type, name, lvl); ++ } ++ ++ btf_dump_printf(d, ")"); ++ return; ++ } ++ default: ++ pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", ++ kind, id); ++ return; ++ } ++ ++ last_was_ptr = kind == BTF_KIND_PTR; ++ } ++ ++ btf_dump_emit_name(d, fname, last_was_ptr); ++} ++ ++/* show type name as (type_name) */ ++static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id, ++ bool top_level) ++{ ++ const struct btf_type *t; ++ ++ /* for array members, we don't bother emitting type name for each ++ * member to avoid the redundancy of ++ * .name = (char[4])[(char)'f',(char)'o',(char)'o',] ++ */ ++ if (d->typed_dump->is_array_member) ++ return; ++ ++ /* avoid type name specification for variable/section; it will be done ++ * for the associated variable value(s). ++ */ ++ t = btf__type_by_id(d->btf, id); ++ if (btf_is_var(t) || btf_is_datasec(t)) ++ return; ++ ++ if (top_level) ++ btf_dump_printf(d, "("); ++ ++ d->skip_anon_defs = true; ++ d->strip_mods = true; ++ btf_dump_emit_type_decl(d, id, "", 0); ++ d->strip_mods = false; ++ d->skip_anon_defs = false; ++ ++ if (top_level) ++ btf_dump_printf(d, ")"); ++} ++ ++/* return number of duplicates (occurrences) of a given name */ ++static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, ++ const char *orig_name) ++{ ++ size_t dup_cnt = 0; ++ ++ hashmap__find(name_map, orig_name, (void **)&dup_cnt); ++ dup_cnt++; ++ hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); ++ ++ return dup_cnt; ++} ++ ++static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, ++ struct hashmap *name_map) ++{ ++ struct btf_dump_type_aux_state *s = &d->type_states[id]; ++ const struct btf_type *t = btf__type_by_id(d->btf, id); ++ const char *orig_name = btf_name_of(d, t->name_off); ++ const char **cached_name = &d->cached_names[id]; ++ size_t dup_cnt; ++ ++ if (t->name_off == 0) ++ return ""; ++ ++ if (s->name_resolved) ++ return *cached_name ? *cached_name : orig_name; ++ ++ if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) { ++ s->name_resolved = 1; ++ return orig_name; ++ } ++ ++ dup_cnt = btf_dump_name_dups(d, name_map, orig_name); ++ if (dup_cnt > 1) { ++ const size_t max_len = 256; ++ char new_name[max_len]; ++ ++ snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt); ++ *cached_name = strdup(new_name); ++ } ++ ++ s->name_resolved = 1; ++ return *cached_name ? *cached_name : orig_name; ++} ++ ++static const char *btf_dump_type_name(struct btf_dump *d, __u32 id) ++{ ++ return btf_dump_resolve_name(d, id, d->type_names); ++} ++ ++static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id) ++{ ++ return btf_dump_resolve_name(d, id, d->ident_names); ++} ++ ++static int btf_dump_dump_type_data(struct btf_dump *d, ++ const char *fname, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data, ++ __u8 bits_offset, ++ __u8 bit_sz); ++ ++static const char *btf_dump_data_newline(struct btf_dump *d) ++{ ++ return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n"; ++} ++ ++static const char *btf_dump_data_delim(struct btf_dump *d) ++{ ++ return d->typed_dump->depth == 0 ? "" : ","; ++} ++ ++static void btf_dump_data_pfx(struct btf_dump *d) ++{ ++ int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth; ++ ++ if (d->typed_dump->compact) ++ return; ++ ++ for (i = 0; i < lvl; i++) ++ btf_dump_printf(d, "%s", d->typed_dump->indent_str); ++} ++ ++/* A macro is used here as btf_type_value[s]() appends format specifiers ++ * to the format specifier passed in; these do the work of appending ++ * delimiters etc while the caller simply has to specify the type values ++ * in the format specifier + value(s). ++ */ ++#define btf_dump_type_values(d, fmt, ...) \ ++ btf_dump_printf(d, fmt "%s%s", \ ++ ##__VA_ARGS__, \ ++ btf_dump_data_delim(d), \ ++ btf_dump_data_newline(d)) ++ ++static int btf_dump_unsupported_data(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id) ++{ ++ btf_dump_printf(d, "", btf_kind(t)); ++ return -ENOTSUP; ++} ++ ++static int btf_dump_get_bitfield_value(struct btf_dump *d, ++ const struct btf_type *t, ++ const void *data, ++ __u8 bits_offset, ++ __u8 bit_sz, ++ __u64 *value) ++{ ++ __u16 left_shift_bits, right_shift_bits; ++ const __u8 *bytes = data; ++ __u8 nr_copy_bits; ++ __u64 num = 0; ++ int i; ++ ++ /* Maximum supported bitfield size is 64 bits */ ++ if (t->size > 8) { ++ pr_warn("unexpected bitfield size %d\n", t->size); ++ return -EINVAL; ++ } ++ ++ /* Bitfield value retrieval is done in two steps; first relevant bytes are ++ * stored in num, then we left/right shift num to eliminate irrelevant bits. ++ */ ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ for (i = t->size - 1; i >= 0; i--) ++ num = num * 256 + bytes[i]; ++ nr_copy_bits = bit_sz + bits_offset; ++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ for (i = 0; i < t->size; i++) ++ num = num * 256 + bytes[i]; ++ nr_copy_bits = t->size * 8 - bits_offset; ++#else ++# error "Unrecognized __BYTE_ORDER__" ++#endif ++ left_shift_bits = 64 - nr_copy_bits; ++ right_shift_bits = 64 - bit_sz; ++ ++ *value = (num << left_shift_bits) >> right_shift_bits; ++ ++ return 0; ++} ++ ++static int btf_dump_bitfield_check_zero(struct btf_dump *d, ++ const struct btf_type *t, ++ const void *data, ++ __u8 bits_offset, ++ __u8 bit_sz) ++{ ++ __u64 check_num; ++ int err; ++ ++ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num); ++ if (err) ++ return err; ++ if (check_num == 0) ++ return -ENODATA; ++ return 0; ++} ++ ++static int btf_dump_bitfield_data(struct btf_dump *d, ++ const struct btf_type *t, ++ const void *data, ++ __u8 bits_offset, ++ __u8 bit_sz) ++{ ++ __u64 print_num; ++ int err; ++ ++ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num); ++ if (err) ++ return err; ++ ++ btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num); ++ ++ return 0; ++} ++ ++/* ints, floats and ptrs */ ++static int btf_dump_base_type_check_zero(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data) ++{ ++ static __u8 bytecmp[16] = {}; ++ int nr_bytes; ++ ++ /* For pointer types, pointer size is not defined on a per-type basis. ++ * On dump creation however, we store the pointer size. ++ */ ++ if (btf_kind(t) == BTF_KIND_PTR) ++ nr_bytes = d->ptr_sz; ++ else ++ nr_bytes = t->size; ++ ++ if (nr_bytes < 1 || nr_bytes > 16) { ++ pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id); ++ return -EINVAL; ++ } ++ ++ if (memcmp(data, bytecmp, nr_bytes) == 0) ++ return -ENODATA; ++ return 0; ++} ++ ++static bool ptr_is_aligned(const struct btf *btf, __u32 type_id, ++ const void *data) ++{ ++ int alignment = btf__align_of(btf, type_id); ++ ++ if (alignment == 0) ++ return false; ++ ++ return ((uintptr_t)data) % alignment == 0; ++} ++ ++static int btf_dump_int_data(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 type_id, ++ const void *data, ++ __u8 bits_offset) ++{ ++ __u8 encoding = btf_int_encoding(t); ++ bool sign = encoding & BTF_INT_SIGNED; ++ char buf[16] __attribute__((aligned(16))); ++ int sz = t->size; ++ ++ if (sz == 0 || sz > sizeof(buf)) { ++ pr_warn("unexpected size %d for id [%u]\n", sz, type_id); ++ return -EINVAL; ++ } ++ ++ /* handle packed int data - accesses of integers not aligned on ++ * int boundaries can cause problems on some platforms. ++ */ ++ if (!ptr_is_aligned(d->btf, type_id, data)) { ++ memcpy(buf, data, sz); ++ data = buf; ++ } ++ ++ switch (sz) { ++ case 16: { ++ const __u64 *ints = data; ++ __u64 lsi, msi; ++ ++ /* avoid use of __int128 as some 32-bit platforms do not ++ * support it. ++ */ ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ lsi = ints[0]; ++ msi = ints[1]; ++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ lsi = ints[1]; ++ msi = ints[0]; ++#else ++# error "Unrecognized __BYTE_ORDER__" ++#endif ++ if (msi == 0) ++ btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi); ++ else ++ btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi, ++ (unsigned long long)lsi); ++ break; ++ } ++ case 8: ++ if (sign) ++ btf_dump_type_values(d, "%lld", *(long long *)data); ++ else ++ btf_dump_type_values(d, "%llu", *(unsigned long long *)data); ++ break; ++ case 4: ++ if (sign) ++ btf_dump_type_values(d, "%d", *(__s32 *)data); ++ else ++ btf_dump_type_values(d, "%u", *(__u32 *)data); ++ break; ++ case 2: ++ if (sign) ++ btf_dump_type_values(d, "%d", *(__s16 *)data); ++ else ++ btf_dump_type_values(d, "%u", *(__u16 *)data); ++ break; ++ case 1: ++ if (d->typed_dump->is_array_char) { ++ /* check for null terminator */ ++ if (d->typed_dump->is_array_terminated) ++ break; ++ if (*(char *)data == '\0') { ++ d->typed_dump->is_array_terminated = true; ++ break; ++ } ++ if (isprint(*(char *)data)) { ++ btf_dump_type_values(d, "'%c'", *(char *)data); ++ break; ++ } ++ } ++ if (sign) ++ btf_dump_type_values(d, "%d", *(__s8 *)data); ++ else ++ btf_dump_type_values(d, "%u", *(__u8 *)data); ++ break; ++ default: ++ pr_warn("unexpected sz %d for id [%u]\n", sz, type_id); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++union float_data { ++ long double ld; ++ double d; ++ float f; ++}; ++ ++static int btf_dump_float_data(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 type_id, ++ const void *data) ++{ ++ const union float_data *flp = data; ++ union float_data fl; ++ int sz = t->size; ++ ++ /* handle unaligned data; copy to local union */ ++ if (!ptr_is_aligned(d->btf, type_id, data)) { ++ memcpy(&fl, data, sz); ++ flp = &fl; ++ } ++ ++ switch (sz) { ++ case 16: ++ btf_dump_type_values(d, "%Lf", flp->ld); ++ break; ++ case 8: ++ btf_dump_type_values(d, "%lf", flp->d); ++ break; ++ case 4: ++ btf_dump_type_values(d, "%f", flp->f); ++ break; ++ default: ++ pr_warn("unexpected size %d for id [%u]\n", sz, type_id); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int btf_dump_var_data(struct btf_dump *d, ++ const struct btf_type *v, ++ __u32 id, ++ const void *data) ++{ ++ enum btf_func_linkage linkage = btf_var(v)->linkage; ++ const struct btf_type *t; ++ const char *l; ++ __u32 type_id; ++ ++ switch (linkage) { ++ case BTF_FUNC_STATIC: ++ l = "static "; ++ break; ++ case BTF_FUNC_EXTERN: ++ l = "extern "; ++ break; ++ case BTF_FUNC_GLOBAL: ++ default: ++ l = ""; ++ break; ++ } ++ ++ /* format of output here is [linkage] [type] [varname] = (type)value, ++ * for example "static int cpu_profile_flip = (int)1" ++ */ ++ btf_dump_printf(d, "%s", l); ++ type_id = v->type; ++ t = btf__type_by_id(d->btf, type_id); ++ btf_dump_emit_type_cast(d, type_id, false); ++ btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off)); ++ return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); ++} ++ ++static int btf_dump_array_data(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data) ++{ ++ const struct btf_array *array = btf_array(t); ++ const struct btf_type *elem_type; ++ __u32 i, elem_type_id; ++ __s64 elem_size; ++ bool is_array_member; ++ ++ elem_type_id = array->type; ++ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); ++ elem_size = btf__resolve_size(d->btf, elem_type_id); ++ if (elem_size <= 0) { ++ pr_warn("unexpected elem size %zd for array type [%u]\n", ++ (ssize_t)elem_size, id); ++ return -EINVAL; ++ } ++ ++ if (btf_is_int(elem_type)) { ++ /* ++ * BTF_INT_CHAR encoding never seems to be set for ++ * char arrays, so if size is 1 and element is ++ * printable as a char, we'll do that. ++ */ ++ if (elem_size == 1) ++ d->typed_dump->is_array_char = true; ++ } ++ ++ /* note that we increment depth before calling btf_dump_print() below; ++ * this is intentional. btf_dump_data_newline() will not print a ++ * newline for depth 0 (since this leaves us with trailing newlines ++ * at the end of typed display), so depth is incremented first. ++ * For similar reasons, we decrement depth before showing the closing ++ * parenthesis. ++ */ ++ d->typed_dump->depth++; ++ btf_dump_printf(d, "[%s", btf_dump_data_newline(d)); ++ ++ /* may be a multidimensional array, so store current "is array member" ++ * status so we can restore it correctly later. ++ */ ++ is_array_member = d->typed_dump->is_array_member; ++ d->typed_dump->is_array_member = true; ++ for (i = 0; i < array->nelems; i++, data += elem_size) { ++ if (d->typed_dump->is_array_terminated) ++ break; ++ btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0); ++ } ++ d->typed_dump->is_array_member = is_array_member; ++ d->typed_dump->depth--; ++ btf_dump_data_pfx(d); ++ btf_dump_type_values(d, "]"); ++ ++ return 0; ++} ++ ++static int btf_dump_struct_data(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data) ++{ ++ const struct btf_member *m = btf_members(t); ++ __u16 n = btf_vlen(t); ++ int i, err; ++ ++ /* note that we increment depth before calling btf_dump_print() below; ++ * this is intentional. btf_dump_data_newline() will not print a ++ * newline for depth 0 (since this leaves us with trailing newlines ++ * at the end of typed display), so depth is incremented first. ++ * For similar reasons, we decrement depth before showing the closing ++ * parenthesis. ++ */ ++ d->typed_dump->depth++; ++ btf_dump_printf(d, "{%s", btf_dump_data_newline(d)); ++ ++ for (i = 0; i < n; i++, m++) { ++ const struct btf_type *mtype; ++ const char *mname; ++ __u32 moffset; ++ __u8 bit_sz; ++ ++ mtype = btf__type_by_id(d->btf, m->type); ++ mname = btf_name_of(d, m->name_off); ++ moffset = btf_member_bit_offset(t, i); ++ ++ bit_sz = btf_member_bitfield_size(t, i); ++ err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8, ++ moffset % 8, bit_sz); ++ if (err < 0) ++ return err; ++ } ++ d->typed_dump->depth--; ++ btf_dump_data_pfx(d); ++ btf_dump_type_values(d, "}"); ++ return err; ++} ++ ++union ptr_data { ++ unsigned int p; ++ unsigned long long lp; ++}; ++ ++static int btf_dump_ptr_data(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data) ++{ ++ if (ptr_is_aligned(d->btf, id, data) && d->ptr_sz == sizeof(void *)) { ++ btf_dump_type_values(d, "%p", *(void **)data); ++ } else { ++ union ptr_data pt; ++ ++ memcpy(&pt, data, d->ptr_sz); ++ if (d->ptr_sz == 4) ++ btf_dump_type_values(d, "0x%x", pt.p); ++ else ++ btf_dump_type_values(d, "0x%llx", pt.lp); ++ } ++ return 0; ++} ++ ++static int btf_dump_get_enum_value(struct btf_dump *d, ++ const struct btf_type *t, ++ const void *data, ++ __u32 id, ++ __s64 *value) ++{ ++ bool is_signed = btf_kflag(t); ++ ++ if (!ptr_is_aligned(d->btf, id, data)) { ++ __u64 val; ++ int err; ++ ++ err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val); ++ if (err) ++ return err; ++ *value = (__s64)val; ++ return 0; ++ } ++ ++ switch (t->size) { ++ case 8: ++ *value = *(__s64 *)data; ++ return 0; ++ case 4: ++ *value = is_signed ? (__s64)*(__s32 *)data : *(__u32 *)data; ++ return 0; ++ case 2: ++ *value = is_signed ? *(__s16 *)data : *(__u16 *)data; ++ return 0; ++ case 1: ++ *value = is_signed ? *(__s8 *)data : *(__u8 *)data; ++ return 0; ++ default: ++ pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id); ++ return -EINVAL; ++ } ++} ++ ++static int btf_dump_enum_data(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data) ++{ ++ bool is_signed; ++ __s64 value; ++ int i, err; ++ ++ err = btf_dump_get_enum_value(d, t, data, id, &value); ++ if (err) ++ return err; ++ ++ is_signed = btf_kflag(t); ++ if (btf_is_enum(t)) { ++ const struct btf_enum *e; ++ ++ for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { ++ if (value != e->val) ++ continue; ++ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); ++ return 0; ++ } ++ ++ btf_dump_type_values(d, is_signed ? "%d" : "%u", value); ++ } else { ++ const struct btf_enum64 *e; ++ ++ for (i = 0, e = btf_enum64(t); i < btf_vlen(t); i++, e++) { ++ if (value != btf_enum64_value(e)) ++ continue; ++ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); ++ return 0; ++ } ++ ++ btf_dump_type_values(d, is_signed ? "%lldLL" : "%lluULL", ++ (unsigned long long)value); ++ } ++ return 0; ++} ++ ++static int btf_dump_datasec_data(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data) ++{ ++ const struct btf_var_secinfo *vsi; ++ const struct btf_type *var; ++ __u32 i; ++ int err; ++ ++ btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off)); ++ ++ for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) { ++ var = btf__type_by_id(d->btf, vsi->type); ++ err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0); ++ if (err < 0) ++ return err; ++ btf_dump_printf(d, ";"); ++ } ++ return 0; ++} ++ ++/* return size of type, or if base type overflows, return -E2BIG. */ ++static int btf_dump_type_data_check_overflow(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data, ++ __u8 bits_offset) ++{ ++ __s64 size = btf__resolve_size(d->btf, id); ++ ++ if (size < 0 || size >= INT_MAX) { ++ pr_warn("unexpected size [%zu] for id [%u]\n", ++ (size_t)size, id); ++ return -EINVAL; ++ } ++ ++ /* Only do overflow checking for base types; we do not want to ++ * avoid showing part of a struct, union or array, even if we ++ * do not have enough data to show the full object. By ++ * restricting overflow checking to base types we can ensure ++ * that partial display succeeds, while avoiding overflowing ++ * and using bogus data for display. ++ */ ++ t = skip_mods_and_typedefs(d->btf, id, NULL); ++ if (!t) { ++ pr_warn("unexpected error skipping mods/typedefs for id [%u]\n", ++ id); ++ return -EINVAL; ++ } ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_INT: ++ case BTF_KIND_FLOAT: ++ case BTF_KIND_PTR: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ if (data + bits_offset / 8 + size > d->typed_dump->data_end) ++ return -E2BIG; ++ break; ++ default: ++ break; ++ } ++ return (int)size; ++} ++ ++static int btf_dump_type_data_check_zero(struct btf_dump *d, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data, ++ __u8 bits_offset, ++ __u8 bit_sz) ++{ ++ __s64 value; ++ int i, err; ++ ++ /* toplevel exceptions; we show zero values if ++ * - we ask for them (emit_zeros) ++ * - if we are at top-level so we see "struct empty { }" ++ * - or if we are an array member and the array is non-empty and ++ * not a char array; we don't want to be in a situation where we ++ * have an integer array 0, 1, 0, 1 and only show non-zero values. ++ * If the array contains zeroes only, or is a char array starting ++ * with a '\0', the array-level check_zero() will prevent showing it; ++ * we are concerned with determining zero value at the array member ++ * level here. ++ */ ++ if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 || ++ (d->typed_dump->is_array_member && ++ !d->typed_dump->is_array_char)) ++ return 0; ++ ++ t = skip_mods_and_typedefs(d->btf, id, NULL); ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_INT: ++ if (bit_sz) ++ return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz); ++ return btf_dump_base_type_check_zero(d, t, id, data); ++ case BTF_KIND_FLOAT: ++ case BTF_KIND_PTR: ++ return btf_dump_base_type_check_zero(d, t, id, data); ++ case BTF_KIND_ARRAY: { ++ const struct btf_array *array = btf_array(t); ++ const struct btf_type *elem_type; ++ __u32 elem_type_id, elem_size; ++ bool ischar; ++ ++ elem_type_id = array->type; ++ elem_size = btf__resolve_size(d->btf, elem_type_id); ++ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); ++ ++ ischar = btf_is_int(elem_type) && elem_size == 1; ++ ++ /* check all elements; if _any_ element is nonzero, all ++ * of array is displayed. We make an exception however ++ * for char arrays where the first element is 0; these ++ * are considered zeroed also, even if later elements are ++ * non-zero because the string is terminated. ++ */ ++ for (i = 0; i < array->nelems; i++) { ++ if (i == 0 && ischar && *(char *)data == 0) ++ return -ENODATA; ++ err = btf_dump_type_data_check_zero(d, elem_type, ++ elem_type_id, ++ data + ++ (i * elem_size), ++ bits_offset, 0); ++ if (err != -ENODATA) ++ return err; ++ } ++ return -ENODATA; ++ } ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: { ++ const struct btf_member *m = btf_members(t); ++ __u16 n = btf_vlen(t); ++ ++ /* if any struct/union member is non-zero, the struct/union ++ * is considered non-zero and dumped. ++ */ ++ for (i = 0; i < n; i++, m++) { ++ const struct btf_type *mtype; ++ __u32 moffset; ++ ++ mtype = btf__type_by_id(d->btf, m->type); ++ moffset = btf_member_bit_offset(t, i); ++ ++ /* btf_int_bits() does not store member bitfield size; ++ * bitfield size needs to be stored here so int display ++ * of member can retrieve it. ++ */ ++ bit_sz = btf_member_bitfield_size(t, i); ++ err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8, ++ moffset % 8, bit_sz); ++ if (err != ENODATA) ++ return err; ++ } ++ return -ENODATA; ++ } ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ err = btf_dump_get_enum_value(d, t, data, id, &value); ++ if (err) ++ return err; ++ if (value == 0) ++ return -ENODATA; ++ return 0; ++ default: ++ return 0; ++ } ++} ++ ++/* returns size of data dumped, or error. */ ++static int btf_dump_dump_type_data(struct btf_dump *d, ++ const char *fname, ++ const struct btf_type *t, ++ __u32 id, ++ const void *data, ++ __u8 bits_offset, ++ __u8 bit_sz) ++{ ++ int size, err = 0; ++ ++ size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); ++ if (size < 0) ++ return size; ++ err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); ++ if (err) { ++ /* zeroed data is expected and not an error, so simply skip ++ * dumping such data. Record other errors however. ++ */ ++ if (err == -ENODATA) ++ return size; ++ return err; ++ } ++ btf_dump_data_pfx(d); ++ ++ if (!d->typed_dump->skip_names) { ++ if (fname && strlen(fname) > 0) ++ btf_dump_printf(d, ".%s = ", fname); ++ btf_dump_emit_type_cast(d, id, true); ++ } ++ ++ t = skip_mods_and_typedefs(d->btf, id, NULL); ++ ++ switch (btf_kind(t)) { ++ case BTF_KIND_UNKN: ++ case BTF_KIND_FWD: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_FUNC_PROTO: ++ case BTF_KIND_DECL_TAG: ++ err = btf_dump_unsupported_data(d, t, id); ++ break; ++ case BTF_KIND_INT: ++ if (bit_sz) ++ err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz); ++ else ++ err = btf_dump_int_data(d, t, id, data, bits_offset); ++ break; ++ case BTF_KIND_FLOAT: ++ err = btf_dump_float_data(d, t, id, data); ++ break; ++ case BTF_KIND_PTR: ++ err = btf_dump_ptr_data(d, t, id, data); ++ break; ++ case BTF_KIND_ARRAY: ++ err = btf_dump_array_data(d, t, id, data); ++ break; ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ err = btf_dump_struct_data(d, t, id, data); ++ break; ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ /* handle bitfield and int enum values */ ++ if (bit_sz) { ++ __u64 print_num; ++ __s64 enum_val; ++ ++ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, ++ &print_num); ++ if (err) ++ break; ++ enum_val = (__s64)print_num; ++ err = btf_dump_enum_data(d, t, id, &enum_val); ++ } else ++ err = btf_dump_enum_data(d, t, id, data); ++ break; ++ case BTF_KIND_VAR: ++ err = btf_dump_var_data(d, t, id, data); ++ break; ++ case BTF_KIND_DATASEC: ++ err = btf_dump_datasec_data(d, t, id, data); ++ break; ++ default: ++ pr_warn("unexpected kind [%u] for id [%u]\n", ++ BTF_INFO_KIND(t->info), id); ++ return -EINVAL; ++ } ++ if (err < 0) ++ return err; ++ return size; ++} ++ ++int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, ++ const void *data, size_t data_sz, ++ const struct btf_dump_type_data_opts *opts) ++{ ++ struct btf_dump_data typed_dump = {}; ++ const struct btf_type *t; ++ int ret; ++ ++ if (!OPTS_VALID(opts, btf_dump_type_data_opts)) ++ return libbpf_err(-EINVAL); ++ ++ t = btf__type_by_id(d->btf, id); ++ if (!t) ++ return libbpf_err(-ENOENT); ++ ++ d->typed_dump = &typed_dump; ++ d->typed_dump->data_end = data + data_sz; ++ d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0); ++ ++ /* default indent string is a tab */ ++ if (!opts->indent_str) ++ d->typed_dump->indent_str[0] = '\t'; ++ else ++ libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str, ++ sizeof(d->typed_dump->indent_str)); ++ ++ d->typed_dump->compact = OPTS_GET(opts, compact, false); ++ d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); ++ d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); ++ ++ ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); ++ ++ d->typed_dump = NULL; ++ ++ return libbpf_err(ret); ++} +diff --git a/src/cc/libbpf/src/gen_loader.c b/src/cc/libbpf/src/gen_loader.c +new file mode 100644 +index 0000000..23f5c46 +--- /dev/null ++++ b/src/cc/libbpf/src/gen_loader.c +@@ -0,0 +1,1121 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2021 Facebook */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "btf.h" ++#include "bpf.h" ++#include "libbpf.h" ++#include "libbpf_internal.h" ++#include "hashmap.h" ++#include "bpf_gen_internal.h" ++#include "skel_internal.h" ++#include ++ ++#define MAX_USED_MAPS 64 ++#define MAX_USED_PROGS 32 ++#define MAX_KFUNC_DESCS 256 ++#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS) ++ ++/* The following structure describes the stack layout of the loader program. ++ * In addition R6 contains the pointer to context. ++ * R7 contains the result of the last sys_bpf command (typically error or FD). ++ * R9 contains the result of the last sys_close command. ++ * ++ * Naming convention: ++ * ctx - bpf program context ++ * stack - bpf program stack ++ * blob - bpf_attr-s, strings, insns, map data. ++ * All the bytes that loader prog will use for read/write. ++ */ ++struct loader_stack { ++ __u32 btf_fd; ++ __u32 inner_map_fd; ++ __u32 prog_fd[MAX_USED_PROGS]; ++}; ++ ++#define stack_off(field) \ ++ (__s16)(-sizeof(struct loader_stack) + offsetof(struct loader_stack, field)) ++ ++#define attr_field(attr, field) (attr + offsetof(union bpf_attr, field)) ++ ++static int blob_fd_array_off(struct bpf_gen *gen, int index) ++{ ++ return gen->fd_array + index * sizeof(int); ++} ++ ++static int realloc_insn_buf(struct bpf_gen *gen, __u32 size) ++{ ++ size_t off = gen->insn_cur - gen->insn_start; ++ void *insn_start; ++ ++ if (gen->error) ++ return gen->error; ++ if (size > INT32_MAX || off + size > INT32_MAX) { ++ gen->error = -ERANGE; ++ return -ERANGE; ++ } ++ insn_start = realloc(gen->insn_start, off + size); ++ if (!insn_start) { ++ gen->error = -ENOMEM; ++ free(gen->insn_start); ++ gen->insn_start = NULL; ++ return -ENOMEM; ++ } ++ gen->insn_start = insn_start; ++ gen->insn_cur = insn_start + off; ++ return 0; ++} ++ ++static int realloc_data_buf(struct bpf_gen *gen, __u32 size) ++{ ++ size_t off = gen->data_cur - gen->data_start; ++ void *data_start; ++ ++ if (gen->error) ++ return gen->error; ++ if (size > INT32_MAX || off + size > INT32_MAX) { ++ gen->error = -ERANGE; ++ return -ERANGE; ++ } ++ data_start = realloc(gen->data_start, off + size); ++ if (!data_start) { ++ gen->error = -ENOMEM; ++ free(gen->data_start); ++ gen->data_start = NULL; ++ return -ENOMEM; ++ } ++ gen->data_start = data_start; ++ gen->data_cur = data_start + off; ++ return 0; ++} ++ ++static void emit(struct bpf_gen *gen, struct bpf_insn insn) ++{ ++ if (realloc_insn_buf(gen, sizeof(insn))) ++ return; ++ memcpy(gen->insn_cur, &insn, sizeof(insn)); ++ gen->insn_cur += sizeof(insn); ++} ++ ++static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn insn2) ++{ ++ emit(gen, insn1); ++ emit(gen, insn2); ++} ++ ++static int add_data(struct bpf_gen *gen, const void *data, __u32 size); ++static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off); ++ ++void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps) ++{ ++ size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz; ++ int i; ++ ++ gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); ++ gen->log_level = log_level; ++ /* save ctx pointer into R6 */ ++ emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); ++ ++ /* bzero stack */ ++ emit(gen, BPF_MOV64_REG(BPF_REG_1, BPF_REG_10)); ++ emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -stack_sz)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_2, stack_sz)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); ++ ++ /* amount of stack actually used, only used to calculate iterations, not stack offset */ ++ nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]); ++ /* jump over cleanup code */ ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, ++ /* size of cleanup code below (including map fd cleanup) */ ++ (nr_progs_sz / 4) * 3 + 2 + ++ /* 6 insns for emit_sys_close_blob, ++ * 6 insns for debug_regs in emit_sys_close_blob ++ */ ++ nr_maps * (6 + (gen->log_level ? 6 : 0)))); ++ ++ /* remember the label where all error branches will jump to */ ++ gen->cleanup_label = gen->insn_cur - gen->insn_start; ++ /* emit cleanup code: close all temp FDs */ ++ for (i = 0; i < nr_progs_sz; i += 4) { ++ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i)); ++ emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); ++ } ++ for (i = 0; i < nr_maps; i++) ++ emit_sys_close_blob(gen, blob_fd_array_off(gen, i)); ++ /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ ++ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); ++ emit(gen, BPF_EXIT_INSN()); ++} ++ ++static int add_data(struct bpf_gen *gen, const void *data, __u32 size) ++{ ++ __u32 size8 = roundup(size, 8); ++ __u64 zero = 0; ++ void *prev; ++ ++ if (realloc_data_buf(gen, size8)) ++ return 0; ++ prev = gen->data_cur; ++ if (data) { ++ memcpy(gen->data_cur, data, size); ++ memcpy(gen->data_cur + size, &zero, size8 - size); ++ } else { ++ memset(gen->data_cur, 0, size8); ++ } ++ gen->data_cur += size8; ++ return prev - gen->data_start; ++} ++ ++/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative ++ * to start of fd_array. Caller can decide if it is usable or not. ++ */ ++static int add_map_fd(struct bpf_gen *gen) ++{ ++ if (gen->nr_maps == MAX_USED_MAPS) { ++ pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); ++ gen->error = -E2BIG; ++ return 0; ++ } ++ return gen->nr_maps++; ++} ++ ++static int add_kfunc_btf_fd(struct bpf_gen *gen) ++{ ++ int cur; ++ ++ if (gen->nr_fd_array == MAX_KFUNC_DESCS) { ++ cur = add_data(gen, NULL, sizeof(int)); ++ return (cur - gen->fd_array) / sizeof(int); ++ } ++ return MAX_USED_MAPS + gen->nr_fd_array++; ++} ++ ++static int insn_bytes_to_bpf_size(__u32 sz) ++{ ++ switch (sz) { ++ case 8: return BPF_DW; ++ case 4: return BPF_W; ++ case 2: return BPF_H; ++ case 1: return BPF_B; ++ default: return -1; ++ } ++} ++ ++/* *(u64 *)(blob + off) = (u64)(void *)(blob + data) */ ++static void emit_rel_store(struct bpf_gen *gen, int off, int data) ++{ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, data)); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, off)); ++ emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); ++} ++ ++static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off) ++{ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, blob_off)); ++ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0)); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, off)); ++ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); ++} ++ ++static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off) ++{ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, blob_off)); ++ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0)); ++ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); ++} ++ ++static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, ++ bool check_non_zero) ++{ ++ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_6, ctx_off)); ++ if (check_non_zero) ++ /* If value in ctx is zero don't update the blob. ++ * For example: when ctx->map.max_entries == 0, keep default max_entries from bpf.c ++ */ ++ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3)); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, off)); ++ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); ++} ++ ++static void move_stack2blob(struct bpf_gen *gen, int off, int size, int stack_off) ++{ ++ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off)); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, off)); ++ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); ++} ++ ++static void move_stack2ctx(struct bpf_gen *gen, int ctx_off, int size, int stack_off) ++{ ++ emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off)); ++ emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); ++} ++ ++static void emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size) ++{ ++ emit(gen, BPF_MOV64_IMM(BPF_REG_1, cmd)); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, attr)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_3, attr_size)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_bpf)); ++ /* remember the result in R7 */ ++ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); ++} ++ ++static bool is_simm16(__s64 value) ++{ ++ return value == (__s64)(__s16)value; ++} ++ ++static void emit_check_err(struct bpf_gen *gen) ++{ ++ __s64 off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1; ++ ++ /* R7 contains result of last sys_bpf command. ++ * if (R7 < 0) goto cleanup; ++ */ ++ if (is_simm16(off)) { ++ emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off)); ++ } else { ++ gen->error = -ERANGE; ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1)); ++ } ++} ++ ++/* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */ ++static void emit_debug(struct bpf_gen *gen, int reg1, int reg2, ++ const char *fmt, va_list args) ++{ ++ char buf[1024]; ++ int addr, len, ret; ++ ++ if (!gen->log_level) ++ return; ++ ret = vsnprintf(buf, sizeof(buf), fmt, args); ++ if (ret < 1024 - 7 && reg1 >= 0 && reg2 < 0) ++ /* The special case to accommodate common debug_ret(): ++ * to avoid specifying BPF_REG_7 and adding " r=%%d" to ++ * prints explicitly. ++ */ ++ strcat(buf, " r=%d"); ++ len = strlen(buf) + 1; ++ addr = add_data(gen, buf, len); ++ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, addr)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); ++ if (reg1 >= 0) ++ emit(gen, BPF_MOV64_REG(BPF_REG_3, reg1)); ++ if (reg2 >= 0) ++ emit(gen, BPF_MOV64_REG(BPF_REG_4, reg2)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_trace_printk)); ++} ++ ++static void debug_regs(struct bpf_gen *gen, int reg1, int reg2, const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ emit_debug(gen, reg1, reg2, fmt, args); ++ va_end(args); ++} ++ ++static void debug_ret(struct bpf_gen *gen, const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ emit_debug(gen, BPF_REG_7, -1, fmt, args); ++ va_end(args); ++} ++ ++static void __emit_sys_close(struct bpf_gen *gen) ++{ ++ emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, ++ /* 2 is the number of the following insns ++ * * 6 is additional insns in debug_regs ++ */ ++ 2 + (gen->log_level ? 6 : 0))); ++ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_1)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); ++ debug_regs(gen, BPF_REG_9, BPF_REG_0, "close(%%d) = %%d"); ++} ++ ++static void emit_sys_close_stack(struct bpf_gen *gen, int stack_off) ++{ ++ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, stack_off)); ++ __emit_sys_close(gen); ++} ++ ++static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) ++{ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, blob_off)); ++ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0)); ++ __emit_sys_close(gen); ++} ++ ++int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) ++{ ++ int i; ++ ++ if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) { ++ pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n", ++ nr_progs, gen->nr_progs, nr_maps, gen->nr_maps); ++ gen->error = -EFAULT; ++ return gen->error; ++ } ++ emit_sys_close_stack(gen, stack_off(btf_fd)); ++ for (i = 0; i < gen->nr_progs; i++) ++ move_stack2ctx(gen, ++ sizeof(struct bpf_loader_ctx) + ++ sizeof(struct bpf_map_desc) * gen->nr_maps + ++ sizeof(struct bpf_prog_desc) * i + ++ offsetof(struct bpf_prog_desc, prog_fd), 4, ++ stack_off(prog_fd[i])); ++ for (i = 0; i < gen->nr_maps; i++) ++ move_blob2ctx(gen, ++ sizeof(struct bpf_loader_ctx) + ++ sizeof(struct bpf_map_desc) * i + ++ offsetof(struct bpf_map_desc, map_fd), 4, ++ blob_fd_array_off(gen, i)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); ++ emit(gen, BPF_EXIT_INSN()); ++ pr_debug("gen: finish %d\n", gen->error); ++ if (!gen->error) { ++ struct gen_loader_opts *opts = gen->opts; ++ ++ opts->insns = gen->insn_start; ++ opts->insns_sz = gen->insn_cur - gen->insn_start; ++ opts->data = gen->data_start; ++ opts->data_sz = gen->data_cur - gen->data_start; ++ } ++ return gen->error; ++} ++ ++void bpf_gen__free(struct bpf_gen *gen) ++{ ++ if (!gen) ++ return; ++ free(gen->data_start); ++ free(gen->insn_start); ++ free(gen); ++} ++ ++void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, ++ __u32 btf_raw_size) ++{ ++ int attr_size = offsetofend(union bpf_attr, btf_log_level); ++ int btf_data, btf_load_attr; ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_size); ++ pr_debug("gen: load_btf: size %d\n", btf_raw_size); ++ btf_data = add_data(gen, btf_raw_data, btf_raw_size); ++ ++ attr.btf_size = btf_raw_size; ++ btf_load_attr = add_data(gen, &attr, attr_size); ++ ++ /* populate union bpf_attr with user provided log details */ ++ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, ++ offsetof(struct bpf_loader_ctx, log_level), false); ++ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_size), 4, ++ offsetof(struct bpf_loader_ctx, log_size), false); ++ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_buf), 8, ++ offsetof(struct bpf_loader_ctx, log_buf), false); ++ /* populate union bpf_attr with a pointer to the BTF data */ ++ emit_rel_store(gen, attr_field(btf_load_attr, btf), btf_data); ++ /* emit BTF_LOAD command */ ++ emit_sys_bpf(gen, BPF_BTF_LOAD, btf_load_attr, attr_size); ++ debug_ret(gen, "btf_load size %d", btf_raw_size); ++ emit_check_err(gen); ++ /* remember btf_fd in the stack, if successful */ ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, stack_off(btf_fd))); ++} ++ ++void bpf_gen__map_create(struct bpf_gen *gen, ++ enum bpf_map_type map_type, ++ const char *map_name, ++ __u32 key_size, __u32 value_size, __u32 max_entries, ++ struct bpf_map_create_opts *map_attr, int map_idx) ++{ ++ int attr_size = offsetofend(union bpf_attr, map_extra); ++ bool close_inner_map_fd = false; ++ int map_create_attr, idx; ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_size); ++ attr.map_type = map_type; ++ attr.key_size = key_size; ++ attr.value_size = value_size; ++ attr.map_flags = map_attr->map_flags; ++ attr.map_extra = map_attr->map_extra; ++ if (map_name) ++ libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); ++ attr.numa_node = map_attr->numa_node; ++ attr.map_ifindex = map_attr->map_ifindex; ++ attr.max_entries = max_entries; ++ attr.btf_key_type_id = map_attr->btf_key_type_id; ++ attr.btf_value_type_id = map_attr->btf_value_type_id; ++ ++ pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", ++ attr.map_name, map_idx, map_type, attr.btf_value_type_id); ++ ++ map_create_attr = add_data(gen, &attr, attr_size); ++ if (attr.btf_value_type_id) ++ /* populate union bpf_attr with btf_fd saved in the stack earlier */ ++ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, ++ stack_off(btf_fd)); ++ switch (attr.map_type) { ++ case BPF_MAP_TYPE_ARRAY_OF_MAPS: ++ case BPF_MAP_TYPE_HASH_OF_MAPS: ++ move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, ++ stack_off(inner_map_fd)); ++ close_inner_map_fd = true; ++ break; ++ default: ++ break; ++ } ++ /* conditionally update max_entries */ ++ if (map_idx >= 0) ++ move_ctx2blob(gen, attr_field(map_create_attr, max_entries), 4, ++ sizeof(struct bpf_loader_ctx) + ++ sizeof(struct bpf_map_desc) * map_idx + ++ offsetof(struct bpf_map_desc, max_entries), ++ true /* check that max_entries != 0 */); ++ /* emit MAP_CREATE command */ ++ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); ++ debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", ++ attr.map_name, map_idx, map_type, value_size, ++ attr.btf_value_type_id); ++ emit_check_err(gen); ++ /* remember map_fd in the stack, if successful */ ++ if (map_idx < 0) { ++ /* This bpf_gen__map_create() function is called with map_idx >= 0 ++ * for all maps that libbpf loading logic tracks. ++ * It's called with -1 to create an inner map. ++ */ ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, ++ stack_off(inner_map_fd))); ++ } else if (map_idx != gen->nr_maps) { ++ gen->error = -EDOM; /* internal bug */ ++ return; ++ } else { ++ /* add_map_fd does gen->nr_maps++ */ ++ idx = add_map_fd(gen); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, blob_fd_array_off(gen, idx))); ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0)); ++ } ++ if (close_inner_map_fd) ++ emit_sys_close_stack(gen, stack_off(inner_map_fd)); ++} ++ ++void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, ++ enum bpf_attach_type type) ++{ ++ const char *prefix; ++ int kind, ret; ++ ++ btf_get_kernel_prefix_kind(type, &prefix, &kind); ++ gen->attach_kind = kind; ++ ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s", ++ prefix, attach_name); ++ if (ret >= sizeof(gen->attach_target)) ++ gen->error = -ENOSPC; ++} ++ ++static void emit_find_attach_target(struct bpf_gen *gen) ++{ ++ int name, len = strlen(gen->attach_target) + 1; ++ ++ pr_debug("gen: find_attach_tgt %s %d\n", gen->attach_target, gen->attach_kind); ++ name = add_data(gen, gen->attach_target, len); ++ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, name)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_3, gen->attach_kind)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); ++ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); ++ debug_ret(gen, "find_by_name_kind(%s,%d)", ++ gen->attach_target, gen->attach_kind); ++ emit_check_err(gen); ++ /* if successful, btf_id is in lower 32-bit of R7 and ++ * btf_obj_fd is in upper 32-bit ++ */ ++} ++ ++void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, ++ bool is_typeless, int kind, int insn_idx) ++{ ++ struct ksym_relo_desc *relo; ++ ++ relo = libbpf_reallocarray(gen->relos, gen->relo_cnt + 1, sizeof(*relo)); ++ if (!relo) { ++ gen->error = -ENOMEM; ++ return; ++ } ++ gen->relos = relo; ++ relo += gen->relo_cnt; ++ relo->name = name; ++ relo->is_weak = is_weak; ++ relo->is_typeless = is_typeless; ++ relo->kind = kind; ++ relo->insn_idx = insn_idx; ++ gen->relo_cnt++; ++} ++ ++/* returns existing ksym_desc with ref incremented, or inserts a new one */ ++static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) ++{ ++ struct ksym_desc *kdesc; ++ int i; ++ ++ for (i = 0; i < gen->nr_ksyms; i++) { ++ if (!strcmp(gen->ksyms[i].name, relo->name)) { ++ gen->ksyms[i].ref++; ++ return &gen->ksyms[i]; ++ } ++ } ++ kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); ++ if (!kdesc) { ++ gen->error = -ENOMEM; ++ return NULL; ++ } ++ gen->ksyms = kdesc; ++ kdesc = &gen->ksyms[gen->nr_ksyms++]; ++ kdesc->name = relo->name; ++ kdesc->kind = relo->kind; ++ kdesc->ref = 1; ++ kdesc->off = 0; ++ kdesc->insn = 0; ++ return kdesc; ++} ++ ++/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} ++ * Returns result in BPF_REG_7 ++ */ ++static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo) ++{ ++ int name_off, len = strlen(relo->name) + 1; ++ ++ name_off = add_data(gen, relo->name, len); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, name_off)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); ++ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); ++ debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); ++} ++ ++/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} ++ * Returns result in BPF_REG_7 ++ * Returns u64 symbol addr in BPF_REG_9 ++ */ ++static void emit_bpf_kallsyms_lookup_name(struct bpf_gen *gen, struct ksym_relo_desc *relo) ++{ ++ int name_off, len = strlen(relo->name) + 1, res_off; ++ ++ name_off = add_data(gen, relo->name, len); ++ res_off = add_data(gen, NULL, 8); /* res is u64 */ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, name_off)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_4, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, res_off)); ++ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_4)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_kallsyms_lookup_name)); ++ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0)); ++ emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); ++ debug_ret(gen, "kallsyms_lookup_name(%s,%d)", relo->name, relo->kind); ++} ++ ++/* Expects: ++ * BPF_REG_8 - pointer to instruction ++ * ++ * We need to reuse BTF fd for same symbol otherwise each relocation takes a new ++ * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols, ++ * this would mean a new BTF fd index for each entry. By pairing symbol name ++ * with index, we get the insn->imm, insn->off pairing that kernel uses for ++ * kfunc_tab, which becomes the effective limit even though all of them may ++ * share same index in fd_array (such that kfunc_btf_tab has 1 element). ++ */ ++static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) ++{ ++ struct ksym_desc *kdesc; ++ int btf_fd_idx; ++ ++ kdesc = get_ksym_desc(gen, relo); ++ if (!kdesc) ++ return; ++ /* try to copy from existing bpf_insn */ ++ if (kdesc->ref > 1) { ++ move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, ++ kdesc->insn + offsetof(struct bpf_insn, imm)); ++ move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2, ++ kdesc->insn + offsetof(struct bpf_insn, off)); ++ goto log; ++ } ++ /* remember insn offset, so we can copy BTF ID and FD later */ ++ kdesc->insn = insn; ++ emit_bpf_find_by_name_kind(gen, relo); ++ if (!relo->is_weak) ++ emit_check_err(gen); ++ /* get index in fd_array to store BTF FD at */ ++ btf_fd_idx = add_kfunc_btf_fd(gen); ++ if (btf_fd_idx > INT16_MAX) { ++ pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n", ++ btf_fd_idx, relo->name); ++ gen->error = -E2BIG; ++ return; ++ } ++ kdesc->off = btf_fd_idx; ++ /* jump to success case */ ++ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); ++ /* set value for imm, off as 0 */ ++ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); ++ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); ++ /* skip success case for ret < 0 */ ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10)); ++ /* store btf_id into insn[insn_idx].imm */ ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); ++ /* obtain fd in BPF_REG_9 */ ++ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); ++ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); ++ /* jump to fd_array store if fd denotes module BTF */ ++ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); ++ /* set the default value for off */ ++ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); ++ /* skip BTF fd store for vmlinux BTF */ ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); ++ /* load fd_array slot pointer */ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); ++ /* store BTF fd in slot */ ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); ++ /* store index into insn[insn_idx].off */ ++ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); ++log: ++ if (!gen->log_level) ++ return; ++ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, ++ offsetof(struct bpf_insn, imm))); ++ emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, ++ offsetof(struct bpf_insn, off))); ++ debug_regs(gen, BPF_REG_7, BPF_REG_9, " func (%s:count=%d): imm: %%d, off: %%d", ++ relo->name, kdesc->ref); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, blob_fd_array_off(gen, kdesc->off))); ++ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_0, 0)); ++ debug_regs(gen, BPF_REG_9, -1, " func (%s:count=%d): btf_fd", ++ relo->name, kdesc->ref); ++} ++ ++static void emit_ksym_relo_log(struct bpf_gen *gen, struct ksym_relo_desc *relo, ++ int ref) ++{ ++ if (!gen->log_level) ++ return; ++ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, ++ offsetof(struct bpf_insn, imm))); ++ emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) + ++ offsetof(struct bpf_insn, imm))); ++ debug_regs(gen, BPF_REG_7, BPF_REG_9, " var t=%d w=%d (%s:count=%d): imm[0]: %%d, imm[1]: %%d", ++ relo->is_typeless, relo->is_weak, relo->name, ref); ++ emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); ++ debug_regs(gen, BPF_REG_9, -1, " var t=%d w=%d (%s:count=%d): insn.reg", ++ relo->is_typeless, relo->is_weak, relo->name, ref); ++} ++ ++/* Expects: ++ * BPF_REG_8 - pointer to instruction ++ */ ++static void emit_relo_ksym_typeless(struct bpf_gen *gen, ++ struct ksym_relo_desc *relo, int insn) ++{ ++ struct ksym_desc *kdesc; ++ ++ kdesc = get_ksym_desc(gen, relo); ++ if (!kdesc) ++ return; ++ /* try to copy from existing ldimm64 insn */ ++ if (kdesc->ref > 1) { ++ move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, ++ kdesc->insn + offsetof(struct bpf_insn, imm)); ++ move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, ++ kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); ++ goto log; ++ } ++ /* remember insn offset, so we can copy ksym addr later */ ++ kdesc->insn = insn; ++ /* skip typeless ksym_desc in fd closing loop in cleanup_relos */ ++ kdesc->typeless = true; ++ emit_bpf_kallsyms_lookup_name(gen, relo); ++ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_7, -ENOENT, 1)); ++ emit_check_err(gen); ++ /* store lower half of addr into insn[insn_idx].imm */ ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, offsetof(struct bpf_insn, imm))); ++ /* store upper half of addr into insn[insn_idx + 1].imm */ ++ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, ++ sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); ++log: ++ emit_ksym_relo_log(gen, relo, kdesc->ref); ++} ++ ++static __u32 src_reg_mask(void) ++{ ++#if defined(__LITTLE_ENDIAN_BITFIELD) ++ return 0x0f; /* src_reg,dst_reg,... */ ++#elif defined(__BIG_ENDIAN_BITFIELD) ++ return 0xf0; /* dst_reg,src_reg,... */ ++#else ++#error "Unsupported bit endianness, cannot proceed" ++#endif ++} ++ ++/* Expects: ++ * BPF_REG_8 - pointer to instruction ++ */ ++static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) ++{ ++ struct ksym_desc *kdesc; ++ __u32 reg_mask; ++ ++ kdesc = get_ksym_desc(gen, relo); ++ if (!kdesc) ++ return; ++ /* try to copy from existing ldimm64 insn */ ++ if (kdesc->ref > 1) { ++ move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, ++ kdesc->insn + offsetof(struct bpf_insn, imm)); ++ move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, ++ kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); ++ /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */ ++ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3)); ++ goto clear_src_reg; ++ } ++ /* remember insn offset, so we can copy BTF ID and FD later */ ++ kdesc->insn = insn; ++ emit_bpf_find_by_name_kind(gen, relo); ++ if (!relo->is_weak) ++ emit_check_err(gen); ++ /* jump to success case */ ++ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); ++ /* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */ ++ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); ++ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); ++ /* skip success case for ret < 0 */ ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); ++ /* store btf_id into insn[insn_idx].imm */ ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); ++ /* store btf_obj_fd into insn[insn_idx + 1].imm */ ++ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, ++ sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); ++ /* skip src_reg adjustment */ ++ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); ++clear_src_reg: ++ /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ ++ reg_mask = src_reg_mask(); ++ emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); ++ emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); ++ emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); ++ ++ emit_ksym_relo_log(gen, relo, kdesc->ref); ++} ++ ++void bpf_gen__record_relo_core(struct bpf_gen *gen, ++ const struct bpf_core_relo *core_relo) ++{ ++ struct bpf_core_relo *relos; ++ ++ relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos)); ++ if (!relos) { ++ gen->error = -ENOMEM; ++ return; ++ } ++ gen->core_relos = relos; ++ relos += gen->core_relo_cnt; ++ memcpy(relos, core_relo, sizeof(*relos)); ++ gen->core_relo_cnt++; ++} ++ ++static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) ++{ ++ int insn; ++ ++ pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx); ++ insn = insns + sizeof(struct bpf_insn) * relo->insn_idx; ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); ++ switch (relo->kind) { ++ case BTF_KIND_VAR: ++ if (relo->is_typeless) ++ emit_relo_ksym_typeless(gen, relo, insn); ++ else ++ emit_relo_ksym_btf(gen, relo, insn); ++ break; ++ case BTF_KIND_FUNC: ++ emit_relo_kfunc_btf(gen, relo, insn); ++ break; ++ default: ++ pr_warn("Unknown relocation kind '%d'\n", relo->kind); ++ gen->error = -EDOM; ++ return; ++ } ++} ++ ++static void emit_relos(struct bpf_gen *gen, int insns) ++{ ++ int i; ++ ++ for (i = 0; i < gen->relo_cnt; i++) ++ emit_relo(gen, gen->relos + i, insns); ++} ++ ++static void cleanup_core_relo(struct bpf_gen *gen) ++{ ++ if (!gen->core_relo_cnt) ++ return; ++ free(gen->core_relos); ++ gen->core_relo_cnt = 0; ++ gen->core_relos = NULL; ++} ++ ++static void cleanup_relos(struct bpf_gen *gen, int insns) ++{ ++ int i, insn; ++ ++ for (i = 0; i < gen->nr_ksyms; i++) { ++ /* only close fds for typed ksyms and kfuncs */ ++ if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) { ++ /* close fd recorded in insn[insn_idx + 1].imm */ ++ insn = gen->ksyms[i].insn; ++ insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); ++ emit_sys_close_blob(gen, insn); ++ } else if (gen->ksyms[i].kind == BTF_KIND_FUNC) { ++ emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); ++ if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) ++ gen->nr_fd_array--; ++ } ++ } ++ if (gen->nr_ksyms) { ++ free(gen->ksyms); ++ gen->nr_ksyms = 0; ++ gen->ksyms = NULL; ++ } ++ if (gen->relo_cnt) { ++ free(gen->relos); ++ gen->relo_cnt = 0; ++ gen->relos = NULL; ++ } ++ cleanup_core_relo(gen); ++} ++ ++void bpf_gen__prog_load(struct bpf_gen *gen, ++ enum bpf_prog_type prog_type, const char *prog_name, ++ const char *license, struct bpf_insn *insns, size_t insn_cnt, ++ struct bpf_prog_load_opts *load_attr, int prog_idx) ++{ ++ int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; ++ int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_size); ++ pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", ++ prog_type, insn_cnt, prog_idx); ++ /* add license string to blob of bytes */ ++ license_off = add_data(gen, license, strlen(license) + 1); ++ /* add insns to blob of bytes */ ++ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); ++ ++ attr.prog_type = prog_type; ++ attr.expected_attach_type = load_attr->expected_attach_type; ++ attr.attach_btf_id = load_attr->attach_btf_id; ++ attr.prog_ifindex = load_attr->prog_ifindex; ++ attr.kern_version = 0; ++ attr.insn_cnt = (__u32)insn_cnt; ++ attr.prog_flags = load_attr->prog_flags; ++ ++ attr.func_info_rec_size = load_attr->func_info_rec_size; ++ attr.func_info_cnt = load_attr->func_info_cnt; ++ func_info = add_data(gen, load_attr->func_info, ++ attr.func_info_cnt * attr.func_info_rec_size); ++ ++ attr.line_info_rec_size = load_attr->line_info_rec_size; ++ attr.line_info_cnt = load_attr->line_info_cnt; ++ line_info = add_data(gen, load_attr->line_info, ++ attr.line_info_cnt * attr.line_info_rec_size); ++ ++ attr.core_relo_rec_size = sizeof(struct bpf_core_relo); ++ attr.core_relo_cnt = gen->core_relo_cnt; ++ core_relos = add_data(gen, gen->core_relos, ++ attr.core_relo_cnt * attr.core_relo_rec_size); ++ ++ libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); ++ prog_load_attr = add_data(gen, &attr, attr_size); ++ ++ /* populate union bpf_attr with a pointer to license */ ++ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); ++ ++ /* populate union bpf_attr with a pointer to instructions */ ++ emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off); ++ ++ /* populate union bpf_attr with a pointer to func_info */ ++ emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info); ++ ++ /* populate union bpf_attr with a pointer to line_info */ ++ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); ++ ++ /* populate union bpf_attr with a pointer to core_relos */ ++ emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos); ++ ++ /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ ++ emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); ++ ++ /* populate union bpf_attr with user provided log details */ ++ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4, ++ offsetof(struct bpf_loader_ctx, log_level), false); ++ move_ctx2blob(gen, attr_field(prog_load_attr, log_size), 4, ++ offsetof(struct bpf_loader_ctx, log_size), false); ++ move_ctx2blob(gen, attr_field(prog_load_attr, log_buf), 8, ++ offsetof(struct bpf_loader_ctx, log_buf), false); ++ /* populate union bpf_attr with btf_fd saved in the stack earlier */ ++ move_stack2blob(gen, attr_field(prog_load_attr, prog_btf_fd), 4, ++ stack_off(btf_fd)); ++ if (gen->attach_kind) { ++ emit_find_attach_target(gen); ++ /* populate union bpf_attr with btf_id and btf_obj_fd found by helper */ ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, prog_load_attr)); ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, ++ offsetof(union bpf_attr, attach_btf_id))); ++ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, ++ offsetof(union bpf_attr, attach_btf_obj_fd))); ++ } ++ emit_relos(gen, insns_off); ++ /* emit PROG_LOAD command */ ++ emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size); ++ debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); ++ /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ ++ cleanup_relos(gen, insns_off); ++ if (gen->attach_kind) { ++ emit_sys_close_blob(gen, ++ attr_field(prog_load_attr, attach_btf_obj_fd)); ++ gen->attach_kind = 0; ++ } ++ emit_check_err(gen); ++ /* remember prog_fd in the stack, if successful */ ++ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, ++ stack_off(prog_fd[gen->nr_progs]))); ++ gen->nr_progs++; ++} ++ ++void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, ++ __u32 value_size) ++{ ++ int attr_size = offsetofend(union bpf_attr, flags); ++ int map_update_attr, value, key; ++ union bpf_attr attr; ++ int zero = 0; ++ ++ memset(&attr, 0, attr_size); ++ pr_debug("gen: map_update_elem: idx %d\n", map_idx); ++ ++ value = add_data(gen, pvalue, value_size); ++ key = add_data(gen, &zero, sizeof(zero)); ++ ++ /* if (map_desc[map_idx].initial_value) { ++ * if (ctx->flags & BPF_SKEL_KERNEL) ++ * bpf_probe_read_kernel(value, value_size, initial_value); ++ * else ++ * bpf_copy_from_user(value, value_size, initial_value); ++ * } ++ */ ++ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, ++ sizeof(struct bpf_loader_ctx) + ++ sizeof(struct bpf_map_desc) * map_idx + ++ offsetof(struct bpf_map_desc, initial_value))); ++ emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8)); ++ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, ++ 0, 0, 0, value)); ++ emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size)); ++ emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, ++ offsetof(struct bpf_loader_ctx, flags))); ++ emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); ++ emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); ++ ++ map_update_attr = add_data(gen, &attr, attr_size); ++ move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, ++ blob_fd_array_off(gen, map_idx)); ++ emit_rel_store(gen, attr_field(map_update_attr, key), key); ++ emit_rel_store(gen, attr_field(map_update_attr, value), value); ++ /* emit MAP_UPDATE_ELEM command */ ++ emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); ++ debug_ret(gen, "update_elem idx %d value_size %d", map_idx, value_size); ++ emit_check_err(gen); ++} ++ ++void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot, ++ int inner_map_idx) ++{ ++ int attr_size = offsetofend(union bpf_attr, flags); ++ int map_update_attr, key; ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_size); ++ pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", ++ outer_map_idx, slot, inner_map_idx); ++ ++ key = add_data(gen, &slot, sizeof(slot)); ++ ++ map_update_attr = add_data(gen, &attr, attr_size); ++ move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, ++ blob_fd_array_off(gen, outer_map_idx)); ++ emit_rel_store(gen, attr_field(map_update_attr, key), key); ++ emit_rel_store(gen, attr_field(map_update_attr, value), ++ blob_fd_array_off(gen, inner_map_idx)); ++ ++ /* emit MAP_UPDATE_ELEM command */ ++ emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); ++ debug_ret(gen, "populate_outer_map outer %d key %d inner %d", ++ outer_map_idx, slot, inner_map_idx); ++ emit_check_err(gen); ++} ++ ++void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) ++{ ++ int attr_size = offsetofend(union bpf_attr, map_fd); ++ int map_freeze_attr; ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_size); ++ pr_debug("gen: map_freeze: idx %d\n", map_idx); ++ map_freeze_attr = add_data(gen, &attr, attr_size); ++ move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, ++ blob_fd_array_off(gen, map_idx)); ++ /* emit MAP_FREEZE command */ ++ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size); ++ debug_ret(gen, "map_freeze"); ++ emit_check_err(gen); ++} +diff --git a/src/cc/libbpf/src/hashmap.c b/src/cc/libbpf/src/hashmap.c +new file mode 100644 +index 0000000..aeb09c2 +--- /dev/null ++++ b/src/cc/libbpf/src/hashmap.c +@@ -0,0 +1,240 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++/* ++ * Generic non-thread safe hash map implementation. ++ * ++ * Copyright (c) 2019 Facebook ++ */ ++#include ++#include ++#include ++#include ++#include ++#include "hashmap.h" ++ ++/* make sure libbpf doesn't use kernel-only integer typedefs */ ++#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 ++ ++/* prevent accidental re-addition of reallocarray() */ ++#pragma GCC poison reallocarray ++ ++/* start with 4 buckets */ ++#define HASHMAP_MIN_CAP_BITS 2 ++ ++static void hashmap_add_entry(struct hashmap_entry **pprev, ++ struct hashmap_entry *entry) ++{ ++ entry->next = *pprev; ++ *pprev = entry; ++} ++ ++static void hashmap_del_entry(struct hashmap_entry **pprev, ++ struct hashmap_entry *entry) ++{ ++ *pprev = entry->next; ++ entry->next = NULL; ++} ++ ++void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, ++ hashmap_equal_fn equal_fn, void *ctx) ++{ ++ map->hash_fn = hash_fn; ++ map->equal_fn = equal_fn; ++ map->ctx = ctx; ++ ++ map->buckets = NULL; ++ map->cap = 0; ++ map->cap_bits = 0; ++ map->sz = 0; ++} ++ ++struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, ++ hashmap_equal_fn equal_fn, ++ void *ctx) ++{ ++ struct hashmap *map = malloc(sizeof(struct hashmap)); ++ ++ if (!map) ++ return ERR_PTR(-ENOMEM); ++ hashmap__init(map, hash_fn, equal_fn, ctx); ++ return map; ++} ++ ++void hashmap__clear(struct hashmap *map) ++{ ++ struct hashmap_entry *cur, *tmp; ++ size_t bkt; ++ ++ hashmap__for_each_entry_safe(map, cur, tmp, bkt) { ++ free(cur); ++ } ++ free(map->buckets); ++ map->buckets = NULL; ++ map->cap = map->cap_bits = map->sz = 0; ++} ++ ++void hashmap__free(struct hashmap *map) ++{ ++ if (IS_ERR_OR_NULL(map)) ++ return; ++ ++ hashmap__clear(map); ++ free(map); ++} ++ ++size_t hashmap__size(const struct hashmap *map) ++{ ++ return map->sz; ++} ++ ++size_t hashmap__capacity(const struct hashmap *map) ++{ ++ return map->cap; ++} ++ ++static bool hashmap_needs_to_grow(struct hashmap *map) ++{ ++ /* grow if empty or more than 75% filled */ ++ return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap); ++} ++ ++static int hashmap_grow(struct hashmap *map) ++{ ++ struct hashmap_entry **new_buckets; ++ struct hashmap_entry *cur, *tmp; ++ size_t new_cap_bits, new_cap; ++ size_t h, bkt; ++ ++ new_cap_bits = map->cap_bits + 1; ++ if (new_cap_bits < HASHMAP_MIN_CAP_BITS) ++ new_cap_bits = HASHMAP_MIN_CAP_BITS; ++ ++ new_cap = 1UL << new_cap_bits; ++ new_buckets = calloc(new_cap, sizeof(new_buckets[0])); ++ if (!new_buckets) ++ return -ENOMEM; ++ ++ hashmap__for_each_entry_safe(map, cur, tmp, bkt) { ++ h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits); ++ hashmap_add_entry(&new_buckets[h], cur); ++ } ++ ++ map->cap = new_cap; ++ map->cap_bits = new_cap_bits; ++ free(map->buckets); ++ map->buckets = new_buckets; ++ ++ return 0; ++} ++ ++static bool hashmap_find_entry(const struct hashmap *map, ++ const void *key, size_t hash, ++ struct hashmap_entry ***pprev, ++ struct hashmap_entry **entry) ++{ ++ struct hashmap_entry *cur, **prev_ptr; ++ ++ if (!map->buckets) ++ return false; ++ ++ for (prev_ptr = &map->buckets[hash], cur = *prev_ptr; ++ cur; ++ prev_ptr = &cur->next, cur = cur->next) { ++ if (map->equal_fn(cur->key, key, map->ctx)) { ++ if (pprev) ++ *pprev = prev_ptr; ++ *entry = cur; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++int hashmap__insert(struct hashmap *map, const void *key, void *value, ++ enum hashmap_insert_strategy strategy, ++ const void **old_key, void **old_value) ++{ ++ struct hashmap_entry *entry; ++ size_t h; ++ int err; ++ ++ if (old_key) ++ *old_key = NULL; ++ if (old_value) ++ *old_value = NULL; ++ ++ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); ++ if (strategy != HASHMAP_APPEND && ++ hashmap_find_entry(map, key, h, NULL, &entry)) { ++ if (old_key) ++ *old_key = entry->key; ++ if (old_value) ++ *old_value = entry->value; ++ ++ if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) { ++ entry->key = key; ++ entry->value = value; ++ return 0; ++ } else if (strategy == HASHMAP_ADD) { ++ return -EEXIST; ++ } ++ } ++ ++ if (strategy == HASHMAP_UPDATE) ++ return -ENOENT; ++ ++ if (hashmap_needs_to_grow(map)) { ++ err = hashmap_grow(map); ++ if (err) ++ return err; ++ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); ++ } ++ ++ entry = malloc(sizeof(struct hashmap_entry)); ++ if (!entry) ++ return -ENOMEM; ++ ++ entry->key = key; ++ entry->value = value; ++ hashmap_add_entry(&map->buckets[h], entry); ++ map->sz++; ++ ++ return 0; ++} ++ ++bool hashmap__find(const struct hashmap *map, const void *key, void **value) ++{ ++ struct hashmap_entry *entry; ++ size_t h; ++ ++ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); ++ if (!hashmap_find_entry(map, key, h, NULL, &entry)) ++ return false; ++ ++ if (value) ++ *value = entry->value; ++ return true; ++} ++ ++bool hashmap__delete(struct hashmap *map, const void *key, ++ const void **old_key, void **old_value) ++{ ++ struct hashmap_entry **pprev, *entry; ++ size_t h; ++ ++ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); ++ if (!hashmap_find_entry(map, key, h, &pprev, &entry)) ++ return false; ++ ++ if (old_key) ++ *old_key = entry->key; ++ if (old_value) ++ *old_value = entry->value; ++ ++ hashmap_del_entry(pprev, entry); ++ free(entry); ++ map->sz--; ++ ++ return true; ++} +diff --git a/src/cc/libbpf/src/hashmap.h b/src/cc/libbpf/src/hashmap.h +new file mode 100644 +index 0000000..10a4c4c +--- /dev/null ++++ b/src/cc/libbpf/src/hashmap.h +@@ -0,0 +1,195 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++/* ++ * Generic non-thread safe hash map implementation. ++ * ++ * Copyright (c) 2019 Facebook ++ */ ++#ifndef __LIBBPF_HASHMAP_H ++#define __LIBBPF_HASHMAP_H ++ ++#include ++#include ++#include ++ ++static inline size_t hash_bits(size_t h, int bits) ++{ ++ /* shuffle bits and return requested number of upper bits */ ++ if (bits == 0) ++ return 0; ++ ++#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) ++ /* LP64 case */ ++ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); ++#elif (__SIZEOF_SIZE_T__ <= __SIZEOF_LONG__) ++ return (h * 2654435769lu) >> (__SIZEOF_LONG__ * 8 - bits); ++#else ++# error "Unsupported size_t size" ++#endif ++} ++ ++/* generic C-string hashing function */ ++static inline size_t str_hash(const char *s) ++{ ++ size_t h = 0; ++ ++ while (*s) { ++ h = h * 31 + *s; ++ s++; ++ } ++ return h; ++} ++ ++typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); ++typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); ++ ++struct hashmap_entry { ++ const void *key; ++ void *value; ++ struct hashmap_entry *next; ++}; ++ ++struct hashmap { ++ hashmap_hash_fn hash_fn; ++ hashmap_equal_fn equal_fn; ++ void *ctx; ++ ++ struct hashmap_entry **buckets; ++ size_t cap; ++ size_t cap_bits; ++ size_t sz; ++}; ++ ++#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ ++ .hash_fn = (hash_fn), \ ++ .equal_fn = (equal_fn), \ ++ .ctx = (ctx), \ ++ .buckets = NULL, \ ++ .cap = 0, \ ++ .cap_bits = 0, \ ++ .sz = 0, \ ++} ++ ++void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, ++ hashmap_equal_fn equal_fn, void *ctx); ++struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, ++ hashmap_equal_fn equal_fn, ++ void *ctx); ++void hashmap__clear(struct hashmap *map); ++void hashmap__free(struct hashmap *map); ++ ++size_t hashmap__size(const struct hashmap *map); ++size_t hashmap__capacity(const struct hashmap *map); ++ ++/* ++ * Hashmap insertion strategy: ++ * - HASHMAP_ADD - only add key/value if key doesn't exist yet; ++ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise, ++ * update value; ++ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do ++ * nothing and return -ENOENT; ++ * - HASHMAP_APPEND - always add key/value pair, even if key already exists. ++ * This turns hashmap into a multimap by allowing multiple values to be ++ * associated with the same key. Most useful read API for such hashmap is ++ * hashmap__for_each_key_entry() iteration. If hashmap__find() is still ++ * used, it will return last inserted key/value entry (first in a bucket ++ * chain). ++ */ ++enum hashmap_insert_strategy { ++ HASHMAP_ADD, ++ HASHMAP_SET, ++ HASHMAP_UPDATE, ++ HASHMAP_APPEND, ++}; ++ ++/* ++ * hashmap__insert() adds key/value entry w/ various semantics, depending on ++ * provided strategy value. If a given key/value pair replaced already ++ * existing key/value pair, both old key and old value will be returned ++ * through old_key and old_value to allow calling code do proper memory ++ * management. ++ */ ++int hashmap__insert(struct hashmap *map, const void *key, void *value, ++ enum hashmap_insert_strategy strategy, ++ const void **old_key, void **old_value); ++ ++static inline int hashmap__add(struct hashmap *map, ++ const void *key, void *value) ++{ ++ return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); ++} ++ ++static inline int hashmap__set(struct hashmap *map, ++ const void *key, void *value, ++ const void **old_key, void **old_value) ++{ ++ return hashmap__insert(map, key, value, HASHMAP_SET, ++ old_key, old_value); ++} ++ ++static inline int hashmap__update(struct hashmap *map, ++ const void *key, void *value, ++ const void **old_key, void **old_value) ++{ ++ return hashmap__insert(map, key, value, HASHMAP_UPDATE, ++ old_key, old_value); ++} ++ ++static inline int hashmap__append(struct hashmap *map, ++ const void *key, void *value) ++{ ++ return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); ++} ++ ++bool hashmap__delete(struct hashmap *map, const void *key, ++ const void **old_key, void **old_value); ++ ++bool hashmap__find(const struct hashmap *map, const void *key, void **value); ++ ++/* ++ * hashmap__for_each_entry - iterate over all entries in hashmap ++ * @map: hashmap to iterate ++ * @cur: struct hashmap_entry * used as a loop cursor ++ * @bkt: integer used as a bucket loop cursor ++ */ ++#define hashmap__for_each_entry(map, cur, bkt) \ ++ for (bkt = 0; bkt < map->cap; bkt++) \ ++ for (cur = map->buckets[bkt]; cur; cur = cur->next) ++ ++/* ++ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe ++ * against removals ++ * @map: hashmap to iterate ++ * @cur: struct hashmap_entry * used as a loop cursor ++ * @tmp: struct hashmap_entry * used as a temporary next cursor storage ++ * @bkt: integer used as a bucket loop cursor ++ */ ++#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ ++ for (bkt = 0; bkt < map->cap; bkt++) \ ++ for (cur = map->buckets[bkt]; \ ++ cur && ({tmp = cur->next; true; }); \ ++ cur = tmp) ++ ++/* ++ * hashmap__for_each_key_entry - iterate over entries associated with given key ++ * @map: hashmap to iterate ++ * @cur: struct hashmap_entry * used as a loop cursor ++ * @key: key to iterate entries for ++ */ ++#define hashmap__for_each_key_entry(map, cur, _key) \ ++ for (cur = map->buckets \ ++ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ ++ : NULL; \ ++ cur; \ ++ cur = cur->next) \ ++ if (map->equal_fn(cur->key, (_key), map->ctx)) ++ ++#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ ++ for (cur = map->buckets \ ++ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ ++ : NULL; \ ++ cur && ({ tmp = cur->next; true; }); \ ++ cur = tmp) \ ++ if (map->equal_fn(cur->key, (_key), map->ctx)) ++ ++#endif /* __LIBBPF_HASHMAP_H */ +diff --git a/src/cc/libbpf/src/libbpf.c b/src/cc/libbpf/src/libbpf.c +new file mode 100644 +index 0000000..50d4181 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf.c +@@ -0,0 +1,12388 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++/* ++ * Common eBPF ELF object loading operations. ++ * ++ * Copyright (C) 2013-2015 Alexei Starovoitov ++ * Copyright (C) 2015 Wang Nan ++ * Copyright (C) 2015 Huawei Inc. ++ * Copyright (C) 2017 Nicira, Inc. ++ * Copyright (C) 2019 Isovalent, Inc. ++ */ ++ ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "libbpf.h" ++#include "bpf.h" ++#include "btf.h" ++#include "str_error.h" ++#include "libbpf_internal.h" ++#include "hashmap.h" ++#include "bpf_gen_internal.h" ++ ++#ifndef BPF_FS_MAGIC ++#define BPF_FS_MAGIC 0xcafe4a11 ++#endif ++ ++#define BPF_INSN_SZ (sizeof(struct bpf_insn)) ++ ++/* vsprintf() in __base_pr() uses nonliteral format string. It may break ++ * compilation if user enables corresponding warning. Disable it explicitly. ++ */ ++#pragma GCC diagnostic ignored "-Wformat-nonliteral" ++ ++#define __printf(a, b) __attribute__((format(printf, a, b))) ++ ++static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); ++static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); ++ ++static const char * const attach_type_name[] = { ++ [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", ++ [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", ++ [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", ++ [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", ++ [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", ++ [BPF_CGROUP_DEVICE] = "cgroup_device", ++ [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", ++ [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", ++ [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", ++ [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", ++ [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", ++ [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", ++ [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", ++ [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", ++ [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", ++ [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", ++ [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", ++ [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", ++ [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", ++ [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", ++ [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", ++ [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", ++ [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", ++ [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", ++ [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", ++ [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", ++ [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", ++ [BPF_LIRC_MODE2] = "lirc_mode2", ++ [BPF_FLOW_DISSECTOR] = "flow_dissector", ++ [BPF_TRACE_RAW_TP] = "trace_raw_tp", ++ [BPF_TRACE_FENTRY] = "trace_fentry", ++ [BPF_TRACE_FEXIT] = "trace_fexit", ++ [BPF_MODIFY_RETURN] = "modify_return", ++ [BPF_LSM_MAC] = "lsm_mac", ++ [BPF_LSM_CGROUP] = "lsm_cgroup", ++ [BPF_SK_LOOKUP] = "sk_lookup", ++ [BPF_TRACE_ITER] = "trace_iter", ++ [BPF_XDP_DEVMAP] = "xdp_devmap", ++ [BPF_XDP_CPUMAP] = "xdp_cpumap", ++ [BPF_XDP] = "xdp", ++ [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", ++ [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", ++ [BPF_PERF_EVENT] = "perf_event", ++ [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", ++}; ++ ++static const char * const link_type_name[] = { ++ [BPF_LINK_TYPE_UNSPEC] = "unspec", ++ [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", ++ [BPF_LINK_TYPE_TRACING] = "tracing", ++ [BPF_LINK_TYPE_CGROUP] = "cgroup", ++ [BPF_LINK_TYPE_ITER] = "iter", ++ [BPF_LINK_TYPE_NETNS] = "netns", ++ [BPF_LINK_TYPE_XDP] = "xdp", ++ [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", ++ [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", ++ [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", ++}; ++ ++static const char * const map_type_name[] = { ++ [BPF_MAP_TYPE_UNSPEC] = "unspec", ++ [BPF_MAP_TYPE_HASH] = "hash", ++ [BPF_MAP_TYPE_ARRAY] = "array", ++ [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", ++ [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", ++ [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", ++ [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", ++ [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", ++ [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", ++ [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", ++ [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", ++ [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", ++ [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", ++ [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", ++ [BPF_MAP_TYPE_DEVMAP] = "devmap", ++ [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", ++ [BPF_MAP_TYPE_SOCKMAP] = "sockmap", ++ [BPF_MAP_TYPE_CPUMAP] = "cpumap", ++ [BPF_MAP_TYPE_XSKMAP] = "xskmap", ++ [BPF_MAP_TYPE_SOCKHASH] = "sockhash", ++ [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", ++ [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", ++ [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", ++ [BPF_MAP_TYPE_QUEUE] = "queue", ++ [BPF_MAP_TYPE_STACK] = "stack", ++ [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", ++ [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", ++ [BPF_MAP_TYPE_RINGBUF] = "ringbuf", ++ [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", ++ [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", ++ [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", ++}; ++ ++static const char * const prog_type_name[] = { ++ [BPF_PROG_TYPE_UNSPEC] = "unspec", ++ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", ++ [BPF_PROG_TYPE_KPROBE] = "kprobe", ++ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", ++ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", ++ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", ++ [BPF_PROG_TYPE_XDP] = "xdp", ++ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", ++ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", ++ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", ++ [BPF_PROG_TYPE_LWT_IN] = "lwt_in", ++ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", ++ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", ++ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", ++ [BPF_PROG_TYPE_SK_SKB] = "sk_skb", ++ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", ++ [BPF_PROG_TYPE_SK_MSG] = "sk_msg", ++ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", ++ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", ++ [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", ++ [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", ++ [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", ++ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", ++ [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", ++ [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", ++ [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", ++ [BPF_PROG_TYPE_TRACING] = "tracing", ++ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", ++ [BPF_PROG_TYPE_EXT] = "ext", ++ [BPF_PROG_TYPE_LSM] = "lsm", ++ [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", ++ [BPF_PROG_TYPE_SYSCALL] = "syscall", ++}; ++ ++static int __base_pr(enum libbpf_print_level level, const char *format, ++ va_list args) ++{ ++ if (level == LIBBPF_DEBUG) ++ return 0; ++ ++ return vfprintf(stderr, format, args); ++} ++ ++static libbpf_print_fn_t __libbpf_pr = __base_pr; ++ ++libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) ++{ ++ libbpf_print_fn_t old_print_fn = __libbpf_pr; ++ ++ __libbpf_pr = fn; ++ return old_print_fn; ++} ++ ++__printf(2, 3) ++void libbpf_print(enum libbpf_print_level level, const char *format, ...) ++{ ++ va_list args; ++ ++ if (!__libbpf_pr) ++ return; ++ ++ va_start(args, format); ++ __libbpf_pr(level, format, args); ++ va_end(args); ++} ++ ++static void pr_perm_msg(int err) ++{ ++ struct rlimit limit; ++ char buf[100]; ++ ++ if (err != -EPERM || geteuid() != 0) ++ return; ++ ++ err = getrlimit(RLIMIT_MEMLOCK, &limit); ++ if (err) ++ return; ++ ++ if (limit.rlim_cur == RLIM_INFINITY) ++ return; ++ ++ if (limit.rlim_cur < 1024) ++ snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); ++ else if (limit.rlim_cur < 1024*1024) ++ snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); ++ else ++ snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); ++ ++ pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", ++ buf); ++} ++ ++#define STRERR_BUFSIZE 128 ++ ++/* Copied from tools/perf/util/util.h */ ++#ifndef zfree ++# define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) ++#endif ++ ++#ifndef zclose ++# define zclose(fd) ({ \ ++ int ___err = 0; \ ++ if ((fd) >= 0) \ ++ ___err = close((fd)); \ ++ fd = -1; \ ++ ___err; }) ++#endif ++ ++static inline __u64 ptr_to_u64(const void *ptr) ++{ ++ return (__u64) (unsigned long) ptr; ++} ++ ++int libbpf_set_strict_mode(enum libbpf_strict_mode mode) ++{ ++ /* as of v1.0 libbpf_set_strict_mode() is a no-op */ ++ return 0; ++} ++ ++__u32 libbpf_major_version(void) ++{ ++ return LIBBPF_MAJOR_VERSION; ++} ++ ++__u32 libbpf_minor_version(void) ++{ ++ return LIBBPF_MINOR_VERSION; ++} ++ ++const char *libbpf_version_string(void) ++{ ++#define __S(X) #X ++#define _S(X) __S(X) ++ return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); ++#undef _S ++#undef __S ++} ++ ++enum reloc_type { ++ RELO_LD64, ++ RELO_CALL, ++ RELO_DATA, ++ RELO_EXTERN_VAR, ++ RELO_EXTERN_FUNC, ++ RELO_SUBPROG_ADDR, ++ RELO_CORE, ++}; ++ ++struct reloc_desc { ++ enum reloc_type type; ++ int insn_idx; ++ union { ++ const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ ++ struct { ++ int map_idx; ++ int sym_off; ++ }; ++ }; ++}; ++ ++/* stored as sec_def->cookie for all libbpf-supported SEC()s */ ++enum sec_def_flags { ++ SEC_NONE = 0, ++ /* expected_attach_type is optional, if kernel doesn't support that */ ++ SEC_EXP_ATTACH_OPT = 1, ++ /* legacy, only used by libbpf_get_type_names() and ++ * libbpf_attach_type_by_name(), not used by libbpf itself at all. ++ * This used to be associated with cgroup (and few other) BPF programs ++ * that were attachable through BPF_PROG_ATTACH command. Pretty ++ * meaningless nowadays, though. ++ */ ++ SEC_ATTACHABLE = 2, ++ SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, ++ /* attachment target is specified through BTF ID in either kernel or ++ * other BPF program's BTF object */ ++ SEC_ATTACH_BTF = 4, ++ /* BPF program type allows sleeping/blocking in kernel */ ++ SEC_SLEEPABLE = 8, ++ /* BPF program support non-linear XDP buffer */ ++ SEC_XDP_FRAGS = 16, ++}; ++ ++struct bpf_sec_def { ++ char *sec; ++ enum bpf_prog_type prog_type; ++ enum bpf_attach_type expected_attach_type; ++ long cookie; ++ int handler_id; ++ ++ libbpf_prog_setup_fn_t prog_setup_fn; ++ libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; ++ libbpf_prog_attach_fn_t prog_attach_fn; ++}; ++ ++/* ++ * bpf_prog should be a better name but it has been used in ++ * linux/filter.h. ++ */ ++struct bpf_program { ++ char *name; ++ char *sec_name; ++ size_t sec_idx; ++ const struct bpf_sec_def *sec_def; ++ /* this program's instruction offset (in number of instructions) ++ * within its containing ELF section ++ */ ++ size_t sec_insn_off; ++ /* number of original instructions in ELF section belonging to this ++ * program, not taking into account subprogram instructions possible ++ * appended later during relocation ++ */ ++ size_t sec_insn_cnt; ++ /* Offset (in number of instructions) of the start of instruction ++ * belonging to this BPF program within its containing main BPF ++ * program. For the entry-point (main) BPF program, this is always ++ * zero. For a sub-program, this gets reset before each of main BPF ++ * programs are processed and relocated and is used to determined ++ * whether sub-program was already appended to the main program, and ++ * if yes, at which instruction offset. ++ */ ++ size_t sub_insn_off; ++ ++ /* instructions that belong to BPF program; insns[0] is located at ++ * sec_insn_off instruction within its ELF section in ELF file, so ++ * when mapping ELF file instruction index to the local instruction, ++ * one needs to subtract sec_insn_off; and vice versa. ++ */ ++ struct bpf_insn *insns; ++ /* actual number of instruction in this BPF program's image; for ++ * entry-point BPF programs this includes the size of main program ++ * itself plus all the used sub-programs, appended at the end ++ */ ++ size_t insns_cnt; ++ ++ struct reloc_desc *reloc_desc; ++ int nr_reloc; ++ ++ /* BPF verifier log settings */ ++ char *log_buf; ++ size_t log_size; ++ __u32 log_level; ++ ++ struct bpf_object *obj; ++ ++ int fd; ++ bool autoload; ++ bool mark_btf_static; ++ enum bpf_prog_type type; ++ enum bpf_attach_type expected_attach_type; ++ ++ int prog_ifindex; ++ __u32 attach_btf_obj_fd; ++ __u32 attach_btf_id; ++ __u32 attach_prog_fd; ++ ++ void *func_info; ++ __u32 func_info_rec_size; ++ __u32 func_info_cnt; ++ ++ void *line_info; ++ __u32 line_info_rec_size; ++ __u32 line_info_cnt; ++ __u32 prog_flags; ++}; ++ ++struct bpf_struct_ops { ++ const char *tname; ++ const struct btf_type *type; ++ struct bpf_program **progs; ++ __u32 *kern_func_off; ++ /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ ++ void *data; ++ /* e.g. struct bpf_struct_ops_tcp_congestion_ops in ++ * btf_vmlinux's format. ++ * struct bpf_struct_ops_tcp_congestion_ops { ++ * [... some other kernel fields ...] ++ * struct tcp_congestion_ops data; ++ * } ++ * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) ++ * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" ++ * from "data". ++ */ ++ void *kern_vdata; ++ __u32 type_id; ++}; ++ ++#define DATA_SEC ".data" ++#define BSS_SEC ".bss" ++#define RODATA_SEC ".rodata" ++#define KCONFIG_SEC ".kconfig" ++#define KSYMS_SEC ".ksyms" ++#define STRUCT_OPS_SEC ".struct_ops" ++ ++enum libbpf_map_type { ++ LIBBPF_MAP_UNSPEC, ++ LIBBPF_MAP_DATA, ++ LIBBPF_MAP_BSS, ++ LIBBPF_MAP_RODATA, ++ LIBBPF_MAP_KCONFIG, ++}; ++ ++struct bpf_map_def { ++ unsigned int type; ++ unsigned int key_size; ++ unsigned int value_size; ++ unsigned int max_entries; ++ unsigned int map_flags; ++}; ++ ++struct bpf_map { ++ struct bpf_object *obj; ++ char *name; ++ /* real_name is defined for special internal maps (.rodata*, ++ * .data*, .bss, .kconfig) and preserves their original ELF section ++ * name. This is important to be be able to find corresponding BTF ++ * DATASEC information. ++ */ ++ char *real_name; ++ int fd; ++ int sec_idx; ++ size_t sec_offset; ++ int map_ifindex; ++ int inner_map_fd; ++ struct bpf_map_def def; ++ __u32 numa_node; ++ __u32 btf_var_idx; ++ __u32 btf_key_type_id; ++ __u32 btf_value_type_id; ++ __u32 btf_vmlinux_value_type_id; ++ enum libbpf_map_type libbpf_type; ++ void *mmaped; ++ struct bpf_struct_ops *st_ops; ++ struct bpf_map *inner_map; ++ void **init_slots; ++ int init_slots_sz; ++ char *pin_path; ++ bool pinned; ++ bool reused; ++ bool autocreate; ++ __u64 map_extra; ++}; ++ ++enum extern_type { ++ EXT_UNKNOWN, ++ EXT_KCFG, ++ EXT_KSYM, ++}; ++ ++enum kcfg_type { ++ KCFG_UNKNOWN, ++ KCFG_CHAR, ++ KCFG_BOOL, ++ KCFG_INT, ++ KCFG_TRISTATE, ++ KCFG_CHAR_ARR, ++}; ++ ++struct extern_desc { ++ enum extern_type type; ++ int sym_idx; ++ int btf_id; ++ int sec_btf_id; ++ const char *name; ++ bool is_set; ++ bool is_weak; ++ union { ++ struct { ++ enum kcfg_type type; ++ int sz; ++ int align; ++ int data_off; ++ bool is_signed; ++ } kcfg; ++ struct { ++ unsigned long long addr; ++ ++ /* target btf_id of the corresponding kernel var. */ ++ int kernel_btf_obj_fd; ++ int kernel_btf_id; ++ ++ /* local btf_id of the ksym extern's type. */ ++ __u32 type_id; ++ /* BTF fd index to be patched in for insn->off, this is ++ * 0 for vmlinux BTF, index in obj->fd_array for module ++ * BTF ++ */ ++ __s16 btf_fd_idx; ++ } ksym; ++ }; ++}; ++ ++struct module_btf { ++ struct btf *btf; ++ char *name; ++ __u32 id; ++ int fd; ++ int fd_array_idx; ++}; ++ ++enum sec_type { ++ SEC_UNUSED = 0, ++ SEC_RELO, ++ SEC_BSS, ++ SEC_DATA, ++ SEC_RODATA, ++}; ++ ++struct elf_sec_desc { ++ enum sec_type sec_type; ++ Elf64_Shdr *shdr; ++ Elf_Data *data; ++}; ++ ++struct elf_state { ++ int fd; ++ const void *obj_buf; ++ size_t obj_buf_sz; ++ Elf *elf; ++ Elf64_Ehdr *ehdr; ++ Elf_Data *symbols; ++ Elf_Data *st_ops_data; ++ size_t shstrndx; /* section index for section name strings */ ++ size_t strtabidx; ++ struct elf_sec_desc *secs; ++ int sec_cnt; ++ int maps_shndx; ++ int btf_maps_shndx; ++ __u32 btf_maps_sec_btf_id; ++ int text_shndx; ++ int symbols_shndx; ++ int st_ops_shndx; ++}; ++ ++struct usdt_manager; ++ ++struct bpf_object { ++ char name[BPF_OBJ_NAME_LEN]; ++ char license[64]; ++ __u32 kern_version; ++ ++ struct bpf_program *programs; ++ size_t nr_programs; ++ struct bpf_map *maps; ++ size_t nr_maps; ++ size_t maps_cap; ++ ++ char *kconfig; ++ struct extern_desc *externs; ++ int nr_extern; ++ int kconfig_map_idx; ++ ++ bool loaded; ++ bool has_subcalls; ++ bool has_rodata; ++ ++ struct bpf_gen *gen_loader; ++ ++ /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ ++ struct elf_state efile; ++ ++ struct btf *btf; ++ struct btf_ext *btf_ext; ++ ++ /* Parse and load BTF vmlinux if any of the programs in the object need ++ * it at load time. ++ */ ++ struct btf *btf_vmlinux; ++ /* Path to the custom BTF to be used for BPF CO-RE relocations as an ++ * override for vmlinux BTF. ++ */ ++ char *btf_custom_path; ++ /* vmlinux BTF override for CO-RE relocations */ ++ struct btf *btf_vmlinux_override; ++ /* Lazily initialized kernel module BTFs */ ++ struct module_btf *btf_modules; ++ bool btf_modules_loaded; ++ size_t btf_module_cnt; ++ size_t btf_module_cap; ++ ++ /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ ++ char *log_buf; ++ size_t log_size; ++ __u32 log_level; ++ ++ int *fd_array; ++ size_t fd_array_cap; ++ size_t fd_array_cnt; ++ ++ struct usdt_manager *usdt_man; ++ ++ char path[]; ++}; ++ ++static const char *elf_sym_str(const struct bpf_object *obj, size_t off); ++static const char *elf_sec_str(const struct bpf_object *obj, size_t off); ++static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); ++static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); ++static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); ++static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); ++static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); ++static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); ++static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); ++ ++void bpf_program__unload(struct bpf_program *prog) ++{ ++ if (!prog) ++ return; ++ ++ zclose(prog->fd); ++ ++ zfree(&prog->func_info); ++ zfree(&prog->line_info); ++} ++ ++static void bpf_program__exit(struct bpf_program *prog) ++{ ++ if (!prog) ++ return; ++ ++ bpf_program__unload(prog); ++ zfree(&prog->name); ++ zfree(&prog->sec_name); ++ zfree(&prog->insns); ++ zfree(&prog->reloc_desc); ++ ++ prog->nr_reloc = 0; ++ prog->insns_cnt = 0; ++ prog->sec_idx = -1; ++} ++ ++static bool insn_is_subprog_call(const struct bpf_insn *insn) ++{ ++ return BPF_CLASS(insn->code) == BPF_JMP && ++ BPF_OP(insn->code) == BPF_CALL && ++ BPF_SRC(insn->code) == BPF_K && ++ insn->src_reg == BPF_PSEUDO_CALL && ++ insn->dst_reg == 0 && ++ insn->off == 0; ++} ++ ++static bool is_call_insn(const struct bpf_insn *insn) ++{ ++ return insn->code == (BPF_JMP | BPF_CALL); ++} ++ ++static bool insn_is_pseudo_func(struct bpf_insn *insn) ++{ ++ return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC; ++} ++ ++static int ++bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, ++ const char *name, size_t sec_idx, const char *sec_name, ++ size_t sec_off, void *insn_data, size_t insn_data_sz) ++{ ++ if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { ++ pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", ++ sec_name, name, sec_off, insn_data_sz); ++ return -EINVAL; ++ } ++ ++ memset(prog, 0, sizeof(*prog)); ++ prog->obj = obj; ++ ++ prog->sec_idx = sec_idx; ++ prog->sec_insn_off = sec_off / BPF_INSN_SZ; ++ prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; ++ /* insns_cnt can later be increased by appending used subprograms */ ++ prog->insns_cnt = prog->sec_insn_cnt; ++ ++ prog->type = BPF_PROG_TYPE_UNSPEC; ++ prog->fd = -1; ++ ++ /* libbpf's convention for SEC("?abc...") is that it's just like ++ * SEC("abc...") but the corresponding bpf_program starts out with ++ * autoload set to false. ++ */ ++ if (sec_name[0] == '?') { ++ prog->autoload = false; ++ /* from now on forget there was ? in section name */ ++ sec_name++; ++ } else { ++ prog->autoload = true; ++ } ++ ++ /* inherit object's log_level */ ++ prog->log_level = obj->log_level; ++ ++ prog->sec_name = strdup(sec_name); ++ if (!prog->sec_name) ++ goto errout; ++ ++ prog->name = strdup(name); ++ if (!prog->name) ++ goto errout; ++ ++ prog->insns = malloc(insn_data_sz); ++ if (!prog->insns) ++ goto errout; ++ memcpy(prog->insns, insn_data, insn_data_sz); ++ ++ return 0; ++errout: ++ pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); ++ bpf_program__exit(prog); ++ return -ENOMEM; ++} ++ ++static int ++bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, ++ const char *sec_name, int sec_idx) ++{ ++ Elf_Data *symbols = obj->efile.symbols; ++ struct bpf_program *prog, *progs; ++ void *data = sec_data->d_buf; ++ size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; ++ int nr_progs, err, i; ++ const char *name; ++ Elf64_Sym *sym; ++ ++ progs = obj->programs; ++ nr_progs = obj->nr_programs; ++ nr_syms = symbols->d_size / sizeof(Elf64_Sym); ++ sec_off = 0; ++ ++ for (i = 0; i < nr_syms; i++) { ++ sym = elf_sym_by_idx(obj, i); ++ ++ if (sym->st_shndx != sec_idx) ++ continue; ++ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) ++ continue; ++ ++ prog_sz = sym->st_size; ++ sec_off = sym->st_value; ++ ++ name = elf_sym_str(obj, sym->st_name); ++ if (!name) { ++ pr_warn("sec '%s': failed to get symbol name for offset %zu\n", ++ sec_name, sec_off); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ if (sec_off + prog_sz > sec_sz) { ++ pr_warn("sec '%s': program at offset %zu crosses section boundary\n", ++ sec_name, sec_off); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { ++ pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); ++ return -ENOTSUP; ++ } ++ ++ pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", ++ sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); ++ ++ progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); ++ if (!progs) { ++ /* ++ * In this case the original obj->programs ++ * is still valid, so don't need special treat for ++ * bpf_close_object(). ++ */ ++ pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", ++ sec_name, name); ++ return -ENOMEM; ++ } ++ obj->programs = progs; ++ ++ prog = &progs[nr_progs]; ++ ++ err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, ++ sec_off, data + sec_off, prog_sz); ++ if (err) ++ return err; ++ ++ /* if function is a global/weak symbol, but has restricted ++ * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC ++ * as static to enable more permissive BPF verification mode ++ * with more outside context available to BPF verifier ++ */ ++ if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL ++ && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN ++ || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) ++ prog->mark_btf_static = true; ++ ++ nr_progs++; ++ obj->nr_programs = nr_progs; ++ } ++ ++ return 0; ++} ++ ++__u32 get_kernel_version(void) ++{ ++ /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, ++ * but Ubuntu provides /proc/version_signature file, as described at ++ * https://ubuntu.com/kernel, with an example contents below, which we ++ * can use to get a proper LINUX_VERSION_CODE. ++ * ++ * Ubuntu 5.4.0-12.15-generic 5.4.8 ++ * ++ * In the above, 5.4.8 is what kernel is actually expecting, while ++ * uname() call will return 5.4.0 in info.release. ++ */ ++ const char *ubuntu_kver_file = "/proc/version_signature"; ++ __u32 major, minor, patch; ++ struct utsname info; ++ ++ if (access(ubuntu_kver_file, R_OK) == 0) { ++ FILE *f; ++ ++ f = fopen(ubuntu_kver_file, "r"); ++ if (f) { ++ if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) { ++ fclose(f); ++ return KERNEL_VERSION(major, minor, patch); ++ } ++ fclose(f); ++ } ++ /* something went wrong, fall back to uname() approach */ ++ } ++ ++ uname(&info); ++ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) ++ return 0; ++ return KERNEL_VERSION(major, minor, patch); ++} ++ ++static const struct btf_member * ++find_member_by_offset(const struct btf_type *t, __u32 bit_offset) ++{ ++ struct btf_member *m; ++ int i; ++ ++ for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { ++ if (btf_member_bit_offset(t, i) == bit_offset) ++ return m; ++ } ++ ++ return NULL; ++} ++ ++static const struct btf_member * ++find_member_by_name(const struct btf *btf, const struct btf_type *t, ++ const char *name) ++{ ++ struct btf_member *m; ++ int i; ++ ++ for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { ++ if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) ++ return m; ++ } ++ ++ return NULL; ++} ++ ++#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" ++static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, ++ const char *name, __u32 kind); ++ ++static int ++find_struct_ops_kern_types(const struct btf *btf, const char *tname, ++ const struct btf_type **type, __u32 *type_id, ++ const struct btf_type **vtype, __u32 *vtype_id, ++ const struct btf_member **data_member) ++{ ++ const struct btf_type *kern_type, *kern_vtype; ++ const struct btf_member *kern_data_member; ++ __s32 kern_vtype_id, kern_type_id; ++ __u32 i; ++ ++ kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); ++ if (kern_type_id < 0) { ++ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", ++ tname); ++ return kern_type_id; ++ } ++ kern_type = btf__type_by_id(btf, kern_type_id); ++ ++ /* Find the corresponding "map_value" type that will be used ++ * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, ++ * find "struct bpf_struct_ops_tcp_congestion_ops" from the ++ * btf_vmlinux. ++ */ ++ kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, ++ tname, BTF_KIND_STRUCT); ++ if (kern_vtype_id < 0) { ++ pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", ++ STRUCT_OPS_VALUE_PREFIX, tname); ++ return kern_vtype_id; ++ } ++ kern_vtype = btf__type_by_id(btf, kern_vtype_id); ++ ++ /* Find "struct tcp_congestion_ops" from ++ * struct bpf_struct_ops_tcp_congestion_ops { ++ * [ ... ] ++ * struct tcp_congestion_ops data; ++ * } ++ */ ++ kern_data_member = btf_members(kern_vtype); ++ for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { ++ if (kern_data_member->type == kern_type_id) ++ break; ++ } ++ if (i == btf_vlen(kern_vtype)) { ++ pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", ++ tname, STRUCT_OPS_VALUE_PREFIX, tname); ++ return -EINVAL; ++ } ++ ++ *type = kern_type; ++ *type_id = kern_type_id; ++ *vtype = kern_vtype; ++ *vtype_id = kern_vtype_id; ++ *data_member = kern_data_member; ++ ++ return 0; ++} ++ ++static bool bpf_map__is_struct_ops(const struct bpf_map *map) ++{ ++ return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; ++} ++ ++/* Init the map's fields that depend on kern_btf */ ++static int bpf_map__init_kern_struct_ops(struct bpf_map *map, ++ const struct btf *btf, ++ const struct btf *kern_btf) ++{ ++ const struct btf_member *member, *kern_member, *kern_data_member; ++ const struct btf_type *type, *kern_type, *kern_vtype; ++ __u32 i, kern_type_id, kern_vtype_id, kern_data_off; ++ struct bpf_struct_ops *st_ops; ++ void *data, *kern_data; ++ const char *tname; ++ int err; ++ ++ st_ops = map->st_ops; ++ type = st_ops->type; ++ tname = st_ops->tname; ++ err = find_struct_ops_kern_types(kern_btf, tname, ++ &kern_type, &kern_type_id, ++ &kern_vtype, &kern_vtype_id, ++ &kern_data_member); ++ if (err) ++ return err; ++ ++ pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", ++ map->name, st_ops->type_id, kern_type_id, kern_vtype_id); ++ ++ map->def.value_size = kern_vtype->size; ++ map->btf_vmlinux_value_type_id = kern_vtype_id; ++ ++ st_ops->kern_vdata = calloc(1, kern_vtype->size); ++ if (!st_ops->kern_vdata) ++ return -ENOMEM; ++ ++ data = st_ops->data; ++ kern_data_off = kern_data_member->offset / 8; ++ kern_data = st_ops->kern_vdata + kern_data_off; ++ ++ member = btf_members(type); ++ for (i = 0; i < btf_vlen(type); i++, member++) { ++ const struct btf_type *mtype, *kern_mtype; ++ __u32 mtype_id, kern_mtype_id; ++ void *mdata, *kern_mdata; ++ __s64 msize, kern_msize; ++ __u32 moff, kern_moff; ++ __u32 kern_member_idx; ++ const char *mname; ++ ++ mname = btf__name_by_offset(btf, member->name_off); ++ kern_member = find_member_by_name(kern_btf, kern_type, mname); ++ if (!kern_member) { ++ pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", ++ map->name, mname); ++ return -ENOTSUP; ++ } ++ ++ kern_member_idx = kern_member - btf_members(kern_type); ++ if (btf_member_bitfield_size(type, i) || ++ btf_member_bitfield_size(kern_type, kern_member_idx)) { ++ pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", ++ map->name, mname); ++ return -ENOTSUP; ++ } ++ ++ moff = member->offset / 8; ++ kern_moff = kern_member->offset / 8; ++ ++ mdata = data + moff; ++ kern_mdata = kern_data + kern_moff; ++ ++ mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); ++ kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, ++ &kern_mtype_id); ++ if (BTF_INFO_KIND(mtype->info) != ++ BTF_INFO_KIND(kern_mtype->info)) { ++ pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", ++ map->name, mname, BTF_INFO_KIND(mtype->info), ++ BTF_INFO_KIND(kern_mtype->info)); ++ return -ENOTSUP; ++ } ++ ++ if (btf_is_ptr(mtype)) { ++ struct bpf_program *prog; ++ ++ prog = st_ops->progs[i]; ++ if (!prog) ++ continue; ++ ++ kern_mtype = skip_mods_and_typedefs(kern_btf, ++ kern_mtype->type, ++ &kern_mtype_id); ++ ++ /* mtype->type must be a func_proto which was ++ * guaranteed in bpf_object__collect_st_ops_relos(), ++ * so only check kern_mtype for func_proto here. ++ */ ++ if (!btf_is_func_proto(kern_mtype)) { ++ pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", ++ map->name, mname); ++ return -ENOTSUP; ++ } ++ ++ prog->attach_btf_id = kern_type_id; ++ prog->expected_attach_type = kern_member_idx; ++ ++ st_ops->kern_func_off[i] = kern_data_off + kern_moff; ++ ++ pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", ++ map->name, mname, prog->name, moff, ++ kern_moff); ++ ++ continue; ++ } ++ ++ msize = btf__resolve_size(btf, mtype_id); ++ kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); ++ if (msize < 0 || kern_msize < 0 || msize != kern_msize) { ++ pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", ++ map->name, mname, (ssize_t)msize, ++ (ssize_t)kern_msize); ++ return -ENOTSUP; ++ } ++ ++ pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", ++ map->name, mname, (unsigned int)msize, ++ moff, kern_moff); ++ memcpy(kern_mdata, mdata, msize); ++ } ++ ++ return 0; ++} ++ ++static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) ++{ ++ struct bpf_map *map; ++ size_t i; ++ int err; ++ ++ for (i = 0; i < obj->nr_maps; i++) { ++ map = &obj->maps[i]; ++ ++ if (!bpf_map__is_struct_ops(map)) ++ continue; ++ ++ err = bpf_map__init_kern_struct_ops(map, obj->btf, ++ obj->btf_vmlinux); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) ++{ ++ const struct btf_type *type, *datasec; ++ const struct btf_var_secinfo *vsi; ++ struct bpf_struct_ops *st_ops; ++ const char *tname, *var_name; ++ __s32 type_id, datasec_id; ++ const struct btf *btf; ++ struct bpf_map *map; ++ __u32 i; ++ ++ if (obj->efile.st_ops_shndx == -1) ++ return 0; ++ ++ btf = obj->btf; ++ datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, ++ BTF_KIND_DATASEC); ++ if (datasec_id < 0) { ++ pr_warn("struct_ops init: DATASEC %s not found\n", ++ STRUCT_OPS_SEC); ++ return -EINVAL; ++ } ++ ++ datasec = btf__type_by_id(btf, datasec_id); ++ vsi = btf_var_secinfos(datasec); ++ for (i = 0; i < btf_vlen(datasec); i++, vsi++) { ++ type = btf__type_by_id(obj->btf, vsi->type); ++ var_name = btf__name_by_offset(obj->btf, type->name_off); ++ ++ type_id = btf__resolve_type(obj->btf, vsi->type); ++ if (type_id < 0) { ++ pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", ++ vsi->type, STRUCT_OPS_SEC); ++ return -EINVAL; ++ } ++ ++ type = btf__type_by_id(obj->btf, type_id); ++ tname = btf__name_by_offset(obj->btf, type->name_off); ++ if (!tname[0]) { ++ pr_warn("struct_ops init: anonymous type is not supported\n"); ++ return -ENOTSUP; ++ } ++ if (!btf_is_struct(type)) { ++ pr_warn("struct_ops init: %s is not a struct\n", tname); ++ return -EINVAL; ++ } ++ ++ map = bpf_object__add_map(obj); ++ if (IS_ERR(map)) ++ return PTR_ERR(map); ++ ++ map->sec_idx = obj->efile.st_ops_shndx; ++ map->sec_offset = vsi->offset; ++ map->name = strdup(var_name); ++ if (!map->name) ++ return -ENOMEM; ++ ++ map->def.type = BPF_MAP_TYPE_STRUCT_OPS; ++ map->def.key_size = sizeof(int); ++ map->def.value_size = type->size; ++ map->def.max_entries = 1; ++ ++ map->st_ops = calloc(1, sizeof(*map->st_ops)); ++ if (!map->st_ops) ++ return -ENOMEM; ++ st_ops = map->st_ops; ++ st_ops->data = malloc(type->size); ++ st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); ++ st_ops->kern_func_off = malloc(btf_vlen(type) * ++ sizeof(*st_ops->kern_func_off)); ++ if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) ++ return -ENOMEM; ++ ++ if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { ++ pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", ++ var_name, STRUCT_OPS_SEC); ++ return -EINVAL; ++ } ++ ++ memcpy(st_ops->data, ++ obj->efile.st_ops_data->d_buf + vsi->offset, ++ type->size); ++ st_ops->tname = tname; ++ st_ops->type = type; ++ st_ops->type_id = type_id; ++ ++ pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", ++ tname, type_id, var_name, vsi->offset); ++ } ++ ++ return 0; ++} ++ ++static struct bpf_object *bpf_object__new(const char *path, ++ const void *obj_buf, ++ size_t obj_buf_sz, ++ const char *obj_name) ++{ ++ struct bpf_object *obj; ++ char *end; ++ ++ obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); ++ if (!obj) { ++ pr_warn("alloc memory failed for %s\n", path); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ strcpy(obj->path, path); ++ if (obj_name) { ++ libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); ++ } else { ++ /* Using basename() GNU version which doesn't modify arg. */ ++ libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); ++ end = strchr(obj->name, '.'); ++ if (end) ++ *end = 0; ++ } ++ ++ obj->efile.fd = -1; ++ /* ++ * Caller of this function should also call ++ * bpf_object__elf_finish() after data collection to return ++ * obj_buf to user. If not, we should duplicate the buffer to ++ * avoid user freeing them before elf finish. ++ */ ++ obj->efile.obj_buf = obj_buf; ++ obj->efile.obj_buf_sz = obj_buf_sz; ++ obj->efile.maps_shndx = -1; ++ obj->efile.btf_maps_shndx = -1; ++ obj->efile.st_ops_shndx = -1; ++ obj->kconfig_map_idx = -1; ++ ++ obj->kern_version = get_kernel_version(); ++ obj->loaded = false; ++ ++ return obj; ++} ++ ++static void bpf_object__elf_finish(struct bpf_object *obj) ++{ ++ if (!obj->efile.elf) ++ return; ++ ++ elf_end(obj->efile.elf); ++ obj->efile.elf = NULL; ++ obj->efile.symbols = NULL; ++ obj->efile.st_ops_data = NULL; ++ ++ zfree(&obj->efile.secs); ++ obj->efile.sec_cnt = 0; ++ zclose(obj->efile.fd); ++ obj->efile.obj_buf = NULL; ++ obj->efile.obj_buf_sz = 0; ++} ++ ++static int bpf_object__elf_init(struct bpf_object *obj) ++{ ++ Elf64_Ehdr *ehdr; ++ int err = 0; ++ Elf *elf; ++ ++ if (obj->efile.elf) { ++ pr_warn("elf: init internal error\n"); ++ return -LIBBPF_ERRNO__LIBELF; ++ } ++ ++ if (obj->efile.obj_buf_sz > 0) { ++ /* obj_buf should have been validated by bpf_object__open_mem(). */ ++ elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); ++ } else { ++ obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); ++ if (obj->efile.fd < 0) { ++ char errmsg[STRERR_BUFSIZE], *cp; ++ ++ err = -errno; ++ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); ++ pr_warn("elf: failed to open %s: %s\n", obj->path, cp); ++ return err; ++ } ++ ++ elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); ++ } ++ ++ if (!elf) { ++ pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); ++ err = -LIBBPF_ERRNO__LIBELF; ++ goto errout; ++ } ++ ++ obj->efile.elf = elf; ++ ++ if (elf_kind(elf) != ELF_K_ELF) { ++ err = -LIBBPF_ERRNO__FORMAT; ++ pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); ++ goto errout; ++ } ++ ++ if (gelf_getclass(elf) != ELFCLASS64) { ++ err = -LIBBPF_ERRNO__FORMAT; ++ pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); ++ goto errout; ++ } ++ ++ obj->efile.ehdr = ehdr = elf64_getehdr(elf); ++ if (!obj->efile.ehdr) { ++ pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); ++ err = -LIBBPF_ERRNO__FORMAT; ++ goto errout; ++ } ++ ++ if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { ++ pr_warn("elf: failed to get section names section index for %s: %s\n", ++ obj->path, elf_errmsg(-1)); ++ err = -LIBBPF_ERRNO__FORMAT; ++ goto errout; ++ } ++ ++ /* Elf is corrupted/truncated, avoid calling elf_strptr. */ ++ if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { ++ pr_warn("elf: failed to get section names strings from %s: %s\n", ++ obj->path, elf_errmsg(-1)); ++ err = -LIBBPF_ERRNO__FORMAT; ++ goto errout; ++ } ++ ++ /* Old LLVM set e_machine to EM_NONE */ ++ if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { ++ pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); ++ err = -LIBBPF_ERRNO__FORMAT; ++ goto errout; ++ } ++ ++ return 0; ++errout: ++ bpf_object__elf_finish(obj); ++ return err; ++} ++ ++static int bpf_object__check_endianness(struct bpf_object *obj) ++{ ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) ++ return 0; ++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) ++ return 0; ++#else ++# error "Unrecognized __BYTE_ORDER__" ++#endif ++ pr_warn("elf: endianness mismatch in %s.\n", obj->path); ++ return -LIBBPF_ERRNO__ENDIAN; ++} ++ ++static int ++bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) ++{ ++ /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't ++ * go over allowed ELF data section buffer ++ */ ++ libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); ++ pr_debug("license of %s is %s\n", obj->path, obj->license); ++ return 0; ++} ++ ++static int ++bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) ++{ ++ __u32 kver; ++ ++ if (size != sizeof(kver)) { ++ pr_warn("invalid kver section in %s\n", obj->path); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ memcpy(&kver, data, sizeof(kver)); ++ obj->kern_version = kver; ++ pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); ++ return 0; ++} ++ ++static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) ++{ ++ if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || ++ type == BPF_MAP_TYPE_HASH_OF_MAPS) ++ return true; ++ return false; ++} ++ ++static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) ++{ ++ Elf_Data *data; ++ Elf_Scn *scn; ++ ++ if (!name) ++ return -EINVAL; ++ ++ scn = elf_sec_by_name(obj, name); ++ data = elf_sec_data(obj, scn); ++ if (data) { ++ *size = data->d_size; ++ return 0; /* found it */ ++ } ++ ++ return -ENOENT; ++} ++ ++static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) ++{ ++ Elf_Data *symbols = obj->efile.symbols; ++ const char *sname; ++ size_t si; ++ ++ if (!name || !off) ++ return -EINVAL; ++ ++ for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { ++ Elf64_Sym *sym = elf_sym_by_idx(obj, si); ++ ++ if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) ++ continue; ++ ++ if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ++ ELF64_ST_BIND(sym->st_info) != STB_WEAK) ++ continue; ++ ++ sname = elf_sym_str(obj, sym->st_name); ++ if (!sname) { ++ pr_warn("failed to get sym name string for var %s\n", name); ++ return -EIO; ++ } ++ if (strcmp(name, sname) == 0) { ++ *off = sym->st_value; ++ return 0; ++ } ++ } ++ ++ return -ENOENT; ++} ++ ++static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) ++{ ++ struct bpf_map *map; ++ int err; ++ ++ err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, ++ sizeof(*obj->maps), obj->nr_maps + 1); ++ if (err) ++ return ERR_PTR(err); ++ ++ map = &obj->maps[obj->nr_maps++]; ++ map->obj = obj; ++ map->fd = -1; ++ map->inner_map_fd = -1; ++ map->autocreate = true; ++ ++ return map; ++} ++ ++static size_t bpf_map_mmap_sz(const struct bpf_map *map) ++{ ++ long page_sz = sysconf(_SC_PAGE_SIZE); ++ size_t map_sz; ++ ++ map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; ++ map_sz = roundup(map_sz, page_sz); ++ return map_sz; ++} ++ ++static char *internal_map_name(struct bpf_object *obj, const char *real_name) ++{ ++ char map_name[BPF_OBJ_NAME_LEN], *p; ++ int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); ++ ++ /* This is one of the more confusing parts of libbpf for various ++ * reasons, some of which are historical. The original idea for naming ++ * internal names was to include as much of BPF object name prefix as ++ * possible, so that it can be distinguished from similar internal ++ * maps of a different BPF object. ++ * As an example, let's say we have bpf_object named 'my_object_name' ++ * and internal map corresponding to '.rodata' ELF section. The final ++ * map name advertised to user and to the kernel will be ++ * 'my_objec.rodata', taking first 8 characters of object name and ++ * entire 7 characters of '.rodata'. ++ * Somewhat confusingly, if internal map ELF section name is shorter ++ * than 7 characters, e.g., '.bss', we still reserve 7 characters ++ * for the suffix, even though we only have 4 actual characters, and ++ * resulting map will be called 'my_objec.bss', not even using all 15 ++ * characters allowed by the kernel. Oh well, at least the truncated ++ * object name is somewhat consistent in this case. But if the map ++ * name is '.kconfig', we'll still have entirety of '.kconfig' added ++ * (8 chars) and thus will be left with only first 7 characters of the ++ * object name ('my_obje'). Happy guessing, user, that the final map ++ * name will be "my_obje.kconfig". ++ * Now, with libbpf starting to support arbitrarily named .rodata.* ++ * and .data.* data sections, it's possible that ELF section name is ++ * longer than allowed 15 chars, so we now need to be careful to take ++ * only up to 15 first characters of ELF name, taking no BPF object ++ * name characters at all. So '.rodata.abracadabra' will result in ++ * '.rodata.abracad' kernel and user-visible name. ++ * We need to keep this convoluted logic intact for .data, .bss and ++ * .rodata maps, but for new custom .data.custom and .rodata.custom ++ * maps we use their ELF names as is, not prepending bpf_object name ++ * in front. We still need to truncate them to 15 characters for the ++ * kernel. Full name can be recovered for such maps by using DATASEC ++ * BTF type associated with such map's value type, though. ++ */ ++ if (sfx_len >= BPF_OBJ_NAME_LEN) ++ sfx_len = BPF_OBJ_NAME_LEN - 1; ++ ++ /* if there are two or more dots in map name, it's a custom dot map */ ++ if (strchr(real_name + 1, '.') != NULL) ++ pfx_len = 0; ++ else ++ pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); ++ ++ snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, ++ sfx_len, real_name); ++ ++ /* sanitise map name to characters allowed by kernel */ ++ for (p = map_name; *p && p < map_name + sizeof(map_name); p++) ++ if (!isalnum(*p) && *p != '_' && *p != '.') ++ *p = '_'; ++ ++ return strdup(map_name); ++} ++ ++static int ++bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map); ++ ++static int ++bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, ++ const char *real_name, int sec_idx, void *data, size_t data_sz) ++{ ++ struct bpf_map_def *def; ++ struct bpf_map *map; ++ int err; ++ ++ map = bpf_object__add_map(obj); ++ if (IS_ERR(map)) ++ return PTR_ERR(map); ++ ++ map->libbpf_type = type; ++ map->sec_idx = sec_idx; ++ map->sec_offset = 0; ++ map->real_name = strdup(real_name); ++ map->name = internal_map_name(obj, real_name); ++ if (!map->real_name || !map->name) { ++ zfree(&map->real_name); ++ zfree(&map->name); ++ return -ENOMEM; ++ } ++ ++ def = &map->def; ++ def->type = BPF_MAP_TYPE_ARRAY; ++ def->key_size = sizeof(int); ++ def->value_size = data_sz; ++ def->max_entries = 1; ++ def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG ++ ? BPF_F_RDONLY_PROG : 0; ++ def->map_flags |= BPF_F_MMAPABLE; ++ ++ pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", ++ map->name, map->sec_idx, map->sec_offset, def->map_flags); ++ ++ map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_ANONYMOUS, -1, 0); ++ if (map->mmaped == MAP_FAILED) { ++ err = -errno; ++ map->mmaped = NULL; ++ pr_warn("failed to alloc map '%s' content buffer: %d\n", ++ map->name, err); ++ zfree(&map->real_name); ++ zfree(&map->name); ++ return err; ++ } ++ ++ /* failures are fine because of maps like .rodata.str1.1 */ ++ (void) bpf_map_find_btf_info(obj, map); ++ ++ if (data) ++ memcpy(map->mmaped, data, data_sz); ++ ++ pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); ++ return 0; ++} ++ ++static int bpf_object__init_global_data_maps(struct bpf_object *obj) ++{ ++ struct elf_sec_desc *sec_desc; ++ const char *sec_name; ++ int err = 0, sec_idx; ++ ++ /* ++ * Populate obj->maps with libbpf internal maps. ++ */ ++ for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { ++ sec_desc = &obj->efile.secs[sec_idx]; ++ ++ switch (sec_desc->sec_type) { ++ case SEC_DATA: ++ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); ++ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, ++ sec_name, sec_idx, ++ sec_desc->data->d_buf, ++ sec_desc->data->d_size); ++ break; ++ case SEC_RODATA: ++ obj->has_rodata = true; ++ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); ++ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, ++ sec_name, sec_idx, ++ sec_desc->data->d_buf, ++ sec_desc->data->d_size); ++ break; ++ case SEC_BSS: ++ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); ++ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, ++ sec_name, sec_idx, ++ NULL, ++ sec_desc->data->d_size); ++ break; ++ default: ++ /* skip */ ++ break; ++ } ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++ ++static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, ++ const void *name) ++{ ++ int i; ++ ++ for (i = 0; i < obj->nr_extern; i++) { ++ if (strcmp(obj->externs[i].name, name) == 0) ++ return &obj->externs[i]; ++ } ++ return NULL; ++} ++ ++static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, ++ char value) ++{ ++ switch (ext->kcfg.type) { ++ case KCFG_BOOL: ++ if (value == 'm') { ++ pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", ++ ext->name, value); ++ return -EINVAL; ++ } ++ *(bool *)ext_val = value == 'y' ? true : false; ++ break; ++ case KCFG_TRISTATE: ++ if (value == 'y') ++ *(enum libbpf_tristate *)ext_val = TRI_YES; ++ else if (value == 'm') ++ *(enum libbpf_tristate *)ext_val = TRI_MODULE; ++ else /* value == 'n' */ ++ *(enum libbpf_tristate *)ext_val = TRI_NO; ++ break; ++ case KCFG_CHAR: ++ *(char *)ext_val = value; ++ break; ++ case KCFG_UNKNOWN: ++ case KCFG_INT: ++ case KCFG_CHAR_ARR: ++ default: ++ pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", ++ ext->name, value); ++ return -EINVAL; ++ } ++ ext->is_set = true; ++ return 0; ++} ++ ++static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, ++ const char *value) ++{ ++ size_t len; ++ ++ if (ext->kcfg.type != KCFG_CHAR_ARR) { ++ pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", ++ ext->name, value); ++ return -EINVAL; ++ } ++ ++ len = strlen(value); ++ if (value[len - 1] != '"') { ++ pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ++ ext->name, value); ++ return -EINVAL; ++ } ++ ++ /* strip quotes */ ++ len -= 2; ++ if (len >= ext->kcfg.sz) { ++ pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", ++ ext->name, value, len, ext->kcfg.sz - 1); ++ len = ext->kcfg.sz - 1; ++ } ++ memcpy(ext_val, value + 1, len); ++ ext_val[len] = '\0'; ++ ext->is_set = true; ++ return 0; ++} ++ ++static int parse_u64(const char *value, __u64 *res) ++{ ++ char *value_end; ++ int err; ++ ++ errno = 0; ++ *res = strtoull(value, &value_end, 0); ++ if (errno) { ++ err = -errno; ++ pr_warn("failed to parse '%s' as integer: %d\n", value, err); ++ return err; ++ } ++ if (*value_end) { ++ pr_warn("failed to parse '%s' as integer completely\n", value); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) ++{ ++ int bit_sz = ext->kcfg.sz * 8; ++ ++ if (ext->kcfg.sz == 8) ++ return true; ++ ++ /* Validate that value stored in u64 fits in integer of `ext->sz` ++ * bytes size without any loss of information. If the target integer ++ * is signed, we rely on the following limits of integer type of ++ * Y bits and subsequent transformation: ++ * ++ * -2^(Y-1) <= X <= 2^(Y-1) - 1 ++ * 0 <= X + 2^(Y-1) <= 2^Y - 1 ++ * 0 <= X + 2^(Y-1) < 2^Y ++ * ++ * For unsigned target integer, check that all the (64 - Y) bits are ++ * zero. ++ */ ++ if (ext->kcfg.is_signed) ++ return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); ++ else ++ return (v >> bit_sz) == 0; ++} ++ ++static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, ++ __u64 value) ++{ ++ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && ++ ext->kcfg.type != KCFG_BOOL) { ++ pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", ++ ext->name, (unsigned long long)value); ++ return -EINVAL; ++ } ++ if (ext->kcfg.type == KCFG_BOOL && value > 1) { ++ pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", ++ ext->name, (unsigned long long)value); ++ return -EINVAL; ++ ++ } ++ if (!is_kcfg_value_in_range(ext, value)) { ++ pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", ++ ext->name, (unsigned long long)value, ext->kcfg.sz); ++ return -ERANGE; ++ } ++ switch (ext->kcfg.sz) { ++ case 1: *(__u8 *)ext_val = value; break; ++ case 2: *(__u16 *)ext_val = value; break; ++ case 4: *(__u32 *)ext_val = value; break; ++ case 8: *(__u64 *)ext_val = value; break; ++ default: ++ return -EINVAL; ++ } ++ ext->is_set = true; ++ return 0; ++} ++ ++static int bpf_object__process_kconfig_line(struct bpf_object *obj, ++ char *buf, void *data) ++{ ++ struct extern_desc *ext; ++ char *sep, *value; ++ int len, err = 0; ++ void *ext_val; ++ __u64 num; ++ ++ if (!str_has_pfx(buf, "CONFIG_")) ++ return 0; ++ ++ sep = strchr(buf, '='); ++ if (!sep) { ++ pr_warn("failed to parse '%s': no separator\n", buf); ++ return -EINVAL; ++ } ++ ++ /* Trim ending '\n' */ ++ len = strlen(buf); ++ if (buf[len - 1] == '\n') ++ buf[len - 1] = '\0'; ++ /* Split on '=' and ensure that a value is present. */ ++ *sep = '\0'; ++ if (!sep[1]) { ++ *sep = '='; ++ pr_warn("failed to parse '%s': no value\n", buf); ++ return -EINVAL; ++ } ++ ++ ext = find_extern_by_name(obj, buf); ++ if (!ext || ext->is_set) ++ return 0; ++ ++ ext_val = data + ext->kcfg.data_off; ++ value = sep + 1; ++ ++ switch (*value) { ++ case 'y': case 'n': case 'm': ++ err = set_kcfg_value_tri(ext, ext_val, *value); ++ break; ++ case '"': ++ err = set_kcfg_value_str(ext, ext_val, value); ++ break; ++ default: ++ /* assume integer */ ++ err = parse_u64(value, &num); ++ if (err) { ++ pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); ++ return err; ++ } ++ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { ++ pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); ++ return -EINVAL; ++ } ++ err = set_kcfg_value_num(ext, ext_val, num); ++ break; ++ } ++ if (err) ++ return err; ++ pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); ++ return 0; ++} ++ ++static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) ++{ ++ char buf[PATH_MAX]; ++ struct utsname uts; ++ int len, err = 0; ++ gzFile file; ++ ++ uname(&uts); ++ len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); ++ if (len < 0) ++ return -EINVAL; ++ else if (len >= PATH_MAX) ++ return -ENAMETOOLONG; ++ ++ /* gzopen also accepts uncompressed files. */ ++ file = gzopen(buf, "r"); ++ if (!file) ++ file = gzopen("/proc/config.gz", "r"); ++ ++ if (!file) { ++ pr_warn("failed to open system Kconfig\n"); ++ return -ENOENT; ++ } ++ ++ while (gzgets(file, buf, sizeof(buf))) { ++ err = bpf_object__process_kconfig_line(obj, buf, data); ++ if (err) { ++ pr_warn("error parsing system Kconfig line '%s': %d\n", ++ buf, err); ++ goto out; ++ } ++ } ++ ++out: ++ gzclose(file); ++ return err; ++} ++ ++static int bpf_object__read_kconfig_mem(struct bpf_object *obj, ++ const char *config, void *data) ++{ ++ char buf[PATH_MAX]; ++ int err = 0; ++ FILE *file; ++ ++ file = fmemopen((void *)config, strlen(config), "r"); ++ if (!file) { ++ err = -errno; ++ pr_warn("failed to open in-memory Kconfig: %d\n", err); ++ return err; ++ } ++ ++ while (fgets(buf, sizeof(buf), file)) { ++ err = bpf_object__process_kconfig_line(obj, buf, data); ++ if (err) { ++ pr_warn("error parsing in-memory Kconfig line '%s': %d\n", ++ buf, err); ++ break; ++ } ++ } ++ ++ fclose(file); ++ return err; ++} ++ ++static int bpf_object__init_kconfig_map(struct bpf_object *obj) ++{ ++ struct extern_desc *last_ext = NULL, *ext; ++ size_t map_sz; ++ int i, err; ++ ++ for (i = 0; i < obj->nr_extern; i++) { ++ ext = &obj->externs[i]; ++ if (ext->type == EXT_KCFG) ++ last_ext = ext; ++ } ++ ++ if (!last_ext) ++ return 0; ++ ++ map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; ++ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, ++ ".kconfig", obj->efile.symbols_shndx, ++ NULL, map_sz); ++ if (err) ++ return err; ++ ++ obj->kconfig_map_idx = obj->nr_maps - 1; ++ ++ return 0; ++} ++ ++const struct btf_type * ++skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) ++{ ++ const struct btf_type *t = btf__type_by_id(btf, id); ++ ++ if (res_id) ++ *res_id = id; ++ ++ while (btf_is_mod(t) || btf_is_typedef(t)) { ++ if (res_id) ++ *res_id = t->type; ++ t = btf__type_by_id(btf, t->type); ++ } ++ ++ return t; ++} ++ ++static const struct btf_type * ++resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) ++{ ++ const struct btf_type *t; ++ ++ t = skip_mods_and_typedefs(btf, id, NULL); ++ if (!btf_is_ptr(t)) ++ return NULL; ++ ++ t = skip_mods_and_typedefs(btf, t->type, res_id); ++ ++ return btf_is_func_proto(t) ? t : NULL; ++} ++ ++static const char *__btf_kind_str(__u16 kind) ++{ ++ switch (kind) { ++ case BTF_KIND_UNKN: return "void"; ++ case BTF_KIND_INT: return "int"; ++ case BTF_KIND_PTR: return "ptr"; ++ case BTF_KIND_ARRAY: return "array"; ++ case BTF_KIND_STRUCT: return "struct"; ++ case BTF_KIND_UNION: return "union"; ++ case BTF_KIND_ENUM: return "enum"; ++ case BTF_KIND_FWD: return "fwd"; ++ case BTF_KIND_TYPEDEF: return "typedef"; ++ case BTF_KIND_VOLATILE: return "volatile"; ++ case BTF_KIND_CONST: return "const"; ++ case BTF_KIND_RESTRICT: return "restrict"; ++ case BTF_KIND_FUNC: return "func"; ++ case BTF_KIND_FUNC_PROTO: return "func_proto"; ++ case BTF_KIND_VAR: return "var"; ++ case BTF_KIND_DATASEC: return "datasec"; ++ case BTF_KIND_FLOAT: return "float"; ++ case BTF_KIND_DECL_TAG: return "decl_tag"; ++ case BTF_KIND_TYPE_TAG: return "type_tag"; ++ case BTF_KIND_ENUM64: return "enum64"; ++ default: return "unknown"; ++ } ++} ++ ++const char *btf_kind_str(const struct btf_type *t) ++{ ++ return __btf_kind_str(btf_kind(t)); ++} ++ ++/* ++ * Fetch integer attribute of BTF map definition. Such attributes are ++ * represented using a pointer to an array, in which dimensionality of array ++ * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; ++ * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF ++ * type definition, while using only sizeof(void *) space in ELF data section. ++ */ ++static bool get_map_field_int(const char *map_name, const struct btf *btf, ++ const struct btf_member *m, __u32 *res) ++{ ++ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); ++ const char *name = btf__name_by_offset(btf, m->name_off); ++ const struct btf_array *arr_info; ++ const struct btf_type *arr_t; ++ ++ if (!btf_is_ptr(t)) { ++ pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", ++ map_name, name, btf_kind_str(t)); ++ return false; ++ } ++ ++ arr_t = btf__type_by_id(btf, t->type); ++ if (!arr_t) { ++ pr_warn("map '%s': attr '%s': type [%u] not found.\n", ++ map_name, name, t->type); ++ return false; ++ } ++ if (!btf_is_array(arr_t)) { ++ pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", ++ map_name, name, btf_kind_str(arr_t)); ++ return false; ++ } ++ arr_info = btf_array(arr_t); ++ *res = arr_info->nelems; ++ return true; ++} ++ ++static int build_map_pin_path(struct bpf_map *map, const char *path) ++{ ++ char buf[PATH_MAX]; ++ int len; ++ ++ if (!path) ++ path = "/sys/fs/bpf"; ++ ++ len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); ++ if (len < 0) ++ return -EINVAL; ++ else if (len >= PATH_MAX) ++ return -ENAMETOOLONG; ++ ++ return bpf_map__set_pin_path(map, buf); ++} ++ ++/* should match definition in bpf_helpers.h */ ++enum libbpf_pin_type { ++ LIBBPF_PIN_NONE, ++ /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ ++ LIBBPF_PIN_BY_NAME, ++}; ++ ++int parse_btf_map_def(const char *map_name, struct btf *btf, ++ const struct btf_type *def_t, bool strict, ++ struct btf_map_def *map_def, struct btf_map_def *inner_def) ++{ ++ const struct btf_type *t; ++ const struct btf_member *m; ++ bool is_inner = inner_def == NULL; ++ int vlen, i; ++ ++ vlen = btf_vlen(def_t); ++ m = btf_members(def_t); ++ for (i = 0; i < vlen; i++, m++) { ++ const char *name = btf__name_by_offset(btf, m->name_off); ++ ++ if (!name) { ++ pr_warn("map '%s': invalid field #%d.\n", map_name, i); ++ return -EINVAL; ++ } ++ if (strcmp(name, "type") == 0) { ++ if (!get_map_field_int(map_name, btf, m, &map_def->map_type)) ++ return -EINVAL; ++ map_def->parts |= MAP_DEF_MAP_TYPE; ++ } else if (strcmp(name, "max_entries") == 0) { ++ if (!get_map_field_int(map_name, btf, m, &map_def->max_entries)) ++ return -EINVAL; ++ map_def->parts |= MAP_DEF_MAX_ENTRIES; ++ } else if (strcmp(name, "map_flags") == 0) { ++ if (!get_map_field_int(map_name, btf, m, &map_def->map_flags)) ++ return -EINVAL; ++ map_def->parts |= MAP_DEF_MAP_FLAGS; ++ } else if (strcmp(name, "numa_node") == 0) { ++ if (!get_map_field_int(map_name, btf, m, &map_def->numa_node)) ++ return -EINVAL; ++ map_def->parts |= MAP_DEF_NUMA_NODE; ++ } else if (strcmp(name, "key_size") == 0) { ++ __u32 sz; ++ ++ if (!get_map_field_int(map_name, btf, m, &sz)) ++ return -EINVAL; ++ if (map_def->key_size && map_def->key_size != sz) { ++ pr_warn("map '%s': conflicting key size %u != %u.\n", ++ map_name, map_def->key_size, sz); ++ return -EINVAL; ++ } ++ map_def->key_size = sz; ++ map_def->parts |= MAP_DEF_KEY_SIZE; ++ } else if (strcmp(name, "key") == 0) { ++ __s64 sz; ++ ++ t = btf__type_by_id(btf, m->type); ++ if (!t) { ++ pr_warn("map '%s': key type [%d] not found.\n", ++ map_name, m->type); ++ return -EINVAL; ++ } ++ if (!btf_is_ptr(t)) { ++ pr_warn("map '%s': key spec is not PTR: %s.\n", ++ map_name, btf_kind_str(t)); ++ return -EINVAL; ++ } ++ sz = btf__resolve_size(btf, t->type); ++ if (sz < 0) { ++ pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", ++ map_name, t->type, (ssize_t)sz); ++ return sz; ++ } ++ if (map_def->key_size && map_def->key_size != sz) { ++ pr_warn("map '%s': conflicting key size %u != %zd.\n", ++ map_name, map_def->key_size, (ssize_t)sz); ++ return -EINVAL; ++ } ++ map_def->key_size = sz; ++ map_def->key_type_id = t->type; ++ map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE; ++ } else if (strcmp(name, "value_size") == 0) { ++ __u32 sz; ++ ++ if (!get_map_field_int(map_name, btf, m, &sz)) ++ return -EINVAL; ++ if (map_def->value_size && map_def->value_size != sz) { ++ pr_warn("map '%s': conflicting value size %u != %u.\n", ++ map_name, map_def->value_size, sz); ++ return -EINVAL; ++ } ++ map_def->value_size = sz; ++ map_def->parts |= MAP_DEF_VALUE_SIZE; ++ } else if (strcmp(name, "value") == 0) { ++ __s64 sz; ++ ++ t = btf__type_by_id(btf, m->type); ++ if (!t) { ++ pr_warn("map '%s': value type [%d] not found.\n", ++ map_name, m->type); ++ return -EINVAL; ++ } ++ if (!btf_is_ptr(t)) { ++ pr_warn("map '%s': value spec is not PTR: %s.\n", ++ map_name, btf_kind_str(t)); ++ return -EINVAL; ++ } ++ sz = btf__resolve_size(btf, t->type); ++ if (sz < 0) { ++ pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", ++ map_name, t->type, (ssize_t)sz); ++ return sz; ++ } ++ if (map_def->value_size && map_def->value_size != sz) { ++ pr_warn("map '%s': conflicting value size %u != %zd.\n", ++ map_name, map_def->value_size, (ssize_t)sz); ++ return -EINVAL; ++ } ++ map_def->value_size = sz; ++ map_def->value_type_id = t->type; ++ map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; ++ } ++ else if (strcmp(name, "values") == 0) { ++ bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); ++ bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; ++ const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; ++ char inner_map_name[128]; ++ int err; ++ ++ if (is_inner) { ++ pr_warn("map '%s': multi-level inner maps not supported.\n", ++ map_name); ++ return -ENOTSUP; ++ } ++ if (i != vlen - 1) { ++ pr_warn("map '%s': '%s' member should be last.\n", ++ map_name, name); ++ return -EINVAL; ++ } ++ if (!is_map_in_map && !is_prog_array) { ++ pr_warn("map '%s': should be map-in-map or prog-array.\n", ++ map_name); ++ return -ENOTSUP; ++ } ++ if (map_def->value_size && map_def->value_size != 4) { ++ pr_warn("map '%s': conflicting value size %u != 4.\n", ++ map_name, map_def->value_size); ++ return -EINVAL; ++ } ++ map_def->value_size = 4; ++ t = btf__type_by_id(btf, m->type); ++ if (!t) { ++ pr_warn("map '%s': %s type [%d] not found.\n", ++ map_name, desc, m->type); ++ return -EINVAL; ++ } ++ if (!btf_is_array(t) || btf_array(t)->nelems) { ++ pr_warn("map '%s': %s spec is not a zero-sized array.\n", ++ map_name, desc); ++ return -EINVAL; ++ } ++ t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); ++ if (!btf_is_ptr(t)) { ++ pr_warn("map '%s': %s def is of unexpected kind %s.\n", ++ map_name, desc, btf_kind_str(t)); ++ return -EINVAL; ++ } ++ t = skip_mods_and_typedefs(btf, t->type, NULL); ++ if (is_prog_array) { ++ if (!btf_is_func_proto(t)) { ++ pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", ++ map_name, btf_kind_str(t)); ++ return -EINVAL; ++ } ++ continue; ++ } ++ if (!btf_is_struct(t)) { ++ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", ++ map_name, btf_kind_str(t)); ++ return -EINVAL; ++ } ++ ++ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); ++ err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); ++ if (err) ++ return err; ++ ++ map_def->parts |= MAP_DEF_INNER_MAP; ++ } else if (strcmp(name, "pinning") == 0) { ++ __u32 val; ++ ++ if (is_inner) { ++ pr_warn("map '%s': inner def can't be pinned.\n", map_name); ++ return -EINVAL; ++ } ++ if (!get_map_field_int(map_name, btf, m, &val)) ++ return -EINVAL; ++ if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { ++ pr_warn("map '%s': invalid pinning value %u.\n", ++ map_name, val); ++ return -EINVAL; ++ } ++ map_def->pinning = val; ++ map_def->parts |= MAP_DEF_PINNING; ++ } else if (strcmp(name, "map_extra") == 0) { ++ __u32 map_extra; ++ ++ if (!get_map_field_int(map_name, btf, m, &map_extra)) ++ return -EINVAL; ++ map_def->map_extra = map_extra; ++ map_def->parts |= MAP_DEF_MAP_EXTRA; ++ } else { ++ if (strict) { ++ pr_warn("map '%s': unknown field '%s'.\n", map_name, name); ++ return -ENOTSUP; ++ } ++ pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name); ++ } ++ } ++ ++ if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) { ++ pr_warn("map '%s': map type isn't specified.\n", map_name); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static size_t adjust_ringbuf_sz(size_t sz) ++{ ++ __u32 page_sz = sysconf(_SC_PAGE_SIZE); ++ __u32 mul; ++ ++ /* if user forgot to set any size, make sure they see error */ ++ if (sz == 0) ++ return 0; ++ /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be ++ * a power-of-2 multiple of kernel's page size. If user diligently ++ * satisified these conditions, pass the size through. ++ */ ++ if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) ++ return sz; ++ ++ /* Otherwise find closest (page_sz * power_of_2) product bigger than ++ * user-set size to satisfy both user size request and kernel ++ * requirements and substitute correct max_entries for map creation. ++ */ ++ for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { ++ if (mul * page_sz > sz) ++ return mul * page_sz; ++ } ++ ++ /* if it's impossible to satisfy the conditions (i.e., user size is ++ * very close to UINT_MAX but is not a power-of-2 multiple of ++ * page_size) then just return original size and let kernel reject it ++ */ ++ return sz; ++} ++ ++static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) ++{ ++ map->def.type = def->map_type; ++ map->def.key_size = def->key_size; ++ map->def.value_size = def->value_size; ++ map->def.max_entries = def->max_entries; ++ map->def.map_flags = def->map_flags; ++ map->map_extra = def->map_extra; ++ ++ map->numa_node = def->numa_node; ++ map->btf_key_type_id = def->key_type_id; ++ map->btf_value_type_id = def->value_type_id; ++ ++ /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ ++ if (map->def.type == BPF_MAP_TYPE_RINGBUF) ++ map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); ++ ++ if (def->parts & MAP_DEF_MAP_TYPE) ++ pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); ++ ++ if (def->parts & MAP_DEF_KEY_TYPE) ++ pr_debug("map '%s': found key [%u], sz = %u.\n", ++ map->name, def->key_type_id, def->key_size); ++ else if (def->parts & MAP_DEF_KEY_SIZE) ++ pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size); ++ ++ if (def->parts & MAP_DEF_VALUE_TYPE) ++ pr_debug("map '%s': found value [%u], sz = %u.\n", ++ map->name, def->value_type_id, def->value_size); ++ else if (def->parts & MAP_DEF_VALUE_SIZE) ++ pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size); ++ ++ if (def->parts & MAP_DEF_MAX_ENTRIES) ++ pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); ++ if (def->parts & MAP_DEF_MAP_FLAGS) ++ pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); ++ if (def->parts & MAP_DEF_MAP_EXTRA) ++ pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, ++ (unsigned long long)def->map_extra); ++ if (def->parts & MAP_DEF_PINNING) ++ pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); ++ if (def->parts & MAP_DEF_NUMA_NODE) ++ pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node); ++ ++ if (def->parts & MAP_DEF_INNER_MAP) ++ pr_debug("map '%s': found inner map definition.\n", map->name); ++} ++ ++static const char *btf_var_linkage_str(__u32 linkage) ++{ ++ switch (linkage) { ++ case BTF_VAR_STATIC: return "static"; ++ case BTF_VAR_GLOBAL_ALLOCATED: return "global"; ++ case BTF_VAR_GLOBAL_EXTERN: return "extern"; ++ default: return "unknown"; ++ } ++} ++ ++static int bpf_object__init_user_btf_map(struct bpf_object *obj, ++ const struct btf_type *sec, ++ int var_idx, int sec_idx, ++ const Elf_Data *data, bool strict, ++ const char *pin_root_path) ++{ ++ struct btf_map_def map_def = {}, inner_def = {}; ++ const struct btf_type *var, *def; ++ const struct btf_var_secinfo *vi; ++ const struct btf_var *var_extra; ++ const char *map_name; ++ struct bpf_map *map; ++ int err; ++ ++ vi = btf_var_secinfos(sec) + var_idx; ++ var = btf__type_by_id(obj->btf, vi->type); ++ var_extra = btf_var(var); ++ map_name = btf__name_by_offset(obj->btf, var->name_off); ++ ++ if (map_name == NULL || map_name[0] == '\0') { ++ pr_warn("map #%d: empty name.\n", var_idx); ++ return -EINVAL; ++ } ++ if ((__u64)vi->offset + vi->size > data->d_size) { ++ pr_warn("map '%s' BTF data is corrupted.\n", map_name); ++ return -EINVAL; ++ } ++ if (!btf_is_var(var)) { ++ pr_warn("map '%s': unexpected var kind %s.\n", ++ map_name, btf_kind_str(var)); ++ return -EINVAL; ++ } ++ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { ++ pr_warn("map '%s': unsupported map linkage %s.\n", ++ map_name, btf_var_linkage_str(var_extra->linkage)); ++ return -EOPNOTSUPP; ++ } ++ ++ def = skip_mods_and_typedefs(obj->btf, var->type, NULL); ++ if (!btf_is_struct(def)) { ++ pr_warn("map '%s': unexpected def kind %s.\n", ++ map_name, btf_kind_str(var)); ++ return -EINVAL; ++ } ++ if (def->size > vi->size) { ++ pr_warn("map '%s': invalid def size.\n", map_name); ++ return -EINVAL; ++ } ++ ++ map = bpf_object__add_map(obj); ++ if (IS_ERR(map)) ++ return PTR_ERR(map); ++ map->name = strdup(map_name); ++ if (!map->name) { ++ pr_warn("map '%s': failed to alloc map name.\n", map_name); ++ return -ENOMEM; ++ } ++ map->libbpf_type = LIBBPF_MAP_UNSPEC; ++ map->def.type = BPF_MAP_TYPE_UNSPEC; ++ map->sec_idx = sec_idx; ++ map->sec_offset = vi->offset; ++ map->btf_var_idx = var_idx; ++ pr_debug("map '%s': at sec_idx %d, offset %zu.\n", ++ map_name, map->sec_idx, map->sec_offset); ++ ++ err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def); ++ if (err) ++ return err; ++ ++ fill_map_from_def(map, &map_def); ++ ++ if (map_def.pinning == LIBBPF_PIN_BY_NAME) { ++ err = build_map_pin_path(map, pin_root_path); ++ if (err) { ++ pr_warn("map '%s': couldn't build pin path.\n", map->name); ++ return err; ++ } ++ } ++ ++ if (map_def.parts & MAP_DEF_INNER_MAP) { ++ map->inner_map = calloc(1, sizeof(*map->inner_map)); ++ if (!map->inner_map) ++ return -ENOMEM; ++ map->inner_map->fd = -1; ++ map->inner_map->sec_idx = sec_idx; ++ map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); ++ if (!map->inner_map->name) ++ return -ENOMEM; ++ sprintf(map->inner_map->name, "%s.inner", map_name); ++ ++ fill_map_from_def(map->inner_map, &inner_def); ++ } ++ ++ err = bpf_map_find_btf_info(obj, map); ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, ++ const char *pin_root_path) ++{ ++ const struct btf_type *sec = NULL; ++ int nr_types, i, vlen, err; ++ const struct btf_type *t; ++ const char *name; ++ Elf_Data *data; ++ Elf_Scn *scn; ++ ++ if (obj->efile.btf_maps_shndx < 0) ++ return 0; ++ ++ scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); ++ data = elf_sec_data(obj, scn); ++ if (!scn || !data) { ++ pr_warn("elf: failed to get %s map definitions for %s\n", ++ MAPS_ELF_SEC, obj->path); ++ return -EINVAL; ++ } ++ ++ nr_types = btf__type_cnt(obj->btf); ++ for (i = 1; i < nr_types; i++) { ++ t = btf__type_by_id(obj->btf, i); ++ if (!btf_is_datasec(t)) ++ continue; ++ name = btf__name_by_offset(obj->btf, t->name_off); ++ if (strcmp(name, MAPS_ELF_SEC) == 0) { ++ sec = t; ++ obj->efile.btf_maps_sec_btf_id = i; ++ break; ++ } ++ } ++ ++ if (!sec) { ++ pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); ++ return -ENOENT; ++ } ++ ++ vlen = btf_vlen(sec); ++ for (i = 0; i < vlen; i++) { ++ err = bpf_object__init_user_btf_map(obj, sec, i, ++ obj->efile.btf_maps_shndx, ++ data, strict, ++ pin_root_path); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static int bpf_object__init_maps(struct bpf_object *obj, ++ const struct bpf_object_open_opts *opts) ++{ ++ const char *pin_root_path; ++ bool strict; ++ int err = 0; ++ ++ strict = !OPTS_GET(opts, relaxed_maps, false); ++ pin_root_path = OPTS_GET(opts, pin_root_path, NULL); ++ ++ err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); ++ err = err ?: bpf_object__init_global_data_maps(obj); ++ err = err ?: bpf_object__init_kconfig_map(obj); ++ err = err ?: bpf_object__init_struct_ops_maps(obj); ++ ++ return err; ++} ++ ++static bool section_have_execinstr(struct bpf_object *obj, int idx) ++{ ++ Elf64_Shdr *sh; ++ ++ sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); ++ if (!sh) ++ return false; ++ ++ return sh->sh_flags & SHF_EXECINSTR; ++} ++ ++static bool btf_needs_sanitization(struct bpf_object *obj) ++{ ++ bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); ++ bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); ++ bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); ++ bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); ++ bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); ++ bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); ++ bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); ++ ++ return !has_func || !has_datasec || !has_func_global || !has_float || ++ !has_decl_tag || !has_type_tag || !has_enum64; ++} ++ ++static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) ++{ ++ bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); ++ bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); ++ bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); ++ bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); ++ bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); ++ bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); ++ bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); ++ int enum64_placeholder_id = 0; ++ struct btf_type *t; ++ int i, j, vlen; ++ ++ for (i = 1; i < btf__type_cnt(btf); i++) { ++ t = (struct btf_type *)btf__type_by_id(btf, i); ++ ++ if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { ++ /* replace VAR/DECL_TAG with INT */ ++ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); ++ /* ++ * using size = 1 is the safest choice, 4 will be too ++ * big and cause kernel BTF validation failure if ++ * original variable took less than 4 bytes ++ */ ++ t->size = 1; ++ *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); ++ } else if (!has_datasec && btf_is_datasec(t)) { ++ /* replace DATASEC with STRUCT */ ++ const struct btf_var_secinfo *v = btf_var_secinfos(t); ++ struct btf_member *m = btf_members(t); ++ struct btf_type *vt; ++ char *name; ++ ++ name = (char *)btf__name_by_offset(btf, t->name_off); ++ while (*name) { ++ if (*name == '.') ++ *name = '_'; ++ name++; ++ } ++ ++ vlen = btf_vlen(t); ++ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); ++ for (j = 0; j < vlen; j++, v++, m++) { ++ /* order of field assignments is important */ ++ m->offset = v->offset * 8; ++ m->type = v->type; ++ /* preserve variable name as member name */ ++ vt = (void *)btf__type_by_id(btf, v->type); ++ m->name_off = vt->name_off; ++ } ++ } else if (!has_func && btf_is_func_proto(t)) { ++ /* replace FUNC_PROTO with ENUM */ ++ vlen = btf_vlen(t); ++ t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); ++ t->size = sizeof(__u32); /* kernel enforced */ ++ } else if (!has_func && btf_is_func(t)) { ++ /* replace FUNC with TYPEDEF */ ++ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); ++ } else if (!has_func_global && btf_is_func(t)) { ++ /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ ++ t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); ++ } else if (!has_float && btf_is_float(t)) { ++ /* replace FLOAT with an equally-sized empty STRUCT; ++ * since C compilers do not accept e.g. "float" as a ++ * valid struct name, make it anonymous ++ */ ++ t->name_off = 0; ++ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); ++ } else if (!has_type_tag && btf_is_type_tag(t)) { ++ /* replace TYPE_TAG with a CONST */ ++ t->name_off = 0; ++ t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); ++ } else if (!has_enum64 && btf_is_enum(t)) { ++ /* clear the kflag */ ++ t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); ++ } else if (!has_enum64 && btf_is_enum64(t)) { ++ /* replace ENUM64 with a union */ ++ struct btf_member *m; ++ ++ if (enum64_placeholder_id == 0) { ++ enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); ++ if (enum64_placeholder_id < 0) ++ return enum64_placeholder_id; ++ ++ t = (struct btf_type *)btf__type_by_id(btf, i); ++ } ++ ++ m = btf_members(t); ++ vlen = btf_vlen(t); ++ t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); ++ for (j = 0; j < vlen; j++, m++) { ++ m->type = enum64_placeholder_id; ++ m->offset = 0; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static bool libbpf_needs_btf(const struct bpf_object *obj) ++{ ++ return obj->efile.btf_maps_shndx >= 0 || ++ obj->efile.st_ops_shndx >= 0 || ++ obj->nr_extern > 0; ++} ++ ++static bool kernel_needs_btf(const struct bpf_object *obj) ++{ ++ return obj->efile.st_ops_shndx >= 0; ++} ++ ++static int bpf_object__init_btf(struct bpf_object *obj, ++ Elf_Data *btf_data, ++ Elf_Data *btf_ext_data) ++{ ++ int err = -ENOENT; ++ ++ if (btf_data) { ++ obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); ++ err = libbpf_get_error(obj->btf); ++ if (err) { ++ obj->btf = NULL; ++ pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); ++ goto out; ++ } ++ /* enforce 8-byte pointers for BPF-targeted BTFs */ ++ btf__set_pointer_size(obj->btf, 8); ++ } ++ if (btf_ext_data) { ++ struct btf_ext_info *ext_segs[3]; ++ int seg_num, sec_num; ++ ++ if (!obj->btf) { ++ pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", ++ BTF_EXT_ELF_SEC, BTF_ELF_SEC); ++ goto out; ++ } ++ obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); ++ err = libbpf_get_error(obj->btf_ext); ++ if (err) { ++ pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", ++ BTF_EXT_ELF_SEC, err); ++ obj->btf_ext = NULL; ++ goto out; ++ } ++ ++ /* setup .BTF.ext to ELF section mapping */ ++ ext_segs[0] = &obj->btf_ext->func_info; ++ ext_segs[1] = &obj->btf_ext->line_info; ++ ext_segs[2] = &obj->btf_ext->core_relo_info; ++ for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { ++ struct btf_ext_info *seg = ext_segs[seg_num]; ++ const struct btf_ext_info_sec *sec; ++ const char *sec_name; ++ Elf_Scn *scn; ++ ++ if (seg->sec_cnt == 0) ++ continue; ++ ++ seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); ++ if (!seg->sec_idxs) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ sec_num = 0; ++ for_each_btf_ext_sec(seg, sec) { ++ /* preventively increment index to avoid doing ++ * this before every continue below ++ */ ++ sec_num++; ++ ++ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); ++ if (str_is_empty(sec_name)) ++ continue; ++ scn = elf_sec_by_name(obj, sec_name); ++ if (!scn) ++ continue; ++ ++ seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); ++ } ++ } ++ } ++out: ++ if (err && libbpf_needs_btf(obj)) { ++ pr_warn("BTF is required, but is missing or corrupted.\n"); ++ return err; ++ } ++ return 0; ++} ++ ++static int compare_vsi_off(const void *_a, const void *_b) ++{ ++ const struct btf_var_secinfo *a = _a; ++ const struct btf_var_secinfo *b = _b; ++ ++ return a->offset - b->offset; ++} ++ ++static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, ++ struct btf_type *t) ++{ ++ __u32 size = 0, off = 0, i, vars = btf_vlen(t); ++ const char *name = btf__name_by_offset(btf, t->name_off); ++ const struct btf_type *t_var; ++ struct btf_var_secinfo *vsi; ++ const struct btf_var *var; ++ int ret; ++ ++ if (!name) { ++ pr_debug("No name found in string section for DATASEC kind.\n"); ++ return -ENOENT; ++ } ++ ++ /* .extern datasec size and var offsets were set correctly during ++ * extern collection step, so just skip straight to sorting variables ++ */ ++ if (t->size) ++ goto sort_vars; ++ ++ ret = find_elf_sec_sz(obj, name, &size); ++ if (ret || !size) { ++ pr_debug("Invalid size for section %s: %u bytes\n", name, size); ++ return -ENOENT; ++ } ++ ++ t->size = size; ++ ++ for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { ++ t_var = btf__type_by_id(btf, vsi->type); ++ if (!t_var || !btf_is_var(t_var)) { ++ pr_debug("Non-VAR type seen in section %s\n", name); ++ return -EINVAL; ++ } ++ ++ var = btf_var(t_var); ++ if (var->linkage == BTF_VAR_STATIC) ++ continue; ++ ++ name = btf__name_by_offset(btf, t_var->name_off); ++ if (!name) { ++ pr_debug("No name found in string section for VAR kind\n"); ++ return -ENOENT; ++ } ++ ++ ret = find_elf_var_offset(obj, name, &off); ++ if (ret) { ++ pr_debug("No offset found in symbol table for VAR %s\n", ++ name); ++ return -ENOENT; ++ } ++ ++ vsi->offset = off; ++ } ++ ++sort_vars: ++ qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); ++ return 0; ++} ++ ++static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) ++{ ++ int err = 0; ++ __u32 i, n = btf__type_cnt(btf); ++ ++ for (i = 1; i < n; i++) { ++ struct btf_type *t = btf_type_by_id(btf, i); ++ ++ /* Loader needs to fix up some of the things compiler ++ * couldn't get its hands on while emitting BTF. This ++ * is section size and global variable offset. We use ++ * the info from the ELF itself for this purpose. ++ */ ++ if (btf_is_datasec(t)) { ++ err = btf_fixup_datasec(obj, btf, t); ++ if (err) ++ break; ++ } ++ } ++ ++ return libbpf_err(err); ++} ++ ++static int bpf_object__finalize_btf(struct bpf_object *obj) ++{ ++ int err; ++ ++ if (!obj->btf) ++ return 0; ++ ++ err = btf_finalize_data(obj, obj->btf); ++ if (err) { ++ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); ++ return err; ++ } ++ ++ return 0; ++} ++ ++static bool prog_needs_vmlinux_btf(struct bpf_program *prog) ++{ ++ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || ++ prog->type == BPF_PROG_TYPE_LSM) ++ return true; ++ ++ /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs ++ * also need vmlinux BTF ++ */ ++ if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) ++ return true; ++ ++ return false; ++} ++ ++static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) ++{ ++ struct bpf_program *prog; ++ int i; ++ ++ /* CO-RE relocations need kernel BTF, only when btf_custom_path ++ * is not specified ++ */ ++ if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) ++ return true; ++ ++ /* Support for typed ksyms needs kernel BTF */ ++ for (i = 0; i < obj->nr_extern; i++) { ++ const struct extern_desc *ext; ++ ++ ext = &obj->externs[i]; ++ if (ext->type == EXT_KSYM && ext->ksym.type_id) ++ return true; ++ } ++ ++ bpf_object__for_each_program(prog, obj) { ++ if (!prog->autoload) ++ continue; ++ if (prog_needs_vmlinux_btf(prog)) ++ return true; ++ } ++ ++ return false; ++} ++ ++static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) ++{ ++ int err; ++ ++ /* btf_vmlinux could be loaded earlier */ ++ if (obj->btf_vmlinux || obj->gen_loader) ++ return 0; ++ ++ if (!force && !obj_needs_vmlinux_btf(obj)) ++ return 0; ++ ++ obj->btf_vmlinux = btf__load_vmlinux_btf(); ++ err = libbpf_get_error(obj->btf_vmlinux); ++ if (err) { ++ pr_warn("Error loading vmlinux BTF: %d\n", err); ++ obj->btf_vmlinux = NULL; ++ return err; ++ } ++ return 0; ++} ++ ++static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) ++{ ++ struct btf *kern_btf = obj->btf; ++ bool btf_mandatory, sanitize; ++ int i, err = 0; ++ ++ if (!obj->btf) ++ return 0; ++ ++ if (!kernel_supports(obj, FEAT_BTF)) { ++ if (kernel_needs_btf(obj)) { ++ err = -EOPNOTSUPP; ++ goto report; ++ } ++ pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); ++ return 0; ++ } ++ ++ /* Even though some subprogs are global/weak, user might prefer more ++ * permissive BPF verification process that BPF verifier performs for ++ * static functions, taking into account more context from the caller ++ * functions. In such case, they need to mark such subprogs with ++ * __attribute__((visibility("hidden"))) and libbpf will adjust ++ * corresponding FUNC BTF type to be marked as static and trigger more ++ * involved BPF verification process. ++ */ ++ for (i = 0; i < obj->nr_programs; i++) { ++ struct bpf_program *prog = &obj->programs[i]; ++ struct btf_type *t; ++ const char *name; ++ int j, n; ++ ++ if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) ++ continue; ++ ++ n = btf__type_cnt(obj->btf); ++ for (j = 1; j < n; j++) { ++ t = btf_type_by_id(obj->btf, j); ++ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) ++ continue; ++ ++ name = btf__str_by_offset(obj->btf, t->name_off); ++ if (strcmp(name, prog->name) != 0) ++ continue; ++ ++ t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0); ++ break; ++ } ++ } ++ ++ sanitize = btf_needs_sanitization(obj); ++ if (sanitize) { ++ const void *raw_data; ++ __u32 sz; ++ ++ /* clone BTF to sanitize a copy and leave the original intact */ ++ raw_data = btf__raw_data(obj->btf, &sz); ++ kern_btf = btf__new(raw_data, sz); ++ err = libbpf_get_error(kern_btf); ++ if (err) ++ return err; ++ ++ /* enforce 8-byte pointers for BPF-targeted BTFs */ ++ btf__set_pointer_size(obj->btf, 8); ++ err = bpf_object__sanitize_btf(obj, kern_btf); ++ if (err) ++ return err; ++ } ++ ++ if (obj->gen_loader) { ++ __u32 raw_size = 0; ++ const void *raw_data = btf__raw_data(kern_btf, &raw_size); ++ ++ if (!raw_data) ++ return -ENOMEM; ++ bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); ++ /* Pretend to have valid FD to pass various fd >= 0 checks. ++ * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. ++ */ ++ btf__set_fd(kern_btf, 0); ++ } else { ++ /* currently BPF_BTF_LOAD only supports log_level 1 */ ++ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, ++ obj->log_level ? 1 : 0); ++ } ++ if (sanitize) { ++ if (!err) { ++ /* move fd to libbpf's BTF */ ++ btf__set_fd(obj->btf, btf__fd(kern_btf)); ++ btf__set_fd(kern_btf, -1); ++ } ++ btf__free(kern_btf); ++ } ++report: ++ if (err) { ++ btf_mandatory = kernel_needs_btf(obj); ++ pr_warn("Error loading .BTF into kernel: %d. %s\n", err, ++ btf_mandatory ? "BTF is mandatory, can't proceed." ++ : "BTF is optional, ignoring."); ++ if (!btf_mandatory) ++ err = 0; ++ } ++ return err; ++} ++ ++static const char *elf_sym_str(const struct bpf_object *obj, size_t off) ++{ ++ const char *name; ++ ++ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); ++ if (!name) { ++ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", ++ off, obj->path, elf_errmsg(-1)); ++ return NULL; ++ } ++ ++ return name; ++} ++ ++static const char *elf_sec_str(const struct bpf_object *obj, size_t off) ++{ ++ const char *name; ++ ++ name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); ++ if (!name) { ++ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", ++ off, obj->path, elf_errmsg(-1)); ++ return NULL; ++ } ++ ++ return name; ++} ++ ++static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) ++{ ++ Elf_Scn *scn; ++ ++ scn = elf_getscn(obj->efile.elf, idx); ++ if (!scn) { ++ pr_warn("elf: failed to get section(%zu) from %s: %s\n", ++ idx, obj->path, elf_errmsg(-1)); ++ return NULL; ++ } ++ return scn; ++} ++ ++static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) ++{ ++ Elf_Scn *scn = NULL; ++ Elf *elf = obj->efile.elf; ++ const char *sec_name; ++ ++ while ((scn = elf_nextscn(elf, scn)) != NULL) { ++ sec_name = elf_sec_name(obj, scn); ++ if (!sec_name) ++ return NULL; ++ ++ if (strcmp(sec_name, name) != 0) ++ continue; ++ ++ return scn; ++ } ++ return NULL; ++} ++ ++static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) ++{ ++ Elf64_Shdr *shdr; ++ ++ if (!scn) ++ return NULL; ++ ++ shdr = elf64_getshdr(scn); ++ if (!shdr) { ++ pr_warn("elf: failed to get section(%zu) header from %s: %s\n", ++ elf_ndxscn(scn), obj->path, elf_errmsg(-1)); ++ return NULL; ++ } ++ ++ return shdr; ++} ++ ++static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) ++{ ++ const char *name; ++ Elf64_Shdr *sh; ++ ++ if (!scn) ++ return NULL; ++ ++ sh = elf_sec_hdr(obj, scn); ++ if (!sh) ++ return NULL; ++ ++ name = elf_sec_str(obj, sh->sh_name); ++ if (!name) { ++ pr_warn("elf: failed to get section(%zu) name from %s: %s\n", ++ elf_ndxscn(scn), obj->path, elf_errmsg(-1)); ++ return NULL; ++ } ++ ++ return name; ++} ++ ++static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) ++{ ++ Elf_Data *data; ++ ++ if (!scn) ++ return NULL; ++ ++ data = elf_getdata(scn, 0); ++ if (!data) { ++ pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", ++ elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "", ++ obj->path, elf_errmsg(-1)); ++ return NULL; ++ } ++ ++ return data; ++} ++ ++static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) ++{ ++ if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) ++ return NULL; ++ ++ return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; ++} ++ ++static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) ++{ ++ if (idx >= data->d_size / sizeof(Elf64_Rel)) ++ return NULL; ++ ++ return (Elf64_Rel *)data->d_buf + idx; ++} ++ ++static bool is_sec_name_dwarf(const char *name) ++{ ++ /* approximation, but the actual list is too long */ ++ return str_has_pfx(name, ".debug_"); ++} ++ ++static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) ++{ ++ /* no special handling of .strtab */ ++ if (hdr->sh_type == SHT_STRTAB) ++ return true; ++ ++ /* ignore .llvm_addrsig section as well */ ++ if (hdr->sh_type == SHT_LLVM_ADDRSIG) ++ return true; ++ ++ /* no subprograms will lead to an empty .text section, ignore it */ ++ if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && ++ strcmp(name, ".text") == 0) ++ return true; ++ ++ /* DWARF sections */ ++ if (is_sec_name_dwarf(name)) ++ return true; ++ ++ if (str_has_pfx(name, ".rel")) { ++ name += sizeof(".rel") - 1; ++ /* DWARF section relocations */ ++ if (is_sec_name_dwarf(name)) ++ return true; ++ ++ /* .BTF and .BTF.ext don't need relocations */ ++ if (strcmp(name, BTF_ELF_SEC) == 0 || ++ strcmp(name, BTF_EXT_ELF_SEC) == 0) ++ return true; ++ } ++ ++ return false; ++} ++ ++static int cmp_progs(const void *_a, const void *_b) ++{ ++ const struct bpf_program *a = _a; ++ const struct bpf_program *b = _b; ++ ++ if (a->sec_idx != b->sec_idx) ++ return a->sec_idx < b->sec_idx ? -1 : 1; ++ ++ /* sec_insn_off can't be the same within the section */ ++ return a->sec_insn_off < b->sec_insn_off ? -1 : 1; ++} ++ ++static int bpf_object__elf_collect(struct bpf_object *obj) ++{ ++ struct elf_sec_desc *sec_desc; ++ Elf *elf = obj->efile.elf; ++ Elf_Data *btf_ext_data = NULL; ++ Elf_Data *btf_data = NULL; ++ int idx = 0, err = 0; ++ const char *name; ++ Elf_Data *data; ++ Elf_Scn *scn; ++ Elf64_Shdr *sh; ++ ++ /* ELF section indices are 0-based, but sec #0 is special "invalid" ++ * section. e_shnum does include sec #0, so e_shnum is the necessary ++ * size of an array to keep all the sections. ++ */ ++ obj->efile.sec_cnt = obj->efile.ehdr->e_shnum; ++ obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); ++ if (!obj->efile.secs) ++ return -ENOMEM; ++ ++ /* a bunch of ELF parsing functionality depends on processing symbols, ++ * so do the first pass and find the symbol table ++ */ ++ scn = NULL; ++ while ((scn = elf_nextscn(elf, scn)) != NULL) { ++ sh = elf_sec_hdr(obj, scn); ++ if (!sh) ++ return -LIBBPF_ERRNO__FORMAT; ++ ++ if (sh->sh_type == SHT_SYMTAB) { ++ if (obj->efile.symbols) { ++ pr_warn("elf: multiple symbol tables in %s\n", obj->path); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ data = elf_sec_data(obj, scn); ++ if (!data) ++ return -LIBBPF_ERRNO__FORMAT; ++ ++ idx = elf_ndxscn(scn); ++ ++ obj->efile.symbols = data; ++ obj->efile.symbols_shndx = idx; ++ obj->efile.strtabidx = sh->sh_link; ++ } ++ } ++ ++ if (!obj->efile.symbols) { ++ pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", ++ obj->path); ++ return -ENOENT; ++ } ++ ++ scn = NULL; ++ while ((scn = elf_nextscn(elf, scn)) != NULL) { ++ idx = elf_ndxscn(scn); ++ sec_desc = &obj->efile.secs[idx]; ++ ++ sh = elf_sec_hdr(obj, scn); ++ if (!sh) ++ return -LIBBPF_ERRNO__FORMAT; ++ ++ name = elf_sec_str(obj, sh->sh_name); ++ if (!name) ++ return -LIBBPF_ERRNO__FORMAT; ++ ++ if (ignore_elf_section(sh, name)) ++ continue; ++ ++ data = elf_sec_data(obj, scn); ++ if (!data) ++ return -LIBBPF_ERRNO__FORMAT; ++ ++ pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", ++ idx, name, (unsigned long)data->d_size, ++ (int)sh->sh_link, (unsigned long)sh->sh_flags, ++ (int)sh->sh_type); ++ ++ if (strcmp(name, "license") == 0) { ++ err = bpf_object__init_license(obj, data->d_buf, data->d_size); ++ if (err) ++ return err; ++ } else if (strcmp(name, "version") == 0) { ++ err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); ++ if (err) ++ return err; ++ } else if (strcmp(name, "maps") == 0) { ++ obj->efile.maps_shndx = idx; ++ } else if (strcmp(name, MAPS_ELF_SEC) == 0) { ++ obj->efile.btf_maps_shndx = idx; ++ } else if (strcmp(name, BTF_ELF_SEC) == 0) { ++ if (sh->sh_type != SHT_PROGBITS) ++ return -LIBBPF_ERRNO__FORMAT; ++ btf_data = data; ++ } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { ++ if (sh->sh_type != SHT_PROGBITS) ++ return -LIBBPF_ERRNO__FORMAT; ++ btf_ext_data = data; ++ } else if (sh->sh_type == SHT_SYMTAB) { ++ /* already processed during the first pass above */ ++ } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { ++ if (sh->sh_flags & SHF_EXECINSTR) { ++ if (strcmp(name, ".text") == 0) ++ obj->efile.text_shndx = idx; ++ err = bpf_object__add_programs(obj, data, name, idx); ++ if (err) ++ return err; ++ } else if (strcmp(name, DATA_SEC) == 0 || ++ str_has_pfx(name, DATA_SEC ".")) { ++ sec_desc->sec_type = SEC_DATA; ++ sec_desc->shdr = sh; ++ sec_desc->data = data; ++ } else if (strcmp(name, RODATA_SEC) == 0 || ++ str_has_pfx(name, RODATA_SEC ".")) { ++ sec_desc->sec_type = SEC_RODATA; ++ sec_desc->shdr = sh; ++ sec_desc->data = data; ++ } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { ++ obj->efile.st_ops_data = data; ++ obj->efile.st_ops_shndx = idx; ++ } else { ++ pr_info("elf: skipping unrecognized data section(%d) %s\n", ++ idx, name); ++ } ++ } else if (sh->sh_type == SHT_REL) { ++ int targ_sec_idx = sh->sh_info; /* points to other section */ ++ ++ if (sh->sh_entsize != sizeof(Elf64_Rel) || ++ targ_sec_idx >= obj->efile.sec_cnt) ++ return -LIBBPF_ERRNO__FORMAT; ++ ++ /* Only do relo for section with exec instructions */ ++ if (!section_have_execinstr(obj, targ_sec_idx) && ++ strcmp(name, ".rel" STRUCT_OPS_SEC) && ++ strcmp(name, ".rel" MAPS_ELF_SEC)) { ++ pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", ++ idx, name, targ_sec_idx, ++ elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: ""); ++ continue; ++ } ++ ++ sec_desc->sec_type = SEC_RELO; ++ sec_desc->shdr = sh; ++ sec_desc->data = data; ++ } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { ++ sec_desc->sec_type = SEC_BSS; ++ sec_desc->shdr = sh; ++ sec_desc->data = data; ++ } else { ++ pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, ++ (size_t)sh->sh_size); ++ } ++ } ++ ++ if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { ++ pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ /* sort BPF programs by section name and in-section instruction offset ++ * for faster search */ ++ if (obj->nr_programs) ++ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); ++ ++ return bpf_object__init_btf(obj, btf_data, btf_ext_data); ++} ++ ++static bool sym_is_extern(const Elf64_Sym *sym) ++{ ++ int bind = ELF64_ST_BIND(sym->st_info); ++ /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ ++ return sym->st_shndx == SHN_UNDEF && ++ (bind == STB_GLOBAL || bind == STB_WEAK) && ++ ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; ++} ++ ++static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) ++{ ++ int bind = ELF64_ST_BIND(sym->st_info); ++ int type = ELF64_ST_TYPE(sym->st_info); ++ ++ /* in .text section */ ++ if (sym->st_shndx != text_shndx) ++ return false; ++ ++ /* local function */ ++ if (bind == STB_LOCAL && type == STT_SECTION) ++ return true; ++ ++ /* global function */ ++ return bind == STB_GLOBAL && type == STT_FUNC; ++} ++ ++static int find_extern_btf_id(const struct btf *btf, const char *ext_name) ++{ ++ const struct btf_type *t; ++ const char *tname; ++ int i, n; ++ ++ if (!btf) ++ return -ESRCH; ++ ++ n = btf__type_cnt(btf); ++ for (i = 1; i < n; i++) { ++ t = btf__type_by_id(btf, i); ++ ++ if (!btf_is_var(t) && !btf_is_func(t)) ++ continue; ++ ++ tname = btf__name_by_offset(btf, t->name_off); ++ if (strcmp(tname, ext_name)) ++ continue; ++ ++ if (btf_is_var(t) && ++ btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) ++ return -EINVAL; ++ ++ if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN) ++ return -EINVAL; ++ ++ return i; ++ } ++ ++ return -ENOENT; ++} ++ ++static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { ++ const struct btf_var_secinfo *vs; ++ const struct btf_type *t; ++ int i, j, n; ++ ++ if (!btf) ++ return -ESRCH; ++ ++ n = btf__type_cnt(btf); ++ for (i = 1; i < n; i++) { ++ t = btf__type_by_id(btf, i); ++ ++ if (!btf_is_datasec(t)) ++ continue; ++ ++ vs = btf_var_secinfos(t); ++ for (j = 0; j < btf_vlen(t); j++, vs++) { ++ if (vs->type == ext_btf_id) ++ return i; ++ } ++ } ++ ++ return -ENOENT; ++} ++ ++static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, ++ bool *is_signed) ++{ ++ const struct btf_type *t; ++ const char *name; ++ ++ t = skip_mods_and_typedefs(btf, id, NULL); ++ name = btf__name_by_offset(btf, t->name_off); ++ ++ if (is_signed) ++ *is_signed = false; ++ switch (btf_kind(t)) { ++ case BTF_KIND_INT: { ++ int enc = btf_int_encoding(t); ++ ++ if (enc & BTF_INT_BOOL) ++ return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; ++ if (is_signed) ++ *is_signed = enc & BTF_INT_SIGNED; ++ if (t->size == 1) ++ return KCFG_CHAR; ++ if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) ++ return KCFG_UNKNOWN; ++ return KCFG_INT; ++ } ++ case BTF_KIND_ENUM: ++ if (t->size != 4) ++ return KCFG_UNKNOWN; ++ if (strcmp(name, "libbpf_tristate")) ++ return KCFG_UNKNOWN; ++ return KCFG_TRISTATE; ++ case BTF_KIND_ENUM64: ++ if (strcmp(name, "libbpf_tristate")) ++ return KCFG_UNKNOWN; ++ return KCFG_TRISTATE; ++ case BTF_KIND_ARRAY: ++ if (btf_array(t)->nelems == 0) ++ return KCFG_UNKNOWN; ++ if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) ++ return KCFG_UNKNOWN; ++ return KCFG_CHAR_ARR; ++ default: ++ return KCFG_UNKNOWN; ++ } ++} ++ ++static int cmp_externs(const void *_a, const void *_b) ++{ ++ const struct extern_desc *a = _a; ++ const struct extern_desc *b = _b; ++ ++ if (a->type != b->type) ++ return a->type < b->type ? -1 : 1; ++ ++ if (a->type == EXT_KCFG) { ++ /* descending order by alignment requirements */ ++ if (a->kcfg.align != b->kcfg.align) ++ return a->kcfg.align > b->kcfg.align ? -1 : 1; ++ /* ascending order by size, within same alignment class */ ++ if (a->kcfg.sz != b->kcfg.sz) ++ return a->kcfg.sz < b->kcfg.sz ? -1 : 1; ++ } ++ ++ /* resolve ties by name */ ++ return strcmp(a->name, b->name); ++} ++ ++static int find_int_btf_id(const struct btf *btf) ++{ ++ const struct btf_type *t; ++ int i, n; ++ ++ n = btf__type_cnt(btf); ++ for (i = 1; i < n; i++) { ++ t = btf__type_by_id(btf, i); ++ ++ if (btf_is_int(t) && btf_int_bits(t) == 32) ++ return i; ++ } ++ ++ return 0; ++} ++ ++static int add_dummy_ksym_var(struct btf *btf) ++{ ++ int i, int_btf_id, sec_btf_id, dummy_var_btf_id; ++ const struct btf_var_secinfo *vs; ++ const struct btf_type *sec; ++ ++ if (!btf) ++ return 0; ++ ++ sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, ++ BTF_KIND_DATASEC); ++ if (sec_btf_id < 0) ++ return 0; ++ ++ sec = btf__type_by_id(btf, sec_btf_id); ++ vs = btf_var_secinfos(sec); ++ for (i = 0; i < btf_vlen(sec); i++, vs++) { ++ const struct btf_type *vt; ++ ++ vt = btf__type_by_id(btf, vs->type); ++ if (btf_is_func(vt)) ++ break; ++ } ++ ++ /* No func in ksyms sec. No need to add dummy var. */ ++ if (i == btf_vlen(sec)) ++ return 0; ++ ++ int_btf_id = find_int_btf_id(btf); ++ dummy_var_btf_id = btf__add_var(btf, ++ "dummy_ksym", ++ BTF_VAR_GLOBAL_ALLOCATED, ++ int_btf_id); ++ if (dummy_var_btf_id < 0) ++ pr_warn("cannot create a dummy_ksym var\n"); ++ ++ return dummy_var_btf_id; ++} ++ ++static int bpf_object__collect_externs(struct bpf_object *obj) ++{ ++ struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; ++ const struct btf_type *t; ++ struct extern_desc *ext; ++ int i, n, off, dummy_var_btf_id; ++ const char *ext_name, *sec_name; ++ Elf_Scn *scn; ++ Elf64_Shdr *sh; ++ ++ if (!obj->efile.symbols) ++ return 0; ++ ++ scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); ++ sh = elf_sec_hdr(obj, scn); ++ if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) ++ return -LIBBPF_ERRNO__FORMAT; ++ ++ dummy_var_btf_id = add_dummy_ksym_var(obj->btf); ++ if (dummy_var_btf_id < 0) ++ return dummy_var_btf_id; ++ ++ n = sh->sh_size / sh->sh_entsize; ++ pr_debug("looking for externs among %d symbols...\n", n); ++ ++ for (i = 0; i < n; i++) { ++ Elf64_Sym *sym = elf_sym_by_idx(obj, i); ++ ++ if (!sym) ++ return -LIBBPF_ERRNO__FORMAT; ++ if (!sym_is_extern(sym)) ++ continue; ++ ext_name = elf_sym_str(obj, sym->st_name); ++ if (!ext_name || !ext_name[0]) ++ continue; ++ ++ ext = obj->externs; ++ ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); ++ if (!ext) ++ return -ENOMEM; ++ obj->externs = ext; ++ ext = &ext[obj->nr_extern]; ++ memset(ext, 0, sizeof(*ext)); ++ obj->nr_extern++; ++ ++ ext->btf_id = find_extern_btf_id(obj->btf, ext_name); ++ if (ext->btf_id <= 0) { ++ pr_warn("failed to find BTF for extern '%s': %d\n", ++ ext_name, ext->btf_id); ++ return ext->btf_id; ++ } ++ t = btf__type_by_id(obj->btf, ext->btf_id); ++ ext->name = btf__name_by_offset(obj->btf, t->name_off); ++ ext->sym_idx = i; ++ ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; ++ ++ ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); ++ if (ext->sec_btf_id <= 0) { ++ pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", ++ ext_name, ext->btf_id, ext->sec_btf_id); ++ return ext->sec_btf_id; ++ } ++ sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); ++ sec_name = btf__name_by_offset(obj->btf, sec->name_off); ++ ++ if (strcmp(sec_name, KCONFIG_SEC) == 0) { ++ if (btf_is_func(t)) { ++ pr_warn("extern function %s is unsupported under %s section\n", ++ ext->name, KCONFIG_SEC); ++ return -ENOTSUP; ++ } ++ kcfg_sec = sec; ++ ext->type = EXT_KCFG; ++ ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); ++ if (ext->kcfg.sz <= 0) { ++ pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", ++ ext_name, ext->kcfg.sz); ++ return ext->kcfg.sz; ++ } ++ ext->kcfg.align = btf__align_of(obj->btf, t->type); ++ if (ext->kcfg.align <= 0) { ++ pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", ++ ext_name, ext->kcfg.align); ++ return -EINVAL; ++ } ++ ext->kcfg.type = find_kcfg_type(obj->btf, t->type, ++ &ext->kcfg.is_signed); ++ if (ext->kcfg.type == KCFG_UNKNOWN) { ++ pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); ++ return -ENOTSUP; ++ } ++ } else if (strcmp(sec_name, KSYMS_SEC) == 0) { ++ ksym_sec = sec; ++ ext->type = EXT_KSYM; ++ skip_mods_and_typedefs(obj->btf, t->type, ++ &ext->ksym.type_id); ++ } else { ++ pr_warn("unrecognized extern section '%s'\n", sec_name); ++ return -ENOTSUP; ++ } ++ } ++ pr_debug("collected %d externs total\n", obj->nr_extern); ++ ++ if (!obj->nr_extern) ++ return 0; ++ ++ /* sort externs by type, for kcfg ones also by (align, size, name) */ ++ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); ++ ++ /* for .ksyms section, we need to turn all externs into allocated ++ * variables in BTF to pass kernel verification; we do this by ++ * pretending that each extern is a 8-byte variable ++ */ ++ if (ksym_sec) { ++ /* find existing 4-byte integer type in BTF to use for fake ++ * extern variables in DATASEC ++ */ ++ int int_btf_id = find_int_btf_id(obj->btf); ++ /* For extern function, a dummy_var added earlier ++ * will be used to replace the vs->type and ++ * its name string will be used to refill ++ * the missing param's name. ++ */ ++ const struct btf_type *dummy_var; ++ ++ dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id); ++ for (i = 0; i < obj->nr_extern; i++) { ++ ext = &obj->externs[i]; ++ if (ext->type != EXT_KSYM) ++ continue; ++ pr_debug("extern (ksym) #%d: symbol %d, name %s\n", ++ i, ext->sym_idx, ext->name); ++ } ++ ++ sec = ksym_sec; ++ n = btf_vlen(sec); ++ for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { ++ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; ++ struct btf_type *vt; ++ ++ vt = (void *)btf__type_by_id(obj->btf, vs->type); ++ ext_name = btf__name_by_offset(obj->btf, vt->name_off); ++ ext = find_extern_by_name(obj, ext_name); ++ if (!ext) { ++ pr_warn("failed to find extern definition for BTF %s '%s'\n", ++ btf_kind_str(vt), ext_name); ++ return -ESRCH; ++ } ++ if (btf_is_func(vt)) { ++ const struct btf_type *func_proto; ++ struct btf_param *param; ++ int j; ++ ++ func_proto = btf__type_by_id(obj->btf, ++ vt->type); ++ param = btf_params(func_proto); ++ /* Reuse the dummy_var string if the ++ * func proto does not have param name. ++ */ ++ for (j = 0; j < btf_vlen(func_proto); j++) ++ if (param[j].type && !param[j].name_off) ++ param[j].name_off = ++ dummy_var->name_off; ++ vs->type = dummy_var_btf_id; ++ vt->info &= ~0xffff; ++ vt->info |= BTF_FUNC_GLOBAL; ++ } else { ++ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; ++ vt->type = int_btf_id; ++ } ++ vs->offset = off; ++ vs->size = sizeof(int); ++ } ++ sec->size = off; ++ } ++ ++ if (kcfg_sec) { ++ sec = kcfg_sec; ++ /* for kcfg externs calculate their offsets within a .kconfig map */ ++ off = 0; ++ for (i = 0; i < obj->nr_extern; i++) { ++ ext = &obj->externs[i]; ++ if (ext->type != EXT_KCFG) ++ continue; ++ ++ ext->kcfg.data_off = roundup(off, ext->kcfg.align); ++ off = ext->kcfg.data_off + ext->kcfg.sz; ++ pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", ++ i, ext->sym_idx, ext->kcfg.data_off, ext->name); ++ } ++ sec->size = off; ++ n = btf_vlen(sec); ++ for (i = 0; i < n; i++) { ++ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; ++ ++ t = btf__type_by_id(obj->btf, vs->type); ++ ext_name = btf__name_by_offset(obj->btf, t->name_off); ++ ext = find_extern_by_name(obj, ext_name); ++ if (!ext) { ++ pr_warn("failed to find extern definition for BTF var '%s'\n", ++ ext_name); ++ return -ESRCH; ++ } ++ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; ++ vs->offset = ext->kcfg.data_off; ++ } ++ } ++ return 0; ++} ++ ++static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) ++{ ++ return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; ++} ++ ++struct bpf_program * ++bpf_object__find_program_by_name(const struct bpf_object *obj, ++ const char *name) ++{ ++ struct bpf_program *prog; ++ ++ bpf_object__for_each_program(prog, obj) { ++ if (prog_is_subprog(obj, prog)) ++ continue; ++ if (!strcmp(prog->name, name)) ++ return prog; ++ } ++ return errno = ENOENT, NULL; ++} ++ ++static bool bpf_object__shndx_is_data(const struct bpf_object *obj, ++ int shndx) ++{ ++ switch (obj->efile.secs[shndx].sec_type) { ++ case SEC_BSS: ++ case SEC_DATA: ++ case SEC_RODATA: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, ++ int shndx) ++{ ++ return shndx == obj->efile.maps_shndx || ++ shndx == obj->efile.btf_maps_shndx; ++} ++ ++static enum libbpf_map_type ++bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) ++{ ++ if (shndx == obj->efile.symbols_shndx) ++ return LIBBPF_MAP_KCONFIG; ++ ++ switch (obj->efile.secs[shndx].sec_type) { ++ case SEC_BSS: ++ return LIBBPF_MAP_BSS; ++ case SEC_DATA: ++ return LIBBPF_MAP_DATA; ++ case SEC_RODATA: ++ return LIBBPF_MAP_RODATA; ++ default: ++ return LIBBPF_MAP_UNSPEC; ++ } ++} ++ ++static int bpf_program__record_reloc(struct bpf_program *prog, ++ struct reloc_desc *reloc_desc, ++ __u32 insn_idx, const char *sym_name, ++ const Elf64_Sym *sym, const Elf64_Rel *rel) ++{ ++ struct bpf_insn *insn = &prog->insns[insn_idx]; ++ size_t map_idx, nr_maps = prog->obj->nr_maps; ++ struct bpf_object *obj = prog->obj; ++ __u32 shdr_idx = sym->st_shndx; ++ enum libbpf_map_type type; ++ const char *sym_sec_name; ++ struct bpf_map *map; ++ ++ if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { ++ pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", ++ prog->name, sym_name, insn_idx, insn->code); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ ++ if (sym_is_extern(sym)) { ++ int sym_idx = ELF64_R_SYM(rel->r_info); ++ int i, n = obj->nr_extern; ++ struct extern_desc *ext; ++ ++ for (i = 0; i < n; i++) { ++ ext = &obj->externs[i]; ++ if (ext->sym_idx == sym_idx) ++ break; ++ } ++ if (i >= n) { ++ pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", ++ prog->name, sym_name, sym_idx); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", ++ prog->name, i, ext->name, ext->sym_idx, insn_idx); ++ if (insn->code == (BPF_JMP | BPF_CALL)) ++ reloc_desc->type = RELO_EXTERN_FUNC; ++ else ++ reloc_desc->type = RELO_EXTERN_VAR; ++ reloc_desc->insn_idx = insn_idx; ++ reloc_desc->sym_off = i; /* sym_off stores extern index */ ++ return 0; ++ } ++ ++ /* sub-program call relocation */ ++ if (is_call_insn(insn)) { ++ if (insn->src_reg != BPF_PSEUDO_CALL) { ++ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ /* text_shndx can be 0, if no default "main" program exists */ ++ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { ++ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); ++ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", ++ prog->name, sym_name, sym_sec_name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ if (sym->st_value % BPF_INSN_SZ) { ++ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", ++ prog->name, sym_name, (size_t)sym->st_value); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ reloc_desc->type = RELO_CALL; ++ reloc_desc->insn_idx = insn_idx; ++ reloc_desc->sym_off = sym->st_value; ++ return 0; ++ } ++ ++ if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { ++ pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", ++ prog->name, sym_name, shdr_idx); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ ++ /* loading subprog addresses */ ++ if (sym_is_subprog(sym, obj->efile.text_shndx)) { ++ /* global_func: sym->st_value = offset in the section, insn->imm = 0. ++ * local_func: sym->st_value = 0, insn->imm = offset in the section. ++ */ ++ if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) { ++ pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n", ++ prog->name, sym_name, (size_t)sym->st_value, insn->imm); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ ++ reloc_desc->type = RELO_SUBPROG_ADDR; ++ reloc_desc->insn_idx = insn_idx; ++ reloc_desc->sym_off = sym->st_value; ++ return 0; ++ } ++ ++ type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); ++ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); ++ ++ /* generic map reference relocation */ ++ if (type == LIBBPF_MAP_UNSPEC) { ++ if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { ++ pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", ++ prog->name, sym_name, sym_sec_name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ for (map_idx = 0; map_idx < nr_maps; map_idx++) { ++ map = &obj->maps[map_idx]; ++ if (map->libbpf_type != type || ++ map->sec_idx != sym->st_shndx || ++ map->sec_offset != sym->st_value) ++ continue; ++ pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", ++ prog->name, map_idx, map->name, map->sec_idx, ++ map->sec_offset, insn_idx); ++ break; ++ } ++ if (map_idx >= nr_maps) { ++ pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", ++ prog->name, sym_sec_name, (size_t)sym->st_value); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ reloc_desc->type = RELO_LD64; ++ reloc_desc->insn_idx = insn_idx; ++ reloc_desc->map_idx = map_idx; ++ reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ ++ return 0; ++ } ++ ++ /* global data map relocation */ ++ if (!bpf_object__shndx_is_data(obj, shdr_idx)) { ++ pr_warn("prog '%s': bad data relo against section '%s'\n", ++ prog->name, sym_sec_name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ for (map_idx = 0; map_idx < nr_maps; map_idx++) { ++ map = &obj->maps[map_idx]; ++ if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) ++ continue; ++ pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", ++ prog->name, map_idx, map->name, map->sec_idx, ++ map->sec_offset, insn_idx); ++ break; ++ } ++ if (map_idx >= nr_maps) { ++ pr_warn("prog '%s': data relo failed to find map for section '%s'\n", ++ prog->name, sym_sec_name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ ++ reloc_desc->type = RELO_DATA; ++ reloc_desc->insn_idx = insn_idx; ++ reloc_desc->map_idx = map_idx; ++ reloc_desc->sym_off = sym->st_value; ++ return 0; ++} ++ ++static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) ++{ ++ return insn_idx >= prog->sec_insn_off && ++ insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; ++} ++ ++static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, ++ size_t sec_idx, size_t insn_idx) ++{ ++ int l = 0, r = obj->nr_programs - 1, m; ++ struct bpf_program *prog; ++ ++ while (l < r) { ++ m = l + (r - l + 1) / 2; ++ prog = &obj->programs[m]; ++ ++ if (prog->sec_idx < sec_idx || ++ (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) ++ l = m; ++ else ++ r = m - 1; ++ } ++ /* matching program could be at index l, but it still might be the ++ * wrong one, so we need to double check conditions for the last time ++ */ ++ prog = &obj->programs[l]; ++ if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) ++ return prog; ++ return NULL; ++} ++ ++static int ++bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) ++{ ++ const char *relo_sec_name, *sec_name; ++ size_t sec_idx = shdr->sh_info, sym_idx; ++ struct bpf_program *prog; ++ struct reloc_desc *relos; ++ int err, i, nrels; ++ const char *sym_name; ++ __u32 insn_idx; ++ Elf_Scn *scn; ++ Elf_Data *scn_data; ++ Elf64_Sym *sym; ++ Elf64_Rel *rel; ++ ++ if (sec_idx >= obj->efile.sec_cnt) ++ return -EINVAL; ++ ++ scn = elf_sec_by_idx(obj, sec_idx); ++ scn_data = elf_sec_data(obj, scn); ++ ++ relo_sec_name = elf_sec_str(obj, shdr->sh_name); ++ sec_name = elf_sec_name(obj, scn); ++ if (!relo_sec_name || !sec_name) ++ return -EINVAL; ++ ++ pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", ++ relo_sec_name, sec_idx, sec_name); ++ nrels = shdr->sh_size / shdr->sh_entsize; ++ ++ for (i = 0; i < nrels; i++) { ++ rel = elf_rel_by_idx(data, i); ++ if (!rel) { ++ pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ sym_idx = ELF64_R_SYM(rel->r_info); ++ sym = elf_sym_by_idx(obj, sym_idx); ++ if (!sym) { ++ pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", ++ relo_sec_name, sym_idx, i); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ if (sym->st_shndx >= obj->efile.sec_cnt) { ++ pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", ++ relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { ++ pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", ++ relo_sec_name, (size_t)rel->r_offset, i); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ insn_idx = rel->r_offset / BPF_INSN_SZ; ++ /* relocations against static functions are recorded as ++ * relocations against the section that contains a function; ++ * in such case, symbol will be STT_SECTION and sym.st_name ++ * will point to empty string (0), so fetch section name ++ * instead ++ */ ++ if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) ++ sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); ++ else ++ sym_name = elf_sym_str(obj, sym->st_name); ++ sym_name = sym_name ?: "reloc_desc, ++ prog->nr_reloc + 1, sizeof(*relos)); ++ if (!relos) ++ return -ENOMEM; ++ prog->reloc_desc = relos; ++ ++ /* adjust insn_idx to local BPF program frame of reference */ ++ insn_idx -= prog->sec_insn_off; ++ err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], ++ insn_idx, sym_name, sym, rel); ++ if (err) ++ return err; ++ ++ prog->nr_reloc++; ++ } ++ return 0; ++} ++ ++static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) ++{ ++ int id; ++ ++ if (!obj->btf) ++ return -ENOENT; ++ ++ /* if it's BTF-defined map, we don't need to search for type IDs. ++ * For struct_ops map, it does not need btf_key_type_id and ++ * btf_value_type_id. ++ */ ++ if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) ++ return 0; ++ ++ /* ++ * LLVM annotates global data differently in BTF, that is, ++ * only as '.data', '.bss' or '.rodata'. ++ */ ++ if (!bpf_map__is_internal(map)) ++ return -ENOENT; ++ ++ id = btf__find_by_name(obj->btf, map->real_name); ++ if (id < 0) ++ return id; ++ ++ map->btf_key_type_id = 0; ++ map->btf_value_type_id = id; ++ return 0; ++} ++ ++static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) ++{ ++ char file[PATH_MAX], buff[4096]; ++ FILE *fp; ++ __u32 val; ++ int err; ++ ++ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); ++ memset(info, 0, sizeof(*info)); ++ ++ fp = fopen(file, "r"); ++ if (!fp) { ++ err = -errno; ++ pr_warn("failed to open %s: %d. No procfs support?\n", file, ++ err); ++ return err; ++ } ++ ++ while (fgets(buff, sizeof(buff), fp)) { ++ if (sscanf(buff, "map_type:\t%u", &val) == 1) ++ info->type = val; ++ else if (sscanf(buff, "key_size:\t%u", &val) == 1) ++ info->key_size = val; ++ else if (sscanf(buff, "value_size:\t%u", &val) == 1) ++ info->value_size = val; ++ else if (sscanf(buff, "max_entries:\t%u", &val) == 1) ++ info->max_entries = val; ++ else if (sscanf(buff, "map_flags:\t%i", &val) == 1) ++ info->map_flags = val; ++ } ++ ++ fclose(fp); ++ ++ return 0; ++} ++ ++bool bpf_map__autocreate(const struct bpf_map *map) ++{ ++ return map->autocreate; ++} ++ ++int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) ++{ ++ if (map->obj->loaded) ++ return libbpf_err(-EBUSY); ++ ++ map->autocreate = autocreate; ++ return 0; ++} ++ ++int bpf_map__reuse_fd(struct bpf_map *map, int fd) ++{ ++ struct bpf_map_info info = {}; ++ __u32 len = sizeof(info), name_len; ++ int new_fd, err; ++ char *new_name; ++ ++ err = bpf_obj_get_info_by_fd(fd, &info, &len); ++ if (err && errno == EINVAL) ++ err = bpf_get_map_info_from_fdinfo(fd, &info); ++ if (err) ++ return libbpf_err(err); ++ ++ name_len = strlen(info.name); ++ if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) ++ new_name = strdup(map->name); ++ else ++ new_name = strdup(info.name); ++ ++ if (!new_name) ++ return libbpf_err(-errno); ++ ++ new_fd = open("/", O_RDONLY | O_CLOEXEC); ++ if (new_fd < 0) { ++ err = -errno; ++ goto err_free_new_name; ++ } ++ ++ new_fd = dup3(fd, new_fd, O_CLOEXEC); ++ if (new_fd < 0) { ++ err = -errno; ++ goto err_close_new_fd; ++ } ++ ++ err = zclose(map->fd); ++ if (err) { ++ err = -errno; ++ goto err_close_new_fd; ++ } ++ free(map->name); ++ ++ map->fd = new_fd; ++ map->name = new_name; ++ map->def.type = info.type; ++ map->def.key_size = info.key_size; ++ map->def.value_size = info.value_size; ++ map->def.max_entries = info.max_entries; ++ map->def.map_flags = info.map_flags; ++ map->btf_key_type_id = info.btf_key_type_id; ++ map->btf_value_type_id = info.btf_value_type_id; ++ map->reused = true; ++ map->map_extra = info.map_extra; ++ ++ return 0; ++ ++err_close_new_fd: ++ close(new_fd); ++err_free_new_name: ++ free(new_name); ++ return libbpf_err(err); ++} ++ ++__u32 bpf_map__max_entries(const struct bpf_map *map) ++{ ++ return map->def.max_entries; ++} ++ ++struct bpf_map *bpf_map__inner_map(struct bpf_map *map) ++{ ++ if (!bpf_map_type__is_map_in_map(map->def.type)) ++ return errno = EINVAL, NULL; ++ ++ return map->inner_map; ++} ++ ++int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) ++{ ++ if (map->obj->loaded) ++ return libbpf_err(-EBUSY); ++ ++ map->def.max_entries = max_entries; ++ ++ /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ ++ if (map->def.type == BPF_MAP_TYPE_RINGBUF) ++ map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); ++ ++ return 0; ++} ++ ++static int ++bpf_object__probe_loading(struct bpf_object *obj) ++{ ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ struct bpf_insn insns[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }; ++ int ret, insn_cnt = ARRAY_SIZE(insns); ++ ++ if (obj->gen_loader) ++ return 0; ++ ++ ret = bump_rlimit_memlock(); ++ if (ret) ++ pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); ++ ++ /* make sure basic loading works */ ++ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); ++ if (ret < 0) ++ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); ++ if (ret < 0) { ++ ret = errno; ++ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); ++ pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " ++ "program. Make sure your kernel supports BPF " ++ "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " ++ "set to big enough value.\n", __func__, cp, ret); ++ return -ret; ++ } ++ close(ret); ++ ++ return 0; ++} ++ ++static int probe_fd(int fd) ++{ ++ if (fd >= 0) ++ close(fd); ++ return fd >= 0; ++} ++ ++static int probe_kern_prog_name(void) ++{ ++ struct bpf_insn insns[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }; ++ int ret, insn_cnt = ARRAY_SIZE(insns); ++ ++ /* make sure loading with name works */ ++ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); ++ return probe_fd(ret); ++} ++ ++static int probe_kern_global_data(void) ++{ ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ struct bpf_insn insns[] = { ++ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), ++ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }; ++ int ret, map, insn_cnt = ARRAY_SIZE(insns); ++ ++ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); ++ if (map < 0) { ++ ret = -errno; ++ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); ++ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", ++ __func__, cp, -ret); ++ return ret; ++ } ++ ++ insns[0].imm = map; ++ ++ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); ++ close(map); ++ return probe_fd(ret); ++} ++ ++static int probe_kern_btf(void) ++{ ++ static const char strs[] = "\0int"; ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ ++static int probe_kern_btf_func(void) ++{ ++ static const char strs[] = "\0int\0x\0a"; ++ /* void x(int a) {} */ ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ /* FUNC_PROTO */ /* [2] */ ++ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), ++ BTF_PARAM_ENC(7, 1), ++ /* FUNC x */ /* [3] */ ++ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ ++static int probe_kern_btf_func_global(void) ++{ ++ static const char strs[] = "\0int\0x\0a"; ++ /* static void x(int a) {} */ ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ /* FUNC_PROTO */ /* [2] */ ++ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), ++ BTF_PARAM_ENC(7, 1), ++ /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ ++ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ ++static int probe_kern_btf_datasec(void) ++{ ++ static const char strs[] = "\0x\0.data"; ++ /* static int a; */ ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ /* VAR x */ /* [2] */ ++ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), ++ BTF_VAR_STATIC, ++ /* DATASEC val */ /* [3] */ ++ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), ++ BTF_VAR_SECINFO_ENC(2, 0, 4), ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ ++static int probe_kern_btf_float(void) ++{ ++ static const char strs[] = "\0float"; ++ __u32 types[] = { ++ /* float */ ++ BTF_TYPE_FLOAT_ENC(1, 4), ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ ++static int probe_kern_btf_decl_tag(void) ++{ ++ static const char strs[] = "\0tag"; ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ /* VAR x */ /* [2] */ ++ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), ++ BTF_VAR_STATIC, ++ /* attr */ ++ BTF_TYPE_DECL_TAG_ENC(1, 2, -1), ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ ++static int probe_kern_btf_type_tag(void) ++{ ++ static const char strs[] = "\0tag"; ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ /* attr */ ++ BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ ++ /* ptr */ ++ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ ++static int probe_kern_array_mmap(void) ++{ ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); ++ int fd; ++ ++ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); ++ return probe_fd(fd); ++} ++ ++static int probe_kern_exp_attach_type(void) ++{ ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); ++ struct bpf_insn insns[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }; ++ int fd, insn_cnt = ARRAY_SIZE(insns); ++ ++ /* use any valid combination of program type and (optional) ++ * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) ++ * to see if kernel supports expected_attach_type field for ++ * BPF_PROG_LOAD command ++ */ ++ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); ++ return probe_fd(fd); ++} ++ ++static int probe_kern_probe_read_kernel(void) ++{ ++ struct bpf_insn insns[] = { ++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ ++ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ ++ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), ++ BPF_EXIT_INSN(), ++ }; ++ int fd, insn_cnt = ARRAY_SIZE(insns); ++ ++ fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); ++ return probe_fd(fd); ++} ++ ++static int probe_prog_bind_map(void) ++{ ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ struct bpf_insn insns[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }; ++ int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); ++ ++ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); ++ if (map < 0) { ++ ret = -errno; ++ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); ++ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", ++ __func__, cp, -ret); ++ return ret; ++ } ++ ++ prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); ++ if (prog < 0) { ++ close(map); ++ return 0; ++ } ++ ++ ret = bpf_prog_bind_map(prog, map, NULL); ++ ++ close(map); ++ close(prog); ++ ++ return ret >= 0; ++} ++ ++static int probe_module_btf(void) ++{ ++ static const char strs[] = "\0int"; ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), ++ }; ++ struct bpf_btf_info info; ++ __u32 len = sizeof(info); ++ char name[16]; ++ int fd, err; ++ ++ fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); ++ if (fd < 0) ++ return 0; /* BTF not supported at all */ ++ ++ memset(&info, 0, sizeof(info)); ++ info.name = ptr_to_u64(name); ++ info.name_len = sizeof(name); ++ ++ /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; ++ * kernel's module BTF support coincides with support for ++ * name/name_len fields in struct bpf_btf_info. ++ */ ++ err = bpf_obj_get_info_by_fd(fd, &info, &len); ++ close(fd); ++ return !err; ++} ++ ++static int probe_perf_link(void) ++{ ++ struct bpf_insn insns[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }; ++ int prog_fd, link_fd, err; ++ ++ prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", ++ insns, ARRAY_SIZE(insns), NULL); ++ if (prog_fd < 0) ++ return -errno; ++ ++ /* use invalid perf_event FD to get EBADF, if link is supported; ++ * otherwise EINVAL should be returned ++ */ ++ link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); ++ err = -errno; /* close() can clobber errno */ ++ ++ if (link_fd >= 0) ++ close(link_fd); ++ close(prog_fd); ++ ++ return link_fd < 0 && err == -EBADF; ++} ++ ++static int probe_kern_bpf_cookie(void) ++{ ++ struct bpf_insn insns[] = { ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), ++ BPF_EXIT_INSN(), ++ }; ++ int ret, insn_cnt = ARRAY_SIZE(insns); ++ ++ ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); ++ return probe_fd(ret); ++} ++ ++static int probe_kern_btf_enum64(void) ++{ ++ static const char strs[] = "\0enum64"; ++ __u32 types[] = { ++ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ ++static int probe_kern_syscall_wrapper(void); ++ ++enum kern_feature_result { ++ FEAT_UNKNOWN = 0, ++ FEAT_SUPPORTED = 1, ++ FEAT_MISSING = 2, ++}; ++ ++typedef int (*feature_probe_fn)(void); ++ ++static struct kern_feature_desc { ++ const char *desc; ++ feature_probe_fn probe; ++ enum kern_feature_result res; ++} feature_probes[__FEAT_CNT] = { ++ [FEAT_PROG_NAME] = { ++ "BPF program name", probe_kern_prog_name, ++ }, ++ [FEAT_GLOBAL_DATA] = { ++ "global variables", probe_kern_global_data, ++ }, ++ [FEAT_BTF] = { ++ "minimal BTF", probe_kern_btf, ++ }, ++ [FEAT_BTF_FUNC] = { ++ "BTF functions", probe_kern_btf_func, ++ }, ++ [FEAT_BTF_GLOBAL_FUNC] = { ++ "BTF global function", probe_kern_btf_func_global, ++ }, ++ [FEAT_BTF_DATASEC] = { ++ "BTF data section and variable", probe_kern_btf_datasec, ++ }, ++ [FEAT_ARRAY_MMAP] = { ++ "ARRAY map mmap()", probe_kern_array_mmap, ++ }, ++ [FEAT_EXP_ATTACH_TYPE] = { ++ "BPF_PROG_LOAD expected_attach_type attribute", ++ probe_kern_exp_attach_type, ++ }, ++ [FEAT_PROBE_READ_KERN] = { ++ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, ++ }, ++ [FEAT_PROG_BIND_MAP] = { ++ "BPF_PROG_BIND_MAP support", probe_prog_bind_map, ++ }, ++ [FEAT_MODULE_BTF] = { ++ "module BTF support", probe_module_btf, ++ }, ++ [FEAT_BTF_FLOAT] = { ++ "BTF_KIND_FLOAT support", probe_kern_btf_float, ++ }, ++ [FEAT_PERF_LINK] = { ++ "BPF perf link support", probe_perf_link, ++ }, ++ [FEAT_BTF_DECL_TAG] = { ++ "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, ++ }, ++ [FEAT_BTF_TYPE_TAG] = { ++ "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, ++ }, ++ [FEAT_MEMCG_ACCOUNT] = { ++ "memcg-based memory accounting", probe_memcg_account, ++ }, ++ [FEAT_BPF_COOKIE] = { ++ "BPF cookie support", probe_kern_bpf_cookie, ++ }, ++ [FEAT_BTF_ENUM64] = { ++ "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, ++ }, ++ [FEAT_SYSCALL_WRAPPER] = { ++ "Kernel using syscall wrapper", probe_kern_syscall_wrapper, ++ }, ++}; ++ ++bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) ++{ ++ struct kern_feature_desc *feat = &feature_probes[feat_id]; ++ int ret; ++ ++ if (obj && obj->gen_loader) ++ /* To generate loader program assume the latest kernel ++ * to avoid doing extra prog_load, map_create syscalls. ++ */ ++ return true; ++ ++ if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { ++ ret = feat->probe(); ++ if (ret > 0) { ++ WRITE_ONCE(feat->res, FEAT_SUPPORTED); ++ } else if (ret == 0) { ++ WRITE_ONCE(feat->res, FEAT_MISSING); ++ } else { ++ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); ++ WRITE_ONCE(feat->res, FEAT_MISSING); ++ } ++ } ++ ++ return READ_ONCE(feat->res) == FEAT_SUPPORTED; ++} ++ ++static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) ++{ ++ struct bpf_map_info map_info = {}; ++ char msg[STRERR_BUFSIZE]; ++ __u32 map_info_len; ++ int err; ++ ++ map_info_len = sizeof(map_info); ++ ++ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); ++ if (err && errno == EINVAL) ++ err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); ++ if (err) { ++ pr_warn("failed to get map info for map FD %d: %s\n", map_fd, ++ libbpf_strerror_r(errno, msg, sizeof(msg))); ++ return false; ++ } ++ ++ return (map_info.type == map->def.type && ++ map_info.key_size == map->def.key_size && ++ map_info.value_size == map->def.value_size && ++ map_info.max_entries == map->def.max_entries && ++ map_info.map_flags == map->def.map_flags && ++ map_info.map_extra == map->map_extra); ++} ++ ++static int ++bpf_object__reuse_map(struct bpf_map *map) ++{ ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ int err, pin_fd; ++ ++ pin_fd = bpf_obj_get(map->pin_path); ++ if (pin_fd < 0) { ++ err = -errno; ++ if (err == -ENOENT) { ++ pr_debug("found no pinned map to reuse at '%s'\n", ++ map->pin_path); ++ return 0; ++ } ++ ++ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); ++ pr_warn("couldn't retrieve pinned map '%s': %s\n", ++ map->pin_path, cp); ++ return err; ++ } ++ ++ if (!map_is_reuse_compat(map, pin_fd)) { ++ pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", ++ map->pin_path); ++ close(pin_fd); ++ return -EINVAL; ++ } ++ ++ err = bpf_map__reuse_fd(map, pin_fd); ++ close(pin_fd); ++ if (err) { ++ return err; ++ } ++ map->pinned = true; ++ pr_debug("reused pinned map at '%s'\n", map->pin_path); ++ ++ return 0; ++} ++ ++static int ++bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) ++{ ++ enum libbpf_map_type map_type = map->libbpf_type; ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ int err, zero = 0; ++ ++ if (obj->gen_loader) { ++ bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, ++ map->mmaped, map->def.value_size); ++ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) ++ bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); ++ return 0; ++ } ++ err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); ++ if (err) { ++ err = -errno; ++ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); ++ pr_warn("Error setting initial map(%s) contents: %s\n", ++ map->name, cp); ++ return err; ++ } ++ ++ /* Freeze .rodata and .kconfig map as read-only from syscall side. */ ++ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { ++ err = bpf_map_freeze(map->fd); ++ if (err) { ++ err = -errno; ++ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); ++ pr_warn("Error freezing map(%s) as read-only: %s\n", ++ map->name, cp); ++ return err; ++ } ++ } ++ return 0; ++} ++ ++static void bpf_map__destroy(struct bpf_map *map); ++ ++static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) ++{ ++ LIBBPF_OPTS(bpf_map_create_opts, create_attr); ++ struct bpf_map_def *def = &map->def; ++ const char *map_name = NULL; ++ int err = 0; ++ ++ if (kernel_supports(obj, FEAT_PROG_NAME)) ++ map_name = map->name; ++ create_attr.map_ifindex = map->map_ifindex; ++ create_attr.map_flags = def->map_flags; ++ create_attr.numa_node = map->numa_node; ++ create_attr.map_extra = map->map_extra; ++ ++ if (bpf_map__is_struct_ops(map)) ++ create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; ++ ++ if (obj->btf && btf__fd(obj->btf) >= 0) { ++ create_attr.btf_fd = btf__fd(obj->btf); ++ create_attr.btf_key_type_id = map->btf_key_type_id; ++ create_attr.btf_value_type_id = map->btf_value_type_id; ++ } ++ ++ if (bpf_map_type__is_map_in_map(def->type)) { ++ if (map->inner_map) { ++ err = bpf_object__create_map(obj, map->inner_map, true); ++ if (err) { ++ pr_warn("map '%s': failed to create inner map: %d\n", ++ map->name, err); ++ return err; ++ } ++ map->inner_map_fd = bpf_map__fd(map->inner_map); ++ } ++ if (map->inner_map_fd >= 0) ++ create_attr.inner_map_fd = map->inner_map_fd; ++ } ++ ++ switch (def->type) { ++ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: ++ case BPF_MAP_TYPE_CGROUP_ARRAY: ++ case BPF_MAP_TYPE_STACK_TRACE: ++ case BPF_MAP_TYPE_ARRAY_OF_MAPS: ++ case BPF_MAP_TYPE_HASH_OF_MAPS: ++ case BPF_MAP_TYPE_DEVMAP: ++ case BPF_MAP_TYPE_DEVMAP_HASH: ++ case BPF_MAP_TYPE_CPUMAP: ++ case BPF_MAP_TYPE_XSKMAP: ++ case BPF_MAP_TYPE_SOCKMAP: ++ case BPF_MAP_TYPE_SOCKHASH: ++ case BPF_MAP_TYPE_QUEUE: ++ case BPF_MAP_TYPE_STACK: ++ create_attr.btf_fd = 0; ++ create_attr.btf_key_type_id = 0; ++ create_attr.btf_value_type_id = 0; ++ map->btf_key_type_id = 0; ++ map->btf_value_type_id = 0; ++ default: ++ break; ++ } ++ ++ if (obj->gen_loader) { ++ bpf_gen__map_create(obj->gen_loader, def->type, map_name, ++ def->key_size, def->value_size, def->max_entries, ++ &create_attr, is_inner ? -1 : map - obj->maps); ++ /* Pretend to have valid FD to pass various fd >= 0 checks. ++ * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. ++ */ ++ map->fd = 0; ++ } else { ++ map->fd = bpf_map_create(def->type, map_name, ++ def->key_size, def->value_size, ++ def->max_entries, &create_attr); ++ } ++ if (map->fd < 0 && (create_attr.btf_key_type_id || ++ create_attr.btf_value_type_id)) { ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ ++ err = -errno; ++ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); ++ pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", ++ map->name, cp, err); ++ create_attr.btf_fd = 0; ++ create_attr.btf_key_type_id = 0; ++ create_attr.btf_value_type_id = 0; ++ map->btf_key_type_id = 0; ++ map->btf_value_type_id = 0; ++ map->fd = bpf_map_create(def->type, map_name, ++ def->key_size, def->value_size, ++ def->max_entries, &create_attr); ++ } ++ ++ err = map->fd < 0 ? -errno : 0; ++ ++ if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { ++ if (obj->gen_loader) ++ map->inner_map->fd = -1; ++ bpf_map__destroy(map->inner_map); ++ zfree(&map->inner_map); ++ } ++ ++ return err; ++} ++ ++static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) ++{ ++ const struct bpf_map *targ_map; ++ unsigned int i; ++ int fd, err = 0; ++ ++ for (i = 0; i < map->init_slots_sz; i++) { ++ if (!map->init_slots[i]) ++ continue; ++ ++ targ_map = map->init_slots[i]; ++ fd = bpf_map__fd(targ_map); ++ ++ if (obj->gen_loader) { ++ bpf_gen__populate_outer_map(obj->gen_loader, ++ map - obj->maps, i, ++ targ_map - obj->maps); ++ } else { ++ err = bpf_map_update_elem(map->fd, &i, &fd, 0); ++ } ++ if (err) { ++ err = -errno; ++ pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", ++ map->name, i, targ_map->name, fd, err); ++ return err; ++ } ++ pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", ++ map->name, i, targ_map->name, fd); ++ } ++ ++ zfree(&map->init_slots); ++ map->init_slots_sz = 0; ++ ++ return 0; ++} ++ ++static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) ++{ ++ const struct bpf_program *targ_prog; ++ unsigned int i; ++ int fd, err; ++ ++ if (obj->gen_loader) ++ return -ENOTSUP; ++ ++ for (i = 0; i < map->init_slots_sz; i++) { ++ if (!map->init_slots[i]) ++ continue; ++ ++ targ_prog = map->init_slots[i]; ++ fd = bpf_program__fd(targ_prog); ++ ++ err = bpf_map_update_elem(map->fd, &i, &fd, 0); ++ if (err) { ++ err = -errno; ++ pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", ++ map->name, i, targ_prog->name, fd, err); ++ return err; ++ } ++ pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", ++ map->name, i, targ_prog->name, fd); ++ } ++ ++ zfree(&map->init_slots); ++ map->init_slots_sz = 0; ++ ++ return 0; ++} ++ ++static int bpf_object_init_prog_arrays(struct bpf_object *obj) ++{ ++ struct bpf_map *map; ++ int i, err; ++ ++ for (i = 0; i < obj->nr_maps; i++) { ++ map = &obj->maps[i]; ++ ++ if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) ++ continue; ++ ++ err = init_prog_array_slots(obj, map); ++ if (err < 0) { ++ zclose(map->fd); ++ return err; ++ } ++ } ++ return 0; ++} ++ ++static int map_set_def_max_entries(struct bpf_map *map) ++{ ++ if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { ++ int nr_cpus; ++ ++ nr_cpus = libbpf_num_possible_cpus(); ++ if (nr_cpus < 0) { ++ pr_warn("map '%s': failed to determine number of system CPUs: %d\n", ++ map->name, nr_cpus); ++ return nr_cpus; ++ } ++ pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); ++ map->def.max_entries = nr_cpus; ++ } ++ ++ return 0; ++} ++ ++static int ++bpf_object__create_maps(struct bpf_object *obj) ++{ ++ struct bpf_map *map; ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ unsigned int i, j; ++ int err; ++ bool retried; ++ ++ for (i = 0; i < obj->nr_maps; i++) { ++ map = &obj->maps[i]; ++ ++ /* To support old kernels, we skip creating global data maps ++ * (.rodata, .data, .kconfig, etc); later on, during program ++ * loading, if we detect that at least one of the to-be-loaded ++ * programs is referencing any global data map, we'll error ++ * out with program name and relocation index logged. ++ * This approach allows to accommodate Clang emitting ++ * unnecessary .rodata.str1.1 sections for string literals, ++ * but also it allows to have CO-RE applications that use ++ * global variables in some of BPF programs, but not others. ++ * If those global variable-using programs are not loaded at ++ * runtime due to bpf_program__set_autoload(prog, false), ++ * bpf_object loading will succeed just fine even on old ++ * kernels. ++ */ ++ if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) ++ map->autocreate = false; ++ ++ if (!map->autocreate) { ++ pr_debug("map '%s': skipped auto-creating...\n", map->name); ++ continue; ++ } ++ ++ err = map_set_def_max_entries(map); ++ if (err) ++ goto err_out; ++ ++ retried = false; ++retry: ++ if (map->pin_path) { ++ err = bpf_object__reuse_map(map); ++ if (err) { ++ pr_warn("map '%s': error reusing pinned map\n", ++ map->name); ++ goto err_out; ++ } ++ if (retried && map->fd < 0) { ++ pr_warn("map '%s': cannot find pinned map\n", ++ map->name); ++ err = -ENOENT; ++ goto err_out; ++ } ++ } ++ ++ if (map->fd >= 0) { ++ pr_debug("map '%s': skipping creation (preset fd=%d)\n", ++ map->name, map->fd); ++ } else { ++ err = bpf_object__create_map(obj, map, false); ++ if (err) ++ goto err_out; ++ ++ pr_debug("map '%s': created successfully, fd=%d\n", ++ map->name, map->fd); ++ ++ if (bpf_map__is_internal(map)) { ++ err = bpf_object__populate_internal_map(obj, map); ++ if (err < 0) { ++ zclose(map->fd); ++ goto err_out; ++ } ++ } ++ ++ if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { ++ err = init_map_in_map_slots(obj, map); ++ if (err < 0) { ++ zclose(map->fd); ++ goto err_out; ++ } ++ } ++ } ++ ++ if (map->pin_path && !map->pinned) { ++ err = bpf_map__pin(map, NULL); ++ if (err) { ++ zclose(map->fd); ++ if (!retried && err == -EEXIST) { ++ retried = true; ++ goto retry; ++ } ++ pr_warn("map '%s': failed to auto-pin at '%s': %d\n", ++ map->name, map->pin_path, err); ++ goto err_out; ++ } ++ } ++ } ++ ++ return 0; ++ ++err_out: ++ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); ++ pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); ++ pr_perm_msg(err); ++ for (j = 0; j < i; j++) ++ zclose(obj->maps[j].fd); ++ return err; ++} ++ ++static bool bpf_core_is_flavor_sep(const char *s) ++{ ++ /* check X___Y name pattern, where X and Y are not underscores */ ++ return s[0] != '_' && /* X */ ++ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ ++ s[4] != '_'; /* Y */ ++} ++ ++/* Given 'some_struct_name___with_flavor' return the length of a name prefix ++ * before last triple underscore. Struct name part after last triple ++ * underscore is ignored by BPF CO-RE relocation during relocation matching. ++ */ ++size_t bpf_core_essential_name_len(const char *name) ++{ ++ size_t n = strlen(name); ++ int i; ++ ++ for (i = n - 5; i >= 0; i--) { ++ if (bpf_core_is_flavor_sep(name + i)) ++ return i + 1; ++ } ++ return n; ++} ++ ++void bpf_core_free_cands(struct bpf_core_cand_list *cands) ++{ ++ if (!cands) ++ return; ++ ++ free(cands->cands); ++ free(cands); ++} ++ ++int bpf_core_add_cands(struct bpf_core_cand *local_cand, ++ size_t local_essent_len, ++ const struct btf *targ_btf, ++ const char *targ_btf_name, ++ int targ_start_id, ++ struct bpf_core_cand_list *cands) ++{ ++ struct bpf_core_cand *new_cands, *cand; ++ const struct btf_type *t, *local_t; ++ const char *targ_name, *local_name; ++ size_t targ_essent_len; ++ int n, i; ++ ++ local_t = btf__type_by_id(local_cand->btf, local_cand->id); ++ local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); ++ ++ n = btf__type_cnt(targ_btf); ++ for (i = targ_start_id; i < n; i++) { ++ t = btf__type_by_id(targ_btf, i); ++ if (!btf_kind_core_compat(t, local_t)) ++ continue; ++ ++ targ_name = btf__name_by_offset(targ_btf, t->name_off); ++ if (str_is_empty(targ_name)) ++ continue; ++ ++ targ_essent_len = bpf_core_essential_name_len(targ_name); ++ if (targ_essent_len != local_essent_len) ++ continue; ++ ++ if (strncmp(local_name, targ_name, local_essent_len) != 0) ++ continue; ++ ++ pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", ++ local_cand->id, btf_kind_str(local_t), ++ local_name, i, btf_kind_str(t), targ_name, ++ targ_btf_name); ++ new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, ++ sizeof(*cands->cands)); ++ if (!new_cands) ++ return -ENOMEM; ++ ++ cand = &new_cands[cands->len]; ++ cand->btf = targ_btf; ++ cand->id = i; ++ ++ cands->cands = new_cands; ++ cands->len++; ++ } ++ return 0; ++} ++ ++static int load_module_btfs(struct bpf_object *obj) ++{ ++ struct bpf_btf_info info; ++ struct module_btf *mod_btf; ++ struct btf *btf; ++ char name[64]; ++ __u32 id = 0, len; ++ int err, fd; ++ ++ if (obj->btf_modules_loaded) ++ return 0; ++ ++ if (obj->gen_loader) ++ return 0; ++ ++ /* don't do this again, even if we find no module BTFs */ ++ obj->btf_modules_loaded = true; ++ ++ /* kernel too old to support module BTFs */ ++ if (!kernel_supports(obj, FEAT_MODULE_BTF)) ++ return 0; ++ ++ while (true) { ++ err = bpf_btf_get_next_id(id, &id); ++ if (err && errno == ENOENT) ++ return 0; ++ if (err) { ++ err = -errno; ++ pr_warn("failed to iterate BTF objects: %d\n", err); ++ return err; ++ } ++ ++ fd = bpf_btf_get_fd_by_id(id); ++ if (fd < 0) { ++ if (errno == ENOENT) ++ continue; /* expected race: BTF was unloaded */ ++ err = -errno; ++ pr_warn("failed to get BTF object #%d FD: %d\n", id, err); ++ return err; ++ } ++ ++ len = sizeof(info); ++ memset(&info, 0, sizeof(info)); ++ info.name = ptr_to_u64(name); ++ info.name_len = sizeof(name); ++ ++ err = bpf_obj_get_info_by_fd(fd, &info, &len); ++ if (err) { ++ err = -errno; ++ pr_warn("failed to get BTF object #%d info: %d\n", id, err); ++ goto err_out; ++ } ++ ++ /* ignore non-module BTFs */ ++ if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) { ++ close(fd); ++ continue; ++ } ++ ++ btf = btf_get_from_fd(fd, obj->btf_vmlinux); ++ err = libbpf_get_error(btf); ++ if (err) { ++ pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", ++ name, id, err); ++ goto err_out; ++ } ++ ++ err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, ++ sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); ++ if (err) ++ goto err_out; ++ ++ mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; ++ ++ mod_btf->btf = btf; ++ mod_btf->id = id; ++ mod_btf->fd = fd; ++ mod_btf->name = strdup(name); ++ if (!mod_btf->name) { ++ err = -ENOMEM; ++ goto err_out; ++ } ++ continue; ++ ++err_out: ++ close(fd); ++ return err; ++ } ++ ++ return 0; ++} ++ ++static struct bpf_core_cand_list * ++bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) ++{ ++ struct bpf_core_cand local_cand = {}; ++ struct bpf_core_cand_list *cands; ++ const struct btf *main_btf; ++ const struct btf_type *local_t; ++ const char *local_name; ++ size_t local_essent_len; ++ int err, i; ++ ++ local_cand.btf = local_btf; ++ local_cand.id = local_type_id; ++ local_t = btf__type_by_id(local_btf, local_type_id); ++ if (!local_t) ++ return ERR_PTR(-EINVAL); ++ ++ local_name = btf__name_by_offset(local_btf, local_t->name_off); ++ if (str_is_empty(local_name)) ++ return ERR_PTR(-EINVAL); ++ local_essent_len = bpf_core_essential_name_len(local_name); ++ ++ cands = calloc(1, sizeof(*cands)); ++ if (!cands) ++ return ERR_PTR(-ENOMEM); ++ ++ /* Attempt to find target candidates in vmlinux BTF first */ ++ main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux; ++ err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands); ++ if (err) ++ goto err_out; ++ ++ /* if vmlinux BTF has any candidate, don't got for module BTFs */ ++ if (cands->len) ++ return cands; ++ ++ /* if vmlinux BTF was overridden, don't attempt to load module BTFs */ ++ if (obj->btf_vmlinux_override) ++ return cands; ++ ++ /* now look through module BTFs, trying to still find candidates */ ++ err = load_module_btfs(obj); ++ if (err) ++ goto err_out; ++ ++ for (i = 0; i < obj->btf_module_cnt; i++) { ++ err = bpf_core_add_cands(&local_cand, local_essent_len, ++ obj->btf_modules[i].btf, ++ obj->btf_modules[i].name, ++ btf__type_cnt(obj->btf_vmlinux), ++ cands); ++ if (err) ++ goto err_out; ++ } ++ ++ return cands; ++err_out: ++ bpf_core_free_cands(cands); ++ return ERR_PTR(err); ++} ++ ++/* Check local and target types for compatibility. This check is used for ++ * type-based CO-RE relocations and follow slightly different rules than ++ * field-based relocations. This function assumes that root types were already ++ * checked for name match. Beyond that initial root-level name check, names ++ * are completely ignored. Compatibility rules are as follows: ++ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but ++ * kind should match for local and target types (i.e., STRUCT is not ++ * compatible with UNION); ++ * - for ENUMs, the size is ignored; ++ * - for INT, size and signedness are ignored; ++ * - for ARRAY, dimensionality is ignored, element types are checked for ++ * compatibility recursively; ++ * - CONST/VOLATILE/RESTRICT modifiers are ignored; ++ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; ++ * - FUNC_PROTOs are compatible if they have compatible signature: same ++ * number of input args and compatible return and argument types. ++ * These rules are not set in stone and probably will be adjusted as we get ++ * more experience with using BPF CO-RE relocations. ++ */ ++int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, ++ const struct btf *targ_btf, __u32 targ_id) ++{ ++ return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); ++} ++ ++int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, ++ const struct btf *targ_btf, __u32 targ_id) ++{ ++ return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); ++} ++ ++static size_t bpf_core_hash_fn(const void *key, void *ctx) ++{ ++ return (size_t)key; ++} ++ ++static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) ++{ ++ return k1 == k2; ++} ++ ++static void *u32_as_hash_key(__u32 x) ++{ ++ return (void *)(uintptr_t)x; ++} ++ ++static int record_relo_core(struct bpf_program *prog, ++ const struct bpf_core_relo *core_relo, int insn_idx) ++{ ++ struct reloc_desc *relos, *relo; ++ ++ relos = libbpf_reallocarray(prog->reloc_desc, ++ prog->nr_reloc + 1, sizeof(*relos)); ++ if (!relos) ++ return -ENOMEM; ++ relo = &relos[prog->nr_reloc]; ++ relo->type = RELO_CORE; ++ relo->insn_idx = insn_idx; ++ relo->core_relo = core_relo; ++ prog->reloc_desc = relos; ++ prog->nr_reloc++; ++ return 0; ++} ++ ++static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) ++{ ++ struct reloc_desc *relo; ++ int i; ++ ++ for (i = 0; i < prog->nr_reloc; i++) { ++ relo = &prog->reloc_desc[i]; ++ if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) ++ continue; ++ ++ return relo->core_relo; ++ } ++ ++ return NULL; ++} ++ ++static int bpf_core_resolve_relo(struct bpf_program *prog, ++ const struct bpf_core_relo *relo, ++ int relo_idx, ++ const struct btf *local_btf, ++ struct hashmap *cand_cache, ++ struct bpf_core_relo_res *targ_res) ++{ ++ struct bpf_core_spec specs_scratch[3] = {}; ++ const void *type_key = u32_as_hash_key(relo->type_id); ++ struct bpf_core_cand_list *cands = NULL; ++ const char *prog_name = prog->name; ++ const struct btf_type *local_type; ++ const char *local_name; ++ __u32 local_id = relo->type_id; ++ int err; ++ ++ local_type = btf__type_by_id(local_btf, local_id); ++ if (!local_type) ++ return -EINVAL; ++ ++ local_name = btf__name_by_offset(local_btf, local_type->name_off); ++ if (!local_name) ++ return -EINVAL; ++ ++ if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && ++ !hashmap__find(cand_cache, type_key, (void **)&cands)) { ++ cands = bpf_core_find_cands(prog->obj, local_btf, local_id); ++ if (IS_ERR(cands)) { ++ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", ++ prog_name, relo_idx, local_id, btf_kind_str(local_type), ++ local_name, PTR_ERR(cands)); ++ return PTR_ERR(cands); ++ } ++ err = hashmap__set(cand_cache, type_key, cands, NULL, NULL); ++ if (err) { ++ bpf_core_free_cands(cands); ++ return err; ++ } ++ } ++ ++ return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, ++ targ_res); ++} ++ ++static int ++bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) ++{ ++ const struct btf_ext_info_sec *sec; ++ struct bpf_core_relo_res targ_res; ++ const struct bpf_core_relo *rec; ++ const struct btf_ext_info *seg; ++ struct hashmap_entry *entry; ++ struct hashmap *cand_cache = NULL; ++ struct bpf_program *prog; ++ struct bpf_insn *insn; ++ const char *sec_name; ++ int i, err = 0, insn_idx, sec_idx, sec_num; ++ ++ if (obj->btf_ext->core_relo_info.len == 0) ++ return 0; ++ ++ if (targ_btf_path) { ++ obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); ++ err = libbpf_get_error(obj->btf_vmlinux_override); ++ if (err) { ++ pr_warn("failed to parse target BTF: %d\n", err); ++ return err; ++ } ++ } ++ ++ cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); ++ if (IS_ERR(cand_cache)) { ++ err = PTR_ERR(cand_cache); ++ goto out; ++ } ++ ++ seg = &obj->btf_ext->core_relo_info; ++ sec_num = 0; ++ for_each_btf_ext_sec(seg, sec) { ++ sec_idx = seg->sec_idxs[sec_num]; ++ sec_num++; ++ ++ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); ++ if (str_is_empty(sec_name)) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); ++ ++ for_each_btf_ext_rec(seg, sec, i, rec) { ++ if (rec->insn_off % BPF_INSN_SZ) ++ return -EINVAL; ++ insn_idx = rec->insn_off / BPF_INSN_SZ; ++ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); ++ if (!prog) { ++ /* When __weak subprog is "overridden" by another instance ++ * of the subprog from a different object file, linker still ++ * appends all the .BTF.ext info that used to belong to that ++ * eliminated subprogram. ++ * This is similar to what x86-64 linker does for relocations. ++ * So just ignore such relocations just like we ignore ++ * subprog instructions when discovering subprograms. ++ */ ++ pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", ++ sec_name, i, insn_idx); ++ continue; ++ } ++ /* no need to apply CO-RE relocation if the program is ++ * not going to be loaded ++ */ ++ if (!prog->autoload) ++ continue; ++ ++ /* adjust insn_idx from section frame of reference to the local ++ * program's frame of reference; (sub-)program code is not yet ++ * relocated, so it's enough to just subtract in-section offset ++ */ ++ insn_idx = insn_idx - prog->sec_insn_off; ++ if (insn_idx >= prog->insns_cnt) ++ return -EINVAL; ++ insn = &prog->insns[insn_idx]; ++ ++ err = record_relo_core(prog, rec, insn_idx); ++ if (err) { ++ pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", ++ prog->name, i, err); ++ goto out; ++ } ++ ++ if (prog->obj->gen_loader) ++ continue; ++ ++ err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); ++ if (err) { ++ pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", ++ prog->name, i, err); ++ goto out; ++ } ++ ++ err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); ++ if (err) { ++ pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", ++ prog->name, i, insn_idx, err); ++ goto out; ++ } ++ } ++ } ++ ++out: ++ /* obj->btf_vmlinux and module BTFs are freed after object load */ ++ btf__free(obj->btf_vmlinux_override); ++ obj->btf_vmlinux_override = NULL; ++ ++ if (!IS_ERR_OR_NULL(cand_cache)) { ++ hashmap__for_each_entry(cand_cache, entry, i) { ++ bpf_core_free_cands(entry->value); ++ } ++ hashmap__free(cand_cache); ++ } ++ return err; ++} ++ ++/* base map load ldimm64 special constant, used also for log fixup logic */ ++#define MAP_LDIMM64_POISON_BASE 2001000000 ++#define MAP_LDIMM64_POISON_PFX "200100" ++ ++static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, ++ int insn_idx, struct bpf_insn *insn, ++ int map_idx, const struct bpf_map *map) ++{ ++ int i; ++ ++ pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", ++ prog->name, relo_idx, insn_idx, map_idx, map->name); ++ ++ /* we turn single ldimm64 into two identical invalid calls */ ++ for (i = 0; i < 2; i++) { ++ insn->code = BPF_JMP | BPF_CALL; ++ insn->dst_reg = 0; ++ insn->src_reg = 0; ++ insn->off = 0; ++ /* if this instruction is reachable (not a dead code), ++ * verifier will complain with something like: ++ * invalid func unknown#2001000123 ++ * where lower 123 is map index into obj->maps[] array ++ */ ++ insn->imm = MAP_LDIMM64_POISON_BASE + map_idx; ++ ++ insn++; ++ } ++} ++ ++/* Relocate data references within program code: ++ * - map references; ++ * - global variable references; ++ * - extern references. ++ */ ++static int ++bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) ++{ ++ int i; ++ ++ for (i = 0; i < prog->nr_reloc; i++) { ++ struct reloc_desc *relo = &prog->reloc_desc[i]; ++ struct bpf_insn *insn = &prog->insns[relo->insn_idx]; ++ const struct bpf_map *map; ++ struct extern_desc *ext; ++ ++ switch (relo->type) { ++ case RELO_LD64: ++ map = &obj->maps[relo->map_idx]; ++ if (obj->gen_loader) { ++ insn[0].src_reg = BPF_PSEUDO_MAP_IDX; ++ insn[0].imm = relo->map_idx; ++ } else if (map->autocreate) { ++ insn[0].src_reg = BPF_PSEUDO_MAP_FD; ++ insn[0].imm = map->fd; ++ } else { ++ poison_map_ldimm64(prog, i, relo->insn_idx, insn, ++ relo->map_idx, map); ++ } ++ break; ++ case RELO_DATA: ++ map = &obj->maps[relo->map_idx]; ++ insn[1].imm = insn[0].imm + relo->sym_off; ++ if (obj->gen_loader) { ++ insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; ++ insn[0].imm = relo->map_idx; ++ } else if (map->autocreate) { ++ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; ++ insn[0].imm = map->fd; ++ } else { ++ poison_map_ldimm64(prog, i, relo->insn_idx, insn, ++ relo->map_idx, map); ++ } ++ break; ++ case RELO_EXTERN_VAR: ++ ext = &obj->externs[relo->sym_off]; ++ if (ext->type == EXT_KCFG) { ++ if (obj->gen_loader) { ++ insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; ++ insn[0].imm = obj->kconfig_map_idx; ++ } else { ++ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; ++ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; ++ } ++ insn[1].imm = ext->kcfg.data_off; ++ } else /* EXT_KSYM */ { ++ if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */ ++ insn[0].src_reg = BPF_PSEUDO_BTF_ID; ++ insn[0].imm = ext->ksym.kernel_btf_id; ++ insn[1].imm = ext->ksym.kernel_btf_obj_fd; ++ } else { /* typeless ksyms or unresolved typed ksyms */ ++ insn[0].imm = (__u32)ext->ksym.addr; ++ insn[1].imm = ext->ksym.addr >> 32; ++ } ++ } ++ break; ++ case RELO_EXTERN_FUNC: ++ ext = &obj->externs[relo->sym_off]; ++ insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; ++ if (ext->is_set) { ++ insn[0].imm = ext->ksym.kernel_btf_id; ++ insn[0].off = ext->ksym.btf_fd_idx; ++ } else { /* unresolved weak kfunc */ ++ insn[0].imm = 0; ++ insn[0].off = 0; ++ } ++ break; ++ case RELO_SUBPROG_ADDR: ++ if (insn[0].src_reg != BPF_PSEUDO_FUNC) { ++ pr_warn("prog '%s': relo #%d: bad insn\n", ++ prog->name, i); ++ return -EINVAL; ++ } ++ /* handled already */ ++ break; ++ case RELO_CALL: ++ /* handled already */ ++ break; ++ case RELO_CORE: ++ /* will be handled by bpf_program_record_relos() */ ++ break; ++ default: ++ pr_warn("prog '%s': relo #%d: bad relo type %d\n", ++ prog->name, i, relo->type); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++static int adjust_prog_btf_ext_info(const struct bpf_object *obj, ++ const struct bpf_program *prog, ++ const struct btf_ext_info *ext_info, ++ void **prog_info, __u32 *prog_rec_cnt, ++ __u32 *prog_rec_sz) ++{ ++ void *copy_start = NULL, *copy_end = NULL; ++ void *rec, *rec_end, *new_prog_info; ++ const struct btf_ext_info_sec *sec; ++ size_t old_sz, new_sz; ++ int i, sec_num, sec_idx, off_adj; ++ ++ sec_num = 0; ++ for_each_btf_ext_sec(ext_info, sec) { ++ sec_idx = ext_info->sec_idxs[sec_num]; ++ sec_num++; ++ if (prog->sec_idx != sec_idx) ++ continue; ++ ++ for_each_btf_ext_rec(ext_info, sec, i, rec) { ++ __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; ++ ++ if (insn_off < prog->sec_insn_off) ++ continue; ++ if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) ++ break; ++ ++ if (!copy_start) ++ copy_start = rec; ++ copy_end = rec + ext_info->rec_size; ++ } ++ ++ if (!copy_start) ++ return -ENOENT; ++ ++ /* append func/line info of a given (sub-)program to the main ++ * program func/line info ++ */ ++ old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; ++ new_sz = old_sz + (copy_end - copy_start); ++ new_prog_info = realloc(*prog_info, new_sz); ++ if (!new_prog_info) ++ return -ENOMEM; ++ *prog_info = new_prog_info; ++ *prog_rec_cnt = new_sz / ext_info->rec_size; ++ memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); ++ ++ /* Kernel instruction offsets are in units of 8-byte ++ * instructions, while .BTF.ext instruction offsets generated ++ * by Clang are in units of bytes. So convert Clang offsets ++ * into kernel offsets and adjust offset according to program ++ * relocated position. ++ */ ++ off_adj = prog->sub_insn_off - prog->sec_insn_off; ++ rec = new_prog_info + old_sz; ++ rec_end = new_prog_info + new_sz; ++ for (; rec < rec_end; rec += ext_info->rec_size) { ++ __u32 *insn_off = rec; ++ ++ *insn_off = *insn_off / BPF_INSN_SZ + off_adj; ++ } ++ *prog_rec_sz = ext_info->rec_size; ++ return 0; ++ } ++ ++ return -ENOENT; ++} ++ ++static int ++reloc_prog_func_and_line_info(const struct bpf_object *obj, ++ struct bpf_program *main_prog, ++ const struct bpf_program *prog) ++{ ++ int err; ++ ++ /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't ++ * supprot func/line info ++ */ ++ if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) ++ return 0; ++ ++ /* only attempt func info relocation if main program's func_info ++ * relocation was successful ++ */ ++ if (main_prog != prog && !main_prog->func_info) ++ goto line_info; ++ ++ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, ++ &main_prog->func_info, ++ &main_prog->func_info_cnt, ++ &main_prog->func_info_rec_size); ++ if (err) { ++ if (err != -ENOENT) { ++ pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", ++ prog->name, err); ++ return err; ++ } ++ if (main_prog->func_info) { ++ /* ++ * Some info has already been found but has problem ++ * in the last btf_ext reloc. Must have to error out. ++ */ ++ pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name); ++ return err; ++ } ++ /* Have problem loading the very first info. Ignore the rest. */ ++ pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", ++ prog->name); ++ } ++ ++line_info: ++ /* don't relocate line info if main program's relocation failed */ ++ if (main_prog != prog && !main_prog->line_info) ++ return 0; ++ ++ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, ++ &main_prog->line_info, ++ &main_prog->line_info_cnt, ++ &main_prog->line_info_rec_size); ++ if (err) { ++ if (err != -ENOENT) { ++ pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", ++ prog->name, err); ++ return err; ++ } ++ if (main_prog->line_info) { ++ /* ++ * Some info has already been found but has problem ++ * in the last btf_ext reloc. Must have to error out. ++ */ ++ pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); ++ return err; ++ } ++ /* Have problem loading the very first info. Ignore the rest. */ ++ pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", ++ prog->name); ++ } ++ return 0; ++} ++ ++static int cmp_relo_by_insn_idx(const void *key, const void *elem) ++{ ++ size_t insn_idx = *(const size_t *)key; ++ const struct reloc_desc *relo = elem; ++ ++ if (insn_idx == relo->insn_idx) ++ return 0; ++ return insn_idx < relo->insn_idx ? -1 : 1; ++} ++ ++static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) ++{ ++ if (!prog->nr_reloc) ++ return NULL; ++ return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, ++ sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); ++} ++ ++static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) ++{ ++ int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; ++ struct reloc_desc *relos; ++ int i; ++ ++ if (main_prog == subprog) ++ return 0; ++ relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); ++ if (!relos) ++ return -ENOMEM; ++ if (subprog->nr_reloc) ++ memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, ++ sizeof(*relos) * subprog->nr_reloc); ++ ++ for (i = main_prog->nr_reloc; i < new_cnt; i++) ++ relos[i].insn_idx += subprog->sub_insn_off; ++ /* After insn_idx adjustment the 'relos' array is still sorted ++ * by insn_idx and doesn't break bsearch. ++ */ ++ main_prog->reloc_desc = relos; ++ main_prog->nr_reloc = new_cnt; ++ return 0; ++} ++ ++static int ++bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, ++ struct bpf_program *prog) ++{ ++ size_t sub_insn_idx, insn_idx, new_cnt; ++ struct bpf_program *subprog; ++ struct bpf_insn *insns, *insn; ++ struct reloc_desc *relo; ++ int err; ++ ++ err = reloc_prog_func_and_line_info(obj, main_prog, prog); ++ if (err) ++ return err; ++ ++ for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { ++ insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; ++ if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn)) ++ continue; ++ ++ relo = find_prog_insn_relo(prog, insn_idx); ++ if (relo && relo->type == RELO_EXTERN_FUNC) ++ /* kfunc relocations will be handled later ++ * in bpf_object__relocate_data() ++ */ ++ continue; ++ if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { ++ pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", ++ prog->name, insn_idx, relo->type); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ if (relo) { ++ /* sub-program instruction index is a combination of ++ * an offset of a symbol pointed to by relocation and ++ * call instruction's imm field; for global functions, ++ * call always has imm = -1, but for static functions ++ * relocation is against STT_SECTION and insn->imm ++ * points to a start of a static function ++ * ++ * for subprog addr relocation, the relo->sym_off + insn->imm is ++ * the byte offset in the corresponding section. ++ */ ++ if (relo->type == RELO_CALL) ++ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; ++ else ++ sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ; ++ } else if (insn_is_pseudo_func(insn)) { ++ /* ++ * RELO_SUBPROG_ADDR relo is always emitted even if both ++ * functions are in the same section, so it shouldn't reach here. ++ */ ++ pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n", ++ prog->name, insn_idx); ++ return -LIBBPF_ERRNO__RELOC; ++ } else { ++ /* if subprogram call is to a static function within ++ * the same ELF section, there won't be any relocation ++ * emitted, but it also means there is no additional ++ * offset necessary, insns->imm is relative to ++ * instruction's original position within the section ++ */ ++ sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; ++ } ++ ++ /* we enforce that sub-programs should be in .text section */ ++ subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); ++ if (!subprog) { ++ pr_warn("prog '%s': no .text section found yet sub-program call exists\n", ++ prog->name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ ++ /* if it's the first call instruction calling into this ++ * subprogram (meaning this subprog hasn't been processed ++ * yet) within the context of current main program: ++ * - append it at the end of main program's instructions blog; ++ * - process is recursively, while current program is put on hold; ++ * - if that subprogram calls some other not yet processes ++ * subprogram, same thing will happen recursively until ++ * there are no more unprocesses subprograms left to append ++ * and relocate. ++ */ ++ if (subprog->sub_insn_off == 0) { ++ subprog->sub_insn_off = main_prog->insns_cnt; ++ ++ new_cnt = main_prog->insns_cnt + subprog->insns_cnt; ++ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); ++ if (!insns) { ++ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); ++ return -ENOMEM; ++ } ++ main_prog->insns = insns; ++ main_prog->insns_cnt = new_cnt; ++ ++ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, ++ subprog->insns_cnt * sizeof(*insns)); ++ ++ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", ++ main_prog->name, subprog->insns_cnt, subprog->name); ++ ++ /* The subprog insns are now appended. Append its relos too. */ ++ err = append_subprog_relos(main_prog, subprog); ++ if (err) ++ return err; ++ err = bpf_object__reloc_code(obj, main_prog, subprog); ++ if (err) ++ return err; ++ } ++ ++ /* main_prog->insns memory could have been re-allocated, so ++ * calculate pointer again ++ */ ++ insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; ++ /* calculate correct instruction position within current main ++ * prog; each main prog can have a different set of ++ * subprograms appended (potentially in different order as ++ * well), so position of any subprog can be different for ++ * different main programs */ ++ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; ++ ++ pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", ++ prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); ++ } ++ ++ return 0; ++} ++ ++/* ++ * Relocate sub-program calls. ++ * ++ * Algorithm operates as follows. Each entry-point BPF program (referred to as ++ * main prog) is processed separately. For each subprog (non-entry functions, ++ * that can be called from either entry progs or other subprogs) gets their ++ * sub_insn_off reset to zero. This serves as indicator that this subprogram ++ * hasn't been yet appended and relocated within current main prog. Once its ++ * relocated, sub_insn_off will point at the position within current main prog ++ * where given subprog was appended. This will further be used to relocate all ++ * the call instructions jumping into this subprog. ++ * ++ * We start with main program and process all call instructions. If the call ++ * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off ++ * is zero), subprog instructions are appended at the end of main program's ++ * instruction array. Then main program is "put on hold" while we recursively ++ * process newly appended subprogram. If that subprogram calls into another ++ * subprogram that hasn't been appended, new subprogram is appended again to ++ * the *main* prog's instructions (subprog's instructions are always left ++ * untouched, as they need to be in unmodified state for subsequent main progs ++ * and subprog instructions are always sent only as part of a main prog) and ++ * the process continues recursively. Once all the subprogs called from a main ++ * prog or any of its subprogs are appended (and relocated), all their ++ * positions within finalized instructions array are known, so it's easy to ++ * rewrite call instructions with correct relative offsets, corresponding to ++ * desired target subprog. ++ * ++ * Its important to realize that some subprogs might not be called from some ++ * main prog and any of its called/used subprogs. Those will keep their ++ * subprog->sub_insn_off as zero at all times and won't be appended to current ++ * main prog and won't be relocated within the context of current main prog. ++ * They might still be used from other main progs later. ++ * ++ * Visually this process can be shown as below. Suppose we have two main ++ * programs mainA and mainB and BPF object contains three subprogs: subA, ++ * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and ++ * subC both call subB: ++ * ++ * +--------+ +-------+ ++ * | v v | ++ * +--+---+ +--+-+-+ +---+--+ ++ * | subA | | subB | | subC | ++ * +--+---+ +------+ +---+--+ ++ * ^ ^ ++ * | | ++ * +---+-------+ +------+----+ ++ * | mainA | | mainB | ++ * +-----------+ +-----------+ ++ * ++ * We'll start relocating mainA, will find subA, append it and start ++ * processing sub A recursively: ++ * ++ * +-----------+------+ ++ * | mainA | subA | ++ * +-----------+------+ ++ * ++ * At this point we notice that subB is used from subA, so we append it and ++ * relocate (there are no further subcalls from subB): ++ * ++ * +-----------+------+------+ ++ * | mainA | subA | subB | ++ * +-----------+------+------+ ++ * ++ * At this point, we relocate subA calls, then go one level up and finish with ++ * relocatin mainA calls. mainA is done. ++ * ++ * For mainB process is similar but results in different order. We start with ++ * mainB and skip subA and subB, as mainB never calls them (at least ++ * directly), but we see subC is needed, so we append and start processing it: ++ * ++ * +-----------+------+ ++ * | mainB | subC | ++ * +-----------+------+ ++ * Now we see subC needs subB, so we go back to it, append and relocate it: ++ * ++ * +-----------+------+------+ ++ * | mainB | subC | subB | ++ * +-----------+------+------+ ++ * ++ * At this point we unwind recursion, relocate calls in subC, then in mainB. ++ */ ++static int ++bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) ++{ ++ struct bpf_program *subprog; ++ int i, err; ++ ++ /* mark all subprogs as not relocated (yet) within the context of ++ * current main program ++ */ ++ for (i = 0; i < obj->nr_programs; i++) { ++ subprog = &obj->programs[i]; ++ if (!prog_is_subprog(obj, subprog)) ++ continue; ++ ++ subprog->sub_insn_off = 0; ++ } ++ ++ err = bpf_object__reloc_code(obj, prog, prog); ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++static void ++bpf_object__free_relocs(struct bpf_object *obj) ++{ ++ struct bpf_program *prog; ++ int i; ++ ++ /* free up relocation descriptors */ ++ for (i = 0; i < obj->nr_programs; i++) { ++ prog = &obj->programs[i]; ++ zfree(&prog->reloc_desc); ++ prog->nr_reloc = 0; ++ } ++} ++ ++static int cmp_relocs(const void *_a, const void *_b) ++{ ++ const struct reloc_desc *a = _a; ++ const struct reloc_desc *b = _b; ++ ++ if (a->insn_idx != b->insn_idx) ++ return a->insn_idx < b->insn_idx ? -1 : 1; ++ ++ /* no two relocations should have the same insn_idx, but ... */ ++ if (a->type != b->type) ++ return a->type < b->type ? -1 : 1; ++ ++ return 0; ++} ++ ++static void bpf_object__sort_relos(struct bpf_object *obj) ++{ ++ int i; ++ ++ for (i = 0; i < obj->nr_programs; i++) { ++ struct bpf_program *p = &obj->programs[i]; ++ ++ if (!p->nr_reloc) ++ continue; ++ ++ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); ++ } ++} ++ ++static int ++bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) ++{ ++ struct bpf_program *prog; ++ size_t i, j; ++ int err; ++ ++ if (obj->btf_ext) { ++ err = bpf_object__relocate_core(obj, targ_btf_path); ++ if (err) { ++ pr_warn("failed to perform CO-RE relocations: %d\n", ++ err); ++ return err; ++ } ++ bpf_object__sort_relos(obj); ++ } ++ ++ /* Before relocating calls pre-process relocations and mark ++ * few ld_imm64 instructions that points to subprogs. ++ * Otherwise bpf_object__reloc_code() later would have to consider ++ * all ld_imm64 insns as relocation candidates. That would ++ * reduce relocation speed, since amount of find_prog_insn_relo() ++ * would increase and most of them will fail to find a relo. ++ */ ++ for (i = 0; i < obj->nr_programs; i++) { ++ prog = &obj->programs[i]; ++ for (j = 0; j < prog->nr_reloc; j++) { ++ struct reloc_desc *relo = &prog->reloc_desc[j]; ++ struct bpf_insn *insn = &prog->insns[relo->insn_idx]; ++ ++ /* mark the insn, so it's recognized by insn_is_pseudo_func() */ ++ if (relo->type == RELO_SUBPROG_ADDR) ++ insn[0].src_reg = BPF_PSEUDO_FUNC; ++ } ++ } ++ ++ /* relocate subprogram calls and append used subprograms to main ++ * programs; each copy of subprogram code needs to be relocated ++ * differently for each main program, because its code location might ++ * have changed. ++ * Append subprog relos to main programs to allow data relos to be ++ * processed after text is completely relocated. ++ */ ++ for (i = 0; i < obj->nr_programs; i++) { ++ prog = &obj->programs[i]; ++ /* sub-program's sub-calls are relocated within the context of ++ * its main program only ++ */ ++ if (prog_is_subprog(obj, prog)) ++ continue; ++ if (!prog->autoload) ++ continue; ++ ++ err = bpf_object__relocate_calls(obj, prog); ++ if (err) { ++ pr_warn("prog '%s': failed to relocate calls: %d\n", ++ prog->name, err); ++ return err; ++ } ++ } ++ /* Process data relos for main programs */ ++ for (i = 0; i < obj->nr_programs; i++) { ++ prog = &obj->programs[i]; ++ if (prog_is_subprog(obj, prog)) ++ continue; ++ if (!prog->autoload) ++ continue; ++ err = bpf_object__relocate_data(obj, prog); ++ if (err) { ++ pr_warn("prog '%s': failed to relocate data references: %d\n", ++ prog->name, err); ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, ++ Elf64_Shdr *shdr, Elf_Data *data); ++ ++static int bpf_object__collect_map_relos(struct bpf_object *obj, ++ Elf64_Shdr *shdr, Elf_Data *data) ++{ ++ const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); ++ int i, j, nrels, new_sz; ++ const struct btf_var_secinfo *vi = NULL; ++ const struct btf_type *sec, *var, *def; ++ struct bpf_map *map = NULL, *targ_map = NULL; ++ struct bpf_program *targ_prog = NULL; ++ bool is_prog_array, is_map_in_map; ++ const struct btf_member *member; ++ const char *name, *mname, *type; ++ unsigned int moff; ++ Elf64_Sym *sym; ++ Elf64_Rel *rel; ++ void *tmp; ++ ++ if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) ++ return -EINVAL; ++ sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id); ++ if (!sec) ++ return -EINVAL; ++ ++ nrels = shdr->sh_size / shdr->sh_entsize; ++ for (i = 0; i < nrels; i++) { ++ rel = elf_rel_by_idx(data, i); ++ if (!rel) { ++ pr_warn(".maps relo #%d: failed to get ELF relo\n", i); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); ++ if (!sym) { ++ pr_warn(".maps relo #%d: symbol %zx not found\n", ++ i, (size_t)ELF64_R_SYM(rel->r_info)); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ name = elf_sym_str(obj, sym->st_name) ?: ""; ++ ++ pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", ++ i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, ++ (size_t)rel->r_offset, sym->st_name, name); ++ ++ for (j = 0; j < obj->nr_maps; j++) { ++ map = &obj->maps[j]; ++ if (map->sec_idx != obj->efile.btf_maps_shndx) ++ continue; ++ ++ vi = btf_var_secinfos(sec) + map->btf_var_idx; ++ if (vi->offset <= rel->r_offset && ++ rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) ++ break; ++ } ++ if (j == obj->nr_maps) { ++ pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", ++ i, name, (size_t)rel->r_offset); ++ return -EINVAL; ++ } ++ ++ is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); ++ is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; ++ type = is_map_in_map ? "map" : "prog"; ++ if (is_map_in_map) { ++ if (sym->st_shndx != obj->efile.btf_maps_shndx) { ++ pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", ++ i, name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && ++ map->def.key_size != sizeof(int)) { ++ pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", ++ i, map->name, sizeof(int)); ++ return -EINVAL; ++ } ++ targ_map = bpf_object__find_map_by_name(obj, name); ++ if (!targ_map) { ++ pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", ++ i, name); ++ return -ESRCH; ++ } ++ } else if (is_prog_array) { ++ targ_prog = bpf_object__find_program_by_name(obj, name); ++ if (!targ_prog) { ++ pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", ++ i, name); ++ return -ESRCH; ++ } ++ if (targ_prog->sec_idx != sym->st_shndx || ++ targ_prog->sec_insn_off * 8 != sym->st_value || ++ prog_is_subprog(obj, targ_prog)) { ++ pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", ++ i, name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ } else { ++ return -EINVAL; ++ } ++ ++ var = btf__type_by_id(obj->btf, vi->type); ++ def = skip_mods_and_typedefs(obj->btf, var->type, NULL); ++ if (btf_vlen(def) == 0) ++ return -EINVAL; ++ member = btf_members(def) + btf_vlen(def) - 1; ++ mname = btf__name_by_offset(obj->btf, member->name_off); ++ if (strcmp(mname, "values")) ++ return -EINVAL; ++ ++ moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; ++ if (rel->r_offset - vi->offset < moff) ++ return -EINVAL; ++ ++ moff = rel->r_offset - vi->offset - moff; ++ /* here we use BPF pointer size, which is always 64 bit, as we ++ * are parsing ELF that was built for BPF target ++ */ ++ if (moff % bpf_ptr_sz) ++ return -EINVAL; ++ moff /= bpf_ptr_sz; ++ if (moff >= map->init_slots_sz) { ++ new_sz = moff + 1; ++ tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz); ++ if (!tmp) ++ return -ENOMEM; ++ map->init_slots = tmp; ++ memset(map->init_slots + map->init_slots_sz, 0, ++ (new_sz - map->init_slots_sz) * host_ptr_sz); ++ map->init_slots_sz = new_sz; ++ } ++ map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; ++ ++ pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", ++ i, map->name, moff, type, name); ++ } ++ ++ return 0; ++} ++ ++static int bpf_object__collect_relos(struct bpf_object *obj) ++{ ++ int i, err; ++ ++ for (i = 0; i < obj->efile.sec_cnt; i++) { ++ struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; ++ Elf64_Shdr *shdr; ++ Elf_Data *data; ++ int idx; ++ ++ if (sec_desc->sec_type != SEC_RELO) ++ continue; ++ ++ shdr = sec_desc->shdr; ++ data = sec_desc->data; ++ idx = shdr->sh_info; ++ ++ if (shdr->sh_type != SHT_REL) { ++ pr_warn("internal error at %d\n", __LINE__); ++ return -LIBBPF_ERRNO__INTERNAL; ++ } ++ ++ if (idx == obj->efile.st_ops_shndx) ++ err = bpf_object__collect_st_ops_relos(obj, shdr, data); ++ else if (idx == obj->efile.btf_maps_shndx) ++ err = bpf_object__collect_map_relos(obj, shdr, data); ++ else ++ err = bpf_object__collect_prog_relos(obj, shdr, data); ++ if (err) ++ return err; ++ } ++ ++ bpf_object__sort_relos(obj); ++ return 0; ++} ++ ++static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) ++{ ++ if (BPF_CLASS(insn->code) == BPF_JMP && ++ BPF_OP(insn->code) == BPF_CALL && ++ BPF_SRC(insn->code) == BPF_K && ++ insn->src_reg == 0 && ++ insn->dst_reg == 0) { ++ *func_id = insn->imm; ++ return true; ++ } ++ return false; ++} ++ ++static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog) ++{ ++ struct bpf_insn *insn = prog->insns; ++ enum bpf_func_id func_id; ++ int i; ++ ++ if (obj->gen_loader) ++ return 0; ++ ++ for (i = 0; i < prog->insns_cnt; i++, insn++) { ++ if (!insn_is_helper_call(insn, &func_id)) ++ continue; ++ ++ /* on kernels that don't yet support ++ * bpf_probe_read_{kernel,user}[_str] helpers, fall back ++ * to bpf_probe_read() which works well for old kernels ++ */ ++ switch (func_id) { ++ case BPF_FUNC_probe_read_kernel: ++ case BPF_FUNC_probe_read_user: ++ if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) ++ insn->imm = BPF_FUNC_probe_read; ++ break; ++ case BPF_FUNC_probe_read_kernel_str: ++ case BPF_FUNC_probe_read_user_str: ++ if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) ++ insn->imm = BPF_FUNC_probe_read_str; ++ break; ++ default: ++ break; ++ } ++ } ++ return 0; ++} ++ ++static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, ++ int *btf_obj_fd, int *btf_type_id); ++ ++/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ ++static int libbpf_prepare_prog_load(struct bpf_program *prog, ++ struct bpf_prog_load_opts *opts, long cookie) ++{ ++ enum sec_def_flags def = cookie; ++ ++ /* old kernels might not support specifying expected_attach_type */ ++ if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) ++ opts->expected_attach_type = 0; ++ ++ if (def & SEC_SLEEPABLE) ++ opts->prog_flags |= BPF_F_SLEEPABLE; ++ ++ if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) ++ opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; ++ ++ if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { ++ int btf_obj_fd = 0, btf_type_id = 0, err; ++ const char *attach_name; ++ ++ attach_name = strchr(prog->sec_name, '/'); ++ if (!attach_name) { ++ /* if BPF program is annotated with just SEC("fentry") ++ * (or similar) without declaratively specifying ++ * target, then it is expected that target will be ++ * specified with bpf_program__set_attach_target() at ++ * runtime before BPF object load step. If not, then ++ * there is nothing to load into the kernel as BPF ++ * verifier won't be able to validate BPF program ++ * correctness anyways. ++ */ ++ pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", ++ prog->name); ++ return -EINVAL; ++ } ++ attach_name++; /* skip over / */ ++ ++ err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); ++ if (err) ++ return err; ++ ++ /* cache resolved BTF FD and BTF type ID in the prog */ ++ prog->attach_btf_obj_fd = btf_obj_fd; ++ prog->attach_btf_id = btf_type_id; ++ ++ /* but by now libbpf common logic is not utilizing ++ * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because ++ * this callback is called after opts were populated by ++ * libbpf, so this callback has to update opts explicitly here ++ */ ++ opts->attach_btf_obj_fd = btf_obj_fd; ++ opts->attach_btf_id = btf_type_id; ++ } ++ return 0; ++} ++ ++static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); ++ ++static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, ++ struct bpf_insn *insns, int insns_cnt, ++ const char *license, __u32 kern_version, int *prog_fd) ++{ ++ LIBBPF_OPTS(bpf_prog_load_opts, load_attr); ++ const char *prog_name = NULL; ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ size_t log_buf_size = 0; ++ char *log_buf = NULL, *tmp; ++ int btf_fd, ret, err; ++ bool own_log_buf = true; ++ __u32 log_level = prog->log_level; ++ ++ if (prog->type == BPF_PROG_TYPE_UNSPEC) { ++ /* ++ * The program type must be set. Most likely we couldn't find a proper ++ * section definition at load time, and thus we didn't infer the type. ++ */ ++ pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", ++ prog->name, prog->sec_name); ++ return -EINVAL; ++ } ++ ++ if (!insns || !insns_cnt) ++ return -EINVAL; ++ ++ load_attr.expected_attach_type = prog->expected_attach_type; ++ if (kernel_supports(obj, FEAT_PROG_NAME)) ++ prog_name = prog->name; ++ load_attr.attach_prog_fd = prog->attach_prog_fd; ++ load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; ++ load_attr.attach_btf_id = prog->attach_btf_id; ++ load_attr.kern_version = kern_version; ++ load_attr.prog_ifindex = prog->prog_ifindex; ++ ++ /* specify func_info/line_info only if kernel supports them */ ++ btf_fd = bpf_object__btf_fd(obj); ++ if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { ++ load_attr.prog_btf_fd = btf_fd; ++ load_attr.func_info = prog->func_info; ++ load_attr.func_info_rec_size = prog->func_info_rec_size; ++ load_attr.func_info_cnt = prog->func_info_cnt; ++ load_attr.line_info = prog->line_info; ++ load_attr.line_info_rec_size = prog->line_info_rec_size; ++ load_attr.line_info_cnt = prog->line_info_cnt; ++ } ++ load_attr.log_level = log_level; ++ load_attr.prog_flags = prog->prog_flags; ++ load_attr.fd_array = obj->fd_array; ++ ++ /* adjust load_attr if sec_def provides custom preload callback */ ++ if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { ++ err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); ++ if (err < 0) { ++ pr_warn("prog '%s': failed to prepare load attributes: %d\n", ++ prog->name, err); ++ return err; ++ } ++ insns = prog->insns; ++ insns_cnt = prog->insns_cnt; ++ } ++ ++ if (obj->gen_loader) { ++ bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, ++ license, insns, insns_cnt, &load_attr, ++ prog - obj->programs); ++ *prog_fd = -1; ++ return 0; ++ } ++ ++retry_load: ++ /* if log_level is zero, we don't request logs initially even if ++ * custom log_buf is specified; if the program load fails, then we'll ++ * bump log_level to 1 and use either custom log_buf or we'll allocate ++ * our own and retry the load to get details on what failed ++ */ ++ if (log_level) { ++ if (prog->log_buf) { ++ log_buf = prog->log_buf; ++ log_buf_size = prog->log_size; ++ own_log_buf = false; ++ } else if (obj->log_buf) { ++ log_buf = obj->log_buf; ++ log_buf_size = obj->log_size; ++ own_log_buf = false; ++ } else { ++ log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); ++ tmp = realloc(log_buf, log_buf_size); ++ if (!tmp) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ log_buf = tmp; ++ log_buf[0] = '\0'; ++ own_log_buf = true; ++ } ++ } ++ ++ load_attr.log_buf = log_buf; ++ load_attr.log_size = log_buf_size; ++ load_attr.log_level = log_level; ++ ++ ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); ++ if (ret >= 0) { ++ if (log_level && own_log_buf) { ++ pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", ++ prog->name, log_buf); ++ } ++ ++ if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { ++ struct bpf_map *map; ++ int i; ++ ++ for (i = 0; i < obj->nr_maps; i++) { ++ map = &prog->obj->maps[i]; ++ if (map->libbpf_type != LIBBPF_MAP_RODATA) ++ continue; ++ ++ if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { ++ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); ++ pr_warn("prog '%s': failed to bind map '%s': %s\n", ++ prog->name, map->real_name, cp); ++ /* Don't fail hard if can't bind rodata. */ ++ } ++ } ++ } ++ ++ *prog_fd = ret; ++ ret = 0; ++ goto out; ++ } ++ ++ if (log_level == 0) { ++ log_level = 1; ++ goto retry_load; ++ } ++ /* On ENOSPC, increase log buffer size and retry, unless custom ++ * log_buf is specified. ++ * Be careful to not overflow u32, though. Kernel's log buf size limit ++ * isn't part of UAPI so it can always be bumped to full 4GB. So don't ++ * multiply by 2 unless we are sure we'll fit within 32 bits. ++ * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). ++ */ ++ if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) ++ goto retry_load; ++ ++ ret = -errno; ++ ++ /* post-process verifier log to improve error descriptions */ ++ fixup_verifier_log(prog, log_buf, log_buf_size); ++ ++ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); ++ pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); ++ pr_perm_msg(ret); ++ ++ if (own_log_buf && log_buf && log_buf[0] != '\0') { ++ pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", ++ prog->name, log_buf); ++ } ++ ++out: ++ if (own_log_buf) ++ free(log_buf); ++ return ret; ++} ++ ++static char *find_prev_line(char *buf, char *cur) ++{ ++ char *p; ++ ++ if (cur == buf) /* end of a log buf */ ++ return NULL; ++ ++ p = cur - 1; ++ while (p - 1 >= buf && *(p - 1) != '\n') ++ p--; ++ ++ return p; ++} ++ ++static void patch_log(char *buf, size_t buf_sz, size_t log_sz, ++ char *orig, size_t orig_sz, const char *patch) ++{ ++ /* size of the remaining log content to the right from the to-be-replaced part */ ++ size_t rem_sz = (buf + log_sz) - (orig + orig_sz); ++ size_t patch_sz = strlen(patch); ++ ++ if (patch_sz != orig_sz) { ++ /* If patch line(s) are longer than original piece of verifier log, ++ * shift log contents by (patch_sz - orig_sz) bytes to the right ++ * starting from after to-be-replaced part of the log. ++ * ++ * If patch line(s) are shorter than original piece of verifier log, ++ * shift log contents by (orig_sz - patch_sz) bytes to the left ++ * starting from after to-be-replaced part of the log ++ * ++ * We need to be careful about not overflowing available ++ * buf_sz capacity. If that's the case, we'll truncate the end ++ * of the original log, as necessary. ++ */ ++ if (patch_sz > orig_sz) { ++ if (orig + patch_sz >= buf + buf_sz) { ++ /* patch is big enough to cover remaining space completely */ ++ patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; ++ rem_sz = 0; ++ } else if (patch_sz - orig_sz > buf_sz - log_sz) { ++ /* patch causes part of remaining log to be truncated */ ++ rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); ++ } ++ } ++ /* shift remaining log to the right by calculated amount */ ++ memmove(orig + patch_sz, orig + orig_sz, rem_sz); ++ } ++ ++ memcpy(orig, patch, patch_sz); ++} ++ ++static void fixup_log_failed_core_relo(struct bpf_program *prog, ++ char *buf, size_t buf_sz, size_t log_sz, ++ char *line1, char *line2, char *line3) ++{ ++ /* Expected log for failed and not properly guarded CO-RE relocation: ++ * line1 -> 123: (85) call unknown#195896080 ++ * line2 -> invalid func unknown#195896080 ++ * line3 -> ++ * ++ * "123" is the index of the instruction that was poisoned. We extract ++ * instruction index to find corresponding CO-RE relocation and ++ * replace this part of the log with more relevant information about ++ * failed CO-RE relocation. ++ */ ++ const struct bpf_core_relo *relo; ++ struct bpf_core_spec spec; ++ char patch[512], spec_buf[256]; ++ int insn_idx, err, spec_len; ++ ++ if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) ++ return; ++ ++ relo = find_relo_core(prog, insn_idx); ++ if (!relo) ++ return; ++ ++ err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); ++ if (err) ++ return; ++ ++ spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); ++ snprintf(patch, sizeof(patch), ++ "%d: \n" ++ "failed to resolve CO-RE relocation %s%s\n", ++ insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); ++ ++ patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); ++} ++ ++static void fixup_log_missing_map_load(struct bpf_program *prog, ++ char *buf, size_t buf_sz, size_t log_sz, ++ char *line1, char *line2, char *line3) ++{ ++ /* Expected log for failed and not properly guarded CO-RE relocation: ++ * line1 -> 123: (85) call unknown#2001000345 ++ * line2 -> invalid func unknown#2001000345 ++ * line3 -> ++ * ++ * "123" is the index of the instruction that was poisoned. ++ * "345" in "2001000345" are map index in obj->maps to fetch map name. ++ */ ++ struct bpf_object *obj = prog->obj; ++ const struct bpf_map *map; ++ int insn_idx, map_idx; ++ char patch[128]; ++ ++ if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) ++ return; ++ ++ map_idx -= MAP_LDIMM64_POISON_BASE; ++ if (map_idx < 0 || map_idx >= obj->nr_maps) ++ return; ++ map = &obj->maps[map_idx]; ++ ++ snprintf(patch, sizeof(patch), ++ "%d: \n" ++ "BPF map '%s' is referenced but wasn't created\n", ++ insn_idx, map->name); ++ ++ patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); ++} ++ ++static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) ++{ ++ /* look for familiar error patterns in last N lines of the log */ ++ const size_t max_last_line_cnt = 10; ++ char *prev_line, *cur_line, *next_line; ++ size_t log_sz; ++ int i; ++ ++ if (!buf) ++ return; ++ ++ log_sz = strlen(buf) + 1; ++ next_line = buf + log_sz - 1; ++ ++ for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { ++ cur_line = find_prev_line(buf, next_line); ++ if (!cur_line) ++ return; ++ ++ /* failed CO-RE relocation case */ ++ if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { ++ prev_line = find_prev_line(buf, cur_line); ++ if (!prev_line) ++ continue; ++ ++ fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, ++ prev_line, cur_line, next_line); ++ return; ++ } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) { ++ prev_line = find_prev_line(buf, cur_line); ++ if (!prev_line) ++ continue; ++ ++ fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, ++ prev_line, cur_line, next_line); ++ return; ++ } ++ } ++} ++ ++static int bpf_program_record_relos(struct bpf_program *prog) ++{ ++ struct bpf_object *obj = prog->obj; ++ int i; ++ ++ for (i = 0; i < prog->nr_reloc; i++) { ++ struct reloc_desc *relo = &prog->reloc_desc[i]; ++ struct extern_desc *ext = &obj->externs[relo->sym_off]; ++ ++ switch (relo->type) { ++ case RELO_EXTERN_VAR: ++ if (ext->type != EXT_KSYM) ++ continue; ++ bpf_gen__record_extern(obj->gen_loader, ext->name, ++ ext->is_weak, !ext->ksym.type_id, ++ BTF_KIND_VAR, relo->insn_idx); ++ break; ++ case RELO_EXTERN_FUNC: ++ bpf_gen__record_extern(obj->gen_loader, ext->name, ++ ext->is_weak, false, BTF_KIND_FUNC, ++ relo->insn_idx); ++ break; ++ case RELO_CORE: { ++ struct bpf_core_relo cr = { ++ .insn_off = relo->insn_idx * 8, ++ .type_id = relo->core_relo->type_id, ++ .access_str_off = relo->core_relo->access_str_off, ++ .kind = relo->core_relo->kind, ++ }; ++ ++ bpf_gen__record_relo_core(obj->gen_loader, &cr); ++ break; ++ } ++ default: ++ continue; ++ } ++ } ++ return 0; ++} ++ ++static int ++bpf_object__load_progs(struct bpf_object *obj, int log_level) ++{ ++ struct bpf_program *prog; ++ size_t i; ++ int err; ++ ++ for (i = 0; i < obj->nr_programs; i++) { ++ prog = &obj->programs[i]; ++ err = bpf_object__sanitize_prog(obj, prog); ++ if (err) ++ return err; ++ } ++ ++ for (i = 0; i < obj->nr_programs; i++) { ++ prog = &obj->programs[i]; ++ if (prog_is_subprog(obj, prog)) ++ continue; ++ if (!prog->autoload) { ++ pr_debug("prog '%s': skipped loading\n", prog->name); ++ continue; ++ } ++ prog->log_level |= log_level; ++ ++ if (obj->gen_loader) ++ bpf_program_record_relos(prog); ++ ++ err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, ++ obj->license, obj->kern_version, &prog->fd); ++ if (err) { ++ pr_warn("prog '%s': failed to load: %d\n", prog->name, err); ++ return err; ++ } ++ } ++ ++ bpf_object__free_relocs(obj); ++ return 0; ++} ++ ++static const struct bpf_sec_def *find_sec_def(const char *sec_name); ++ ++static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) ++{ ++ struct bpf_program *prog; ++ int err; ++ ++ bpf_object__for_each_program(prog, obj) { ++ prog->sec_def = find_sec_def(prog->sec_name); ++ if (!prog->sec_def) { ++ /* couldn't guess, but user might manually specify */ ++ pr_debug("prog '%s': unrecognized ELF section name '%s'\n", ++ prog->name, prog->sec_name); ++ continue; ++ } ++ ++ prog->type = prog->sec_def->prog_type; ++ prog->expected_attach_type = prog->sec_def->expected_attach_type; ++ ++ /* sec_def can have custom callback which should be called ++ * after bpf_program is initialized to adjust its properties ++ */ ++ if (prog->sec_def->prog_setup_fn) { ++ err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); ++ if (err < 0) { ++ pr_warn("prog '%s': failed to initialize: %d\n", ++ prog->name, err); ++ return err; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, ++ const struct bpf_object_open_opts *opts) ++{ ++ const char *obj_name, *kconfig, *btf_tmp_path; ++ struct bpf_object *obj; ++ char tmp_name[64]; ++ int err; ++ char *log_buf; ++ size_t log_size; ++ __u32 log_level; ++ ++ if (elf_version(EV_CURRENT) == EV_NONE) { ++ pr_warn("failed to init libelf for %s\n", ++ path ? : "(mem buf)"); ++ return ERR_PTR(-LIBBPF_ERRNO__LIBELF); ++ } ++ ++ if (!OPTS_VALID(opts, bpf_object_open_opts)) ++ return ERR_PTR(-EINVAL); ++ ++ obj_name = OPTS_GET(opts, object_name, NULL); ++ if (obj_buf) { ++ if (!obj_name) { ++ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", ++ (unsigned long)obj_buf, ++ (unsigned long)obj_buf_sz); ++ obj_name = tmp_name; ++ } ++ path = obj_name; ++ pr_debug("loading object '%s' from buffer\n", obj_name); ++ } ++ ++ log_buf = OPTS_GET(opts, kernel_log_buf, NULL); ++ log_size = OPTS_GET(opts, kernel_log_size, 0); ++ log_level = OPTS_GET(opts, kernel_log_level, 0); ++ if (log_size > UINT_MAX) ++ return ERR_PTR(-EINVAL); ++ if (log_size && !log_buf) ++ return ERR_PTR(-EINVAL); ++ ++ obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); ++ if (IS_ERR(obj)) ++ return obj; ++ ++ obj->log_buf = log_buf; ++ obj->log_size = log_size; ++ obj->log_level = log_level; ++ ++ btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); ++ if (btf_tmp_path) { ++ if (strlen(btf_tmp_path) >= PATH_MAX) { ++ err = -ENAMETOOLONG; ++ goto out; ++ } ++ obj->btf_custom_path = strdup(btf_tmp_path); ++ if (!obj->btf_custom_path) { ++ err = -ENOMEM; ++ goto out; ++ } ++ } ++ ++ kconfig = OPTS_GET(opts, kconfig, NULL); ++ if (kconfig) { ++ obj->kconfig = strdup(kconfig); ++ if (!obj->kconfig) { ++ err = -ENOMEM; ++ goto out; ++ } ++ } ++ ++ err = bpf_object__elf_init(obj); ++ err = err ? : bpf_object__check_endianness(obj); ++ err = err ? : bpf_object__elf_collect(obj); ++ err = err ? : bpf_object__collect_externs(obj); ++ err = err ? : bpf_object__finalize_btf(obj); ++ err = err ? : bpf_object__init_maps(obj, opts); ++ err = err ? : bpf_object_init_progs(obj, opts); ++ err = err ? : bpf_object__collect_relos(obj); ++ if (err) ++ goto out; ++ ++ bpf_object__elf_finish(obj); ++ ++ return obj; ++out: ++ bpf_object__close(obj); ++ return ERR_PTR(err); ++} ++ ++struct bpf_object * ++bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) ++{ ++ if (!path) ++ return libbpf_err_ptr(-EINVAL); ++ ++ pr_debug("loading %s\n", path); ++ ++ return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); ++} ++ ++struct bpf_object *bpf_object__open(const char *path) ++{ ++ return bpf_object__open_file(path, NULL); ++} ++ ++struct bpf_object * ++bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, ++ const struct bpf_object_open_opts *opts) ++{ ++ if (!obj_buf || obj_buf_sz == 0) ++ return libbpf_err_ptr(-EINVAL); ++ ++ return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); ++} ++ ++static int bpf_object_unload(struct bpf_object *obj) ++{ ++ size_t i; ++ ++ if (!obj) ++ return libbpf_err(-EINVAL); ++ ++ for (i = 0; i < obj->nr_maps; i++) { ++ zclose(obj->maps[i].fd); ++ if (obj->maps[i].st_ops) ++ zfree(&obj->maps[i].st_ops->kern_vdata); ++ } ++ ++ for (i = 0; i < obj->nr_programs; i++) ++ bpf_program__unload(&obj->programs[i]); ++ ++ return 0; ++} ++ ++int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); ++ ++static int bpf_object__sanitize_maps(struct bpf_object *obj) ++{ ++ struct bpf_map *m; ++ ++ bpf_object__for_each_map(m, obj) { ++ if (!bpf_map__is_internal(m)) ++ continue; ++ if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) ++ m->def.map_flags ^= BPF_F_MMAPABLE; ++ } ++ ++ return 0; ++} ++ ++int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) ++{ ++ char sym_type, sym_name[500]; ++ unsigned long long sym_addr; ++ int ret, err = 0; ++ FILE *f; ++ ++ f = fopen("/proc/kallsyms", "r"); ++ if (!f) { ++ err = -errno; ++ pr_warn("failed to open /proc/kallsyms: %d\n", err); ++ return err; ++ } ++ ++ while (true) { ++ ret = fscanf(f, "%llx %c %499s%*[^\n]\n", ++ &sym_addr, &sym_type, sym_name); ++ if (ret == EOF && feof(f)) ++ break; ++ if (ret != 3) { ++ pr_warn("failed to read kallsyms entry: %d\n", ret); ++ err = -EINVAL; ++ break; ++ } ++ ++ err = cb(sym_addr, sym_type, sym_name, ctx); ++ if (err) ++ break; ++ } ++ ++ fclose(f); ++ return err; ++} ++ ++static int kallsyms_cb(unsigned long long sym_addr, char sym_type, ++ const char *sym_name, void *ctx) ++{ ++ struct bpf_object *obj = ctx; ++ const struct btf_type *t; ++ struct extern_desc *ext; ++ ++ ext = find_extern_by_name(obj, sym_name); ++ if (!ext || ext->type != EXT_KSYM) ++ return 0; ++ ++ t = btf__type_by_id(obj->btf, ext->btf_id); ++ if (!btf_is_var(t)) ++ return 0; ++ ++ if (ext->is_set && ext->ksym.addr != sym_addr) { ++ pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", ++ sym_name, ext->ksym.addr, sym_addr); ++ return -EINVAL; ++ } ++ if (!ext->is_set) { ++ ext->is_set = true; ++ ext->ksym.addr = sym_addr; ++ pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); ++ } ++ return 0; ++} ++ ++static int bpf_object__read_kallsyms_file(struct bpf_object *obj) ++{ ++ return libbpf_kallsyms_parse(kallsyms_cb, obj); ++} ++ ++static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, ++ __u16 kind, struct btf **res_btf, ++ struct module_btf **res_mod_btf) ++{ ++ struct module_btf *mod_btf; ++ struct btf *btf; ++ int i, id, err; ++ ++ btf = obj->btf_vmlinux; ++ mod_btf = NULL; ++ id = btf__find_by_name_kind(btf, ksym_name, kind); ++ ++ if (id == -ENOENT) { ++ err = load_module_btfs(obj); ++ if (err) ++ return err; ++ ++ for (i = 0; i < obj->btf_module_cnt; i++) { ++ /* we assume module_btf's BTF FD is always >0 */ ++ mod_btf = &obj->btf_modules[i]; ++ btf = mod_btf->btf; ++ id = btf__find_by_name_kind_own(btf, ksym_name, kind); ++ if (id != -ENOENT) ++ break; ++ } ++ } ++ if (id <= 0) ++ return -ESRCH; ++ ++ *res_btf = btf; ++ *res_mod_btf = mod_btf; ++ return id; ++} ++ ++static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, ++ struct extern_desc *ext) ++{ ++ const struct btf_type *targ_var, *targ_type; ++ __u32 targ_type_id, local_type_id; ++ struct module_btf *mod_btf = NULL; ++ const char *targ_var_name; ++ struct btf *btf = NULL; ++ int id, err; ++ ++ id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); ++ if (id < 0) { ++ if (id == -ESRCH && ext->is_weak) ++ return 0; ++ pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", ++ ext->name); ++ return id; ++ } ++ ++ /* find local type_id */ ++ local_type_id = ext->ksym.type_id; ++ ++ /* find target type_id */ ++ targ_var = btf__type_by_id(btf, id); ++ targ_var_name = btf__name_by_offset(btf, targ_var->name_off); ++ targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); ++ ++ err = bpf_core_types_are_compat(obj->btf, local_type_id, ++ btf, targ_type_id); ++ if (err <= 0) { ++ const struct btf_type *local_type; ++ const char *targ_name, *local_name; ++ ++ local_type = btf__type_by_id(obj->btf, local_type_id); ++ local_name = btf__name_by_offset(obj->btf, local_type->name_off); ++ targ_name = btf__name_by_offset(btf, targ_type->name_off); ++ ++ pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", ++ ext->name, local_type_id, ++ btf_kind_str(local_type), local_name, targ_type_id, ++ btf_kind_str(targ_type), targ_name); ++ return -EINVAL; ++ } ++ ++ ext->is_set = true; ++ ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; ++ ext->ksym.kernel_btf_id = id; ++ pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", ++ ext->name, id, btf_kind_str(targ_var), targ_var_name); ++ ++ return 0; ++} ++ ++static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, ++ struct extern_desc *ext) ++{ ++ int local_func_proto_id, kfunc_proto_id, kfunc_id; ++ struct module_btf *mod_btf = NULL; ++ const struct btf_type *kern_func; ++ struct btf *kern_btf = NULL; ++ int ret; ++ ++ local_func_proto_id = ext->ksym.type_id; ++ ++ kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); ++ if (kfunc_id < 0) { ++ if (kfunc_id == -ESRCH && ext->is_weak) ++ return 0; ++ pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", ++ ext->name); ++ return kfunc_id; ++ } ++ ++ kern_func = btf__type_by_id(kern_btf, kfunc_id); ++ kfunc_proto_id = kern_func->type; ++ ++ ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, ++ kern_btf, kfunc_proto_id); ++ if (ret <= 0) { ++ pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n", ++ ext->name, local_func_proto_id, kfunc_proto_id); ++ return -EINVAL; ++ } ++ ++ /* set index for module BTF fd in fd_array, if unset */ ++ if (mod_btf && !mod_btf->fd_array_idx) { ++ /* insn->off is s16 */ ++ if (obj->fd_array_cnt == INT16_MAX) { ++ pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", ++ ext->name, mod_btf->fd_array_idx); ++ return -E2BIG; ++ } ++ /* Cannot use index 0 for module BTF fd */ ++ if (!obj->fd_array_cnt) ++ obj->fd_array_cnt = 1; ++ ++ ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), ++ obj->fd_array_cnt + 1); ++ if (ret) ++ return ret; ++ mod_btf->fd_array_idx = obj->fd_array_cnt; ++ /* we assume module BTF FD is always >0 */ ++ obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; ++ } ++ ++ ext->is_set = true; ++ ext->ksym.kernel_btf_id = kfunc_id; ++ ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; ++ pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", ++ ext->name, kfunc_id); ++ ++ return 0; ++} ++ ++static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) ++{ ++ const struct btf_type *t; ++ struct extern_desc *ext; ++ int i, err; ++ ++ for (i = 0; i < obj->nr_extern; i++) { ++ ext = &obj->externs[i]; ++ if (ext->type != EXT_KSYM || !ext->ksym.type_id) ++ continue; ++ ++ if (obj->gen_loader) { ++ ext->is_set = true; ++ ext->ksym.kernel_btf_obj_fd = 0; ++ ext->ksym.kernel_btf_id = 0; ++ continue; ++ } ++ t = btf__type_by_id(obj->btf, ext->btf_id); ++ if (btf_is_var(t)) ++ err = bpf_object__resolve_ksym_var_btf_id(obj, ext); ++ else ++ err = bpf_object__resolve_ksym_func_btf_id(obj, ext); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++static int bpf_object__resolve_externs(struct bpf_object *obj, ++ const char *extra_kconfig) ++{ ++ bool need_config = false, need_kallsyms = false; ++ bool need_vmlinux_btf = false; ++ struct extern_desc *ext; ++ void *kcfg_data = NULL; ++ int err, i; ++ ++ if (obj->nr_extern == 0) ++ return 0; ++ ++ if (obj->kconfig_map_idx >= 0) ++ kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; ++ ++ for (i = 0; i < obj->nr_extern; i++) { ++ ext = &obj->externs[i]; ++ ++ if (ext->type == EXT_KSYM) { ++ if (ext->ksym.type_id) ++ need_vmlinux_btf = true; ++ else ++ need_kallsyms = true; ++ continue; ++ } else if (ext->type == EXT_KCFG) { ++ void *ext_ptr = kcfg_data + ext->kcfg.data_off; ++ __u64 value = 0; ++ ++ /* Kconfig externs need actual /proc/config.gz */ ++ if (str_has_pfx(ext->name, "CONFIG_")) { ++ need_config = true; ++ continue; ++ } ++ ++ /* Virtual kcfg externs are customly handled by libbpf */ ++ if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { ++ value = get_kernel_version(); ++ if (!value) { ++ pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); ++ return -EINVAL; ++ } ++ } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { ++ value = kernel_supports(obj, FEAT_BPF_COOKIE); ++ } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { ++ value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); ++ } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { ++ /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed ++ * __kconfig externs, where LINUX_ ones are virtual and filled out ++ * customly by libbpf (their values don't come from Kconfig). ++ * If LINUX_xxx variable is not recognized by libbpf, but is marked ++ * __weak, it defaults to zero value, just like for CONFIG_xxx ++ * externs. ++ */ ++ pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); ++ return -EINVAL; ++ } ++ ++ err = set_kcfg_value_num(ext, ext_ptr, value); ++ if (err) ++ return err; ++ pr_debug("extern (kcfg) '%s': set to 0x%llx\n", ++ ext->name, (long long)value); ++ } else { ++ pr_warn("extern '%s': unrecognized extern kind\n", ext->name); ++ return -EINVAL; ++ } ++ } ++ if (need_config && extra_kconfig) { ++ err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); ++ if (err) ++ return -EINVAL; ++ need_config = false; ++ for (i = 0; i < obj->nr_extern; i++) { ++ ext = &obj->externs[i]; ++ if (ext->type == EXT_KCFG && !ext->is_set) { ++ need_config = true; ++ break; ++ } ++ } ++ } ++ if (need_config) { ++ err = bpf_object__read_kconfig_file(obj, kcfg_data); ++ if (err) ++ return -EINVAL; ++ } ++ if (need_kallsyms) { ++ err = bpf_object__read_kallsyms_file(obj); ++ if (err) ++ return -EINVAL; ++ } ++ if (need_vmlinux_btf) { ++ err = bpf_object__resolve_ksyms_btf_id(obj); ++ if (err) ++ return -EINVAL; ++ } ++ for (i = 0; i < obj->nr_extern; i++) { ++ ext = &obj->externs[i]; ++ ++ if (!ext->is_set && !ext->is_weak) { ++ pr_warn("extern '%s' (strong): not resolved\n", ext->name); ++ return -ESRCH; ++ } else if (!ext->is_set) { ++ pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", ++ ext->name); ++ } ++ } ++ ++ return 0; ++} ++ ++static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) ++{ ++ int err, i; ++ ++ if (!obj) ++ return libbpf_err(-EINVAL); ++ ++ if (obj->loaded) { ++ pr_warn("object '%s': load can't be attempted twice\n", obj->name); ++ return libbpf_err(-EINVAL); ++ } ++ ++ if (obj->gen_loader) ++ bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); ++ ++ err = bpf_object__probe_loading(obj); ++ err = err ? : bpf_object__load_vmlinux_btf(obj, false); ++ err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); ++ err = err ? : bpf_object__sanitize_and_load_btf(obj); ++ err = err ? : bpf_object__sanitize_maps(obj); ++ err = err ? : bpf_object__init_kern_struct_ops_maps(obj); ++ err = err ? : bpf_object__create_maps(obj); ++ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); ++ err = err ? : bpf_object__load_progs(obj, extra_log_level); ++ err = err ? : bpf_object_init_prog_arrays(obj); ++ ++ if (obj->gen_loader) { ++ /* reset FDs */ ++ if (obj->btf) ++ btf__set_fd(obj->btf, -1); ++ for (i = 0; i < obj->nr_maps; i++) ++ obj->maps[i].fd = -1; ++ if (!err) ++ err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); ++ } ++ ++ /* clean up fd_array */ ++ zfree(&obj->fd_array); ++ ++ /* clean up module BTFs */ ++ for (i = 0; i < obj->btf_module_cnt; i++) { ++ close(obj->btf_modules[i].fd); ++ btf__free(obj->btf_modules[i].btf); ++ free(obj->btf_modules[i].name); ++ } ++ free(obj->btf_modules); ++ ++ /* clean up vmlinux BTF */ ++ btf__free(obj->btf_vmlinux); ++ obj->btf_vmlinux = NULL; ++ ++ obj->loaded = true; /* doesn't matter if successfully or not */ ++ ++ if (err) ++ goto out; ++ ++ return 0; ++out: ++ /* unpin any maps that were auto-pinned during load */ ++ for (i = 0; i < obj->nr_maps; i++) ++ if (obj->maps[i].pinned && !obj->maps[i].reused) ++ bpf_map__unpin(&obj->maps[i], NULL); ++ ++ bpf_object_unload(obj); ++ pr_warn("failed to load object '%s'\n", obj->path); ++ return libbpf_err(err); ++} ++ ++int bpf_object__load(struct bpf_object *obj) ++{ ++ return bpf_object_load(obj, 0, NULL); ++} ++ ++static int make_parent_dir(const char *path) ++{ ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ char *dname, *dir; ++ int err = 0; ++ ++ dname = strdup(path); ++ if (dname == NULL) ++ return -ENOMEM; ++ ++ dir = dirname(dname); ++ if (mkdir(dir, 0700) && errno != EEXIST) ++ err = -errno; ++ ++ free(dname); ++ if (err) { ++ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); ++ pr_warn("failed to mkdir %s: %s\n", path, cp); ++ } ++ return err; ++} ++ ++static int check_path(const char *path) ++{ ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ struct statfs st_fs; ++ char *dname, *dir; ++ int err = 0; ++ ++ if (path == NULL) ++ return -EINVAL; ++ ++ dname = strdup(path); ++ if (dname == NULL) ++ return -ENOMEM; ++ ++ dir = dirname(dname); ++ if (statfs(dir, &st_fs)) { ++ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); ++ pr_warn("failed to statfs %s: %s\n", dir, cp); ++ err = -errno; ++ } ++ free(dname); ++ ++ if (!err && st_fs.f_type != BPF_FS_MAGIC) { ++ pr_warn("specified path %s is not on BPF FS\n", path); ++ err = -EINVAL; ++ } ++ ++ return err; ++} ++ ++int bpf_program__pin(struct bpf_program *prog, const char *path) ++{ ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ int err; ++ ++ if (prog->fd < 0) { ++ pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); ++ return libbpf_err(-EINVAL); ++ } ++ ++ err = make_parent_dir(path); ++ if (err) ++ return libbpf_err(err); ++ ++ err = check_path(path); ++ if (err) ++ return libbpf_err(err); ++ ++ if (bpf_obj_pin(prog->fd, path)) { ++ err = -errno; ++ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); ++ pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); ++ return libbpf_err(err); ++ } ++ ++ pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); ++ return 0; ++} ++ ++int bpf_program__unpin(struct bpf_program *prog, const char *path) ++{ ++ int err; ++ ++ if (prog->fd < 0) { ++ pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); ++ return libbpf_err(-EINVAL); ++ } ++ ++ err = check_path(path); ++ if (err) ++ return libbpf_err(err); ++ ++ err = unlink(path); ++ if (err) ++ return libbpf_err(-errno); ++ ++ pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); ++ return 0; ++} ++ ++int bpf_map__pin(struct bpf_map *map, const char *path) ++{ ++ char *cp, errmsg[STRERR_BUFSIZE]; ++ int err; ++ ++ if (map == NULL) { ++ pr_warn("invalid map pointer\n"); ++ return libbpf_err(-EINVAL); ++ } ++ ++ if (map->pin_path) { ++ if (path && strcmp(path, map->pin_path)) { ++ pr_warn("map '%s' already has pin path '%s' different from '%s'\n", ++ bpf_map__name(map), map->pin_path, path); ++ return libbpf_err(-EINVAL); ++ } else if (map->pinned) { ++ pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", ++ bpf_map__name(map), map->pin_path); ++ return 0; ++ } ++ } else { ++ if (!path) { ++ pr_warn("missing a path to pin map '%s' at\n", ++ bpf_map__name(map)); ++ return libbpf_err(-EINVAL); ++ } else if (map->pinned) { ++ pr_warn("map '%s' already pinned\n", bpf_map__name(map)); ++ return libbpf_err(-EEXIST); ++ } ++ ++ map->pin_path = strdup(path); ++ if (!map->pin_path) { ++ err = -errno; ++ goto out_err; ++ } ++ } ++ ++ err = make_parent_dir(map->pin_path); ++ if (err) ++ return libbpf_err(err); ++ ++ err = check_path(map->pin_path); ++ if (err) ++ return libbpf_err(err); ++ ++ if (bpf_obj_pin(map->fd, map->pin_path)) { ++ err = -errno; ++ goto out_err; ++ } ++ ++ map->pinned = true; ++ pr_debug("pinned map '%s'\n", map->pin_path); ++ ++ return 0; ++ ++out_err: ++ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); ++ pr_warn("failed to pin map: %s\n", cp); ++ return libbpf_err(err); ++} ++ ++int bpf_map__unpin(struct bpf_map *map, const char *path) ++{ ++ int err; ++ ++ if (map == NULL) { ++ pr_warn("invalid map pointer\n"); ++ return libbpf_err(-EINVAL); ++ } ++ ++ if (map->pin_path) { ++ if (path && strcmp(path, map->pin_path)) { ++ pr_warn("map '%s' already has pin path '%s' different from '%s'\n", ++ bpf_map__name(map), map->pin_path, path); ++ return libbpf_err(-EINVAL); ++ } ++ path = map->pin_path; ++ } else if (!path) { ++ pr_warn("no path to unpin map '%s' from\n", ++ bpf_map__name(map)); ++ return libbpf_err(-EINVAL); ++ } ++ ++ err = check_path(path); ++ if (err) ++ return libbpf_err(err); ++ ++ err = unlink(path); ++ if (err != 0) ++ return libbpf_err(-errno); ++ ++ map->pinned = false; ++ pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); ++ ++ return 0; ++} ++ ++int bpf_map__set_pin_path(struct bpf_map *map, const char *path) ++{ ++ char *new = NULL; ++ ++ if (path) { ++ new = strdup(path); ++ if (!new) ++ return libbpf_err(-errno); ++ } ++ ++ free(map->pin_path); ++ map->pin_path = new; ++ return 0; ++} ++ ++__alias(bpf_map__pin_path) ++const char *bpf_map__get_pin_path(const struct bpf_map *map); ++ ++const char *bpf_map__pin_path(const struct bpf_map *map) ++{ ++ return map->pin_path; ++} ++ ++bool bpf_map__is_pinned(const struct bpf_map *map) ++{ ++ return map->pinned; ++} ++ ++static void sanitize_pin_path(char *s) ++{ ++ /* bpffs disallows periods in path names */ ++ while (*s) { ++ if (*s == '.') ++ *s = '_'; ++ s++; ++ } ++} ++ ++int bpf_object__pin_maps(struct bpf_object *obj, const char *path) ++{ ++ struct bpf_map *map; ++ int err; ++ ++ if (!obj) ++ return libbpf_err(-ENOENT); ++ ++ if (!obj->loaded) { ++ pr_warn("object not yet loaded; load it first\n"); ++ return libbpf_err(-ENOENT); ++ } ++ ++ bpf_object__for_each_map(map, obj) { ++ char *pin_path = NULL; ++ char buf[PATH_MAX]; ++ ++ if (!map->autocreate) ++ continue; ++ ++ if (path) { ++ int len; ++ ++ len = snprintf(buf, PATH_MAX, "%s/%s", path, ++ bpf_map__name(map)); ++ if (len < 0) { ++ err = -EINVAL; ++ goto err_unpin_maps; ++ } else if (len >= PATH_MAX) { ++ err = -ENAMETOOLONG; ++ goto err_unpin_maps; ++ } ++ sanitize_pin_path(buf); ++ pin_path = buf; ++ } else if (!map->pin_path) { ++ continue; ++ } ++ ++ err = bpf_map__pin(map, pin_path); ++ if (err) ++ goto err_unpin_maps; ++ } ++ ++ return 0; ++ ++err_unpin_maps: ++ while ((map = bpf_object__prev_map(obj, map))) { ++ if (!map->pin_path) ++ continue; ++ ++ bpf_map__unpin(map, NULL); ++ } ++ ++ return libbpf_err(err); ++} ++ ++int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) ++{ ++ struct bpf_map *map; ++ int err; ++ ++ if (!obj) ++ return libbpf_err(-ENOENT); ++ ++ bpf_object__for_each_map(map, obj) { ++ char *pin_path = NULL; ++ char buf[PATH_MAX]; ++ ++ if (path) { ++ int len; ++ ++ len = snprintf(buf, PATH_MAX, "%s/%s", path, ++ bpf_map__name(map)); ++ if (len < 0) ++ return libbpf_err(-EINVAL); ++ else if (len >= PATH_MAX) ++ return libbpf_err(-ENAMETOOLONG); ++ sanitize_pin_path(buf); ++ pin_path = buf; ++ } else if (!map->pin_path) { ++ continue; ++ } ++ ++ err = bpf_map__unpin(map, pin_path); ++ if (err) ++ return libbpf_err(err); ++ } ++ ++ return 0; ++} ++ ++int bpf_object__pin_programs(struct bpf_object *obj, const char *path) ++{ ++ struct bpf_program *prog; ++ int err; ++ ++ if (!obj) ++ return libbpf_err(-ENOENT); ++ ++ if (!obj->loaded) { ++ pr_warn("object not yet loaded; load it first\n"); ++ return libbpf_err(-ENOENT); ++ } ++ ++ bpf_object__for_each_program(prog, obj) { ++ char buf[PATH_MAX]; ++ int len; ++ ++ len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); ++ if (len < 0) { ++ err = -EINVAL; ++ goto err_unpin_programs; ++ } else if (len >= PATH_MAX) { ++ err = -ENAMETOOLONG; ++ goto err_unpin_programs; ++ } ++ ++ err = bpf_program__pin(prog, buf); ++ if (err) ++ goto err_unpin_programs; ++ } ++ ++ return 0; ++ ++err_unpin_programs: ++ while ((prog = bpf_object__prev_program(obj, prog))) { ++ char buf[PATH_MAX]; ++ int len; ++ ++ len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); ++ if (len < 0) ++ continue; ++ else if (len >= PATH_MAX) ++ continue; ++ ++ bpf_program__unpin(prog, buf); ++ } ++ ++ return libbpf_err(err); ++} ++ ++int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) ++{ ++ struct bpf_program *prog; ++ int err; ++ ++ if (!obj) ++ return libbpf_err(-ENOENT); ++ ++ bpf_object__for_each_program(prog, obj) { ++ char buf[PATH_MAX]; ++ int len; ++ ++ len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name); ++ if (len < 0) ++ return libbpf_err(-EINVAL); ++ else if (len >= PATH_MAX) ++ return libbpf_err(-ENAMETOOLONG); ++ ++ err = bpf_program__unpin(prog, buf); ++ if (err) ++ return libbpf_err(err); ++ } ++ ++ return 0; ++} ++ ++int bpf_object__pin(struct bpf_object *obj, const char *path) ++{ ++ int err; ++ ++ err = bpf_object__pin_maps(obj, path); ++ if (err) ++ return libbpf_err(err); ++ ++ err = bpf_object__pin_programs(obj, path); ++ if (err) { ++ bpf_object__unpin_maps(obj, path); ++ return libbpf_err(err); ++ } ++ ++ return 0; ++} ++ ++static void bpf_map__destroy(struct bpf_map *map) ++{ ++ if (map->inner_map) { ++ bpf_map__destroy(map->inner_map); ++ zfree(&map->inner_map); ++ } ++ ++ zfree(&map->init_slots); ++ map->init_slots_sz = 0; ++ ++ if (map->mmaped) { ++ munmap(map->mmaped, bpf_map_mmap_sz(map)); ++ map->mmaped = NULL; ++ } ++ ++ if (map->st_ops) { ++ zfree(&map->st_ops->data); ++ zfree(&map->st_ops->progs); ++ zfree(&map->st_ops->kern_func_off); ++ zfree(&map->st_ops); ++ } ++ ++ zfree(&map->name); ++ zfree(&map->real_name); ++ zfree(&map->pin_path); ++ ++ if (map->fd >= 0) ++ zclose(map->fd); ++} ++ ++void bpf_object__close(struct bpf_object *obj) ++{ ++ size_t i; ++ ++ if (IS_ERR_OR_NULL(obj)) ++ return; ++ ++ usdt_manager_free(obj->usdt_man); ++ obj->usdt_man = NULL; ++ ++ bpf_gen__free(obj->gen_loader); ++ bpf_object__elf_finish(obj); ++ bpf_object_unload(obj); ++ btf__free(obj->btf); ++ btf_ext__free(obj->btf_ext); ++ ++ for (i = 0; i < obj->nr_maps; i++) ++ bpf_map__destroy(&obj->maps[i]); ++ ++ zfree(&obj->btf_custom_path); ++ zfree(&obj->kconfig); ++ zfree(&obj->externs); ++ obj->nr_extern = 0; ++ ++ zfree(&obj->maps); ++ obj->nr_maps = 0; ++ ++ if (obj->programs && obj->nr_programs) { ++ for (i = 0; i < obj->nr_programs; i++) ++ bpf_program__exit(&obj->programs[i]); ++ } ++ zfree(&obj->programs); ++ ++ free(obj); ++} ++ ++const char *bpf_object__name(const struct bpf_object *obj) ++{ ++ return obj ? obj->name : libbpf_err_ptr(-EINVAL); ++} ++ ++unsigned int bpf_object__kversion(const struct bpf_object *obj) ++{ ++ return obj ? obj->kern_version : 0; ++} ++ ++struct btf *bpf_object__btf(const struct bpf_object *obj) ++{ ++ return obj ? obj->btf : NULL; ++} ++ ++int bpf_object__btf_fd(const struct bpf_object *obj) ++{ ++ return obj->btf ? btf__fd(obj->btf) : -1; ++} ++ ++int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) ++{ ++ if (obj->loaded) ++ return libbpf_err(-EINVAL); ++ ++ obj->kern_version = kern_version; ++ ++ return 0; ++} ++ ++int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) ++{ ++ struct bpf_gen *gen; ++ ++ if (!opts) ++ return -EFAULT; ++ if (!OPTS_VALID(opts, gen_loader_opts)) ++ return -EINVAL; ++ gen = calloc(sizeof(*gen), 1); ++ if (!gen) ++ return -ENOMEM; ++ gen->opts = opts; ++ obj->gen_loader = gen; ++ return 0; ++} ++ ++static struct bpf_program * ++__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, ++ bool forward) ++{ ++ size_t nr_programs = obj->nr_programs; ++ ssize_t idx; ++ ++ if (!nr_programs) ++ return NULL; ++ ++ if (!p) ++ /* Iter from the beginning */ ++ return forward ? &obj->programs[0] : ++ &obj->programs[nr_programs - 1]; ++ ++ if (p->obj != obj) { ++ pr_warn("error: program handler doesn't match object\n"); ++ return errno = EINVAL, NULL; ++ } ++ ++ idx = (p - obj->programs) + (forward ? 1 : -1); ++ if (idx >= obj->nr_programs || idx < 0) ++ return NULL; ++ return &obj->programs[idx]; ++} ++ ++struct bpf_program * ++bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) ++{ ++ struct bpf_program *prog = prev; ++ ++ do { ++ prog = __bpf_program__iter(prog, obj, true); ++ } while (prog && prog_is_subprog(obj, prog)); ++ ++ return prog; ++} ++ ++struct bpf_program * ++bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) ++{ ++ struct bpf_program *prog = next; ++ ++ do { ++ prog = __bpf_program__iter(prog, obj, false); ++ } while (prog && prog_is_subprog(obj, prog)); ++ ++ return prog; ++} ++ ++void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) ++{ ++ prog->prog_ifindex = ifindex; ++} ++ ++const char *bpf_program__name(const struct bpf_program *prog) ++{ ++ return prog->name; ++} ++ ++const char *bpf_program__section_name(const struct bpf_program *prog) ++{ ++ return prog->sec_name; ++} ++ ++bool bpf_program__autoload(const struct bpf_program *prog) ++{ ++ return prog->autoload; ++} ++ ++int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) ++{ ++ if (prog->obj->loaded) ++ return libbpf_err(-EINVAL); ++ ++ prog->autoload = autoload; ++ return 0; ++} ++ ++const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) ++{ ++ return prog->insns; ++} ++ ++size_t bpf_program__insn_cnt(const struct bpf_program *prog) ++{ ++ return prog->insns_cnt; ++} ++ ++int bpf_program__set_insns(struct bpf_program *prog, ++ struct bpf_insn *new_insns, size_t new_insn_cnt) ++{ ++ struct bpf_insn *insns; ++ ++ if (prog->obj->loaded) ++ return -EBUSY; ++ ++ insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); ++ if (!insns) { ++ pr_warn("prog '%s': failed to realloc prog code\n", prog->name); ++ return -ENOMEM; ++ } ++ memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); ++ ++ prog->insns = insns; ++ prog->insns_cnt = new_insn_cnt; ++ return 0; ++} ++ ++int bpf_program__fd(const struct bpf_program *prog) ++{ ++ if (!prog) ++ return libbpf_err(-EINVAL); ++ ++ if (prog->fd < 0) ++ return libbpf_err(-ENOENT); ++ ++ return prog->fd; ++} ++ ++__alias(bpf_program__type) ++enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); ++ ++enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) ++{ ++ return prog->type; ++} ++ ++int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) ++{ ++ if (prog->obj->loaded) ++ return libbpf_err(-EBUSY); ++ ++ prog->type = type; ++ return 0; ++} ++ ++__alias(bpf_program__expected_attach_type) ++enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); ++ ++enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) ++{ ++ return prog->expected_attach_type; ++} ++ ++int bpf_program__set_expected_attach_type(struct bpf_program *prog, ++ enum bpf_attach_type type) ++{ ++ if (prog->obj->loaded) ++ return libbpf_err(-EBUSY); ++ ++ prog->expected_attach_type = type; ++ return 0; ++} ++ ++__u32 bpf_program__flags(const struct bpf_program *prog) ++{ ++ return prog->prog_flags; ++} ++ ++int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) ++{ ++ if (prog->obj->loaded) ++ return libbpf_err(-EBUSY); ++ ++ prog->prog_flags = flags; ++ return 0; ++} ++ ++__u32 bpf_program__log_level(const struct bpf_program *prog) ++{ ++ return prog->log_level; ++} ++ ++int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) ++{ ++ if (prog->obj->loaded) ++ return libbpf_err(-EBUSY); ++ ++ prog->log_level = log_level; ++ return 0; ++} ++ ++const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) ++{ ++ *log_size = prog->log_size; ++ return prog->log_buf; ++} ++ ++int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) ++{ ++ if (log_size && !log_buf) ++ return -EINVAL; ++ if (prog->log_size > UINT_MAX) ++ return -EINVAL; ++ if (prog->obj->loaded) ++ return -EBUSY; ++ ++ prog->log_buf = log_buf; ++ prog->log_size = log_size; ++ return 0; ++} ++ ++#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ ++ .sec = (char *)sec_pfx, \ ++ .prog_type = BPF_PROG_TYPE_##ptype, \ ++ .expected_attach_type = atype, \ ++ .cookie = (long)(flags), \ ++ .prog_prepare_load_fn = libbpf_prepare_prog_load, \ ++ __VA_ARGS__ \ ++} ++ ++static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); ++ ++static const struct bpf_sec_def section_defs[] = { ++ SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), ++ SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), ++ SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), ++ SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), ++ SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), ++ SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), ++ SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), ++ SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), ++ SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), ++ SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), ++ SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), ++ SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), ++ SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), ++ SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), ++ SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), ++ SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), ++ SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), ++ SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), ++ SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), ++ SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), ++ SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), ++ SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), ++ SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), ++ SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), ++ SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), ++ SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), ++ SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), ++ SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), ++ SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), ++ SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), ++ SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), ++ SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), ++ SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), ++ SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), ++ SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), ++ SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), ++ SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), ++ SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), ++ SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), ++ SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), ++ SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), ++ SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), ++ SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), ++ SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), ++ SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), ++ SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), ++ SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), ++ SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), ++ SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), ++ SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), ++ SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), ++ SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), ++ SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), ++ SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), ++ SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), ++ SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), ++ SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), ++ SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), ++ SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), ++ SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), ++ SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), ++ SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), ++ SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), ++}; ++ ++static size_t custom_sec_def_cnt; ++static struct bpf_sec_def *custom_sec_defs; ++static struct bpf_sec_def custom_fallback_def; ++static bool has_custom_fallback_def; ++ ++static int last_custom_sec_def_handler_id; ++ ++int libbpf_register_prog_handler(const char *sec, ++ enum bpf_prog_type prog_type, ++ enum bpf_attach_type exp_attach_type, ++ const struct libbpf_prog_handler_opts *opts) ++{ ++ struct bpf_sec_def *sec_def; ++ ++ if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) ++ return libbpf_err(-EINVAL); ++ ++ if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ ++ return libbpf_err(-E2BIG); ++ ++ if (sec) { ++ sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, ++ sizeof(*sec_def)); ++ if (!sec_def) ++ return libbpf_err(-ENOMEM); ++ ++ custom_sec_defs = sec_def; ++ sec_def = &custom_sec_defs[custom_sec_def_cnt]; ++ } else { ++ if (has_custom_fallback_def) ++ return libbpf_err(-EBUSY); ++ ++ sec_def = &custom_fallback_def; ++ } ++ ++ sec_def->sec = sec ? strdup(sec) : NULL; ++ if (sec && !sec_def->sec) ++ return libbpf_err(-ENOMEM); ++ ++ sec_def->prog_type = prog_type; ++ sec_def->expected_attach_type = exp_attach_type; ++ sec_def->cookie = OPTS_GET(opts, cookie, 0); ++ ++ sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); ++ sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); ++ sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); ++ ++ sec_def->handler_id = ++last_custom_sec_def_handler_id; ++ ++ if (sec) ++ custom_sec_def_cnt++; ++ else ++ has_custom_fallback_def = true; ++ ++ return sec_def->handler_id; ++} ++ ++int libbpf_unregister_prog_handler(int handler_id) ++{ ++ struct bpf_sec_def *sec_defs; ++ int i; ++ ++ if (handler_id <= 0) ++ return libbpf_err(-EINVAL); ++ ++ if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { ++ memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); ++ has_custom_fallback_def = false; ++ return 0; ++ } ++ ++ for (i = 0; i < custom_sec_def_cnt; i++) { ++ if (custom_sec_defs[i].handler_id == handler_id) ++ break; ++ } ++ ++ if (i == custom_sec_def_cnt) ++ return libbpf_err(-ENOENT); ++ ++ free(custom_sec_defs[i].sec); ++ for (i = i + 1; i < custom_sec_def_cnt; i++) ++ custom_sec_defs[i - 1] = custom_sec_defs[i]; ++ custom_sec_def_cnt--; ++ ++ /* try to shrink the array, but it's ok if we couldn't */ ++ sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); ++ if (sec_defs) ++ custom_sec_defs = sec_defs; ++ ++ return 0; ++} ++ ++static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) ++{ ++ size_t len = strlen(sec_def->sec); ++ ++ /* "type/" always has to have proper SEC("type/extras") form */ ++ if (sec_def->sec[len - 1] == '/') { ++ if (str_has_pfx(sec_name, sec_def->sec)) ++ return true; ++ return false; ++ } ++ ++ /* "type+" means it can be either exact SEC("type") or ++ * well-formed SEC("type/extras") with proper '/' separator ++ */ ++ if (sec_def->sec[len - 1] == '+') { ++ len--; ++ /* not even a prefix */ ++ if (strncmp(sec_name, sec_def->sec, len) != 0) ++ return false; ++ /* exact match or has '/' separator */ ++ if (sec_name[len] == '\0' || sec_name[len] == '/') ++ return true; ++ return false; ++ } ++ ++ return strcmp(sec_name, sec_def->sec) == 0; ++} ++ ++static const struct bpf_sec_def *find_sec_def(const char *sec_name) ++{ ++ const struct bpf_sec_def *sec_def; ++ int i, n; ++ ++ n = custom_sec_def_cnt; ++ for (i = 0; i < n; i++) { ++ sec_def = &custom_sec_defs[i]; ++ if (sec_def_matches(sec_def, sec_name)) ++ return sec_def; ++ } ++ ++ n = ARRAY_SIZE(section_defs); ++ for (i = 0; i < n; i++) { ++ sec_def = §ion_defs[i]; ++ if (sec_def_matches(sec_def, sec_name)) ++ return sec_def; ++ } ++ ++ if (has_custom_fallback_def) ++ return &custom_fallback_def; ++ ++ return NULL; ++} ++ ++#define MAX_TYPE_NAME_SIZE 32 ++ ++static char *libbpf_get_type_names(bool attach_type) ++{ ++ int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; ++ char *buf; ++ ++ buf = malloc(len); ++ if (!buf) ++ return NULL; ++ ++ buf[0] = '\0'; ++ /* Forge string buf with all available names */ ++ for (i = 0; i < ARRAY_SIZE(section_defs); i++) { ++ const struct bpf_sec_def *sec_def = §ion_defs[i]; ++ ++ if (attach_type) { ++ if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) ++ continue; ++ ++ if (!(sec_def->cookie & SEC_ATTACHABLE)) ++ continue; ++ } ++ ++ if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { ++ free(buf); ++ return NULL; ++ } ++ strcat(buf, " "); ++ strcat(buf, section_defs[i].sec); ++ } ++ ++ return buf; ++} ++ ++int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, ++ enum bpf_attach_type *expected_attach_type) ++{ ++ const struct bpf_sec_def *sec_def; ++ char *type_names; ++ ++ if (!name) ++ return libbpf_err(-EINVAL); ++ ++ sec_def = find_sec_def(name); ++ if (sec_def) { ++ *prog_type = sec_def->prog_type; ++ *expected_attach_type = sec_def->expected_attach_type; ++ return 0; ++ } ++ ++ pr_debug("failed to guess program type from ELF section '%s'\n", name); ++ type_names = libbpf_get_type_names(false); ++ if (type_names != NULL) { ++ pr_debug("supported section(type) names are:%s\n", type_names); ++ free(type_names); ++ } ++ ++ return libbpf_err(-ESRCH); ++} ++ ++const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) ++{ ++ if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) ++ return NULL; ++ ++ return attach_type_name[t]; ++} ++ ++const char *libbpf_bpf_link_type_str(enum bpf_link_type t) ++{ ++ if (t < 0 || t >= ARRAY_SIZE(link_type_name)) ++ return NULL; ++ ++ return link_type_name[t]; ++} ++ ++const char *libbpf_bpf_map_type_str(enum bpf_map_type t) ++{ ++ if (t < 0 || t >= ARRAY_SIZE(map_type_name)) ++ return NULL; ++ ++ return map_type_name[t]; ++} ++ ++const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) ++{ ++ if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) ++ return NULL; ++ ++ return prog_type_name[t]; ++} ++ ++static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, ++ size_t offset) ++{ ++ struct bpf_map *map; ++ size_t i; ++ ++ for (i = 0; i < obj->nr_maps; i++) { ++ map = &obj->maps[i]; ++ if (!bpf_map__is_struct_ops(map)) ++ continue; ++ if (map->sec_offset <= offset && ++ offset - map->sec_offset < map->def.value_size) ++ return map; ++ } ++ ++ return NULL; ++} ++ ++/* Collect the reloc from ELF and populate the st_ops->progs[] */ ++static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, ++ Elf64_Shdr *shdr, Elf_Data *data) ++{ ++ const struct btf_member *member; ++ struct bpf_struct_ops *st_ops; ++ struct bpf_program *prog; ++ unsigned int shdr_idx; ++ const struct btf *btf; ++ struct bpf_map *map; ++ unsigned int moff, insn_idx; ++ const char *name; ++ __u32 member_idx; ++ Elf64_Sym *sym; ++ Elf64_Rel *rel; ++ int i, nrels; ++ ++ btf = obj->btf; ++ nrels = shdr->sh_size / shdr->sh_entsize; ++ for (i = 0; i < nrels; i++) { ++ rel = elf_rel_by_idx(data, i); ++ if (!rel) { ++ pr_warn("struct_ops reloc: failed to get %d reloc\n", i); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); ++ if (!sym) { ++ pr_warn("struct_ops reloc: symbol %zx not found\n", ++ (size_t)ELF64_R_SYM(rel->r_info)); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ name = elf_sym_str(obj, sym->st_name) ?: ""; ++ map = find_struct_ops_map_by_offset(obj, rel->r_offset); ++ if (!map) { ++ pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", ++ (size_t)rel->r_offset); ++ return -EINVAL; ++ } ++ ++ moff = rel->r_offset - map->sec_offset; ++ shdr_idx = sym->st_shndx; ++ st_ops = map->st_ops; ++ pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", ++ map->name, ++ (long long)(rel->r_info >> 32), ++ (long long)sym->st_value, ++ shdr_idx, (size_t)rel->r_offset, ++ map->sec_offset, sym->st_name, name); ++ ++ if (shdr_idx >= SHN_LORESERVE) { ++ pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", ++ map->name, (size_t)rel->r_offset, shdr_idx); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ if (sym->st_value % BPF_INSN_SZ) { ++ pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", ++ map->name, (unsigned long long)sym->st_value); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ insn_idx = sym->st_value / BPF_INSN_SZ; ++ ++ member = find_member_by_offset(st_ops->type, moff * 8); ++ if (!member) { ++ pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", ++ map->name, moff); ++ return -EINVAL; ++ } ++ member_idx = member - btf_members(st_ops->type); ++ name = btf__name_by_offset(btf, member->name_off); ++ ++ if (!resolve_func_ptr(btf, member->type, NULL)) { ++ pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", ++ map->name, name); ++ return -EINVAL; ++ } ++ ++ prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx); ++ if (!prog) { ++ pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", ++ map->name, shdr_idx, name); ++ return -EINVAL; ++ } ++ ++ /* prevent the use of BPF prog with invalid type */ ++ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { ++ pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", ++ map->name, prog->name); ++ return -EINVAL; ++ } ++ ++ /* if we haven't yet processed this BPF program, record proper ++ * attach_btf_id and member_idx ++ */ ++ if (!prog->attach_btf_id) { ++ prog->attach_btf_id = st_ops->type_id; ++ prog->expected_attach_type = member_idx; ++ } ++ ++ /* struct_ops BPF prog can be re-used between multiple ++ * .struct_ops as long as it's the same struct_ops struct ++ * definition and the same function pointer field ++ */ ++ if (prog->attach_btf_id != st_ops->type_id || ++ prog->expected_attach_type != member_idx) { ++ pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", ++ map->name, prog->name, prog->sec_name, prog->type, ++ prog->attach_btf_id, prog->expected_attach_type, name); ++ return -EINVAL; ++ } ++ ++ st_ops->progs[member_idx] = prog; ++ } ++ ++ return 0; ++} ++ ++#define BTF_TRACE_PREFIX "btf_trace_" ++#define BTF_LSM_PREFIX "bpf_lsm_" ++#define BTF_ITER_PREFIX "bpf_iter_" ++#define BTF_MAX_NAME_SIZE 128 ++ ++void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, ++ const char **prefix, int *kind) ++{ ++ switch (attach_type) { ++ case BPF_TRACE_RAW_TP: ++ *prefix = BTF_TRACE_PREFIX; ++ *kind = BTF_KIND_TYPEDEF; ++ break; ++ case BPF_LSM_MAC: ++ case BPF_LSM_CGROUP: ++ *prefix = BTF_LSM_PREFIX; ++ *kind = BTF_KIND_FUNC; ++ break; ++ case BPF_TRACE_ITER: ++ *prefix = BTF_ITER_PREFIX; ++ *kind = BTF_KIND_FUNC; ++ break; ++ default: ++ *prefix = ""; ++ *kind = BTF_KIND_FUNC; ++ } ++} ++ ++static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, ++ const char *name, __u32 kind) ++{ ++ char btf_type_name[BTF_MAX_NAME_SIZE]; ++ int ret; ++ ++ ret = snprintf(btf_type_name, sizeof(btf_type_name), ++ "%s%s", prefix, name); ++ /* snprintf returns the number of characters written excluding the ++ * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it ++ * indicates truncation. ++ */ ++ if (ret < 0 || ret >= sizeof(btf_type_name)) ++ return -ENAMETOOLONG; ++ return btf__find_by_name_kind(btf, btf_type_name, kind); ++} ++ ++static inline int find_attach_btf_id(struct btf *btf, const char *name, ++ enum bpf_attach_type attach_type) ++{ ++ const char *prefix; ++ int kind; ++ ++ btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); ++ return find_btf_by_prefix_kind(btf, prefix, name, kind); ++} ++ ++int libbpf_find_vmlinux_btf_id(const char *name, ++ enum bpf_attach_type attach_type) ++{ ++ struct btf *btf; ++ int err; ++ ++ btf = btf__load_vmlinux_btf(); ++ err = libbpf_get_error(btf); ++ if (err) { ++ pr_warn("vmlinux BTF is not found\n"); ++ return libbpf_err(err); ++ } ++ ++ err = find_attach_btf_id(btf, name, attach_type); ++ if (err <= 0) ++ pr_warn("%s is not found in vmlinux BTF\n", name); ++ ++ btf__free(btf); ++ return libbpf_err(err); ++} ++ ++static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) ++{ ++ struct bpf_prog_info info = {}; ++ __u32 info_len = sizeof(info); ++ struct btf *btf; ++ int err; ++ ++ err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); ++ if (err) { ++ pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", ++ attach_prog_fd, err); ++ return err; ++ } ++ ++ err = -EINVAL; ++ if (!info.btf_id) { ++ pr_warn("The target program doesn't have BTF\n"); ++ goto out; ++ } ++ btf = btf__load_from_kernel_by_id(info.btf_id); ++ err = libbpf_get_error(btf); ++ if (err) { ++ pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); ++ goto out; ++ } ++ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); ++ btf__free(btf); ++ if (err <= 0) { ++ pr_warn("%s is not found in prog's BTF\n", name); ++ goto out; ++ } ++out: ++ return err; ++} ++ ++static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, ++ enum bpf_attach_type attach_type, ++ int *btf_obj_fd, int *btf_type_id) ++{ ++ int ret, i; ++ ++ ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type); ++ if (ret > 0) { ++ *btf_obj_fd = 0; /* vmlinux BTF */ ++ *btf_type_id = ret; ++ return 0; ++ } ++ if (ret != -ENOENT) ++ return ret; ++ ++ ret = load_module_btfs(obj); ++ if (ret) ++ return ret; ++ ++ for (i = 0; i < obj->btf_module_cnt; i++) { ++ const struct module_btf *mod = &obj->btf_modules[i]; ++ ++ ret = find_attach_btf_id(mod->btf, attach_name, attach_type); ++ if (ret > 0) { ++ *btf_obj_fd = mod->fd; ++ *btf_type_id = ret; ++ return 0; ++ } ++ if (ret == -ENOENT) ++ continue; ++ ++ return ret; ++ } ++ ++ return -ESRCH; ++} ++ ++static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, ++ int *btf_obj_fd, int *btf_type_id) ++{ ++ enum bpf_attach_type attach_type = prog->expected_attach_type; ++ __u32 attach_prog_fd = prog->attach_prog_fd; ++ int err = 0; ++ ++ /* BPF program's BTF ID */ ++ if (attach_prog_fd) { ++ err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); ++ if (err < 0) { ++ pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n", ++ attach_prog_fd, attach_name, err); ++ return err; ++ } ++ *btf_obj_fd = 0; ++ *btf_type_id = err; ++ return 0; ++ } ++ ++ /* kernel/module BTF ID */ ++ if (prog->obj->gen_loader) { ++ bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); ++ *btf_obj_fd = 0; ++ *btf_type_id = 1; ++ } else { ++ err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); ++ } ++ if (err) { ++ pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err); ++ return err; ++ } ++ return 0; ++} ++ ++int libbpf_attach_type_by_name(const char *name, ++ enum bpf_attach_type *attach_type) ++{ ++ char *type_names; ++ const struct bpf_sec_def *sec_def; ++ ++ if (!name) ++ return libbpf_err(-EINVAL); ++ ++ sec_def = find_sec_def(name); ++ if (!sec_def) { ++ pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); ++ type_names = libbpf_get_type_names(true); ++ if (type_names != NULL) { ++ pr_debug("attachable section(type) names are:%s\n", type_names); ++ free(type_names); ++ } ++ ++ return libbpf_err(-EINVAL); ++ } ++ ++ if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) ++ return libbpf_err(-EINVAL); ++ if (!(sec_def->cookie & SEC_ATTACHABLE)) ++ return libbpf_err(-EINVAL); ++ ++ *attach_type = sec_def->expected_attach_type; ++ return 0; ++} ++ ++int bpf_map__fd(const struct bpf_map *map) ++{ ++ return map ? map->fd : libbpf_err(-EINVAL); ++} ++ ++static bool map_uses_real_name(const struct bpf_map *map) ++{ ++ /* Since libbpf started to support custom .data.* and .rodata.* maps, ++ * their user-visible name differs from kernel-visible name. Users see ++ * such map's corresponding ELF section name as a map name. ++ * This check distinguishes .data/.rodata from .data.* and .rodata.* ++ * maps to know which name has to be returned to the user. ++ */ ++ if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) ++ return true; ++ if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) ++ return true; ++ return false; ++} ++ ++const char *bpf_map__name(const struct bpf_map *map) ++{ ++ if (!map) ++ return NULL; ++ ++ if (map_uses_real_name(map)) ++ return map->real_name; ++ ++ return map->name; ++} ++ ++enum bpf_map_type bpf_map__type(const struct bpf_map *map) ++{ ++ return map->def.type; ++} ++ ++int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) ++{ ++ if (map->fd >= 0) ++ return libbpf_err(-EBUSY); ++ map->def.type = type; ++ return 0; ++} ++ ++__u32 bpf_map__map_flags(const struct bpf_map *map) ++{ ++ return map->def.map_flags; ++} ++ ++int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) ++{ ++ if (map->fd >= 0) ++ return libbpf_err(-EBUSY); ++ map->def.map_flags = flags; ++ return 0; ++} ++ ++__u64 bpf_map__map_extra(const struct bpf_map *map) ++{ ++ return map->map_extra; ++} ++ ++int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) ++{ ++ if (map->fd >= 0) ++ return libbpf_err(-EBUSY); ++ map->map_extra = map_extra; ++ return 0; ++} ++ ++__u32 bpf_map__numa_node(const struct bpf_map *map) ++{ ++ return map->numa_node; ++} ++ ++int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) ++{ ++ if (map->fd >= 0) ++ return libbpf_err(-EBUSY); ++ map->numa_node = numa_node; ++ return 0; ++} ++ ++__u32 bpf_map__key_size(const struct bpf_map *map) ++{ ++ return map->def.key_size; ++} ++ ++int bpf_map__set_key_size(struct bpf_map *map, __u32 size) ++{ ++ if (map->fd >= 0) ++ return libbpf_err(-EBUSY); ++ map->def.key_size = size; ++ return 0; ++} ++ ++__u32 bpf_map__value_size(const struct bpf_map *map) ++{ ++ return map->def.value_size; ++} ++ ++int bpf_map__set_value_size(struct bpf_map *map, __u32 size) ++{ ++ if (map->fd >= 0) ++ return libbpf_err(-EBUSY); ++ map->def.value_size = size; ++ return 0; ++} ++ ++__u32 bpf_map__btf_key_type_id(const struct bpf_map *map) ++{ ++ return map ? map->btf_key_type_id : 0; ++} ++ ++__u32 bpf_map__btf_value_type_id(const struct bpf_map *map) ++{ ++ return map ? map->btf_value_type_id : 0; ++} ++ ++int bpf_map__set_initial_value(struct bpf_map *map, ++ const void *data, size_t size) ++{ ++ if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || ++ size != map->def.value_size || map->fd >= 0) ++ return libbpf_err(-EINVAL); ++ ++ memcpy(map->mmaped, data, size); ++ return 0; ++} ++ ++const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) ++{ ++ if (!map->mmaped) ++ return NULL; ++ *psize = map->def.value_size; ++ return map->mmaped; ++} ++ ++bool bpf_map__is_internal(const struct bpf_map *map) ++{ ++ return map->libbpf_type != LIBBPF_MAP_UNSPEC; ++} ++ ++__u32 bpf_map__ifindex(const struct bpf_map *map) ++{ ++ return map->map_ifindex; ++} ++ ++int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) ++{ ++ if (map->fd >= 0) ++ return libbpf_err(-EBUSY); ++ map->map_ifindex = ifindex; ++ return 0; ++} ++ ++int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) ++{ ++ if (!bpf_map_type__is_map_in_map(map->def.type)) { ++ pr_warn("error: unsupported map type\n"); ++ return libbpf_err(-EINVAL); ++ } ++ if (map->inner_map_fd != -1) { ++ pr_warn("error: inner_map_fd already specified\n"); ++ return libbpf_err(-EINVAL); ++ } ++ if (map->inner_map) { ++ bpf_map__destroy(map->inner_map); ++ zfree(&map->inner_map); ++ } ++ map->inner_map_fd = fd; ++ return 0; ++} ++ ++static struct bpf_map * ++__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) ++{ ++ ssize_t idx; ++ struct bpf_map *s, *e; ++ ++ if (!obj || !obj->maps) ++ return errno = EINVAL, NULL; ++ ++ s = obj->maps; ++ e = obj->maps + obj->nr_maps; ++ ++ if ((m < s) || (m >= e)) { ++ pr_warn("error in %s: map handler doesn't belong to object\n", ++ __func__); ++ return errno = EINVAL, NULL; ++ } ++ ++ idx = (m - obj->maps) + i; ++ if (idx >= obj->nr_maps || idx < 0) ++ return NULL; ++ return &obj->maps[idx]; ++} ++ ++struct bpf_map * ++bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) ++{ ++ if (prev == NULL) ++ return obj->maps; ++ ++ return __bpf_map__iter(prev, obj, 1); ++} ++ ++struct bpf_map * ++bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) ++{ ++ if (next == NULL) { ++ if (!obj->nr_maps) ++ return NULL; ++ return obj->maps + obj->nr_maps - 1; ++ } ++ ++ return __bpf_map__iter(next, obj, -1); ++} ++ ++struct bpf_map * ++bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) ++{ ++ struct bpf_map *pos; ++ ++ bpf_object__for_each_map(pos, obj) { ++ /* if it's a special internal map name (which always starts ++ * with dot) then check if that special name matches the ++ * real map name (ELF section name) ++ */ ++ if (name[0] == '.') { ++ if (pos->real_name && strcmp(pos->real_name, name) == 0) ++ return pos; ++ continue; ++ } ++ /* otherwise map name has to be an exact match */ ++ if (map_uses_real_name(pos)) { ++ if (strcmp(pos->real_name, name) == 0) ++ return pos; ++ continue; ++ } ++ if (strcmp(pos->name, name) == 0) ++ return pos; ++ } ++ return errno = ENOENT, NULL; ++} ++ ++int ++bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) ++{ ++ return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); ++} ++ ++static int validate_map_op(const struct bpf_map *map, size_t key_sz, ++ size_t value_sz, bool check_value_sz) ++{ ++ if (map->fd <= 0) ++ return -ENOENT; ++ ++ if (map->def.key_size != key_sz) { ++ pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", ++ map->name, key_sz, map->def.key_size); ++ return -EINVAL; ++ } ++ ++ if (!check_value_sz) ++ return 0; ++ ++ switch (map->def.type) { ++ case BPF_MAP_TYPE_PERCPU_ARRAY: ++ case BPF_MAP_TYPE_PERCPU_HASH: ++ case BPF_MAP_TYPE_LRU_PERCPU_HASH: ++ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { ++ int num_cpu = libbpf_num_possible_cpus(); ++ size_t elem_sz = roundup(map->def.value_size, 8); ++ ++ if (value_sz != num_cpu * elem_sz) { ++ pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", ++ map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); ++ return -EINVAL; ++ } ++ break; ++ } ++ default: ++ if (map->def.value_size != value_sz) { ++ pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", ++ map->name, value_sz, map->def.value_size); ++ return -EINVAL; ++ } ++ break; ++ } ++ return 0; ++} ++ ++int bpf_map__lookup_elem(const struct bpf_map *map, ++ const void *key, size_t key_sz, ++ void *value, size_t value_sz, __u64 flags) ++{ ++ int err; ++ ++ err = validate_map_op(map, key_sz, value_sz, true); ++ if (err) ++ return libbpf_err(err); ++ ++ return bpf_map_lookup_elem_flags(map->fd, key, value, flags); ++} ++ ++int bpf_map__update_elem(const struct bpf_map *map, ++ const void *key, size_t key_sz, ++ const void *value, size_t value_sz, __u64 flags) ++{ ++ int err; ++ ++ err = validate_map_op(map, key_sz, value_sz, true); ++ if (err) ++ return libbpf_err(err); ++ ++ return bpf_map_update_elem(map->fd, key, value, flags); ++} ++ ++int bpf_map__delete_elem(const struct bpf_map *map, ++ const void *key, size_t key_sz, __u64 flags) ++{ ++ int err; ++ ++ err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); ++ if (err) ++ return libbpf_err(err); ++ ++ return bpf_map_delete_elem_flags(map->fd, key, flags); ++} ++ ++int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, ++ const void *key, size_t key_sz, ++ void *value, size_t value_sz, __u64 flags) ++{ ++ int err; ++ ++ err = validate_map_op(map, key_sz, value_sz, true); ++ if (err) ++ return libbpf_err(err); ++ ++ return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); ++} ++ ++int bpf_map__get_next_key(const struct bpf_map *map, ++ const void *cur_key, void *next_key, size_t key_sz) ++{ ++ int err; ++ ++ err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); ++ if (err) ++ return libbpf_err(err); ++ ++ return bpf_map_get_next_key(map->fd, cur_key, next_key); ++} ++ ++long libbpf_get_error(const void *ptr) ++{ ++ if (!IS_ERR_OR_NULL(ptr)) ++ return 0; ++ ++ if (IS_ERR(ptr)) ++ errno = -PTR_ERR(ptr); ++ ++ /* If ptr == NULL, then errno should be already set by the failing ++ * API, because libbpf never returns NULL on success and it now always ++ * sets errno on error. So no extra errno handling for ptr == NULL ++ * case. ++ */ ++ return -errno; ++} ++ ++/* Replace link's underlying BPF program with the new one */ ++int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) ++{ ++ int ret; ++ ++ ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); ++ return libbpf_err_errno(ret); ++} ++ ++/* Release "ownership" of underlying BPF resource (typically, BPF program ++ * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected ++ * link, when destructed through bpf_link__destroy() call won't attempt to ++ * detach/unregisted that BPF resource. This is useful in situations where, ++ * say, attached BPF program has to outlive userspace program that attached it ++ * in the system. Depending on type of BPF program, though, there might be ++ * additional steps (like pinning BPF program in BPF FS) necessary to ensure ++ * exit of userspace program doesn't trigger automatic detachment and clean up ++ * inside the kernel. ++ */ ++void bpf_link__disconnect(struct bpf_link *link) ++{ ++ link->disconnected = true; ++} ++ ++int bpf_link__destroy(struct bpf_link *link) ++{ ++ int err = 0; ++ ++ if (IS_ERR_OR_NULL(link)) ++ return 0; ++ ++ if (!link->disconnected && link->detach) ++ err = link->detach(link); ++ if (link->pin_path) ++ free(link->pin_path); ++ if (link->dealloc) ++ link->dealloc(link); ++ else ++ free(link); ++ ++ return libbpf_err(err); ++} ++ ++int bpf_link__fd(const struct bpf_link *link) ++{ ++ return link->fd; ++} ++ ++const char *bpf_link__pin_path(const struct bpf_link *link) ++{ ++ return link->pin_path; ++} ++ ++static int bpf_link__detach_fd(struct bpf_link *link) ++{ ++ return libbpf_err_errno(close(link->fd)); ++} ++ ++struct bpf_link *bpf_link__open(const char *path) ++{ ++ struct bpf_link *link; ++ int fd; ++ ++ fd = bpf_obj_get(path); ++ if (fd < 0) { ++ fd = -errno; ++ pr_warn("failed to open link at %s: %d\n", path, fd); ++ return libbpf_err_ptr(fd); ++ } ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) { ++ close(fd); ++ return libbpf_err_ptr(-ENOMEM); ++ } ++ link->detach = &bpf_link__detach_fd; ++ link->fd = fd; ++ ++ link->pin_path = strdup(path); ++ if (!link->pin_path) { ++ bpf_link__destroy(link); ++ return libbpf_err_ptr(-ENOMEM); ++ } ++ ++ return link; ++} ++ ++int bpf_link__detach(struct bpf_link *link) ++{ ++ return bpf_link_detach(link->fd) ? -errno : 0; ++} ++ ++int bpf_link__pin(struct bpf_link *link, const char *path) ++{ ++ int err; ++ ++ if (link->pin_path) ++ return libbpf_err(-EBUSY); ++ err = make_parent_dir(path); ++ if (err) ++ return libbpf_err(err); ++ err = check_path(path); ++ if (err) ++ return libbpf_err(err); ++ ++ link->pin_path = strdup(path); ++ if (!link->pin_path) ++ return libbpf_err(-ENOMEM); ++ ++ if (bpf_obj_pin(link->fd, link->pin_path)) { ++ err = -errno; ++ zfree(&link->pin_path); ++ return libbpf_err(err); ++ } ++ ++ pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); ++ return 0; ++} ++ ++int bpf_link__unpin(struct bpf_link *link) ++{ ++ int err; ++ ++ if (!link->pin_path) ++ return libbpf_err(-EINVAL); ++ ++ err = unlink(link->pin_path); ++ if (err != 0) ++ return -errno; ++ ++ pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); ++ zfree(&link->pin_path); ++ return 0; ++} ++ ++struct bpf_link_perf { ++ struct bpf_link link; ++ int perf_event_fd; ++ /* legacy kprobe support: keep track of probe identifier and type */ ++ char *legacy_probe_name; ++ bool legacy_is_kprobe; ++ bool legacy_is_retprobe; ++}; ++ ++static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); ++static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); ++ ++static int bpf_link_perf_detach(struct bpf_link *link) ++{ ++ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); ++ int err = 0; ++ ++ if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0) ++ err = -errno; ++ ++ if (perf_link->perf_event_fd != link->fd) ++ close(perf_link->perf_event_fd); ++ close(link->fd); ++ ++ /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ ++ if (perf_link->legacy_probe_name) { ++ if (perf_link->legacy_is_kprobe) { ++ err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, ++ perf_link->legacy_is_retprobe); ++ } else { ++ err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, ++ perf_link->legacy_is_retprobe); ++ } ++ } ++ ++ return err; ++} ++ ++static void bpf_link_perf_dealloc(struct bpf_link *link) ++{ ++ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); ++ ++ free(perf_link->legacy_probe_name); ++ free(perf_link); ++} ++ ++struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, ++ const struct bpf_perf_event_opts *opts) ++{ ++ char errmsg[STRERR_BUFSIZE]; ++ struct bpf_link_perf *link; ++ int prog_fd, link_fd = -1, err; ++ ++ if (!OPTS_VALID(opts, bpf_perf_event_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (pfd < 0) { ++ pr_warn("prog '%s': invalid perf event FD %d\n", ++ prog->name, pfd); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ prog_fd = bpf_program__fd(prog); ++ if (prog_fd < 0) { ++ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", ++ prog->name); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) ++ return libbpf_err_ptr(-ENOMEM); ++ link->link.detach = &bpf_link_perf_detach; ++ link->link.dealloc = &bpf_link_perf_dealloc; ++ link->perf_event_fd = pfd; ++ ++ if (kernel_supports(prog->obj, FEAT_PERF_LINK)) { ++ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, ++ .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); ++ ++ link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); ++ if (link_fd < 0) { ++ err = -errno; ++ pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", ++ prog->name, pfd, ++ err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto err_out; ++ } ++ link->link.fd = link_fd; ++ } else { ++ if (OPTS_GET(opts, bpf_cookie, 0)) { ++ pr_warn("prog '%s': user context value is not supported\n", prog->name); ++ err = -EOPNOTSUPP; ++ goto err_out; ++ } ++ ++ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { ++ err = -errno; ++ pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", ++ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ if (err == -EPROTO) ++ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", ++ prog->name, pfd); ++ goto err_out; ++ } ++ link->link.fd = pfd; ++ } ++ if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { ++ err = -errno; ++ pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", ++ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto err_out; ++ } ++ ++ return &link->link; ++err_out: ++ if (link_fd >= 0) ++ close(link_fd); ++ free(link); ++ return libbpf_err_ptr(err); ++} ++ ++struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) ++{ ++ return bpf_program__attach_perf_event_opts(prog, pfd, NULL); ++} ++ ++/* ++ * this function is expected to parse integer in the range of [0, 2^31-1] from ++ * given file using scanf format string fmt. If actual parsed value is ++ * negative, the result might be indistinguishable from error ++ */ ++static int parse_uint_from_file(const char *file, const char *fmt) ++{ ++ char buf[STRERR_BUFSIZE]; ++ int err, ret; ++ FILE *f; ++ ++ f = fopen(file, "r"); ++ if (!f) { ++ err = -errno; ++ pr_debug("failed to open '%s': %s\n", file, ++ libbpf_strerror_r(err, buf, sizeof(buf))); ++ return err; ++ } ++ err = fscanf(f, fmt, &ret); ++ if (err != 1) { ++ err = err == EOF ? -EIO : -errno; ++ pr_debug("failed to parse '%s': %s\n", file, ++ libbpf_strerror_r(err, buf, sizeof(buf))); ++ fclose(f); ++ return err; ++ } ++ fclose(f); ++ return ret; ++} ++ ++static int determine_kprobe_perf_type(void) ++{ ++ const char *file = "/sys/bus/event_source/devices/kprobe/type"; ++ ++ return parse_uint_from_file(file, "%d\n"); ++} ++ ++static int determine_uprobe_perf_type(void) ++{ ++ const char *file = "/sys/bus/event_source/devices/uprobe/type"; ++ ++ return parse_uint_from_file(file, "%d\n"); ++} ++ ++static int determine_kprobe_retprobe_bit(void) ++{ ++ const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; ++ ++ return parse_uint_from_file(file, "config:%d\n"); ++} ++ ++static int determine_uprobe_retprobe_bit(void) ++{ ++ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; ++ ++ return parse_uint_from_file(file, "config:%d\n"); ++} ++ ++#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 ++#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 ++ ++static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, ++ uint64_t offset, int pid, size_t ref_ctr_off) ++{ ++ struct perf_event_attr attr = {}; ++ char errmsg[STRERR_BUFSIZE]; ++ int type, pfd; ++ ++ if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) ++ return -EINVAL; ++ ++ type = uprobe ? determine_uprobe_perf_type() ++ : determine_kprobe_perf_type(); ++ if (type < 0) { ++ pr_warn("failed to determine %s perf type: %s\n", ++ uprobe ? "uprobe" : "kprobe", ++ libbpf_strerror_r(type, errmsg, sizeof(errmsg))); ++ return type; ++ } ++ if (retprobe) { ++ int bit = uprobe ? determine_uprobe_retprobe_bit() ++ : determine_kprobe_retprobe_bit(); ++ ++ if (bit < 0) { ++ pr_warn("failed to determine %s retprobe bit: %s\n", ++ uprobe ? "uprobe" : "kprobe", ++ libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); ++ return bit; ++ } ++ attr.config |= 1 << bit; ++ } ++ attr.size = sizeof(attr); ++ attr.type = type; ++ attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; ++ attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ ++ attr.config2 = offset; /* kprobe_addr or probe_offset */ ++ ++ /* pid filter is meaningful only for uprobes */ ++ pfd = syscall(__NR_perf_event_open, &attr, ++ pid < 0 ? -1 : pid /* pid */, ++ pid == -1 ? 0 : -1 /* cpu */, ++ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); ++ return pfd >= 0 ? pfd : -errno; ++} ++ ++static int append_to_file(const char *file, const char *fmt, ...) ++{ ++ int fd, n, err = 0; ++ va_list ap; ++ ++ fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); ++ if (fd < 0) ++ return -errno; ++ ++ va_start(ap, fmt); ++ n = vdprintf(fd, fmt, ap); ++ va_end(ap); ++ ++ if (n < 0) ++ err = -errno; ++ ++ close(fd); ++ return err; ++} ++ ++#define DEBUGFS "/sys/kernel/debug/tracing" ++#define TRACEFS "/sys/kernel/tracing" ++ ++static bool use_debugfs(void) ++{ ++ static int has_debugfs = -1; ++ ++ if (has_debugfs < 0) ++ has_debugfs = access(DEBUGFS, F_OK) == 0; ++ ++ return has_debugfs == 1; ++} ++ ++static const char *tracefs_path(void) ++{ ++ return use_debugfs() ? DEBUGFS : TRACEFS; ++} ++ ++static const char *tracefs_kprobe_events(void) ++{ ++ return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; ++} ++ ++static const char *tracefs_uprobe_events(void) ++{ ++ return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; ++} ++ ++static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, ++ const char *kfunc_name, size_t offset) ++{ ++ static int index = 0; ++ ++ snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, ++ __sync_fetch_and_add(&index, 1)); ++} ++ ++static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, ++ const char *kfunc_name, size_t offset) ++{ ++ return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", ++ retprobe ? 'r' : 'p', ++ retprobe ? "kretprobes" : "kprobes", ++ probe_name, kfunc_name, offset); ++} ++ ++static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) ++{ ++ return append_to_file(tracefs_kprobe_events(), "-:%s/%s", ++ retprobe ? "kretprobes" : "kprobes", probe_name); ++} ++ ++static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) ++{ ++ char file[256]; ++ ++ snprintf(file, sizeof(file), "%s/events/%s/%s/id", ++ tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); ++ ++ return parse_uint_from_file(file, "%d\n"); ++} ++ ++static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, ++ const char *kfunc_name, size_t offset, int pid) ++{ ++ struct perf_event_attr attr = {}; ++ char errmsg[STRERR_BUFSIZE]; ++ int type, pfd, err; ++ ++ err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); ++ if (err < 0) { ++ pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", ++ kfunc_name, offset, ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ return err; ++ } ++ type = determine_kprobe_perf_type_legacy(probe_name, retprobe); ++ if (type < 0) { ++ err = type; ++ pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", ++ kfunc_name, offset, ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto err_clean_legacy; ++ } ++ attr.size = sizeof(attr); ++ attr.config = type; ++ attr.type = PERF_TYPE_TRACEPOINT; ++ ++ pfd = syscall(__NR_perf_event_open, &attr, ++ pid < 0 ? -1 : pid, /* pid */ ++ pid == -1 ? 0 : -1, /* cpu */ ++ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); ++ if (pfd < 0) { ++ err = -errno; ++ pr_warn("legacy kprobe perf_event_open() failed: %s\n", ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto err_clean_legacy; ++ } ++ return pfd; ++ ++err_clean_legacy: ++ /* Clear the newly added legacy kprobe_event */ ++ remove_kprobe_event_legacy(probe_name, retprobe); ++ return err; ++} ++ ++static const char *arch_specific_syscall_pfx(void) ++{ ++#if defined(__x86_64__) ++ return "x64"; ++#elif defined(__i386__) ++ return "ia32"; ++#elif defined(__s390x__) ++ return "s390x"; ++#elif defined(__s390__) ++ return "s390"; ++#elif defined(__arm__) ++ return "arm"; ++#elif defined(__aarch64__) ++ return "arm64"; ++#elif defined(__mips__) ++ return "mips"; ++#elif defined(__riscv) ++ return "riscv"; ++#elif defined(__powerpc__) ++ return "powerpc"; ++#elif defined(__powerpc64__) ++ return "powerpc64"; ++#else ++ return NULL; ++#endif ++} ++ ++static int probe_kern_syscall_wrapper(void) ++{ ++ char syscall_name[64]; ++ const char *ksys_pfx; ++ ++ ksys_pfx = arch_specific_syscall_pfx(); ++ if (!ksys_pfx) ++ return 0; ++ ++ snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); ++ ++ if (determine_kprobe_perf_type() >= 0) { ++ int pfd; ++ ++ pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); ++ if (pfd >= 0) ++ close(pfd); ++ ++ return pfd >= 0 ? 1 : 0; ++ } else { /* legacy mode */ ++ char probe_name[128]; ++ ++ gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); ++ if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) ++ return 0; ++ ++ (void)remove_kprobe_event_legacy(probe_name, false); ++ return 1; ++ } ++} ++ ++struct bpf_link * ++bpf_program__attach_kprobe_opts(const struct bpf_program *prog, ++ const char *func_name, ++ const struct bpf_kprobe_opts *opts) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); ++ char errmsg[STRERR_BUFSIZE]; ++ char *legacy_probe = NULL; ++ struct bpf_link *link; ++ size_t offset; ++ bool retprobe, legacy; ++ int pfd, err; ++ ++ if (!OPTS_VALID(opts, bpf_kprobe_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ retprobe = OPTS_GET(opts, retprobe, false); ++ offset = OPTS_GET(opts, offset, 0); ++ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); ++ ++ legacy = determine_kprobe_perf_type() < 0; ++ if (!legacy) { ++ pfd = perf_event_open_probe(false /* uprobe */, retprobe, ++ func_name, offset, ++ -1 /* pid */, 0 /* ref_ctr_off */); ++ } else { ++ char probe_name[256]; ++ ++ gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), ++ func_name, offset); ++ ++ legacy_probe = strdup(probe_name); ++ if (!legacy_probe) ++ return libbpf_err_ptr(-ENOMEM); ++ ++ pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, ++ offset, -1 /* pid */); ++ } ++ if (pfd < 0) { ++ err = -errno; ++ pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", ++ prog->name, retprobe ? "kretprobe" : "kprobe", ++ func_name, offset, ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto err_out; ++ } ++ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); ++ err = libbpf_get_error(link); ++ if (err) { ++ close(pfd); ++ pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", ++ prog->name, retprobe ? "kretprobe" : "kprobe", ++ func_name, offset, ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto err_clean_legacy; ++ } ++ if (legacy) { ++ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); ++ ++ perf_link->legacy_probe_name = legacy_probe; ++ perf_link->legacy_is_kprobe = true; ++ perf_link->legacy_is_retprobe = retprobe; ++ } ++ ++ return link; ++ ++err_clean_legacy: ++ if (legacy) ++ remove_kprobe_event_legacy(legacy_probe, retprobe); ++err_out: ++ free(legacy_probe); ++ return libbpf_err_ptr(err); ++} ++ ++struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, ++ bool retprobe, ++ const char *func_name) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, ++ .retprobe = retprobe, ++ ); ++ ++ return bpf_program__attach_kprobe_opts(prog, func_name, &opts); ++} ++ ++struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, ++ const char *syscall_name, ++ const struct bpf_ksyscall_opts *opts) ++{ ++ LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); ++ char func_name[128]; ++ ++ if (!OPTS_VALID(opts, bpf_ksyscall_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { ++ /* arch_specific_syscall_pfx() should never return NULL here ++ * because it is guarded by kernel_supports(). However, since ++ * compiler does not know that we have an explicit conditional ++ * as well. ++ */ ++ snprintf(func_name, sizeof(func_name), "__%s_sys_%s", ++ arch_specific_syscall_pfx() ? : "", syscall_name); ++ } else { ++ snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); ++ } ++ ++ kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); ++ kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); ++ ++ return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); ++} ++ ++/* Adapted from perf/util/string.c */ ++static bool glob_match(const char *str, const char *pat) ++{ ++ while (*str && *pat && *pat != '*') { ++ if (*pat == '?') { /* Matches any single character */ ++ str++; ++ pat++; ++ continue; ++ } ++ if (*str != *pat) ++ return false; ++ str++; ++ pat++; ++ } ++ /* Check wild card */ ++ if (*pat == '*') { ++ while (*pat == '*') ++ pat++; ++ if (!*pat) /* Tail wild card matches all */ ++ return true; ++ while (*str) ++ if (glob_match(str++, pat)) ++ return true; ++ } ++ return !*str && !*pat; ++} ++ ++struct kprobe_multi_resolve { ++ const char *pattern; ++ unsigned long *addrs; ++ size_t cap; ++ size_t cnt; ++}; ++ ++static int ++resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type, ++ const char *sym_name, void *ctx) ++{ ++ struct kprobe_multi_resolve *res = ctx; ++ int err; ++ ++ if (!glob_match(sym_name, res->pattern)) ++ return 0; ++ ++ err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long), ++ res->cnt + 1); ++ if (err) ++ return err; ++ ++ res->addrs[res->cnt++] = (unsigned long) sym_addr; ++ return 0; ++} ++ ++struct bpf_link * ++bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, ++ const char *pattern, ++ const struct bpf_kprobe_multi_opts *opts) ++{ ++ LIBBPF_OPTS(bpf_link_create_opts, lopts); ++ struct kprobe_multi_resolve res = { ++ .pattern = pattern, ++ }; ++ struct bpf_link *link = NULL; ++ char errmsg[STRERR_BUFSIZE]; ++ const unsigned long *addrs; ++ int err, link_fd, prog_fd; ++ const __u64 *cookies; ++ const char **syms; ++ bool retprobe; ++ size_t cnt; ++ ++ if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ syms = OPTS_GET(opts, syms, false); ++ addrs = OPTS_GET(opts, addrs, false); ++ cnt = OPTS_GET(opts, cnt, false); ++ cookies = OPTS_GET(opts, cookies, false); ++ ++ if (!pattern && !addrs && !syms) ++ return libbpf_err_ptr(-EINVAL); ++ if (pattern && (addrs || syms || cookies || cnt)) ++ return libbpf_err_ptr(-EINVAL); ++ if (!pattern && !cnt) ++ return libbpf_err_ptr(-EINVAL); ++ if (addrs && syms) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (pattern) { ++ err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res); ++ if (err) ++ goto error; ++ if (!res.cnt) { ++ err = -ENOENT; ++ goto error; ++ } ++ addrs = res.addrs; ++ cnt = res.cnt; ++ } ++ ++ retprobe = OPTS_GET(opts, retprobe, false); ++ ++ lopts.kprobe_multi.syms = syms; ++ lopts.kprobe_multi.addrs = addrs; ++ lopts.kprobe_multi.cookies = cookies; ++ lopts.kprobe_multi.cnt = cnt; ++ lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) { ++ err = -ENOMEM; ++ goto error; ++ } ++ link->detach = &bpf_link__detach_fd; ++ ++ prog_fd = bpf_program__fd(prog); ++ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts); ++ if (link_fd < 0) { ++ err = -errno; ++ pr_warn("prog '%s': failed to attach: %s\n", ++ prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto error; ++ } ++ link->fd = link_fd; ++ free(res.addrs); ++ return link; ++ ++error: ++ free(link); ++ free(res.addrs); ++ return libbpf_err_ptr(err); ++} ++ ++static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); ++ unsigned long offset = 0; ++ const char *func_name; ++ char *func; ++ int n; ++ ++ *link = NULL; ++ ++ /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ ++ if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) ++ return 0; ++ ++ opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); ++ if (opts.retprobe) ++ func_name = prog->sec_name + sizeof("kretprobe/") - 1; ++ else ++ func_name = prog->sec_name + sizeof("kprobe/") - 1; ++ ++ n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); ++ if (n < 1) { ++ pr_warn("kprobe name is invalid: %s\n", func_name); ++ return -EINVAL; ++ } ++ if (opts.retprobe && offset != 0) { ++ free(func); ++ pr_warn("kretprobes do not support offset specification\n"); ++ return -EINVAL; ++ } ++ ++ opts.offset = offset; ++ *link = bpf_program__attach_kprobe_opts(prog, func, &opts); ++ free(func); ++ return libbpf_get_error(*link); ++} ++ ++static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ LIBBPF_OPTS(bpf_ksyscall_opts, opts); ++ const char *syscall_name; ++ ++ *link = NULL; ++ ++ /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ ++ if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) ++ return 0; ++ ++ opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); ++ if (opts.retprobe) ++ syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; ++ else ++ syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; ++ ++ *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); ++ return *link ? 0 : -errno; ++} ++ ++static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); ++ const char *spec; ++ char *pattern; ++ int n; ++ ++ *link = NULL; ++ ++ /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ ++ if (strcmp(prog->sec_name, "kprobe.multi") == 0 || ++ strcmp(prog->sec_name, "kretprobe.multi") == 0) ++ return 0; ++ ++ opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); ++ if (opts.retprobe) ++ spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; ++ else ++ spec = prog->sec_name + sizeof("kprobe.multi/") - 1; ++ ++ n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); ++ if (n < 1) { ++ pr_warn("kprobe multi pattern is invalid: %s\n", pattern); ++ return -EINVAL; ++ } ++ ++ *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); ++ free(pattern); ++ return libbpf_get_error(*link); ++} ++ ++static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, ++ const char *binary_path, uint64_t offset) ++{ ++ int i; ++ ++ snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); ++ ++ /* sanitize binary_path in the probe name */ ++ for (i = 0; buf[i]; i++) { ++ if (!isalnum(buf[i])) ++ buf[i] = '_'; ++ } ++} ++ ++static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, ++ const char *binary_path, size_t offset) ++{ ++ return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", ++ retprobe ? 'r' : 'p', ++ retprobe ? "uretprobes" : "uprobes", ++ probe_name, binary_path, offset); ++} ++ ++static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) ++{ ++ return append_to_file(tracefs_uprobe_events(), "-:%s/%s", ++ retprobe ? "uretprobes" : "uprobes", probe_name); ++} ++ ++static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) ++{ ++ char file[512]; ++ ++ snprintf(file, sizeof(file), "%s/events/%s/%s/id", ++ tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); ++ ++ return parse_uint_from_file(file, "%d\n"); ++} ++ ++static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, ++ const char *binary_path, size_t offset, int pid) ++{ ++ struct perf_event_attr attr; ++ int type, pfd, err; ++ ++ err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); ++ if (err < 0) { ++ pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", ++ binary_path, (size_t)offset, err); ++ return err; ++ } ++ type = determine_uprobe_perf_type_legacy(probe_name, retprobe); ++ if (type < 0) { ++ err = type; ++ pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", ++ binary_path, offset, err); ++ goto err_clean_legacy; ++ } ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.size = sizeof(attr); ++ attr.config = type; ++ attr.type = PERF_TYPE_TRACEPOINT; ++ ++ pfd = syscall(__NR_perf_event_open, &attr, ++ pid < 0 ? -1 : pid, /* pid */ ++ pid == -1 ? 0 : -1, /* cpu */ ++ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); ++ if (pfd < 0) { ++ err = -errno; ++ pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); ++ goto err_clean_legacy; ++ } ++ return pfd; ++ ++err_clean_legacy: ++ /* Clear the newly added legacy uprobe_event */ ++ remove_uprobe_event_legacy(probe_name, retprobe); ++ return err; ++} ++ ++/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ ++static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) ++{ ++ while ((scn = elf_nextscn(elf, scn)) != NULL) { ++ GElf_Shdr sh; ++ ++ if (!gelf_getshdr(scn, &sh)) ++ continue; ++ if (sh.sh_type == sh_type) ++ return scn; ++ } ++ return NULL; ++} ++ ++/* Find offset of function name in object specified by path. "name" matches ++ * symbol name or name@@LIB for library functions. ++ */ ++static long elf_find_func_offset(const char *binary_path, const char *name) ++{ ++ int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; ++ bool is_shared_lib, is_name_qualified; ++ char errmsg[STRERR_BUFSIZE]; ++ long ret = -ENOENT; ++ size_t name_len; ++ GElf_Ehdr ehdr; ++ Elf *elf; ++ ++ fd = open(binary_path, O_RDONLY | O_CLOEXEC); ++ if (fd < 0) { ++ ret = -errno; ++ pr_warn("failed to open %s: %s\n", binary_path, ++ libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); ++ return ret; ++ } ++ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); ++ if (!elf) { ++ pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); ++ close(fd); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ if (!gelf_getehdr(elf, &ehdr)) { ++ pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); ++ ret = -LIBBPF_ERRNO__FORMAT; ++ goto out; ++ } ++ /* for shared lib case, we do not need to calculate relative offset */ ++ is_shared_lib = ehdr.e_type == ET_DYN; ++ ++ name_len = strlen(name); ++ /* Does name specify "@@LIB"? */ ++ is_name_qualified = strstr(name, "@@") != NULL; ++ ++ /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if ++ * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically ++ * linked binary may not have SHT_DYMSYM, so absence of a section should not be ++ * reported as a warning/error. ++ */ ++ for (i = 0; i < ARRAY_SIZE(sh_types); i++) { ++ size_t nr_syms, strtabidx, idx; ++ Elf_Data *symbols = NULL; ++ Elf_Scn *scn = NULL; ++ int last_bind = -1; ++ const char *sname; ++ GElf_Shdr sh; ++ ++ scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); ++ if (!scn) { ++ pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", ++ binary_path); ++ continue; ++ } ++ if (!gelf_getshdr(scn, &sh)) ++ continue; ++ strtabidx = sh.sh_link; ++ symbols = elf_getdata(scn, 0); ++ if (!symbols) { ++ pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", ++ binary_path, elf_errmsg(-1)); ++ ret = -LIBBPF_ERRNO__FORMAT; ++ goto out; ++ } ++ nr_syms = symbols->d_size / sh.sh_entsize; ++ ++ for (idx = 0; idx < nr_syms; idx++) { ++ int curr_bind; ++ GElf_Sym sym; ++ Elf_Scn *sym_scn; ++ GElf_Shdr sym_sh; ++ ++ if (!gelf_getsym(symbols, idx, &sym)) ++ continue; ++ ++ if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) ++ continue; ++ ++ sname = elf_strptr(elf, strtabidx, sym.st_name); ++ if (!sname) ++ continue; ++ ++ curr_bind = GELF_ST_BIND(sym.st_info); ++ ++ /* User can specify func, func@@LIB or func@@LIB_VERSION. */ ++ if (strncmp(sname, name, name_len) != 0) ++ continue; ++ /* ...but we don't want a search for "foo" to match 'foo2" also, so any ++ * additional characters in sname should be of the form "@@LIB". ++ */ ++ if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') ++ continue; ++ ++ if (ret >= 0) { ++ /* handle multiple matches */ ++ if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { ++ /* Only accept one non-weak bind. */ ++ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", ++ sname, name, binary_path); ++ ret = -LIBBPF_ERRNO__FORMAT; ++ goto out; ++ } else if (curr_bind == STB_WEAK) { ++ /* already have a non-weak bind, and ++ * this is a weak bind, so ignore. ++ */ ++ continue; ++ } ++ } ++ ++ /* Transform symbol's virtual address (absolute for ++ * binaries and relative for shared libs) into file ++ * offset, which is what kernel is expecting for ++ * uprobe/uretprobe attachment. ++ * See Documentation/trace/uprobetracer.rst for more ++ * details. ++ * This is done by looking up symbol's containing ++ * section's header and using it's virtual address ++ * (sh_addr) and corresponding file offset (sh_offset) ++ * to transform sym.st_value (virtual address) into ++ * desired final file offset. ++ */ ++ sym_scn = elf_getscn(elf, sym.st_shndx); ++ if (!sym_scn) ++ continue; ++ if (!gelf_getshdr(sym_scn, &sym_sh)) ++ continue; ++ ++ ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset; ++ last_bind = curr_bind; ++ } ++ if (ret > 0) ++ break; ++ } ++ ++ if (ret > 0) { ++ pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, ++ ret); ++ } else { ++ if (ret == 0) { ++ pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, ++ is_shared_lib ? "should not be 0 in a shared library" : ++ "try using shared library path instead"); ++ ret = -ENOENT; ++ } else { ++ pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); ++ } ++ } ++out: ++ elf_end(elf); ++ close(fd); ++ return ret; ++} ++ ++static const char *arch_specific_lib_paths(void) ++{ ++ /* ++ * Based on https://packages.debian.org/sid/libc6. ++ * ++ * Assume that the traced program is built for the same architecture ++ * as libbpf, which should cover the vast majority of cases. ++ */ ++#if defined(__x86_64__) ++ return "/lib/x86_64-linux-gnu"; ++#elif defined(__i386__) ++ return "/lib/i386-linux-gnu"; ++#elif defined(__s390x__) ++ return "/lib/s390x-linux-gnu"; ++#elif defined(__s390__) ++ return "/lib/s390-linux-gnu"; ++#elif defined(__arm__) && defined(__SOFTFP__) ++ return "/lib/arm-linux-gnueabi"; ++#elif defined(__arm__) && !defined(__SOFTFP__) ++ return "/lib/arm-linux-gnueabihf"; ++#elif defined(__aarch64__) ++ return "/lib/aarch64-linux-gnu"; ++#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 ++ return "/lib/mips64el-linux-gnuabi64"; ++#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 ++ return "/lib/mipsel-linux-gnu"; ++#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ return "/lib/powerpc64le-linux-gnu"; ++#elif defined(__sparc__) && defined(__arch64__) ++ return "/lib/sparc64-linux-gnu"; ++#elif defined(__riscv) && __riscv_xlen == 64 ++ return "/lib/riscv64-linux-gnu"; ++#else ++ return NULL; ++#endif ++} ++ ++/* Get full path to program/shared library. */ ++static int resolve_full_path(const char *file, char *result, size_t result_sz) ++{ ++ const char *search_paths[3] = {}; ++ int i; ++ ++ if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { ++ search_paths[0] = getenv("LD_LIBRARY_PATH"); ++ search_paths[1] = "/usr/lib64:/usr/lib"; ++ search_paths[2] = arch_specific_lib_paths(); ++ } else { ++ search_paths[0] = getenv("PATH"); ++ search_paths[1] = "/usr/bin:/usr/sbin"; ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(search_paths); i++) { ++ const char *s; ++ ++ if (!search_paths[i]) ++ continue; ++ for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { ++ char *next_path; ++ int seg_len; ++ ++ if (s[0] == ':') ++ s++; ++ next_path = strchr(s, ':'); ++ seg_len = next_path ? next_path - s : strlen(s); ++ if (!seg_len) ++ continue; ++ snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); ++ /* ensure it is an executable file/link */ ++ if (access(result, R_OK | X_OK) < 0) ++ continue; ++ pr_debug("resolved '%s' to '%s'\n", file, result); ++ return 0; ++ } ++ } ++ return -ENOENT; ++} ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ++ const char *binary_path, size_t func_offset, ++ const struct bpf_uprobe_opts *opts) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); ++ char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; ++ char full_binary_path[PATH_MAX]; ++ struct bpf_link *link; ++ size_t ref_ctr_off; ++ int pfd, err; ++ bool retprobe, legacy; ++ const char *func_name; ++ ++ if (!OPTS_VALID(opts, bpf_uprobe_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ retprobe = OPTS_GET(opts, retprobe, false); ++ ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); ++ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); ++ ++ if (!binary_path) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (!strchr(binary_path, '/')) { ++ err = resolve_full_path(binary_path, full_binary_path, ++ sizeof(full_binary_path)); ++ if (err) { ++ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", ++ prog->name, binary_path, err); ++ return libbpf_err_ptr(err); ++ } ++ binary_path = full_binary_path; ++ } ++ func_name = OPTS_GET(opts, func_name, NULL); ++ if (func_name) { ++ long sym_off; ++ ++ sym_off = elf_find_func_offset(binary_path, func_name); ++ if (sym_off < 0) ++ return libbpf_err_ptr(sym_off); ++ func_offset += sym_off; ++ } ++ ++ legacy = determine_uprobe_perf_type() < 0; ++ if (!legacy) { ++ pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, ++ func_offset, pid, ref_ctr_off); ++ } else { ++ char probe_name[PATH_MAX + 64]; ++ ++ if (ref_ctr_off) ++ return libbpf_err_ptr(-EINVAL); ++ ++ gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), ++ binary_path, func_offset); ++ ++ legacy_probe = strdup(probe_name); ++ if (!legacy_probe) ++ return libbpf_err_ptr(-ENOMEM); ++ ++ pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, ++ binary_path, func_offset, pid); ++ } ++ if (pfd < 0) { ++ err = -errno; ++ pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", ++ prog->name, retprobe ? "uretprobe" : "uprobe", ++ binary_path, func_offset, ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto err_out; ++ } ++ ++ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); ++ err = libbpf_get_error(link); ++ if (err) { ++ close(pfd); ++ pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", ++ prog->name, retprobe ? "uretprobe" : "uprobe", ++ binary_path, func_offset, ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ goto err_clean_legacy; ++ } ++ if (legacy) { ++ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); ++ ++ perf_link->legacy_probe_name = legacy_probe; ++ perf_link->legacy_is_kprobe = false; ++ perf_link->legacy_is_retprobe = retprobe; ++ } ++ return link; ++ ++err_clean_legacy: ++ if (legacy) ++ remove_uprobe_event_legacy(legacy_probe, retprobe); ++err_out: ++ free(legacy_probe); ++ return libbpf_err_ptr(err); ++} ++ ++/* Format of u[ret]probe section definition supporting auto-attach: ++ * u[ret]probe/binary:function[+offset] ++ * ++ * binary can be an absolute/relative path or a filename; the latter is resolved to a ++ * full binary path via bpf_program__attach_uprobe_opts. ++ * ++ * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be ++ * specified (and auto-attach is not possible) or the above format is specified for ++ * auto-attach. ++ */ ++static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); ++ char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; ++ int n, ret = -EINVAL; ++ long offset = 0; ++ ++ *link = NULL; ++ ++ n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", ++ &probe_type, &binary_path, &func_name, &offset); ++ switch (n) { ++ case 1: ++ /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ ++ ret = 0; ++ break; ++ case 2: ++ pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", ++ prog->name, prog->sec_name); ++ break; ++ case 3: ++ case 4: ++ opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || ++ strcmp(probe_type, "uretprobe.s") == 0; ++ if (opts.retprobe && offset != 0) { ++ pr_warn("prog '%s': uretprobes do not support offset specification\n", ++ prog->name); ++ break; ++ } ++ opts.func_name = func_name; ++ *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); ++ ret = libbpf_get_error(*link); ++ break; ++ default: ++ pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, ++ prog->sec_name); ++ break; ++ } ++ free(probe_type); ++ free(binary_path); ++ free(func_name); ++ ++ return ret; ++} ++ ++struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, ++ bool retprobe, pid_t pid, ++ const char *binary_path, ++ size_t func_offset) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); ++ ++ return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); ++} ++ ++struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, ++ pid_t pid, const char *binary_path, ++ const char *usdt_provider, const char *usdt_name, ++ const struct bpf_usdt_opts *opts) ++{ ++ char resolved_path[512]; ++ struct bpf_object *obj = prog->obj; ++ struct bpf_link *link; ++ __u64 usdt_cookie; ++ int err; ++ ++ if (!OPTS_VALID(opts, bpf_uprobe_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (bpf_program__fd(prog) < 0) { ++ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", ++ prog->name); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ ++ if (!binary_path) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (!strchr(binary_path, '/')) { ++ err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); ++ if (err) { ++ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", ++ prog->name, binary_path, err); ++ return libbpf_err_ptr(err); ++ } ++ binary_path = resolved_path; ++ } ++ ++ /* USDT manager is instantiated lazily on first USDT attach. It will ++ * be destroyed together with BPF object in bpf_object__close(). ++ */ ++ if (IS_ERR(obj->usdt_man)) ++ return libbpf_ptr(obj->usdt_man); ++ if (!obj->usdt_man) { ++ obj->usdt_man = usdt_manager_new(obj); ++ if (IS_ERR(obj->usdt_man)) ++ return libbpf_ptr(obj->usdt_man); ++ } ++ ++ usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); ++ link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, ++ usdt_provider, usdt_name, usdt_cookie); ++ err = libbpf_get_error(link); ++ if (err) ++ return libbpf_err_ptr(err); ++ return link; ++} ++ ++static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ char *path = NULL, *provider = NULL, *name = NULL; ++ const char *sec_name; ++ int n, err; ++ ++ sec_name = bpf_program__section_name(prog); ++ if (strcmp(sec_name, "usdt") == 0) { ++ /* no auto-attach for just SEC("usdt") */ ++ *link = NULL; ++ return 0; ++ } ++ ++ n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); ++ if (n != 3) { ++ pr_warn("invalid section '%s', expected SEC(\"usdt/::\")\n", ++ sec_name); ++ err = -EINVAL; ++ } else { ++ *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, ++ provider, name, NULL); ++ err = libbpf_get_error(*link); ++ } ++ free(path); ++ free(provider); ++ free(name); ++ return err; ++} ++ ++static int determine_tracepoint_id(const char *tp_category, ++ const char *tp_name) ++{ ++ char file[PATH_MAX]; ++ int ret; ++ ++ ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", ++ tracefs_path(), tp_category, tp_name); ++ if (ret < 0) ++ return -errno; ++ if (ret >= sizeof(file)) { ++ pr_debug("tracepoint %s/%s path is too long\n", ++ tp_category, tp_name); ++ return -E2BIG; ++ } ++ return parse_uint_from_file(file, "%d\n"); ++} ++ ++static int perf_event_open_tracepoint(const char *tp_category, ++ const char *tp_name) ++{ ++ struct perf_event_attr attr = {}; ++ char errmsg[STRERR_BUFSIZE]; ++ int tp_id, pfd, err; ++ ++ tp_id = determine_tracepoint_id(tp_category, tp_name); ++ if (tp_id < 0) { ++ pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", ++ tp_category, tp_name, ++ libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); ++ return tp_id; ++ } ++ ++ attr.type = PERF_TYPE_TRACEPOINT; ++ attr.size = sizeof(attr); ++ attr.config = tp_id; ++ ++ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, ++ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); ++ if (pfd < 0) { ++ err = -errno; ++ pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", ++ tp_category, tp_name, ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ return err; ++ } ++ return pfd; ++} ++ ++struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, ++ const char *tp_category, ++ const char *tp_name, ++ const struct bpf_tracepoint_opts *opts) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); ++ char errmsg[STRERR_BUFSIZE]; ++ struct bpf_link *link; ++ int pfd, err; ++ ++ if (!OPTS_VALID(opts, bpf_tracepoint_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); ++ ++ pfd = perf_event_open_tracepoint(tp_category, tp_name); ++ if (pfd < 0) { ++ pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", ++ prog->name, tp_category, tp_name, ++ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); ++ return libbpf_err_ptr(pfd); ++ } ++ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); ++ err = libbpf_get_error(link); ++ if (err) { ++ close(pfd); ++ pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", ++ prog->name, tp_category, tp_name, ++ libbpf_strerror_r(err, errmsg, sizeof(errmsg))); ++ return libbpf_err_ptr(err); ++ } ++ return link; ++} ++ ++struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, ++ const char *tp_category, ++ const char *tp_name) ++{ ++ return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); ++} ++ ++static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ char *sec_name, *tp_cat, *tp_name; ++ ++ *link = NULL; ++ ++ /* no auto-attach for SEC("tp") or SEC("tracepoint") */ ++ if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) ++ return 0; ++ ++ sec_name = strdup(prog->sec_name); ++ if (!sec_name) ++ return -ENOMEM; ++ ++ /* extract "tp//" or "tracepoint//" */ ++ if (str_has_pfx(prog->sec_name, "tp/")) ++ tp_cat = sec_name + sizeof("tp/") - 1; ++ else ++ tp_cat = sec_name + sizeof("tracepoint/") - 1; ++ tp_name = strchr(tp_cat, '/'); ++ if (!tp_name) { ++ free(sec_name); ++ return -EINVAL; ++ } ++ *tp_name = '\0'; ++ tp_name++; ++ ++ *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); ++ free(sec_name); ++ return libbpf_get_error(*link); ++} ++ ++struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, ++ const char *tp_name) ++{ ++ char errmsg[STRERR_BUFSIZE]; ++ struct bpf_link *link; ++ int prog_fd, pfd; ++ ++ prog_fd = bpf_program__fd(prog); ++ if (prog_fd < 0) { ++ pr_warn("prog '%s': can't attach before loaded\n", prog->name); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) ++ return libbpf_err_ptr(-ENOMEM); ++ link->detach = &bpf_link__detach_fd; ++ ++ pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); ++ if (pfd < 0) { ++ pfd = -errno; ++ free(link); ++ pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", ++ prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); ++ return libbpf_err_ptr(pfd); ++ } ++ link->fd = pfd; ++ return link; ++} ++ ++static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ static const char *const prefixes[] = { ++ "raw_tp", ++ "raw_tracepoint", ++ "raw_tp.w", ++ "raw_tracepoint.w", ++ }; ++ size_t i; ++ const char *tp_name = NULL; ++ ++ *link = NULL; ++ ++ for (i = 0; i < ARRAY_SIZE(prefixes); i++) { ++ size_t pfx_len; ++ ++ if (!str_has_pfx(prog->sec_name, prefixes[i])) ++ continue; ++ ++ pfx_len = strlen(prefixes[i]); ++ /* no auto-attach case of, e.g., SEC("raw_tp") */ ++ if (prog->sec_name[pfx_len] == '\0') ++ return 0; ++ ++ if (prog->sec_name[pfx_len] != '/') ++ continue; ++ ++ tp_name = prog->sec_name + pfx_len + 1; ++ break; ++ } ++ ++ if (!tp_name) { ++ pr_warn("prog '%s': invalid section name '%s'\n", ++ prog->name, prog->sec_name); ++ return -EINVAL; ++ } ++ ++ *link = bpf_program__attach_raw_tracepoint(prog, tp_name); ++ return libbpf_get_error(link); ++} ++ ++/* Common logic for all BPF program types that attach to a btf_id */ ++static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, ++ const struct bpf_trace_opts *opts) ++{ ++ LIBBPF_OPTS(bpf_link_create_opts, link_opts); ++ char errmsg[STRERR_BUFSIZE]; ++ struct bpf_link *link; ++ int prog_fd, pfd; ++ ++ if (!OPTS_VALID(opts, bpf_trace_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ prog_fd = bpf_program__fd(prog); ++ if (prog_fd < 0) { ++ pr_warn("prog '%s': can't attach before loaded\n", prog->name); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) ++ return libbpf_err_ptr(-ENOMEM); ++ link->detach = &bpf_link__detach_fd; ++ ++ /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ ++ link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); ++ pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); ++ if (pfd < 0) { ++ pfd = -errno; ++ free(link); ++ pr_warn("prog '%s': failed to attach: %s\n", ++ prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); ++ return libbpf_err_ptr(pfd); ++ } ++ link->fd = pfd; ++ return link; ++} ++ ++struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) ++{ ++ return bpf_program__attach_btf_id(prog, NULL); ++} ++ ++struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, ++ const struct bpf_trace_opts *opts) ++{ ++ return bpf_program__attach_btf_id(prog, opts); ++} ++ ++struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) ++{ ++ return bpf_program__attach_btf_id(prog, NULL); ++} ++ ++static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ *link = bpf_program__attach_trace(prog); ++ return libbpf_get_error(*link); ++} ++ ++static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ *link = bpf_program__attach_lsm(prog); ++ return libbpf_get_error(*link); ++} ++ ++static struct bpf_link * ++bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id, ++ const char *target_name) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, ++ .target_btf_id = btf_id); ++ enum bpf_attach_type attach_type; ++ char errmsg[STRERR_BUFSIZE]; ++ struct bpf_link *link; ++ int prog_fd, link_fd; ++ ++ prog_fd = bpf_program__fd(prog); ++ if (prog_fd < 0) { ++ pr_warn("prog '%s': can't attach before loaded\n", prog->name); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) ++ return libbpf_err_ptr(-ENOMEM); ++ link->detach = &bpf_link__detach_fd; ++ ++ attach_type = bpf_program__expected_attach_type(prog); ++ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts); ++ if (link_fd < 0) { ++ link_fd = -errno; ++ free(link); ++ pr_warn("prog '%s': failed to attach to %s: %s\n", ++ prog->name, target_name, ++ libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); ++ return libbpf_err_ptr(link_fd); ++ } ++ link->fd = link_fd; ++ return link; ++} ++ ++struct bpf_link * ++bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) ++{ ++ return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup"); ++} ++ ++struct bpf_link * ++bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) ++{ ++ return bpf_program__attach_fd(prog, netns_fd, 0, "netns"); ++} ++ ++struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) ++{ ++ /* target_fd/target_ifindex use the same field in LINK_CREATE */ ++ return bpf_program__attach_fd(prog, ifindex, 0, "xdp"); ++} ++ ++struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, ++ int target_fd, ++ const char *attach_func_name) ++{ ++ int btf_id; ++ ++ if (!!target_fd != !!attach_func_name) { ++ pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", ++ prog->name); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ ++ if (prog->type != BPF_PROG_TYPE_EXT) { ++ pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", ++ prog->name); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ ++ if (target_fd) { ++ btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); ++ if (btf_id < 0) ++ return libbpf_err_ptr(btf_id); ++ ++ return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace"); ++ } else { ++ /* no target, so use raw_tracepoint_open for compatibility ++ * with old kernels ++ */ ++ return bpf_program__attach_trace(prog); ++ } ++} ++ ++struct bpf_link * ++bpf_program__attach_iter(const struct bpf_program *prog, ++ const struct bpf_iter_attach_opts *opts) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); ++ char errmsg[STRERR_BUFSIZE]; ++ struct bpf_link *link; ++ int prog_fd, link_fd; ++ __u32 target_fd = 0; ++ ++ if (!OPTS_VALID(opts, bpf_iter_attach_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); ++ link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); ++ ++ prog_fd = bpf_program__fd(prog); ++ if (prog_fd < 0) { ++ pr_warn("prog '%s': can't attach before loaded\n", prog->name); ++ return libbpf_err_ptr(-EINVAL); ++ } ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) ++ return libbpf_err_ptr(-ENOMEM); ++ link->detach = &bpf_link__detach_fd; ++ ++ link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, ++ &link_create_opts); ++ if (link_fd < 0) { ++ link_fd = -errno; ++ free(link); ++ pr_warn("prog '%s': failed to attach to iterator: %s\n", ++ prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); ++ return libbpf_err_ptr(link_fd); ++ } ++ link->fd = link_fd; ++ return link; ++} ++ ++static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) ++{ ++ *link = bpf_program__attach_iter(prog, NULL); ++ return libbpf_get_error(*link); ++} ++ ++struct bpf_link *bpf_program__attach(const struct bpf_program *prog) ++{ ++ struct bpf_link *link = NULL; ++ int err; ++ ++ if (!prog->sec_def || !prog->sec_def->prog_attach_fn) ++ return libbpf_err_ptr(-EOPNOTSUPP); ++ ++ err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); ++ if (err) ++ return libbpf_err_ptr(err); ++ ++ /* When calling bpf_program__attach() explicitly, auto-attach support ++ * is expected to work, so NULL returned link is considered an error. ++ * This is different for skeleton's attach, see comment in ++ * bpf_object__attach_skeleton(). ++ */ ++ if (!link) ++ return libbpf_err_ptr(-EOPNOTSUPP); ++ ++ return link; ++} ++ ++static int bpf_link__detach_struct_ops(struct bpf_link *link) ++{ ++ __u32 zero = 0; ++ ++ if (bpf_map_delete_elem(link->fd, &zero)) ++ return -errno; ++ ++ return 0; ++} ++ ++struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) ++{ ++ struct bpf_struct_ops *st_ops; ++ struct bpf_link *link; ++ __u32 i, zero = 0; ++ int err; ++ ++ if (!bpf_map__is_struct_ops(map) || map->fd == -1) ++ return libbpf_err_ptr(-EINVAL); ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) ++ return libbpf_err_ptr(-EINVAL); ++ ++ st_ops = map->st_ops; ++ for (i = 0; i < btf_vlen(st_ops->type); i++) { ++ struct bpf_program *prog = st_ops->progs[i]; ++ void *kern_data; ++ int prog_fd; ++ ++ if (!prog) ++ continue; ++ ++ prog_fd = bpf_program__fd(prog); ++ kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; ++ *(unsigned long *)kern_data = prog_fd; ++ } ++ ++ err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); ++ if (err) { ++ err = -errno; ++ free(link); ++ return libbpf_err_ptr(err); ++ } ++ ++ link->detach = bpf_link__detach_struct_ops; ++ link->fd = map->fd; ++ ++ return link; ++} ++ ++typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, ++ void *private_data); ++ ++static enum bpf_perf_event_ret ++perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, ++ void **copy_mem, size_t *copy_size, ++ bpf_perf_event_print_t fn, void *private_data) ++{ ++ struct perf_event_mmap_page *header = mmap_mem; ++ __u64 data_head = ring_buffer_read_head(header); ++ __u64 data_tail = header->data_tail; ++ void *base = ((__u8 *)header) + page_size; ++ int ret = LIBBPF_PERF_EVENT_CONT; ++ struct perf_event_header *ehdr; ++ size_t ehdr_size; ++ ++ while (data_head != data_tail) { ++ ehdr = base + (data_tail & (mmap_size - 1)); ++ ehdr_size = ehdr->size; ++ ++ if (((void *)ehdr) + ehdr_size > base + mmap_size) { ++ void *copy_start = ehdr; ++ size_t len_first = base + mmap_size - copy_start; ++ size_t len_secnd = ehdr_size - len_first; ++ ++ if (*copy_size < ehdr_size) { ++ free(*copy_mem); ++ *copy_mem = malloc(ehdr_size); ++ if (!*copy_mem) { ++ *copy_size = 0; ++ ret = LIBBPF_PERF_EVENT_ERROR; ++ break; ++ } ++ *copy_size = ehdr_size; ++ } ++ ++ memcpy(*copy_mem, copy_start, len_first); ++ memcpy(*copy_mem + len_first, base, len_secnd); ++ ehdr = *copy_mem; ++ } ++ ++ ret = fn(ehdr, private_data); ++ data_tail += ehdr_size; ++ if (ret != LIBBPF_PERF_EVENT_CONT) ++ break; ++ } ++ ++ ring_buffer_write_tail(header, data_tail); ++ return libbpf_err(ret); ++} ++ ++struct perf_buffer; ++ ++struct perf_buffer_params { ++ struct perf_event_attr *attr; ++ /* if event_cb is specified, it takes precendence */ ++ perf_buffer_event_fn event_cb; ++ /* sample_cb and lost_cb are higher-level common-case callbacks */ ++ perf_buffer_sample_fn sample_cb; ++ perf_buffer_lost_fn lost_cb; ++ void *ctx; ++ int cpu_cnt; ++ int *cpus; ++ int *map_keys; ++}; ++ ++struct perf_cpu_buf { ++ struct perf_buffer *pb; ++ void *base; /* mmap()'ed memory */ ++ void *buf; /* for reconstructing segmented data */ ++ size_t buf_size; ++ int fd; ++ int cpu; ++ int map_key; ++}; ++ ++struct perf_buffer { ++ perf_buffer_event_fn event_cb; ++ perf_buffer_sample_fn sample_cb; ++ perf_buffer_lost_fn lost_cb; ++ void *ctx; /* passed into callbacks */ ++ ++ size_t page_size; ++ size_t mmap_size; ++ struct perf_cpu_buf **cpu_bufs; ++ struct epoll_event *events; ++ int cpu_cnt; /* number of allocated CPU buffers */ ++ int epoll_fd; /* perf event FD */ ++ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ ++}; ++ ++static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, ++ struct perf_cpu_buf *cpu_buf) ++{ ++ if (!cpu_buf) ++ return; ++ if (cpu_buf->base && ++ munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) ++ pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); ++ if (cpu_buf->fd >= 0) { ++ ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); ++ close(cpu_buf->fd); ++ } ++ free(cpu_buf->buf); ++ free(cpu_buf); ++} ++ ++void perf_buffer__free(struct perf_buffer *pb) ++{ ++ int i; ++ ++ if (IS_ERR_OR_NULL(pb)) ++ return; ++ if (pb->cpu_bufs) { ++ for (i = 0; i < pb->cpu_cnt; i++) { ++ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; ++ ++ if (!cpu_buf) ++ continue; ++ ++ bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); ++ perf_buffer__free_cpu_buf(pb, cpu_buf); ++ } ++ free(pb->cpu_bufs); ++ } ++ if (pb->epoll_fd >= 0) ++ close(pb->epoll_fd); ++ free(pb->events); ++ free(pb); ++} ++ ++static struct perf_cpu_buf * ++perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, ++ int cpu, int map_key) ++{ ++ struct perf_cpu_buf *cpu_buf; ++ char msg[STRERR_BUFSIZE]; ++ int err; ++ ++ cpu_buf = calloc(1, sizeof(*cpu_buf)); ++ if (!cpu_buf) ++ return ERR_PTR(-ENOMEM); ++ ++ cpu_buf->pb = pb; ++ cpu_buf->cpu = cpu; ++ cpu_buf->map_key = map_key; ++ ++ cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, ++ -1, PERF_FLAG_FD_CLOEXEC); ++ if (cpu_buf->fd < 0) { ++ err = -errno; ++ pr_warn("failed to open perf buffer event on cpu #%d: %s\n", ++ cpu, libbpf_strerror_r(err, msg, sizeof(msg))); ++ goto error; ++ } ++ ++ cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, ++ PROT_READ | PROT_WRITE, MAP_SHARED, ++ cpu_buf->fd, 0); ++ if (cpu_buf->base == MAP_FAILED) { ++ cpu_buf->base = NULL; ++ err = -errno; ++ pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", ++ cpu, libbpf_strerror_r(err, msg, sizeof(msg))); ++ goto error; ++ } ++ ++ if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { ++ err = -errno; ++ pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", ++ cpu, libbpf_strerror_r(err, msg, sizeof(msg))); ++ goto error; ++ } ++ ++ return cpu_buf; ++ ++error: ++ perf_buffer__free_cpu_buf(pb, cpu_buf); ++ return (struct perf_cpu_buf *)ERR_PTR(err); ++} ++ ++static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, ++ struct perf_buffer_params *p); ++ ++struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, ++ perf_buffer_sample_fn sample_cb, ++ perf_buffer_lost_fn lost_cb, ++ void *ctx, ++ const struct perf_buffer_opts *opts) ++{ ++ struct perf_buffer_params p = {}; ++ struct perf_event_attr attr = {}; ++ ++ if (!OPTS_VALID(opts, perf_buffer_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ attr.config = PERF_COUNT_SW_BPF_OUTPUT; ++ attr.type = PERF_TYPE_SOFTWARE; ++ attr.sample_type = PERF_SAMPLE_RAW; ++ attr.sample_period = 1; ++ attr.wakeup_events = 1; ++ ++ p.attr = &attr; ++ p.sample_cb = sample_cb; ++ p.lost_cb = lost_cb; ++ p.ctx = ctx; ++ ++ return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); ++} ++ ++struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, ++ struct perf_event_attr *attr, ++ perf_buffer_event_fn event_cb, void *ctx, ++ const struct perf_buffer_raw_opts *opts) ++{ ++ struct perf_buffer_params p = {}; ++ ++ if (!attr) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (!OPTS_VALID(opts, perf_buffer_raw_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ p.attr = attr; ++ p.event_cb = event_cb; ++ p.ctx = ctx; ++ p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); ++ p.cpus = OPTS_GET(opts, cpus, NULL); ++ p.map_keys = OPTS_GET(opts, map_keys, NULL); ++ ++ return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); ++} ++ ++static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, ++ struct perf_buffer_params *p) ++{ ++ const char *online_cpus_file = "/sys/devices/system/cpu/online"; ++ struct bpf_map_info map; ++ char msg[STRERR_BUFSIZE]; ++ struct perf_buffer *pb; ++ bool *online = NULL; ++ __u32 map_info_len; ++ int err, i, j, n; ++ ++ if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { ++ pr_warn("page count should be power of two, but is %zu\n", ++ page_cnt); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ /* best-effort sanity checks */ ++ memset(&map, 0, sizeof(map)); ++ map_info_len = sizeof(map); ++ err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len); ++ if (err) { ++ err = -errno; ++ /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return ++ * -EBADFD, -EFAULT, or -E2BIG on real error ++ */ ++ if (err != -EINVAL) { ++ pr_warn("failed to get map info for map FD %d: %s\n", ++ map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); ++ return ERR_PTR(err); ++ } ++ pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", ++ map_fd); ++ } else { ++ if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { ++ pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", ++ map.name); ++ return ERR_PTR(-EINVAL); ++ } ++ } ++ ++ pb = calloc(1, sizeof(*pb)); ++ if (!pb) ++ return ERR_PTR(-ENOMEM); ++ ++ pb->event_cb = p->event_cb; ++ pb->sample_cb = p->sample_cb; ++ pb->lost_cb = p->lost_cb; ++ pb->ctx = p->ctx; ++ ++ pb->page_size = getpagesize(); ++ pb->mmap_size = pb->page_size * page_cnt; ++ pb->map_fd = map_fd; ++ ++ pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); ++ if (pb->epoll_fd < 0) { ++ err = -errno; ++ pr_warn("failed to create epoll instance: %s\n", ++ libbpf_strerror_r(err, msg, sizeof(msg))); ++ goto error; ++ } ++ ++ if (p->cpu_cnt > 0) { ++ pb->cpu_cnt = p->cpu_cnt; ++ } else { ++ pb->cpu_cnt = libbpf_num_possible_cpus(); ++ if (pb->cpu_cnt < 0) { ++ err = pb->cpu_cnt; ++ goto error; ++ } ++ if (map.max_entries && map.max_entries < pb->cpu_cnt) ++ pb->cpu_cnt = map.max_entries; ++ } ++ ++ pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); ++ if (!pb->events) { ++ err = -ENOMEM; ++ pr_warn("failed to allocate events: out of memory\n"); ++ goto error; ++ } ++ pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); ++ if (!pb->cpu_bufs) { ++ err = -ENOMEM; ++ pr_warn("failed to allocate buffers: out of memory\n"); ++ goto error; ++ } ++ ++ err = parse_cpu_mask_file(online_cpus_file, &online, &n); ++ if (err) { ++ pr_warn("failed to get online CPU mask: %d\n", err); ++ goto error; ++ } ++ ++ for (i = 0, j = 0; i < pb->cpu_cnt; i++) { ++ struct perf_cpu_buf *cpu_buf; ++ int cpu, map_key; ++ ++ cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; ++ map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; ++ ++ /* in case user didn't explicitly requested particular CPUs to ++ * be attached to, skip offline/not present CPUs ++ */ ++ if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) ++ continue; ++ ++ cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); ++ if (IS_ERR(cpu_buf)) { ++ err = PTR_ERR(cpu_buf); ++ goto error; ++ } ++ ++ pb->cpu_bufs[j] = cpu_buf; ++ ++ err = bpf_map_update_elem(pb->map_fd, &map_key, ++ &cpu_buf->fd, 0); ++ if (err) { ++ err = -errno; ++ pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", ++ cpu, map_key, cpu_buf->fd, ++ libbpf_strerror_r(err, msg, sizeof(msg))); ++ goto error; ++ } ++ ++ pb->events[j].events = EPOLLIN; ++ pb->events[j].data.ptr = cpu_buf; ++ if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, ++ &pb->events[j]) < 0) { ++ err = -errno; ++ pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", ++ cpu, cpu_buf->fd, ++ libbpf_strerror_r(err, msg, sizeof(msg))); ++ goto error; ++ } ++ j++; ++ } ++ pb->cpu_cnt = j; ++ free(online); ++ ++ return pb; ++ ++error: ++ free(online); ++ if (pb) ++ perf_buffer__free(pb); ++ return ERR_PTR(err); ++} ++ ++struct perf_sample_raw { ++ struct perf_event_header header; ++ uint32_t size; ++ char data[]; ++}; ++ ++struct perf_sample_lost { ++ struct perf_event_header header; ++ uint64_t id; ++ uint64_t lost; ++ uint64_t sample_id; ++}; ++ ++static enum bpf_perf_event_ret ++perf_buffer__process_record(struct perf_event_header *e, void *ctx) ++{ ++ struct perf_cpu_buf *cpu_buf = ctx; ++ struct perf_buffer *pb = cpu_buf->pb; ++ void *data = e; ++ ++ /* user wants full control over parsing perf event */ ++ if (pb->event_cb) ++ return pb->event_cb(pb->ctx, cpu_buf->cpu, e); ++ ++ switch (e->type) { ++ case PERF_RECORD_SAMPLE: { ++ struct perf_sample_raw *s = data; ++ ++ if (pb->sample_cb) ++ pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); ++ break; ++ } ++ case PERF_RECORD_LOST: { ++ struct perf_sample_lost *s = data; ++ ++ if (pb->lost_cb) ++ pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); ++ break; ++ } ++ default: ++ pr_warn("unknown perf sample type %d\n", e->type); ++ return LIBBPF_PERF_EVENT_ERROR; ++ } ++ return LIBBPF_PERF_EVENT_CONT; ++} ++ ++static int perf_buffer__process_records(struct perf_buffer *pb, ++ struct perf_cpu_buf *cpu_buf) ++{ ++ enum bpf_perf_event_ret ret; ++ ++ ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, ++ pb->page_size, &cpu_buf->buf, ++ &cpu_buf->buf_size, ++ perf_buffer__process_record, cpu_buf); ++ if (ret != LIBBPF_PERF_EVENT_CONT) ++ return ret; ++ return 0; ++} ++ ++int perf_buffer__epoll_fd(const struct perf_buffer *pb) ++{ ++ return pb->epoll_fd; ++} ++ ++int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) ++{ ++ int i, cnt, err; ++ ++ cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); ++ if (cnt < 0) ++ return -errno; ++ ++ for (i = 0; i < cnt; i++) { ++ struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; ++ ++ err = perf_buffer__process_records(pb, cpu_buf); ++ if (err) { ++ pr_warn("error while processing records: %d\n", err); ++ return libbpf_err(err); ++ } ++ } ++ return cnt; ++} ++ ++/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer ++ * manager. ++ */ ++size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) ++{ ++ return pb->cpu_cnt; ++} ++ ++/* ++ * Return perf_event FD of a ring buffer in *buf_idx* slot of ++ * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using ++ * select()/poll()/epoll() Linux syscalls. ++ */ ++int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) ++{ ++ struct perf_cpu_buf *cpu_buf; ++ ++ if (buf_idx >= pb->cpu_cnt) ++ return libbpf_err(-EINVAL); ++ ++ cpu_buf = pb->cpu_bufs[buf_idx]; ++ if (!cpu_buf) ++ return libbpf_err(-ENOENT); ++ ++ return cpu_buf->fd; ++} ++ ++int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) ++{ ++ struct perf_cpu_buf *cpu_buf; ++ ++ if (buf_idx >= pb->cpu_cnt) ++ return libbpf_err(-EINVAL); ++ ++ cpu_buf = pb->cpu_bufs[buf_idx]; ++ if (!cpu_buf) ++ return libbpf_err(-ENOENT); ++ ++ *buf = cpu_buf->base; ++ *buf_size = pb->mmap_size; ++ return 0; ++} ++ ++/* ++ * Consume data from perf ring buffer corresponding to slot *buf_idx* in ++ * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to ++ * consume, do nothing and return success. ++ * Returns: ++ * - 0 on success; ++ * - <0 on failure. ++ */ ++int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) ++{ ++ struct perf_cpu_buf *cpu_buf; ++ ++ if (buf_idx >= pb->cpu_cnt) ++ return libbpf_err(-EINVAL); ++ ++ cpu_buf = pb->cpu_bufs[buf_idx]; ++ if (!cpu_buf) ++ return libbpf_err(-ENOENT); ++ ++ return perf_buffer__process_records(pb, cpu_buf); ++} ++ ++int perf_buffer__consume(struct perf_buffer *pb) ++{ ++ int i, err; ++ ++ for (i = 0; i < pb->cpu_cnt; i++) { ++ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; ++ ++ if (!cpu_buf) ++ continue; ++ ++ err = perf_buffer__process_records(pb, cpu_buf); ++ if (err) { ++ pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); ++ return libbpf_err(err); ++ } ++ } ++ return 0; ++} ++ ++int bpf_program__set_attach_target(struct bpf_program *prog, ++ int attach_prog_fd, ++ const char *attach_func_name) ++{ ++ int btf_obj_fd = 0, btf_id = 0, err; ++ ++ if (!prog || attach_prog_fd < 0) ++ return libbpf_err(-EINVAL); ++ ++ if (prog->obj->loaded) ++ return libbpf_err(-EINVAL); ++ ++ if (attach_prog_fd && !attach_func_name) { ++ /* remember attach_prog_fd and let bpf_program__load() find ++ * BTF ID during the program load ++ */ ++ prog->attach_prog_fd = attach_prog_fd; ++ return 0; ++ } ++ ++ if (attach_prog_fd) { ++ btf_id = libbpf_find_prog_btf_id(attach_func_name, ++ attach_prog_fd); ++ if (btf_id < 0) ++ return libbpf_err(btf_id); ++ } else { ++ if (!attach_func_name) ++ return libbpf_err(-EINVAL); ++ ++ /* load btf_vmlinux, if not yet */ ++ err = bpf_object__load_vmlinux_btf(prog->obj, true); ++ if (err) ++ return libbpf_err(err); ++ err = find_kernel_btf_id(prog->obj, attach_func_name, ++ prog->expected_attach_type, ++ &btf_obj_fd, &btf_id); ++ if (err) ++ return libbpf_err(err); ++ } ++ ++ prog->attach_btf_id = btf_id; ++ prog->attach_btf_obj_fd = btf_obj_fd; ++ prog->attach_prog_fd = attach_prog_fd; ++ return 0; ++} ++ ++int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) ++{ ++ int err = 0, n, len, start, end = -1; ++ bool *tmp; ++ ++ *mask = NULL; ++ *mask_sz = 0; ++ ++ /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ ++ while (*s) { ++ if (*s == ',' || *s == '\n') { ++ s++; ++ continue; ++ } ++ n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); ++ if (n <= 0 || n > 2) { ++ pr_warn("Failed to get CPU range %s: %d\n", s, n); ++ err = -EINVAL; ++ goto cleanup; ++ } else if (n == 1) { ++ end = start; ++ } ++ if (start < 0 || start > end) { ++ pr_warn("Invalid CPU range [%d,%d] in %s\n", ++ start, end, s); ++ err = -EINVAL; ++ goto cleanup; ++ } ++ tmp = realloc(*mask, end + 1); ++ if (!tmp) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ *mask = tmp; ++ memset(tmp + *mask_sz, 0, start - *mask_sz); ++ memset(tmp + start, 1, end - start + 1); ++ *mask_sz = end + 1; ++ s += len; ++ } ++ if (!*mask_sz) { ++ pr_warn("Empty CPU range\n"); ++ return -EINVAL; ++ } ++ return 0; ++cleanup: ++ free(*mask); ++ *mask = NULL; ++ return err; ++} ++ ++int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) ++{ ++ int fd, err = 0, len; ++ char buf[128]; ++ ++ fd = open(fcpu, O_RDONLY | O_CLOEXEC); ++ if (fd < 0) { ++ err = -errno; ++ pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); ++ return err; ++ } ++ len = read(fd, buf, sizeof(buf)); ++ close(fd); ++ if (len <= 0) { ++ err = len ? -errno : -EINVAL; ++ pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); ++ return err; ++ } ++ if (len >= sizeof(buf)) { ++ pr_warn("CPU mask is too big in file %s\n", fcpu); ++ return -E2BIG; ++ } ++ buf[len] = '\0'; ++ ++ return parse_cpu_mask_str(buf, mask, mask_sz); ++} ++ ++int libbpf_num_possible_cpus(void) ++{ ++ static const char *fcpu = "/sys/devices/system/cpu/possible"; ++ static int cpus; ++ int err, n, i, tmp_cpus; ++ bool *mask; ++ ++ tmp_cpus = READ_ONCE(cpus); ++ if (tmp_cpus > 0) ++ return tmp_cpus; ++ ++ err = parse_cpu_mask_file(fcpu, &mask, &n); ++ if (err) ++ return libbpf_err(err); ++ ++ tmp_cpus = 0; ++ for (i = 0; i < n; i++) { ++ if (mask[i]) ++ tmp_cpus++; ++ } ++ free(mask); ++ ++ WRITE_ONCE(cpus, tmp_cpus); ++ return tmp_cpus; ++} ++ ++static int populate_skeleton_maps(const struct bpf_object *obj, ++ struct bpf_map_skeleton *maps, ++ size_t map_cnt) ++{ ++ int i; ++ ++ for (i = 0; i < map_cnt; i++) { ++ struct bpf_map **map = maps[i].map; ++ const char *name = maps[i].name; ++ void **mmaped = maps[i].mmaped; ++ ++ *map = bpf_object__find_map_by_name(obj, name); ++ if (!*map) { ++ pr_warn("failed to find skeleton map '%s'\n", name); ++ return -ESRCH; ++ } ++ ++ /* externs shouldn't be pre-setup from user code */ ++ if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) ++ *mmaped = (*map)->mmaped; ++ } ++ return 0; ++} ++ ++static int populate_skeleton_progs(const struct bpf_object *obj, ++ struct bpf_prog_skeleton *progs, ++ size_t prog_cnt) ++{ ++ int i; ++ ++ for (i = 0; i < prog_cnt; i++) { ++ struct bpf_program **prog = progs[i].prog; ++ const char *name = progs[i].name; ++ ++ *prog = bpf_object__find_program_by_name(obj, name); ++ if (!*prog) { ++ pr_warn("failed to find skeleton program '%s'\n", name); ++ return -ESRCH; ++ } ++ } ++ return 0; ++} ++ ++int bpf_object__open_skeleton(struct bpf_object_skeleton *s, ++ const struct bpf_object_open_opts *opts) ++{ ++ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, ++ .object_name = s->name, ++ ); ++ struct bpf_object *obj; ++ int err; ++ ++ /* Attempt to preserve opts->object_name, unless overriden by user ++ * explicitly. Overwriting object name for skeletons is discouraged, ++ * as it breaks global data maps, because they contain object name ++ * prefix as their own map name prefix. When skeleton is generated, ++ * bpftool is making an assumption that this name will stay the same. ++ */ ++ if (opts) { ++ memcpy(&skel_opts, opts, sizeof(*opts)); ++ if (!opts->object_name) ++ skel_opts.object_name = s->name; ++ } ++ ++ obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); ++ err = libbpf_get_error(obj); ++ if (err) { ++ pr_warn("failed to initialize skeleton BPF object '%s': %d\n", ++ s->name, err); ++ return libbpf_err(err); ++ } ++ ++ *s->obj = obj; ++ err = populate_skeleton_maps(obj, s->maps, s->map_cnt); ++ if (err) { ++ pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); ++ return libbpf_err(err); ++ } ++ ++ err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); ++ if (err) { ++ pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); ++ return libbpf_err(err); ++ } ++ ++ return 0; ++} ++ ++int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) ++{ ++ int err, len, var_idx, i; ++ const char *var_name; ++ const struct bpf_map *map; ++ struct btf *btf; ++ __u32 map_type_id; ++ const struct btf_type *map_type, *var_type; ++ const struct bpf_var_skeleton *var_skel; ++ struct btf_var_secinfo *var; ++ ++ if (!s->obj) ++ return libbpf_err(-EINVAL); ++ ++ btf = bpf_object__btf(s->obj); ++ if (!btf) { ++ pr_warn("subskeletons require BTF at runtime (object %s)\n", ++ bpf_object__name(s->obj)); ++ return libbpf_err(-errno); ++ } ++ ++ err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); ++ if (err) { ++ pr_warn("failed to populate subskeleton maps: %d\n", err); ++ return libbpf_err(err); ++ } ++ ++ err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); ++ if (err) { ++ pr_warn("failed to populate subskeleton maps: %d\n", err); ++ return libbpf_err(err); ++ } ++ ++ for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { ++ var_skel = &s->vars[var_idx]; ++ map = *var_skel->map; ++ map_type_id = bpf_map__btf_value_type_id(map); ++ map_type = btf__type_by_id(btf, map_type_id); ++ ++ if (!btf_is_datasec(map_type)) { ++ pr_warn("type for map '%1$s' is not a datasec: %2$s", ++ bpf_map__name(map), ++ __btf_kind_str(btf_kind(map_type))); ++ return libbpf_err(-EINVAL); ++ } ++ ++ len = btf_vlen(map_type); ++ var = btf_var_secinfos(map_type); ++ for (i = 0; i < len; i++, var++) { ++ var_type = btf__type_by_id(btf, var->type); ++ var_name = btf__name_by_offset(btf, var_type->name_off); ++ if (strcmp(var_name, var_skel->name) == 0) { ++ *var_skel->addr = map->mmaped + var->offset; ++ break; ++ } ++ } ++ } ++ return 0; ++} ++ ++void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) ++{ ++ if (!s) ++ return; ++ free(s->maps); ++ free(s->progs); ++ free(s->vars); ++ free(s); ++} ++ ++int bpf_object__load_skeleton(struct bpf_object_skeleton *s) ++{ ++ int i, err; ++ ++ err = bpf_object__load(*s->obj); ++ if (err) { ++ pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); ++ return libbpf_err(err); ++ } ++ ++ for (i = 0; i < s->map_cnt; i++) { ++ struct bpf_map *map = *s->maps[i].map; ++ size_t mmap_sz = bpf_map_mmap_sz(map); ++ int prot, map_fd = bpf_map__fd(map); ++ void **mmaped = s->maps[i].mmaped; ++ ++ if (!mmaped) ++ continue; ++ ++ if (!(map->def.map_flags & BPF_F_MMAPABLE)) { ++ *mmaped = NULL; ++ continue; ++ } ++ ++ if (map->def.map_flags & BPF_F_RDONLY_PROG) ++ prot = PROT_READ; ++ else ++ prot = PROT_READ | PROT_WRITE; ++ ++ /* Remap anonymous mmap()-ed "map initialization image" as ++ * a BPF map-backed mmap()-ed memory, but preserving the same ++ * memory address. This will cause kernel to change process' ++ * page table to point to a different piece of kernel memory, ++ * but from userspace point of view memory address (and its ++ * contents, being identical at this point) will stay the ++ * same. This mapping will be released by bpf_object__close() ++ * as per normal clean up procedure, so we don't need to worry ++ * about it from skeleton's clean up perspective. ++ */ ++ *mmaped = mmap(map->mmaped, mmap_sz, prot, ++ MAP_SHARED | MAP_FIXED, map_fd, 0); ++ if (*mmaped == MAP_FAILED) { ++ err = -errno; ++ *mmaped = NULL; ++ pr_warn("failed to re-mmap() map '%s': %d\n", ++ bpf_map__name(map), err); ++ return libbpf_err(err); ++ } ++ } ++ ++ return 0; ++} ++ ++int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) ++{ ++ int i, err; ++ ++ for (i = 0; i < s->prog_cnt; i++) { ++ struct bpf_program *prog = *s->progs[i].prog; ++ struct bpf_link **link = s->progs[i].link; ++ ++ if (!prog->autoload) ++ continue; ++ ++ /* auto-attaching not supported for this program */ ++ if (!prog->sec_def || !prog->sec_def->prog_attach_fn) ++ continue; ++ ++ /* if user already set the link manually, don't attempt auto-attach */ ++ if (*link) ++ continue; ++ ++ err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); ++ if (err) { ++ pr_warn("prog '%s': failed to auto-attach: %d\n", ++ bpf_program__name(prog), err); ++ return libbpf_err(err); ++ } ++ ++ /* It's possible that for some SEC() definitions auto-attach ++ * is supported in some cases (e.g., if definition completely ++ * specifies target information), but is not in other cases. ++ * SEC("uprobe") is one such case. If user specified target ++ * binary and function name, such BPF program can be ++ * auto-attached. But if not, it shouldn't trigger skeleton's ++ * attach to fail. It should just be skipped. ++ * attach_fn signals such case with returning 0 (no error) and ++ * setting link to NULL. ++ */ ++ } ++ ++ return 0; ++} ++ ++void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) ++{ ++ int i; ++ ++ for (i = 0; i < s->prog_cnt; i++) { ++ struct bpf_link **link = s->progs[i].link; ++ ++ bpf_link__destroy(*link); ++ *link = NULL; ++ } ++} ++ ++void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) ++{ ++ if (!s) ++ return; ++ ++ if (s->progs) ++ bpf_object__detach_skeleton(s); ++ if (s->obj) ++ bpf_object__close(*s->obj); ++ free(s->maps); ++ free(s->progs); ++ free(s); ++} +diff --git a/src/cc/libbpf/src/libbpf.h b/src/cc/libbpf/src/libbpf.h +new file mode 100644 +index 0000000..61493c4 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf.h +@@ -0,0 +1,1418 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++/* ++ * Common eBPF ELF object loading operations. ++ * ++ * Copyright (C) 2013-2015 Alexei Starovoitov ++ * Copyright (C) 2015 Wang Nan ++ * Copyright (C) 2015 Huawei Inc. ++ */ ++#ifndef __LIBBPF_LIBBPF_H ++#define __LIBBPF_LIBBPF_H ++ ++#include ++#include ++#include ++#include ++#include // for size_t ++#include ++ ++#include "libbpf_common.h" ++#include "libbpf_legacy.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++LIBBPF_API __u32 libbpf_major_version(void); ++LIBBPF_API __u32 libbpf_minor_version(void); ++LIBBPF_API const char *libbpf_version_string(void); ++ ++enum libbpf_errno { ++ __LIBBPF_ERRNO__START = 4000, ++ ++ /* Something wrong in libelf */ ++ LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START, ++ LIBBPF_ERRNO__FORMAT, /* BPF object format invalid */ ++ LIBBPF_ERRNO__KVERSION, /* Incorrect or no 'version' section */ ++ LIBBPF_ERRNO__ENDIAN, /* Endian mismatch */ ++ LIBBPF_ERRNO__INTERNAL, /* Internal error in libbpf */ ++ LIBBPF_ERRNO__RELOC, /* Relocation failed */ ++ LIBBPF_ERRNO__LOAD, /* Load program failure for unknown reason */ ++ LIBBPF_ERRNO__VERIFY, /* Kernel verifier blocks program loading */ ++ LIBBPF_ERRNO__PROG2BIG, /* Program too big */ ++ LIBBPF_ERRNO__KVER, /* Incorrect kernel version */ ++ LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ ++ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ ++ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ ++ LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */ ++ __LIBBPF_ERRNO__END, ++}; ++ ++LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); ++ ++/** ++ * @brief **libbpf_bpf_attach_type_str()** converts the provided attach type ++ * value into a textual representation. ++ * @param t The attach type. ++ * @return Pointer to a static string identifying the attach type. NULL is ++ * returned for unknown **bpf_attach_type** values. ++ */ ++LIBBPF_API const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t); ++ ++/** ++ * @brief **libbpf_bpf_link_type_str()** converts the provided link type value ++ * into a textual representation. ++ * @param t The link type. ++ * @return Pointer to a static string identifying the link type. NULL is ++ * returned for unknown **bpf_link_type** values. ++ */ ++LIBBPF_API const char *libbpf_bpf_link_type_str(enum bpf_link_type t); ++ ++/** ++ * @brief **libbpf_bpf_map_type_str()** converts the provided map type value ++ * into a textual representation. ++ * @param t The map type. ++ * @return Pointer to a static string identifying the map type. NULL is ++ * returned for unknown **bpf_map_type** values. ++ */ ++LIBBPF_API const char *libbpf_bpf_map_type_str(enum bpf_map_type t); ++ ++/** ++ * @brief **libbpf_bpf_prog_type_str()** converts the provided program type ++ * value into a textual representation. ++ * @param t The program type. ++ * @return Pointer to a static string identifying the program type. NULL is ++ * returned for unknown **bpf_prog_type** values. ++ */ ++LIBBPF_API const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t); ++ ++enum libbpf_print_level { ++ LIBBPF_WARN, ++ LIBBPF_INFO, ++ LIBBPF_DEBUG, ++}; ++ ++typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, ++ const char *, va_list ap); ++ ++LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn); ++ ++/* Hide internal to user */ ++struct bpf_object; ++ ++struct bpf_object_open_opts { ++ /* size of this struct, for forward/backward compatibility */ ++ size_t sz; ++ /* object name override, if provided: ++ * - for object open from file, this will override setting object ++ * name from file path's base name; ++ * - for object open from memory buffer, this will specify an object ++ * name and will override default "-" name; ++ */ ++ const char *object_name; ++ /* parse map definitions non-strictly, allowing extra attributes/data */ ++ bool relaxed_maps; ++ /* maps that set the 'pinning' attribute in their definition will have ++ * their pin_path attribute set to a file in this directory, and be ++ * auto-pinned to that path on load; defaults to "/sys/fs/bpf". ++ */ ++ const char *pin_root_path; ++ long :0; ++ /* Additional kernel config content that augments and overrides ++ * system Kconfig for CONFIG_xxx externs. ++ */ ++ const char *kconfig; ++ /* Path to the custom BTF to be used for BPF CO-RE relocations. ++ * This custom BTF completely replaces the use of vmlinux BTF ++ * for the purpose of CO-RE relocations. ++ * NOTE: any other BPF feature (e.g., fentry/fexit programs, ++ * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. ++ */ ++ const char *btf_custom_path; ++ /* Pointer to a buffer for storing kernel logs for applicable BPF ++ * commands. Valid kernel_log_size has to be specified as well and are ++ * passed-through to bpf() syscall. Keep in mind that kernel might ++ * fail operation with -ENOSPC error if provided buffer is too small ++ * to contain entire log output. ++ * See the comment below for kernel_log_level for interaction between ++ * log_buf and log_level settings. ++ * ++ * If specified, this log buffer will be passed for: ++ * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden ++ * with bpf_program__set_log() on per-program level, to get ++ * BPF verifier log output. ++ * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get ++ * BTF sanity checking log. ++ * ++ * Each BPF command (BPF_BTF_LOAD or BPF_PROG_LOAD) will overwrite ++ * previous contents, so if you need more fine-grained control, set ++ * per-program buffer with bpf_program__set_log_buf() to preserve each ++ * individual program's verification log. Keep using kernel_log_buf ++ * for BTF verification log, if necessary. ++ */ ++ char *kernel_log_buf; ++ size_t kernel_log_size; ++ /* ++ * Log level can be set independently from log buffer. Log_level=0 ++ * means that libbpf will attempt loading BTF or program without any ++ * logging requested, but will retry with either its own or custom log ++ * buffer, if provided, and log_level=1 on any error. ++ * And vice versa, setting log_level>0 will request BTF or prog ++ * loading with verbose log from the first attempt (and as such also ++ * for successfully loaded BTF or program), and the actual log buffer ++ * could be either libbpf's own auto-allocated log buffer, if ++ * kernel_log_buffer is NULL, or user-provided custom kernel_log_buf. ++ * If user didn't provide custom log buffer, libbpf will emit captured ++ * logs through its print callback. ++ */ ++ __u32 kernel_log_level; ++ ++ size_t :0; ++}; ++#define bpf_object_open_opts__last_field kernel_log_level ++ ++LIBBPF_API struct bpf_object *bpf_object__open(const char *path); ++ ++/** ++ * @brief **bpf_object__open_file()** creates a bpf_object by opening ++ * the BPF ELF object file pointed to by the passed path and loading it ++ * into memory. ++ * @param path BPF object file path ++ * @param opts options for how to load the bpf object, this parameter is ++ * optional and can be set to NULL ++ * @return pointer to the new bpf_object; or NULL is returned on error, ++ * error code is stored in errno ++ */ ++LIBBPF_API struct bpf_object * ++bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); ++ ++/** ++ * @brief **bpf_object__open_mem()** creates a bpf_object by reading ++ * the BPF objects raw bytes from a memory buffer containing a valid ++ * BPF ELF object file. ++ * @param obj_buf pointer to the buffer containing ELF file bytes ++ * @param obj_buf_sz number of bytes in the buffer ++ * @param opts options for how to load the bpf object ++ * @return pointer to the new bpf_object; or NULL is returned on error, ++ * error code is stored in errno ++ */ ++LIBBPF_API struct bpf_object * ++bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, ++ const struct bpf_object_open_opts *opts); ++ ++/* Load/unload object into/from kernel */ ++LIBBPF_API int bpf_object__load(struct bpf_object *obj); ++ ++LIBBPF_API void bpf_object__close(struct bpf_object *object); ++ ++/* pin_maps and unpin_maps can both be called with a NULL path, in which case ++ * they will use the pin_path attribute of each map (and ignore all maps that ++ * don't have a pin_path set). ++ */ ++LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); ++LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, ++ const char *path); ++LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, ++ const char *path); ++LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, ++ const char *path); ++LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); ++ ++LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); ++LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); ++LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version); ++ ++struct btf; ++LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); ++LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); ++ ++LIBBPF_API struct bpf_program * ++bpf_object__find_program_by_name(const struct bpf_object *obj, ++ const char *name); ++ ++LIBBPF_API int ++libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, ++ enum bpf_attach_type *expected_attach_type); ++LIBBPF_API int libbpf_attach_type_by_name(const char *name, ++ enum bpf_attach_type *attach_type); ++LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, ++ enum bpf_attach_type attach_type); ++ ++/* Accessors of bpf_program */ ++struct bpf_program; ++ ++LIBBPF_API struct bpf_program * ++bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog); ++ ++#define bpf_object__for_each_program(pos, obj) \ ++ for ((pos) = bpf_object__next_program((obj), NULL); \ ++ (pos) != NULL; \ ++ (pos) = bpf_object__next_program((obj), (pos))) ++ ++LIBBPF_API struct bpf_program * ++bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog); ++ ++LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, ++ __u32 ifindex); ++ ++LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); ++LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog); ++LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); ++LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); ++ ++struct bpf_insn; ++ ++/** ++ * @brief **bpf_program__insns()** gives read-only access to BPF program's ++ * underlying BPF instructions. ++ * @param prog BPF program for which to return instructions ++ * @return a pointer to an array of BPF instructions that belong to the ++ * specified BPF program ++ * ++ * Returned pointer is always valid and not NULL. Number of `struct bpf_insn` ++ * pointed to can be fetched using **bpf_program__insn_cnt()** API. ++ * ++ * Keep in mind, libbpf can modify and append/delete BPF program's ++ * instructions as it processes BPF object file and prepares everything for ++ * uploading into the kernel. So depending on the point in BPF object ++ * lifetime, **bpf_program__insns()** can return different sets of ++ * instructions. As an example, during BPF object load phase BPF program ++ * instructions will be CO-RE-relocated, BPF subprograms instructions will be ++ * appended, ldimm64 instructions will have FDs embedded, etc. So instructions ++ * returned before **bpf_object__load()** and after it might be quite ++ * different. ++ */ ++LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); ++ ++/** ++ * @brief **bpf_program__set_insns()** can set BPF program's underlying ++ * BPF instructions. ++ * ++ * WARNING: This is a very advanced libbpf API and users need to know ++ * what they are doing. This should be used from prog_prepare_load_fn ++ * callback only. ++ * ++ * @param prog BPF program for which to return instructions ++ * @param new_insns a pointer to an array of BPF instructions ++ * @param new_insn_cnt number of `struct bpf_insn`'s that form ++ * specified BPF program ++ * @return 0, on success; negative error code, otherwise ++ */ ++LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog, ++ struct bpf_insn *new_insns, size_t new_insn_cnt); ++ ++/** ++ * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s ++ * that form specified BPF program. ++ * @param prog BPF program for which to return number of BPF instructions ++ * ++ * See **bpf_program__insns()** documentation for notes on how libbpf can ++ * change instructions and their count during different phases of ++ * **bpf_object** lifetime. ++ */ ++LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); ++ ++LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); ++ ++/** ++ * @brief **bpf_program__pin()** pins the BPF program to a file ++ * in the BPF FS specified by a path. This increments the programs ++ * reference count, allowing it to stay loaded after the process ++ * which loaded it has exited. ++ * ++ * @param prog BPF program to pin, must already be loaded ++ * @param path file path in a BPF file system ++ * @return 0, on success; negative error code, otherwise ++ */ ++LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); ++ ++/** ++ * @brief **bpf_program__unpin()** unpins the BPF program from a file ++ * in the BPFFS specified by a path. This decrements the programs ++ * reference count. ++ * ++ * The file pinning the BPF program can also be unlinked by a different ++ * process in which case this function will return an error. ++ * ++ * @param prog BPF program to unpin ++ * @param path file path to the pin in a BPF file system ++ * @return 0, on success; negative error code, otherwise ++ */ ++LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); ++LIBBPF_API void bpf_program__unload(struct bpf_program *prog); ++ ++struct bpf_link; ++ ++LIBBPF_API struct bpf_link *bpf_link__open(const char *path); ++LIBBPF_API int bpf_link__fd(const struct bpf_link *link); ++LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); ++/** ++ * @brief **bpf_link__pin()** pins the BPF link to a file ++ * in the BPF FS specified by a path. This increments the links ++ * reference count, allowing it to stay loaded after the process ++ * which loaded it has exited. ++ * ++ * @param link BPF link to pin, must already be loaded ++ * @param path file path in a BPF file system ++ * @return 0, on success; negative error code, otherwise ++ */ ++ ++LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); ++ ++/** ++ * @brief **bpf_link__unpin()** unpins the BPF link from a file ++ * in the BPFFS specified by a path. This decrements the links ++ * reference count. ++ * ++ * The file pinning the BPF link can also be unlinked by a different ++ * process in which case this function will return an error. ++ * ++ * @param prog BPF program to unpin ++ * @param path file path to the pin in a BPF file system ++ * @return 0, on success; negative error code, otherwise ++ */ ++LIBBPF_API int bpf_link__unpin(struct bpf_link *link); ++LIBBPF_API int bpf_link__update_program(struct bpf_link *link, ++ struct bpf_program *prog); ++LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); ++LIBBPF_API int bpf_link__detach(struct bpf_link *link); ++LIBBPF_API int bpf_link__destroy(struct bpf_link *link); ++ ++/** ++ * @brief **bpf_program__attach()** is a generic function for attaching ++ * a BPF program based on auto-detection of program type, attach type, ++ * and extra paremeters, where applicable. ++ * ++ * @param prog BPF program to attach ++ * @return Reference to the newly created BPF link; or NULL is returned on error, ++ * error code is stored in errno ++ * ++ * This is supported for: ++ * - kprobe/kretprobe (depends on SEC() definition) ++ * - uprobe/uretprobe (depends on SEC() definition) ++ * - tracepoint ++ * - raw tracepoint ++ * - tracing programs (typed raw TP/fentry/fexit/fmod_ret) ++ */ ++LIBBPF_API struct bpf_link * ++bpf_program__attach(const struct bpf_program *prog); ++ ++struct bpf_perf_event_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++ /* custom user-provided value fetchable through bpf_get_attach_cookie() */ ++ __u64 bpf_cookie; ++}; ++#define bpf_perf_event_opts__last_field bpf_cookie ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, ++ const struct bpf_perf_event_opts *opts); ++ ++struct bpf_kprobe_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++ /* custom user-provided value fetchable through bpf_get_attach_cookie() */ ++ __u64 bpf_cookie; ++ /* function's offset to install kprobe to */ ++ size_t offset; ++ /* kprobe is return probe */ ++ bool retprobe; ++ size_t :0; ++}; ++#define bpf_kprobe_opts__last_field retprobe ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, ++ const char *func_name); ++LIBBPF_API struct bpf_link * ++bpf_program__attach_kprobe_opts(const struct bpf_program *prog, ++ const char *func_name, ++ const struct bpf_kprobe_opts *opts); ++ ++struct bpf_kprobe_multi_opts { ++ /* size of this struct, for forward/backward compatibility */ ++ size_t sz; ++ /* array of function symbols to attach */ ++ const char **syms; ++ /* array of function addresses to attach */ ++ const unsigned long *addrs; ++ /* array of user-provided values fetchable through bpf_get_attach_cookie */ ++ const __u64 *cookies; ++ /* number of elements in syms/addrs/cookies arrays */ ++ size_t cnt; ++ /* create return kprobes */ ++ bool retprobe; ++ size_t :0; ++}; ++ ++#define bpf_kprobe_multi_opts__last_field retprobe ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, ++ const char *pattern, ++ const struct bpf_kprobe_multi_opts *opts); ++ ++struct bpf_ksyscall_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++ /* custom user-provided value fetchable through bpf_get_attach_cookie() */ ++ __u64 bpf_cookie; ++ /* attach as return probe? */ ++ bool retprobe; ++ size_t :0; ++}; ++#define bpf_ksyscall_opts__last_field retprobe ++ ++/** ++ * @brief **bpf_program__attach_ksyscall()** attaches a BPF program ++ * to kernel syscall handler of a specified syscall. Optionally it's possible ++ * to request to install retprobe that will be triggered at syscall exit. It's ++ * also possible to associate BPF cookie (though options). ++ * ++ * Libbpf automatically will determine correct full kernel function name, ++ * which depending on system architecture and kernel version/configuration ++ * could be of the form ___sys_ or __se_sys_, and will ++ * attach specified program using kprobe/kretprobe mechanism. ++ * ++ * **bpf_program__attach_ksyscall()** is an API counterpart of declarative ++ * **SEC("ksyscall/")** annotation of BPF programs. ++ * ++ * At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do ++ * not handle all the calling convention quirks for mmap(), clone() and compat ++ * syscalls. It also only attaches to "native" syscall interfaces. If host ++ * system supports compat syscalls or defines 32-bit syscalls in 64-bit ++ * kernel, such syscall interfaces won't be attached to by libbpf. ++ * ++ * These limitations may or may not change in the future. Therefore it is ++ * recommended to use SEC("kprobe") for these syscalls or if working with ++ * compat and 32-bit interfaces is required. ++ * ++ * @param prog BPF program to attach ++ * @param syscall_name Symbolic name of the syscall (e.g., "bpf") ++ * @param opts Additional options (see **struct bpf_ksyscall_opts**) ++ * @return Reference to the newly created BPF link; or NULL is returned on ++ * error, error code is stored in errno ++ */ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_ksyscall(const struct bpf_program *prog, ++ const char *syscall_name, ++ const struct bpf_ksyscall_opts *opts); ++ ++struct bpf_uprobe_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++ /* offset of kernel reference counted USDT semaphore, added in ++ * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") ++ */ ++ size_t ref_ctr_offset; ++ /* custom user-provided value fetchable through bpf_get_attach_cookie() */ ++ __u64 bpf_cookie; ++ /* uprobe is return probe, invoked at function return time */ ++ bool retprobe; ++ /* Function name to attach to. Could be an unqualified ("abc") or library-qualified ++ * "abc@LIBXYZ" name. To specify function entry, func_name should be set while ++ * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an ++ * offset within a function, specify func_name and use func_offset argument to specify ++ * offset within the function. Shared library functions must specify the shared library ++ * binary_path. ++ */ ++ const char *func_name; ++ size_t :0; ++}; ++#define bpf_uprobe_opts__last_field func_name ++ ++/** ++ * @brief **bpf_program__attach_uprobe()** attaches a BPF program ++ * to the userspace function which is found by binary path and ++ * offset. You can optionally specify a particular proccess to attach ++ * to. You can also optionally attach the program to the function ++ * exit instead of entry. ++ * ++ * @param prog BPF program to attach ++ * @param retprobe Attach to function exit ++ * @param pid Process ID to attach the uprobe to, 0 for self (own process), ++ * -1 for all processes ++ * @param binary_path Path to binary that contains the function symbol ++ * @param func_offset Offset within the binary of the function symbol ++ * @return Reference to the newly created BPF link; or NULL is returned on error, ++ * error code is stored in errno ++ */ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, ++ pid_t pid, const char *binary_path, ++ size_t func_offset); ++ ++/** ++ * @brief **bpf_program__attach_uprobe_opts()** is just like ++ * bpf_program__attach_uprobe() except with a options struct ++ * for various configurations. ++ * ++ * @param prog BPF program to attach ++ * @param pid Process ID to attach the uprobe to, 0 for self (own process), ++ * -1 for all processes ++ * @param binary_path Path to binary that contains the function symbol ++ * @param func_offset Offset within the binary of the function symbol ++ * @param opts Options for altering program attachment ++ * @return Reference to the newly created BPF link; or NULL is returned on error, ++ * error code is stored in errno ++ */ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ++ const char *binary_path, size_t func_offset, ++ const struct bpf_uprobe_opts *opts); ++ ++struct bpf_usdt_opts { ++ /* size of this struct, for forward/backward compatibility */ ++ size_t sz; ++ /* custom user-provided value accessible through usdt_cookie() */ ++ __u64 usdt_cookie; ++ size_t :0; ++}; ++#define bpf_usdt_opts__last_field usdt_cookie ++ ++/** ++ * @brief **bpf_program__attach_usdt()** is just like ++ * bpf_program__attach_uprobe_opts() except it covers USDT (User-space ++ * Statically Defined Tracepoint) attachment, instead of attaching to ++ * user-space function entry or exit. ++ * ++ * @param prog BPF program to attach ++ * @param pid Process ID to attach the uprobe to, 0 for self (own process), ++ * -1 for all processes ++ * @param binary_path Path to binary that contains provided USDT probe ++ * @param usdt_provider USDT provider name ++ * @param usdt_name USDT probe name ++ * @param opts Options for altering program attachment ++ * @return Reference to the newly created BPF link; or NULL is returned on error, ++ * error code is stored in errno ++ */ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_usdt(const struct bpf_program *prog, ++ pid_t pid, const char *binary_path, ++ const char *usdt_provider, const char *usdt_name, ++ const struct bpf_usdt_opts *opts); ++ ++struct bpf_tracepoint_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++ /* custom user-provided value fetchable through bpf_get_attach_cookie() */ ++ __u64 bpf_cookie; ++}; ++#define bpf_tracepoint_opts__last_field bpf_cookie ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_tracepoint(const struct bpf_program *prog, ++ const char *tp_category, ++ const char *tp_name); ++LIBBPF_API struct bpf_link * ++bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, ++ const char *tp_category, ++ const char *tp_name, ++ const struct bpf_tracepoint_opts *opts); ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, ++ const char *tp_name); ++ ++struct bpf_trace_opts { ++ /* size of this struct, for forward/backward compatibility */ ++ size_t sz; ++ /* custom user-provided value fetchable through bpf_get_attach_cookie() */ ++ __u64 cookie; ++}; ++#define bpf_trace_opts__last_field cookie ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_trace(const struct bpf_program *prog); ++LIBBPF_API struct bpf_link * ++bpf_program__attach_trace_opts(const struct bpf_program *prog, const struct bpf_trace_opts *opts); ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_lsm(const struct bpf_program *prog); ++LIBBPF_API struct bpf_link * ++bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); ++LIBBPF_API struct bpf_link * ++bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd); ++LIBBPF_API struct bpf_link * ++bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex); ++LIBBPF_API struct bpf_link * ++bpf_program__attach_freplace(const struct bpf_program *prog, ++ int target_fd, const char *attach_func_name); ++ ++struct bpf_map; ++ ++LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); ++ ++struct bpf_iter_attach_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ union bpf_iter_link_info *link_info; ++ __u32 link_info_len; ++}; ++#define bpf_iter_attach_opts__last_field link_info_len ++ ++LIBBPF_API struct bpf_link * ++bpf_program__attach_iter(const struct bpf_program *prog, ++ const struct bpf_iter_attach_opts *opts); ++ ++LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); ++ ++/** ++ * @brief **bpf_program__set_type()** sets the program ++ * type of the passed BPF program. ++ * @param prog BPF program to set the program type for ++ * @param type program type to set the BPF map to have ++ * @return error code; or 0 if no error. An error occurs ++ * if the object is already loaded. ++ * ++ * This must be called before the BPF object is loaded, ++ * otherwise it has no effect and an error is returned. ++ */ ++LIBBPF_API int bpf_program__set_type(struct bpf_program *prog, ++ enum bpf_prog_type type); ++ ++LIBBPF_API enum bpf_attach_type ++bpf_program__expected_attach_type(const struct bpf_program *prog); ++ ++/** ++ * @brief **bpf_program__set_expected_attach_type()** sets the ++ * attach type of the passed BPF program. This is used for ++ * auto-detection of attachment when programs are loaded. ++ * @param prog BPF program to set the attach type for ++ * @param type attach type to set the BPF map to have ++ * @return error code; or 0 if no error. An error occurs ++ * if the object is already loaded. ++ * ++ * This must be called before the BPF object is loaded, ++ * otherwise it has no effect and an error is returned. ++ */ ++LIBBPF_API int ++bpf_program__set_expected_attach_type(struct bpf_program *prog, ++ enum bpf_attach_type type); ++ ++LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); ++LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags); ++ ++/* Per-program log level and log buffer getters/setters. ++ * See bpf_object_open_opts comments regarding log_level and log_buf ++ * interactions. ++ */ ++LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog); ++LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level); ++LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); ++LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); ++ ++/** ++ * @brief **bpf_program__set_attach_target()** sets BTF-based attach target ++ * for supported BPF program types: ++ * - BTF-aware raw tracepoints (tp_btf); ++ * - fentry/fexit/fmod_ret; ++ * - lsm; ++ * - freplace. ++ * @param prog BPF program to set the attach type for ++ * @param type attach type to set the BPF map to have ++ * @return error code; or 0 if no error occurred. ++ */ ++LIBBPF_API int ++bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, ++ const char *attach_func_name); ++ ++/** ++ * @brief **bpf_object__find_map_by_name()** returns BPF map of ++ * the given name, if it exists within the passed BPF object ++ * @param obj BPF object ++ * @param name name of the BPF map ++ * @return BPF map instance, if such map exists within the BPF object; ++ * or NULL otherwise. ++ */ ++LIBBPF_API struct bpf_map * ++bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); ++ ++LIBBPF_API int ++bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); ++ ++LIBBPF_API struct bpf_map * ++bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); ++ ++#define bpf_object__for_each_map(pos, obj) \ ++ for ((pos) = bpf_object__next_map((obj), NULL); \ ++ (pos) != NULL; \ ++ (pos) = bpf_object__next_map((obj), (pos))) ++#define bpf_map__for_each bpf_object__for_each_map ++ ++LIBBPF_API struct bpf_map * ++bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); ++ ++/** ++ * @brief **bpf_map__set_autocreate()** sets whether libbpf has to auto-create ++ * BPF map during BPF object load phase. ++ * @param map the BPF map instance ++ * @param autocreate whether to create BPF map during BPF object load ++ * @return 0 on success; -EBUSY if BPF object was already loaded ++ * ++ * **bpf_map__set_autocreate()** allows to opt-out from libbpf auto-creating ++ * BPF map. By default, libbpf will attempt to create every single BPF map ++ * defined in BPF object file using BPF_MAP_CREATE command of bpf() syscall ++ * and fill in map FD in BPF instructions. ++ * ++ * This API allows to opt-out of this process for specific map instance. This ++ * can be useful if host kernel doesn't support such BPF map type or used ++ * combination of flags and user application wants to avoid creating such ++ * a map in the first place. User is still responsible to make sure that their ++ * BPF-side code that expects to use such missing BPF map is recognized by BPF ++ * verifier as dead code, otherwise BPF verifier will reject such BPF program. ++ */ ++LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); ++LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); ++ ++/** ++ * @brief **bpf_map__fd()** gets the file descriptor of the passed ++ * BPF map ++ * @param map the BPF map instance ++ * @return the file descriptor; or -EINVAL in case of an error ++ */ ++LIBBPF_API int bpf_map__fd(const struct bpf_map *map); ++LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); ++/* get map name */ ++LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); ++/* get/set map type */ ++LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type); ++/* get/set map size (max_entries) */ ++LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries); ++/* get/set map flags */ ++LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags); ++/* get/set map NUMA node */ ++LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node); ++/* get/set map key size */ ++LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); ++/* get/set map value size */ ++LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); ++/* get map key/value BTF type IDs */ ++LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); ++LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); ++/* get/set map if_index */ ++LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); ++/* get/set map map_extra flags */ ++LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); ++ ++LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, ++ const void *data, size_t size); ++LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); ++ ++/** ++ * @brief **bpf_map__is_internal()** tells the caller whether or not the ++ * passed map is a special map created by libbpf automatically for things like ++ * global variables, __ksym externs, Kconfig values, etc ++ * @param map the bpf_map ++ * @return true, if the map is an internal map; false, otherwise ++ */ ++LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); ++LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); ++LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map); ++LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); ++LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); ++LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); ++ ++LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); ++LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); ++ ++/** ++ * @brief **bpf_map__lookup_elem()** allows to lookup BPF map value ++ * corresponding to provided key. ++ * @param map BPF map to lookup element in ++ * @param key pointer to memory containing bytes of the key used for lookup ++ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** ++ * @param value pointer to memory in which looked up value will be stored ++ * @param value_sz size in byte of value data memory; it has to match BPF map ++ * definition's **value_size**. For per-CPU BPF maps value size has to be ++ * a product of BPF map value size and number of possible CPUs in the system ++ * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for ++ * per-CPU values value size has to be aligned up to closest 8 bytes for ++ * alignment reasons, so expected size is: `round_up(value_size, 8) ++ * * libbpf_num_possible_cpus()`. ++ * @flags extra flags passed to kernel for this operation ++ * @return 0, on success; negative error, otherwise ++ * ++ * **bpf_map__lookup_elem()** is high-level equivalent of ++ * **bpf_map_lookup_elem()** API with added check for key and value size. ++ */ ++LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, ++ const void *key, size_t key_sz, ++ void *value, size_t value_sz, __u64 flags); ++ ++/** ++ * @brief **bpf_map__update_elem()** allows to insert or update value in BPF ++ * map that corresponds to provided key. ++ * @param map BPF map to insert to or update element in ++ * @param key pointer to memory containing bytes of the key ++ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** ++ * @param value pointer to memory containing bytes of the value ++ * @param value_sz size in byte of value data memory; it has to match BPF map ++ * definition's **value_size**. For per-CPU BPF maps value size has to be ++ * a product of BPF map value size and number of possible CPUs in the system ++ * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for ++ * per-CPU values value size has to be aligned up to closest 8 bytes for ++ * alignment reasons, so expected size is: `round_up(value_size, 8) ++ * * libbpf_num_possible_cpus()`. ++ * @flags extra flags passed to kernel for this operation ++ * @return 0, on success; negative error, otherwise ++ * ++ * **bpf_map__update_elem()** is high-level equivalent of ++ * **bpf_map_update_elem()** API with added check for key and value size. ++ */ ++LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, ++ const void *key, size_t key_sz, ++ const void *value, size_t value_sz, __u64 flags); ++ ++/** ++ * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that ++ * corresponds to provided key. ++ * @param map BPF map to delete element from ++ * @param key pointer to memory containing bytes of the key ++ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** ++ * @flags extra flags passed to kernel for this operation ++ * @return 0, on success; negative error, otherwise ++ * ++ * **bpf_map__delete_elem()** is high-level equivalent of ++ * **bpf_map_delete_elem()** API with added check for key size. ++ */ ++LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map, ++ const void *key, size_t key_sz, __u64 flags); ++ ++/** ++ * @brief **bpf_map__lookup_and_delete_elem()** allows to lookup BPF map value ++ * corresponding to provided key and atomically delete it afterwards. ++ * @param map BPF map to lookup element in ++ * @param key pointer to memory containing bytes of the key used for lookup ++ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** ++ * @param value pointer to memory in which looked up value will be stored ++ * @param value_sz size in byte of value data memory; it has to match BPF map ++ * definition's **value_size**. For per-CPU BPF maps value size has to be ++ * a product of BPF map value size and number of possible CPUs in the system ++ * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for ++ * per-CPU values value size has to be aligned up to closest 8 bytes for ++ * alignment reasons, so expected size is: `round_up(value_size, 8) ++ * * libbpf_num_possible_cpus()`. ++ * @flags extra flags passed to kernel for this operation ++ * @return 0, on success; negative error, otherwise ++ * ++ * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of ++ * **bpf_map_lookup_and_delete_elem()** API with added check for key and value size. ++ */ ++LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, ++ const void *key, size_t key_sz, ++ void *value, size_t value_sz, __u64 flags); ++ ++/** ++ * @brief **bpf_map__get_next_key()** allows to iterate BPF map keys by ++ * fetching next key that follows current key. ++ * @param map BPF map to fetch next key from ++ * @param cur_key pointer to memory containing bytes of current key or NULL to ++ * fetch the first key ++ * @param next_key pointer to memory to write next key into ++ * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** ++ * @return 0, on success; -ENOENT if **cur_key** is the last key in BPF map; ++ * negative error, otherwise ++ * ++ * **bpf_map__get_next_key()** is high-level equivalent of ++ * **bpf_map_get_next_key()** API with added check for key size. ++ */ ++LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map, ++ const void *cur_key, void *next_key, size_t key_sz); ++ ++struct bpf_xdp_set_link_opts { ++ size_t sz; ++ int old_fd; ++ size_t :0; ++}; ++#define bpf_xdp_set_link_opts__last_field old_fd ++ ++struct bpf_xdp_attach_opts { ++ size_t sz; ++ int old_prog_fd; ++ size_t :0; ++}; ++#define bpf_xdp_attach_opts__last_field old_prog_fd ++ ++struct bpf_xdp_query_opts { ++ size_t sz; ++ __u32 prog_id; /* output */ ++ __u32 drv_prog_id; /* output */ ++ __u32 hw_prog_id; /* output */ ++ __u32 skb_prog_id; /* output */ ++ __u8 attach_mode; /* output */ ++ size_t :0; ++}; ++#define bpf_xdp_query_opts__last_field attach_mode ++ ++LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, ++ const struct bpf_xdp_attach_opts *opts); ++LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags, ++ const struct bpf_xdp_attach_opts *opts); ++LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts); ++LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id); ++ ++/* TC related API */ ++enum bpf_tc_attach_point { ++ BPF_TC_INGRESS = 1 << 0, ++ BPF_TC_EGRESS = 1 << 1, ++ BPF_TC_CUSTOM = 1 << 2, ++}; ++ ++#define BPF_TC_PARENT(a, b) \ ++ ((((a) << 16) & 0xFFFF0000U) | ((b) & 0x0000FFFFU)) ++ ++enum bpf_tc_flags { ++ BPF_TC_F_REPLACE = 1 << 0, ++}; ++ ++struct bpf_tc_hook { ++ size_t sz; ++ int ifindex; ++ enum bpf_tc_attach_point attach_point; ++ __u32 parent; ++ size_t :0; ++}; ++#define bpf_tc_hook__last_field parent ++ ++struct bpf_tc_opts { ++ size_t sz; ++ int prog_fd; ++ __u32 flags; ++ __u32 prog_id; ++ __u32 handle; ++ __u32 priority; ++ size_t :0; ++}; ++#define bpf_tc_opts__last_field priority ++ ++LIBBPF_API int bpf_tc_hook_create(struct bpf_tc_hook *hook); ++LIBBPF_API int bpf_tc_hook_destroy(struct bpf_tc_hook *hook); ++LIBBPF_API int bpf_tc_attach(const struct bpf_tc_hook *hook, ++ struct bpf_tc_opts *opts); ++LIBBPF_API int bpf_tc_detach(const struct bpf_tc_hook *hook, ++ const struct bpf_tc_opts *opts); ++LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, ++ struct bpf_tc_opts *opts); ++ ++/* Ring buffer APIs */ ++struct ring_buffer; ++ ++typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); ++ ++struct ring_buffer_opts { ++ size_t sz; /* size of this struct, for forward/backward compatiblity */ ++}; ++ ++#define ring_buffer_opts__last_field sz ++ ++LIBBPF_API struct ring_buffer * ++ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, ++ const struct ring_buffer_opts *opts); ++LIBBPF_API void ring_buffer__free(struct ring_buffer *rb); ++LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd, ++ ring_buffer_sample_fn sample_cb, void *ctx); ++LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); ++LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); ++LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); ++ ++/* Perf buffer APIs */ ++struct perf_buffer; ++ ++typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu, ++ void *data, __u32 size); ++typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt); ++ ++/* common use perf buffer options */ ++struct perf_buffer_opts { ++ size_t sz; ++}; ++#define perf_buffer_opts__last_field sz ++ ++/** ++ * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified ++ * BPF_PERF_EVENT_ARRAY map ++ * @param map_fd FD of BPF_PERF_EVENT_ARRAY BPF map that will be used by BPF ++ * code to send data over to user-space ++ * @param page_cnt number of memory pages allocated for each per-CPU buffer ++ * @param sample_cb function called on each received data record ++ * @param lost_cb function called when record loss has occurred ++ * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* ++ * @return a new instance of struct perf_buffer on success, NULL on error with ++ * *errno* containing an error code ++ */ ++LIBBPF_API struct perf_buffer * ++perf_buffer__new(int map_fd, size_t page_cnt, ++ perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, ++ const struct perf_buffer_opts *opts); ++ ++enum bpf_perf_event_ret { ++ LIBBPF_PERF_EVENT_DONE = 0, ++ LIBBPF_PERF_EVENT_ERROR = -1, ++ LIBBPF_PERF_EVENT_CONT = -2, ++}; ++ ++struct perf_event_header; ++ ++typedef enum bpf_perf_event_ret ++(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event); ++ ++/* raw perf buffer options, giving most power and control */ ++struct perf_buffer_raw_opts { ++ size_t sz; ++ long :0; ++ long :0; ++ /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of ++ * max_entries of given PERF_EVENT_ARRAY map) ++ */ ++ int cpu_cnt; ++ /* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */ ++ int *cpus; ++ /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */ ++ int *map_keys; ++}; ++#define perf_buffer_raw_opts__last_field map_keys ++ ++struct perf_event_attr; ++ ++LIBBPF_API struct perf_buffer * ++perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr, ++ perf_buffer_event_fn event_cb, void *ctx, ++ const struct perf_buffer_raw_opts *opts); ++ ++LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); ++LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb); ++LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); ++LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb); ++LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx); ++LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb); ++LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx); ++/** ++ * @brief **perf_buffer__buffer()** returns the per-cpu raw mmap()'ed underlying ++ * memory region of the ring buffer. ++ * This ring buffer can be used to implement a custom events consumer. ++ * The ring buffer starts with the *struct perf_event_mmap_page*, which ++ * holds the ring buffer managment fields, when accessing the header ++ * structure it's important to be SMP aware. ++ * You can refer to *perf_event_read_simple* for a simple example. ++ * @param pb the perf buffer structure ++ * @param buf_idx the buffer index to retreive ++ * @param buf (out) gets the base pointer of the mmap()'ed memory ++ * @param buf_size (out) gets the size of the mmap()'ed region ++ * @return 0 on success, negative error code for failure ++ */ ++LIBBPF_API int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, ++ size_t *buf_size); ++ ++struct bpf_prog_linfo; ++struct bpf_prog_info; ++ ++LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo); ++LIBBPF_API struct bpf_prog_linfo * ++bpf_prog_linfo__new(const struct bpf_prog_info *info); ++LIBBPF_API const struct bpf_line_info * ++bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, ++ __u64 addr, __u32 func_idx, __u32 nr_skip); ++LIBBPF_API const struct bpf_line_info * ++bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, ++ __u32 insn_off, __u32 nr_skip); ++ ++/* ++ * Probe for supported system features ++ * ++ * Note that running many of these probes in a short amount of time can cause ++ * the kernel to reach the maximal size of lockable memory allowed for the ++ * user, causing subsequent probes to fail. In this case, the caller may want ++ * to adjust that limit with setrlimit(). ++ */ ++ ++/** ++ * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports ++ * BPF programs of a given type. ++ * @param prog_type BPF program type to detect kernel support for ++ * @param opts reserved for future extensibility, should be NULL ++ * @return 1, if given program type is supported; 0, if given program type is ++ * not supported; negative error code if feature detection failed or can't be ++ * performed ++ * ++ * Make sure the process has required set of CAP_* permissions (or runs as ++ * root) when performing feature checking. ++ */ ++LIBBPF_API int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts); ++/** ++ * @brief **libbpf_probe_bpf_map_type()** detects if host kernel supports ++ * BPF maps of a given type. ++ * @param map_type BPF map type to detect kernel support for ++ * @param opts reserved for future extensibility, should be NULL ++ * @return 1, if given map type is supported; 0, if given map type is ++ * not supported; negative error code if feature detection failed or can't be ++ * performed ++ * ++ * Make sure the process has required set of CAP_* permissions (or runs as ++ * root) when performing feature checking. ++ */ ++LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts); ++/** ++ * @brief **libbpf_probe_bpf_helper()** detects if host kernel supports the ++ * use of a given BPF helper from specified BPF program type. ++ * @param prog_type BPF program type used to check the support of BPF helper ++ * @param helper_id BPF helper ID (enum bpf_func_id) to check support for ++ * @param opts reserved for future extensibility, should be NULL ++ * @return 1, if given combination of program type and helper is supported; 0, ++ * if the combination is not supported; negative error code if feature ++ * detection for provided input arguments failed or can't be performed ++ * ++ * Make sure the process has required set of CAP_* permissions (or runs as ++ * root) when performing feature checking. ++ */ ++LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, ++ enum bpf_func_id helper_id, const void *opts); ++ ++/** ++ * @brief **libbpf_num_possible_cpus()** is a helper function to get the ++ * number of possible CPUs that the host kernel supports and expects. ++ * @return number of possible CPUs; or error code on failure ++ * ++ * Example usage: ++ * ++ * int ncpus = libbpf_num_possible_cpus(); ++ * if (ncpus < 0) { ++ * // error handling ++ * } ++ * long values[ncpus]; ++ * bpf_map_lookup_elem(per_cpu_map_fd, key, values); ++ */ ++LIBBPF_API int libbpf_num_possible_cpus(void); ++ ++struct bpf_map_skeleton { ++ const char *name; ++ struct bpf_map **map; ++ void **mmaped; ++}; ++ ++struct bpf_prog_skeleton { ++ const char *name; ++ struct bpf_program **prog; ++ struct bpf_link **link; ++}; ++ ++struct bpf_object_skeleton { ++ size_t sz; /* size of this struct, for forward/backward compatibility */ ++ ++ const char *name; ++ const void *data; ++ size_t data_sz; ++ ++ struct bpf_object **obj; ++ ++ int map_cnt; ++ int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */ ++ struct bpf_map_skeleton *maps; ++ ++ int prog_cnt; ++ int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */ ++ struct bpf_prog_skeleton *progs; ++}; ++ ++LIBBPF_API int ++bpf_object__open_skeleton(struct bpf_object_skeleton *s, ++ const struct bpf_object_open_opts *opts); ++LIBBPF_API int bpf_object__load_skeleton(struct bpf_object_skeleton *s); ++LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); ++LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); ++LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); ++ ++struct bpf_var_skeleton { ++ const char *name; ++ struct bpf_map **map; ++ void **addr; ++}; ++ ++struct bpf_object_subskeleton { ++ size_t sz; /* size of this struct, for forward/backward compatibility */ ++ ++ const struct bpf_object *obj; ++ ++ int map_cnt; ++ int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */ ++ struct bpf_map_skeleton *maps; ++ ++ int prog_cnt; ++ int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */ ++ struct bpf_prog_skeleton *progs; ++ ++ int var_cnt; ++ int var_skel_sz; /* sizeof(struct bpf_var_skeleton) */ ++ struct bpf_var_skeleton *vars; ++}; ++ ++LIBBPF_API int ++bpf_object__open_subskeleton(struct bpf_object_subskeleton *s); ++LIBBPF_API void ++bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s); ++ ++struct gen_loader_opts { ++ size_t sz; /* size of this struct, for forward/backward compatiblity */ ++ const char *data; ++ const char *insns; ++ __u32 data_sz; ++ __u32 insns_sz; ++}; ++ ++#define gen_loader_opts__last_field insns_sz ++LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj, ++ struct gen_loader_opts *opts); ++ ++enum libbpf_tristate { ++ TRI_NO = 0, ++ TRI_YES = 1, ++ TRI_MODULE = 2, ++}; ++ ++struct bpf_linker_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++}; ++#define bpf_linker_opts__last_field sz ++ ++struct bpf_linker_file_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++}; ++#define bpf_linker_file_opts__last_field sz ++ ++struct bpf_linker; ++ ++LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts); ++LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, ++ const char *filename, ++ const struct bpf_linker_file_opts *opts); ++LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); ++LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); ++ ++/* ++ * Custom handling of BPF program's SEC() definitions ++ */ ++ ++struct bpf_prog_load_opts; /* defined in bpf.h */ ++ ++/* Called during bpf_object__open() for each recognized BPF program. Callback ++ * can use various bpf_program__set_*() setters to adjust whatever properties ++ * are necessary. ++ */ ++typedef int (*libbpf_prog_setup_fn_t)(struct bpf_program *prog, long cookie); ++ ++/* Called right before libbpf performs bpf_prog_load() to load BPF program ++ * into the kernel. Callback can adjust opts as necessary. ++ */ ++typedef int (*libbpf_prog_prepare_load_fn_t)(struct bpf_program *prog, ++ struct bpf_prog_load_opts *opts, long cookie); ++ ++/* Called during skeleton attach or through bpf_program__attach(). If ++ * auto-attach is not supported, callback should return 0 and set link to ++ * NULL (it's not considered an error during skeleton attach, but it will be ++ * an error for bpf_program__attach() calls). On error, error should be ++ * returned directly and link set to NULL. On success, return 0 and set link ++ * to a valid struct bpf_link. ++ */ ++typedef int (*libbpf_prog_attach_fn_t)(const struct bpf_program *prog, long cookie, ++ struct bpf_link **link); ++ ++struct libbpf_prog_handler_opts { ++ /* size of this struct, for forward/backward compatiblity */ ++ size_t sz; ++ /* User-provided value that is passed to prog_setup_fn, ++ * prog_prepare_load_fn, and prog_attach_fn callbacks. Allows user to ++ * register one set of callbacks for multiple SEC() definitions and ++ * still be able to distinguish them, if necessary. For example, ++ * libbpf itself is using this to pass necessary flags (e.g., ++ * sleepable flag) to a common internal SEC() handler. ++ */ ++ long cookie; ++ /* BPF program initialization callback (see libbpf_prog_setup_fn_t). ++ * Callback is optional, pass NULL if it's not necessary. ++ */ ++ libbpf_prog_setup_fn_t prog_setup_fn; ++ /* BPF program loading callback (see libbpf_prog_prepare_load_fn_t). ++ * Callback is optional, pass NULL if it's not necessary. ++ */ ++ libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; ++ /* BPF program attach callback (see libbpf_prog_attach_fn_t). ++ * Callback is optional, pass NULL if it's not necessary. ++ */ ++ libbpf_prog_attach_fn_t prog_attach_fn; ++}; ++#define libbpf_prog_handler_opts__last_field prog_attach_fn ++ ++/** ++ * @brief **libbpf_register_prog_handler()** registers a custom BPF program ++ * SEC() handler. ++ * @param sec section prefix for which custom handler is registered ++ * @param prog_type BPF program type associated with specified section ++ * @param exp_attach_type Expected BPF attach type associated with specified section ++ * @param opts optional cookie, callbacks, and other extra options ++ * @return Non-negative handler ID is returned on success. This handler ID has ++ * to be passed to *libbpf_unregister_prog_handler()* to unregister such ++ * custom handler. Negative error code is returned on error. ++ * ++ * *sec* defines which SEC() definitions are handled by this custom handler ++ * registration. *sec* can have few different forms: ++ * - if *sec* is just a plain string (e.g., "abc"), it will match only ++ * SEC("abc"). If BPF program specifies SEC("abc/whatever") it will result ++ * in an error; ++ * - if *sec* is of the form "abc/", proper SEC() form is ++ * SEC("abc/something"), where acceptable "something" should be checked by ++ * *prog_init_fn* callback, if there are additional restrictions; ++ * - if *sec* is of the form "abc+", it will successfully match both ++ * SEC("abc") and SEC("abc/whatever") forms; ++ * - if *sec* is NULL, custom handler is registered for any BPF program that ++ * doesn't match any of the registered (custom or libbpf's own) SEC() ++ * handlers. There could be only one such generic custom handler registered ++ * at any given time. ++ * ++ * All custom handlers (except the one with *sec* == NULL) are processed ++ * before libbpf's own SEC() handlers. It is allowed to "override" libbpf's ++ * SEC() handlers by registering custom ones for the same section prefix ++ * (i.e., it's possible to have custom SEC("perf_event/LLC-load-misses") ++ * handler). ++ * ++ * Note, like much of global libbpf APIs (e.g., libbpf_set_print(), ++ * libbpf_set_strict_mode(), etc)) these APIs are not thread-safe. User needs ++ * to ensure synchronization if there is a risk of running this API from ++ * multiple threads simultaneously. ++ */ ++LIBBPF_API int libbpf_register_prog_handler(const char *sec, ++ enum bpf_prog_type prog_type, ++ enum bpf_attach_type exp_attach_type, ++ const struct libbpf_prog_handler_opts *opts); ++/** ++ * @brief *libbpf_unregister_prog_handler()* unregisters previously registered ++ * custom BPF program SEC() handler. ++ * @param handler_id handler ID returned by *libbpf_register_prog_handler()* ++ * after successful registration ++ * @return 0 on success, negative error code if handler isn't found ++ * ++ * Note, like much of global libbpf APIs (e.g., libbpf_set_print(), ++ * libbpf_set_strict_mode(), etc)) these APIs are not thread-safe. User needs ++ * to ensure synchronization if there is a risk of running this API from ++ * multiple threads simultaneously. ++ */ ++LIBBPF_API int libbpf_unregister_prog_handler(int handler_id); ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* __LIBBPF_LIBBPF_H */ +diff --git a/src/cc/libbpf/src/libbpf.map b/src/cc/libbpf/src/libbpf.map +new file mode 100644 +index 0000000..119e6e1 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf.map +@@ -0,0 +1,368 @@ ++LIBBPF_0.0.1 { ++ global: ++ bpf_btf_get_fd_by_id; ++ bpf_map__btf_key_type_id; ++ bpf_map__btf_value_type_id; ++ bpf_map__fd; ++ bpf_map__name; ++ bpf_map__pin; ++ bpf_map__reuse_fd; ++ bpf_map__set_ifindex; ++ bpf_map__set_inner_map_fd; ++ bpf_map__unpin; ++ bpf_map_delete_elem; ++ bpf_map_get_fd_by_id; ++ bpf_map_get_next_id; ++ bpf_map_get_next_key; ++ bpf_map_lookup_and_delete_elem; ++ bpf_map_lookup_elem; ++ bpf_map_update_elem; ++ bpf_obj_get; ++ bpf_obj_get_info_by_fd; ++ bpf_obj_pin; ++ bpf_object__btf_fd; ++ bpf_object__close; ++ bpf_object__find_map_by_name; ++ bpf_object__kversion; ++ bpf_object__load; ++ bpf_object__name; ++ bpf_object__open; ++ bpf_object__pin; ++ bpf_object__pin_maps; ++ bpf_object__pin_programs; ++ bpf_object__unpin_maps; ++ bpf_object__unpin_programs; ++ bpf_prog_attach; ++ bpf_prog_detach; ++ bpf_prog_detach2; ++ bpf_prog_get_fd_by_id; ++ bpf_prog_get_next_id; ++ bpf_prog_query; ++ bpf_program__fd; ++ bpf_program__pin; ++ bpf_program__set_expected_attach_type; ++ bpf_program__set_ifindex; ++ bpf_program__set_type; ++ bpf_program__unload; ++ bpf_program__unpin; ++ bpf_prog_linfo__free; ++ bpf_prog_linfo__new; ++ bpf_prog_linfo__lfind_addr_func; ++ bpf_prog_linfo__lfind; ++ bpf_raw_tracepoint_open; ++ bpf_task_fd_query; ++ btf__fd; ++ btf__find_by_name; ++ btf__free; ++ btf__name_by_offset; ++ btf__new; ++ btf__resolve_size; ++ btf__resolve_type; ++ btf__type_by_id; ++ libbpf_attach_type_by_name; ++ libbpf_get_error; ++ libbpf_prog_type_by_name; ++ libbpf_set_print; ++ libbpf_strerror; ++ local: ++ *; ++}; ++ ++LIBBPF_0.0.2 { ++ global: ++ bpf_map_lookup_elem_flags; ++ bpf_object__btf; ++ bpf_object__find_map_fd_by_name; ++ btf__get_raw_data; ++ btf_ext__free; ++ btf_ext__get_raw_data; ++ btf_ext__new; ++} LIBBPF_0.0.1; ++ ++LIBBPF_0.0.3 { ++ global: ++ bpf_map__is_internal; ++ bpf_map_freeze; ++} LIBBPF_0.0.2; ++ ++LIBBPF_0.0.4 { ++ global: ++ bpf_link__destroy; ++ bpf_program__attach_kprobe; ++ bpf_program__attach_perf_event; ++ bpf_program__attach_raw_tracepoint; ++ bpf_program__attach_tracepoint; ++ bpf_program__attach_uprobe; ++ btf_dump__dump_type; ++ btf_dump__free; ++ btf__parse_elf; ++ libbpf_num_possible_cpus; ++ perf_buffer__free; ++ perf_buffer__poll; ++} LIBBPF_0.0.3; ++ ++LIBBPF_0.0.5 { ++ global: ++ bpf_btf_get_next_id; ++} LIBBPF_0.0.4; ++ ++LIBBPF_0.0.6 { ++ global: ++ bpf_map__get_pin_path; ++ bpf_map__is_pinned; ++ bpf_map__set_pin_path; ++ bpf_object__open_file; ++ bpf_object__open_mem; ++ bpf_program__attach_trace; ++ bpf_program__get_expected_attach_type; ++ bpf_program__get_type; ++ btf__find_by_name_kind; ++ libbpf_find_vmlinux_btf_id; ++} LIBBPF_0.0.5; ++ ++LIBBPF_0.0.7 { ++ global: ++ btf_dump__emit_type_decl; ++ bpf_link__disconnect; ++ bpf_map__attach_struct_ops; ++ bpf_map_delete_batch; ++ bpf_map_lookup_and_delete_batch; ++ bpf_map_lookup_batch; ++ bpf_map_update_batch; ++ bpf_object__find_program_by_name; ++ bpf_object__attach_skeleton; ++ bpf_object__destroy_skeleton; ++ bpf_object__detach_skeleton; ++ bpf_object__load_skeleton; ++ bpf_object__open_skeleton; ++ bpf_program__attach; ++ bpf_program__name; ++ btf__align_of; ++ libbpf_find_kernel_btf; ++} LIBBPF_0.0.6; ++ ++LIBBPF_0.0.8 { ++ global: ++ bpf_link__fd; ++ bpf_link__open; ++ bpf_link__pin; ++ bpf_link__pin_path; ++ bpf_link__unpin; ++ bpf_link__update_program; ++ bpf_link_create; ++ bpf_link_update; ++ bpf_map__set_initial_value; ++ bpf_prog_attach_opts; ++ bpf_program__attach_cgroup; ++ bpf_program__attach_lsm; ++ bpf_program__set_attach_target; ++} LIBBPF_0.0.7; ++ ++LIBBPF_0.0.9 { ++ global: ++ bpf_enable_stats; ++ bpf_iter_create; ++ bpf_link_get_fd_by_id; ++ bpf_link_get_next_id; ++ bpf_program__attach_iter; ++ bpf_program__attach_netns; ++ perf_buffer__consume; ++ ring_buffer__add; ++ ring_buffer__consume; ++ ring_buffer__free; ++ ring_buffer__new; ++ ring_buffer__poll; ++} LIBBPF_0.0.8; ++ ++LIBBPF_0.1.0 { ++ global: ++ bpf_link__detach; ++ bpf_link_detach; ++ bpf_map__ifindex; ++ bpf_map__key_size; ++ bpf_map__map_flags; ++ bpf_map__max_entries; ++ bpf_map__numa_node; ++ bpf_map__set_key_size; ++ bpf_map__set_map_flags; ++ bpf_map__set_max_entries; ++ bpf_map__set_numa_node; ++ bpf_map__set_type; ++ bpf_map__set_value_size; ++ bpf_map__type; ++ bpf_map__value_size; ++ bpf_program__attach_xdp; ++ bpf_program__autoload; ++ bpf_program__set_autoload; ++ btf__parse; ++ btf__parse_raw; ++ btf__pointer_size; ++ btf__set_fd; ++ btf__set_pointer_size; ++} LIBBPF_0.0.9; ++ ++LIBBPF_0.2.0 { ++ global: ++ bpf_prog_bind_map; ++ bpf_prog_test_run_opts; ++ bpf_program__attach_freplace; ++ bpf_program__section_name; ++ btf__add_array; ++ btf__add_const; ++ btf__add_enum; ++ btf__add_enum_value; ++ btf__add_datasec; ++ btf__add_datasec_var_info; ++ btf__add_field; ++ btf__add_func; ++ btf__add_func_param; ++ btf__add_func_proto; ++ btf__add_fwd; ++ btf__add_int; ++ btf__add_ptr; ++ btf__add_restrict; ++ btf__add_str; ++ btf__add_struct; ++ btf__add_typedef; ++ btf__add_union; ++ btf__add_var; ++ btf__add_volatile; ++ btf__endianness; ++ btf__find_str; ++ btf__new_empty; ++ btf__set_endianness; ++ btf__str_by_offset; ++ perf_buffer__buffer_cnt; ++ perf_buffer__buffer_fd; ++ perf_buffer__epoll_fd; ++ perf_buffer__consume_buffer; ++} LIBBPF_0.1.0; ++ ++LIBBPF_0.3.0 { ++ global: ++ btf__base_btf; ++ btf__parse_elf_split; ++ btf__parse_raw_split; ++ btf__parse_split; ++ btf__new_empty_split; ++ btf__new_split; ++ ring_buffer__epoll_fd; ++} LIBBPF_0.2.0; ++ ++LIBBPF_0.4.0 { ++ global: ++ btf__add_float; ++ btf__add_type; ++ bpf_linker__add_file; ++ bpf_linker__finalize; ++ bpf_linker__free; ++ bpf_linker__new; ++ bpf_map__inner_map; ++ bpf_object__set_kversion; ++ bpf_tc_attach; ++ bpf_tc_detach; ++ bpf_tc_hook_create; ++ bpf_tc_hook_destroy; ++ bpf_tc_query; ++} LIBBPF_0.3.0; ++ ++LIBBPF_0.5.0 { ++ global: ++ bpf_map__initial_value; ++ bpf_map__pin_path; ++ bpf_map_lookup_and_delete_elem_flags; ++ bpf_program__attach_kprobe_opts; ++ bpf_program__attach_perf_event_opts; ++ bpf_program__attach_tracepoint_opts; ++ bpf_program__attach_uprobe_opts; ++ bpf_object__gen_loader; ++ btf__load_from_kernel_by_id; ++ btf__load_from_kernel_by_id_split; ++ btf__load_into_kernel; ++ btf__load_module_btf; ++ btf__load_vmlinux_btf; ++ btf_dump__dump_type_data; ++ libbpf_set_strict_mode; ++} LIBBPF_0.4.0; ++ ++LIBBPF_0.6.0 { ++ global: ++ bpf_map__map_extra; ++ bpf_map__set_map_extra; ++ bpf_map_create; ++ bpf_object__next_map; ++ bpf_object__next_program; ++ bpf_object__prev_map; ++ bpf_object__prev_program; ++ bpf_prog_load; ++ bpf_program__flags; ++ bpf_program__insn_cnt; ++ bpf_program__insns; ++ bpf_program__set_flags; ++ btf__add_btf; ++ btf__add_decl_tag; ++ btf__add_type_tag; ++ btf__dedup; ++ btf__raw_data; ++ btf__type_cnt; ++ btf_dump__new; ++ libbpf_major_version; ++ libbpf_minor_version; ++ libbpf_version_string; ++ perf_buffer__new; ++ perf_buffer__new_raw; ++} LIBBPF_0.5.0; ++ ++LIBBPF_0.7.0 { ++ global: ++ bpf_btf_load; ++ bpf_program__expected_attach_type; ++ bpf_program__log_buf; ++ bpf_program__log_level; ++ bpf_program__set_log_buf; ++ bpf_program__set_log_level; ++ bpf_program__type; ++ bpf_xdp_attach; ++ bpf_xdp_detach; ++ bpf_xdp_query; ++ bpf_xdp_query_id; ++ btf_ext__raw_data; ++ libbpf_probe_bpf_helper; ++ libbpf_probe_bpf_map_type; ++ libbpf_probe_bpf_prog_type; ++ libbpf_set_memlock_rlim; ++} LIBBPF_0.6.0; ++ ++LIBBPF_0.8.0 { ++ global: ++ bpf_map__autocreate; ++ bpf_map__get_next_key; ++ bpf_map__delete_elem; ++ bpf_map__lookup_and_delete_elem; ++ bpf_map__lookup_elem; ++ bpf_map__set_autocreate; ++ bpf_map__update_elem; ++ bpf_map_delete_elem_flags; ++ bpf_object__destroy_subskeleton; ++ bpf_object__open_subskeleton; ++ bpf_program__attach_kprobe_multi_opts; ++ bpf_program__attach_trace_opts; ++ bpf_program__attach_usdt; ++ bpf_program__set_insns; ++ libbpf_register_prog_handler; ++ libbpf_unregister_prog_handler; ++} LIBBPF_0.7.0; ++ ++LIBBPF_1.0.0 { ++ global: ++ bpf_obj_get_opts; ++ bpf_prog_query_opts; ++ bpf_program__attach_ksyscall; ++ btf__add_enum64; ++ btf__add_enum64_value; ++ libbpf_bpf_attach_type_str; ++ libbpf_bpf_link_type_str; ++ libbpf_bpf_map_type_str; ++ libbpf_bpf_prog_type_str; ++ perf_buffer__buffer; ++}; +diff --git a/src/cc/libbpf/src/libbpf.pc.template b/src/cc/libbpf/src/libbpf.pc.template +new file mode 100644 +index 0000000..b45ed53 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf.pc.template +@@ -0,0 +1,12 @@ ++# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++prefix=@PREFIX@ ++libdir=@LIBDIR@ ++includedir=${prefix}/include ++ ++Name: libbpf ++Description: BPF library ++Version: @VERSION@ ++Libs: -L${libdir} -lbpf ++Requires.private: libelf zlib ++Cflags: -I${includedir} +diff --git a/src/cc/libbpf/src/libbpf_common.h b/src/cc/libbpf/src/libbpf_common.h +new file mode 100644 +index 0000000..9a7937f +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf_common.h +@@ -0,0 +1,73 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++/* ++ * Common user-facing libbpf helpers. ++ * ++ * Copyright (c) 2019 Facebook ++ */ ++ ++#ifndef __LIBBPF_LIBBPF_COMMON_H ++#define __LIBBPF_LIBBPF_COMMON_H ++ ++#include ++#include "libbpf_version.h" ++ ++#ifndef LIBBPF_API ++#define LIBBPF_API __attribute__((visibility("default"))) ++#endif ++ ++#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg))) ++ ++/* Mark a symbol as deprecated when libbpf version is >= {major}.{minor} */ ++#define LIBBPF_DEPRECATED_SINCE(major, minor, msg) \ ++ __LIBBPF_MARK_DEPRECATED_ ## major ## _ ## minor \ ++ (LIBBPF_DEPRECATED("libbpf v" # major "." # minor "+: " msg)) ++ ++#define __LIBBPF_CURRENT_VERSION_GEQ(major, minor) \ ++ (LIBBPF_MAJOR_VERSION > (major) || \ ++ (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor))) ++ ++/* Add checks for other versions below when planning deprecation of API symbols ++ * with the LIBBPF_DEPRECATED_SINCE macro. ++ */ ++#if __LIBBPF_CURRENT_VERSION_GEQ(1, 0) ++#define __LIBBPF_MARK_DEPRECATED_1_0(X) X ++#else ++#define __LIBBPF_MARK_DEPRECATED_1_0(X) ++#endif ++ ++/* This set of internal macros allows to do "function overloading" based on ++ * number of arguments provided by used in backwards-compatible way during the ++ * transition to libbpf 1.0 ++ * It's ugly but necessary evil that will be cleaned up when we get to 1.0. ++ * See bpf_prog_load() overload for example. ++ */ ++#define ___libbpf_cat(A, B) A ## B ++#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM) ++#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N ++#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1) ++#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__) ++ ++/* Helper macro to declare and initialize libbpf options struct ++ * ++ * This dance with uninitialized declaration, followed by memset to zero, ++ * followed by assignment using compound literal syntax is done to preserve ++ * ability to use a nice struct field initialization syntax and **hopefully** ++ * have all the padding bytes initialized to zero. It's not guaranteed though, ++ * when copying literal, that compiler won't copy garbage in literal's padding ++ * bytes, but that's the best way I've found and it seems to work in practice. ++ * ++ * Macro declares opts struct of given type and name, zero-initializes, ++ * including any extra padding, it with memset() and then assigns initial ++ * values provided by users in struct initializer-syntax as varargs. ++ */ ++#define LIBBPF_OPTS(TYPE, NAME, ...) \ ++ struct TYPE NAME = ({ \ ++ memset(&NAME, 0, sizeof(struct TYPE)); \ ++ (struct TYPE) { \ ++ .sz = sizeof(struct TYPE), \ ++ __VA_ARGS__ \ ++ }; \ ++ }) ++ ++#endif /* __LIBBPF_LIBBPF_COMMON_H */ +diff --git a/src/cc/libbpf/src/libbpf_errno.c b/src/cc/libbpf/src/libbpf_errno.c +new file mode 100644 +index 0000000..96f67a7 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf_errno.c +@@ -0,0 +1,67 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++/* ++ * Copyright (C) 2013-2015 Alexei Starovoitov ++ * Copyright (C) 2015 Wang Nan ++ * Copyright (C) 2015 Huawei Inc. ++ * Copyright (C) 2017 Nicira, Inc. ++ */ ++ ++#undef _GNU_SOURCE ++#include ++#include ++ ++#include "libbpf.h" ++#include "libbpf_internal.h" ++ ++/* make sure libbpf doesn't use kernel-only integer typedefs */ ++#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 ++ ++#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) ++#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) ++#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) ++ ++static const char *libbpf_strerror_table[NR_ERRNO] = { ++ [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", ++ [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", ++ [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", ++ [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", ++ [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", ++ [ERRCODE_OFFSET(RELOC)] = "Relocation failed", ++ [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", ++ [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", ++ [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", ++ [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", ++ [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", ++ [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", ++ [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", ++}; ++ ++int libbpf_strerror(int err, char *buf, size_t size) ++{ ++ if (!buf || !size) ++ return libbpf_err(-EINVAL); ++ ++ err = err > 0 ? err : -err; ++ ++ if (err < __LIBBPF_ERRNO__START) { ++ int ret; ++ ++ ret = strerror_r(err, buf, size); ++ buf[size - 1] = '\0'; ++ return libbpf_err_errno(ret); ++ } ++ ++ if (err < __LIBBPF_ERRNO__END) { ++ const char *msg; ++ ++ msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; ++ snprintf(buf, size, "%s", msg); ++ buf[size - 1] = '\0'; ++ return 0; ++ } ++ ++ snprintf(buf, size, "Unknown libbpf error %d", err); ++ buf[size - 1] = '\0'; ++ return libbpf_err(-ENOENT); ++} +diff --git a/src/cc/libbpf/src/libbpf_internal.h b/src/cc/libbpf/src/libbpf_internal.h +new file mode 100644 +index 0000000..4135ae0 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf_internal.h +@@ -0,0 +1,576 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++/* ++ * Internal libbpf helpers. ++ * ++ * Copyright (c) 2019 Facebook ++ */ ++ ++#ifndef __LIBBPF_LIBBPF_INTERNAL_H ++#define __LIBBPF_LIBBPF_INTERNAL_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "relo_core.h" ++ ++/* make sure libbpf doesn't use kernel-only integer typedefs */ ++#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 ++ ++/* prevent accidental re-addition of reallocarray() */ ++#pragma GCC poison reallocarray ++ ++#include "libbpf.h" ++#include "btf.h" ++ ++#ifndef EM_BPF ++#define EM_BPF 247 ++#endif ++ ++#ifndef R_BPF_64_64 ++#define R_BPF_64_64 1 ++#endif ++#ifndef R_BPF_64_ABS64 ++#define R_BPF_64_ABS64 2 ++#endif ++#ifndef R_BPF_64_ABS32 ++#define R_BPF_64_ABS32 3 ++#endif ++#ifndef R_BPF_64_32 ++#define R_BPF_64_32 10 ++#endif ++ ++#ifndef SHT_LLVM_ADDRSIG ++#define SHT_LLVM_ADDRSIG 0x6FFF4C03 ++#endif ++ ++/* if libelf is old and doesn't support mmap(), fall back to read() */ ++#ifndef ELF_C_READ_MMAP ++#define ELF_C_READ_MMAP ELF_C_READ ++#endif ++ ++/* Older libelf all end up in this expression, for both 32 and 64 bit */ ++#ifndef ELF64_ST_VISIBILITY ++#define ELF64_ST_VISIBILITY(o) ((o) & 0x03) ++#endif ++ ++#define BTF_INFO_ENC(kind, kind_flag, vlen) \ ++ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) ++#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) ++#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ ++ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) ++#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ ++ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ ++ BTF_INT_ENC(encoding, bits_offset, bits) ++#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset) ++#define BTF_PARAM_ENC(name, type) (name), (type) ++#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) ++#define BTF_TYPE_FLOAT_ENC(name, sz) \ ++ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) ++#define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \ ++ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) ++#define BTF_TYPE_TYPE_TAG_ENC(value, type) \ ++ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) ++ ++#ifndef likely ++#define likely(x) __builtin_expect(!!(x), 1) ++#endif ++#ifndef unlikely ++#define unlikely(x) __builtin_expect(!!(x), 0) ++#endif ++#ifndef min ++# define min(x, y) ((x) < (y) ? (x) : (y)) ++#endif ++#ifndef max ++# define max(x, y) ((x) < (y) ? (y) : (x)) ++#endif ++#ifndef offsetofend ++# define offsetofend(TYPE, FIELD) \ ++ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) ++#endif ++#ifndef __alias ++#define __alias(symbol) __attribute__((alias(#symbol))) ++#endif ++ ++/* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is ++ * a string literal known at compilation time or char * pointer known only at ++ * runtime. ++ */ ++#define str_has_pfx(str, pfx) \ ++ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) ++ ++/* suffix check */ ++static inline bool str_has_sfx(const char *str, const char *sfx) ++{ ++ size_t str_len = strlen(str); ++ size_t sfx_len = strlen(sfx); ++ ++ if (sfx_len > str_len) ++ return false; ++ return strcmp(str + str_len - sfx_len, sfx) == 0; ++} ++ ++/* Symbol versioning is different between static and shared library. ++ * Properly versioned symbols are needed for shared library, but ++ * only the symbol of the new version is needed for static library. ++ * Starting with GNU C 10, use symver attribute instead of .symver assembler ++ * directive, which works better with GCC LTO builds. ++ */ ++#if defined(SHARED) && defined(__GNUC__) && __GNUC__ >= 10 ++ ++#define DEFAULT_VERSION(internal_name, api_name, version) \ ++ __attribute__((symver(#api_name "@@" #version))) ++#define COMPAT_VERSION(internal_name, api_name, version) \ ++ __attribute__((symver(#api_name "@" #version))) ++ ++#elif defined(SHARED) ++ ++#define COMPAT_VERSION(internal_name, api_name, version) \ ++ asm(".symver " #internal_name "," #api_name "@" #version); ++#define DEFAULT_VERSION(internal_name, api_name, version) \ ++ asm(".symver " #internal_name "," #api_name "@@" #version); ++ ++#else /* !SHARED */ ++ ++#define COMPAT_VERSION(internal_name, api_name, version) ++#define DEFAULT_VERSION(internal_name, api_name, version) \ ++ extern typeof(internal_name) api_name \ ++ __attribute__((alias(#internal_name))); ++ ++#endif ++ ++extern void libbpf_print(enum libbpf_print_level level, ++ const char *format, ...) ++ __attribute__((format(printf, 2, 3))); ++ ++#define __pr(level, fmt, ...) \ ++do { \ ++ libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ ++} while (0) ++ ++#define pr_warn(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) ++#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) ++#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) ++ ++#ifndef __has_builtin ++#define __has_builtin(x) 0 ++#endif ++ ++struct bpf_link { ++ int (*detach)(struct bpf_link *link); ++ void (*dealloc)(struct bpf_link *link); ++ char *pin_path; /* NULL, if not pinned */ ++ int fd; /* hook FD, -1 if not applicable */ ++ bool disconnected; ++}; ++ ++/* ++ * Re-implement glibc's reallocarray() for libbpf internal-only use. ++ * reallocarray(), unfortunately, is not available in all versions of glibc, ++ * so requires extra feature detection and using reallocarray() stub from ++ * and COMPAT_NEED_REALLOCARRAY. All this complicates ++ * build of libbpf unnecessarily and is just a maintenance burden. Instead, ++ * it's trivial to implement libbpf-specific internal version and use it ++ * throughout libbpf. ++ */ ++static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) ++{ ++ size_t total; ++ ++#if __has_builtin(__builtin_mul_overflow) ++ if (unlikely(__builtin_mul_overflow(nmemb, size, &total))) ++ return NULL; ++#else ++ if (size == 0 || nmemb > ULONG_MAX / size) ++ return NULL; ++ total = nmemb * size; ++#endif ++ return realloc(ptr, total); ++} ++ ++/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst ++ * is zero-terminated string no matter what (unless sz == 0, in which case ++ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs ++ * in what is returned. Given this is internal helper, it's trivial to extend ++ * this, when necessary. Use this instead of strncpy inside libbpf source code. ++ */ ++static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) ++{ ++ size_t i; ++ ++ if (sz == 0) ++ return; ++ ++ sz--; ++ for (i = 0; i < sz && src[i]; i++) ++ dst[i] = src[i]; ++ dst[i] = '\0'; ++} ++ ++__u32 get_kernel_version(void); ++ ++struct btf; ++struct btf_type; ++ ++struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); ++const char *btf_kind_str(const struct btf_type *t); ++const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); ++ ++static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t) ++{ ++ return (enum btf_func_linkage)(int)btf_vlen(t); ++} ++ ++static inline __u32 btf_type_info(int kind, int vlen, int kflag) ++{ ++ return (kflag << 31) | (kind << 24) | vlen; ++} ++ ++enum map_def_parts { ++ MAP_DEF_MAP_TYPE = 0x001, ++ MAP_DEF_KEY_TYPE = 0x002, ++ MAP_DEF_KEY_SIZE = 0x004, ++ MAP_DEF_VALUE_TYPE = 0x008, ++ MAP_DEF_VALUE_SIZE = 0x010, ++ MAP_DEF_MAX_ENTRIES = 0x020, ++ MAP_DEF_MAP_FLAGS = 0x040, ++ MAP_DEF_NUMA_NODE = 0x080, ++ MAP_DEF_PINNING = 0x100, ++ MAP_DEF_INNER_MAP = 0x200, ++ MAP_DEF_MAP_EXTRA = 0x400, ++ ++ MAP_DEF_ALL = 0x7ff, /* combination of all above */ ++}; ++ ++struct btf_map_def { ++ enum map_def_parts parts; ++ __u32 map_type; ++ __u32 key_type_id; ++ __u32 key_size; ++ __u32 value_type_id; ++ __u32 value_size; ++ __u32 max_entries; ++ __u32 map_flags; ++ __u32 numa_node; ++ __u32 pinning; ++ __u64 map_extra; ++}; ++ ++int parse_btf_map_def(const char *map_name, struct btf *btf, ++ const struct btf_type *def_t, bool strict, ++ struct btf_map_def *map_def, struct btf_map_def *inner_def); ++ ++void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, ++ size_t cur_cnt, size_t max_cnt, size_t add_cnt); ++int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt); ++ ++static inline bool libbpf_is_mem_zeroed(const char *p, ssize_t len) ++{ ++ while (len > 0) { ++ if (*p) ++ return false; ++ p++; ++ len--; ++ } ++ return true; ++} ++ ++static inline bool libbpf_validate_opts(const char *opts, ++ size_t opts_sz, size_t user_sz, ++ const char *type_name) ++{ ++ if (user_sz < sizeof(size_t)) { ++ pr_warn("%s size (%zu) is too small\n", type_name, user_sz); ++ return false; ++ } ++ if (!libbpf_is_mem_zeroed(opts + opts_sz, (ssize_t)user_sz - opts_sz)) { ++ pr_warn("%s has non-zero extra bytes\n", type_name); ++ return false; ++ } ++ return true; ++} ++ ++#define OPTS_VALID(opts, type) \ ++ (!(opts) || libbpf_validate_opts((const char *)opts, \ ++ offsetofend(struct type, \ ++ type##__last_field), \ ++ (opts)->sz, #type)) ++#define OPTS_HAS(opts, field) \ ++ ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field)) ++#define OPTS_GET(opts, field, fallback_value) \ ++ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) ++#define OPTS_SET(opts, field, value) \ ++ do { \ ++ if (OPTS_HAS(opts, field)) \ ++ (opts)->field = value; \ ++ } while (0) ++ ++#define OPTS_ZEROED(opts, last_nonzero_field) \ ++({ \ ++ ssize_t __off = offsetofend(typeof(*(opts)), last_nonzero_field); \ ++ !(opts) || libbpf_is_mem_zeroed((const void *)opts + __off, \ ++ (opts)->sz - __off); \ ++}) ++ ++enum kern_feature_id { ++ /* v4.14: kernel support for program & map names. */ ++ FEAT_PROG_NAME, ++ /* v5.2: kernel support for global data sections. */ ++ FEAT_GLOBAL_DATA, ++ /* BTF support */ ++ FEAT_BTF, ++ /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ ++ FEAT_BTF_FUNC, ++ /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ ++ FEAT_BTF_DATASEC, ++ /* BTF_FUNC_GLOBAL is supported */ ++ FEAT_BTF_GLOBAL_FUNC, ++ /* BPF_F_MMAPABLE is supported for arrays */ ++ FEAT_ARRAY_MMAP, ++ /* kernel support for expected_attach_type in BPF_PROG_LOAD */ ++ FEAT_EXP_ATTACH_TYPE, ++ /* bpf_probe_read_{kernel,user}[_str] helpers */ ++ FEAT_PROBE_READ_KERN, ++ /* BPF_PROG_BIND_MAP is supported */ ++ FEAT_PROG_BIND_MAP, ++ /* Kernel support for module BTFs */ ++ FEAT_MODULE_BTF, ++ /* BTF_KIND_FLOAT support */ ++ FEAT_BTF_FLOAT, ++ /* BPF perf link support */ ++ FEAT_PERF_LINK, ++ /* BTF_KIND_DECL_TAG support */ ++ FEAT_BTF_DECL_TAG, ++ /* BTF_KIND_TYPE_TAG support */ ++ FEAT_BTF_TYPE_TAG, ++ /* memcg-based accounting for BPF maps and progs */ ++ FEAT_MEMCG_ACCOUNT, ++ /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */ ++ FEAT_BPF_COOKIE, ++ /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */ ++ FEAT_BTF_ENUM64, ++ /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */ ++ FEAT_SYSCALL_WRAPPER, ++ __FEAT_CNT, ++}; ++ ++int probe_memcg_account(void); ++bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); ++int bump_rlimit_memlock(void); ++ ++int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); ++int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); ++int libbpf__load_raw_btf(const char *raw_types, size_t types_len, ++ const char *str_sec, size_t str_len); ++int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); ++ ++struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); ++void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, ++ const char **prefix, int *kind); ++ ++struct btf_ext_info { ++ /* ++ * info points to the individual info section (e.g. func_info and ++ * line_info) from the .BTF.ext. It does not include the __u32 rec_size. ++ */ ++ void *info; ++ __u32 rec_size; ++ __u32 len; ++ /* optional (maintained internally by libbpf) mapping between .BTF.ext ++ * section and corresponding ELF section. This is used to join ++ * information like CO-RE relocation records with corresponding BPF ++ * programs defined in ELF sections ++ */ ++ __u32 *sec_idxs; ++ int sec_cnt; ++}; ++ ++#define for_each_btf_ext_sec(seg, sec) \ ++ for (sec = (seg)->info; \ ++ (void *)sec < (seg)->info + (seg)->len; \ ++ sec = (void *)sec + sizeof(struct btf_ext_info_sec) + \ ++ (seg)->rec_size * sec->num_info) ++ ++#define for_each_btf_ext_rec(seg, sec, i, rec) \ ++ for (i = 0, rec = (void *)&(sec)->data; \ ++ i < (sec)->num_info; \ ++ i++, rec = (void *)rec + (seg)->rec_size) ++ ++/* ++ * The .BTF.ext ELF section layout defined as ++ * struct btf_ext_header ++ * func_info subsection ++ * ++ * The func_info subsection layout: ++ * record size for struct bpf_func_info in the func_info subsection ++ * struct btf_sec_func_info for section #1 ++ * a list of bpf_func_info records for section #1 ++ * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h ++ * but may not be identical ++ * struct btf_sec_func_info for section #2 ++ * a list of bpf_func_info records for section #2 ++ * ...... ++ * ++ * Note that the bpf_func_info record size in .BTF.ext may not ++ * be the same as the one defined in include/uapi/linux/bpf.h. ++ * The loader should ensure that record_size meets minimum ++ * requirement and pass the record as is to the kernel. The ++ * kernel will handle the func_info properly based on its contents. ++ */ ++struct btf_ext_header { ++ __u16 magic; ++ __u8 version; ++ __u8 flags; ++ __u32 hdr_len; ++ ++ /* All offsets are in bytes relative to the end of this header */ ++ __u32 func_info_off; ++ __u32 func_info_len; ++ __u32 line_info_off; ++ __u32 line_info_len; ++ ++ /* optional part of .BTF.ext header */ ++ __u32 core_relo_off; ++ __u32 core_relo_len; ++}; ++ ++struct btf_ext { ++ union { ++ struct btf_ext_header *hdr; ++ void *data; ++ }; ++ struct btf_ext_info func_info; ++ struct btf_ext_info line_info; ++ struct btf_ext_info core_relo_info; ++ __u32 data_size; ++}; ++ ++struct btf_ext_info_sec { ++ __u32 sec_name_off; ++ __u32 num_info; ++ /* Followed by num_info * record_size number of bytes */ ++ __u8 data[]; ++}; ++ ++/* The minimum bpf_func_info checked by the loader */ ++struct bpf_func_info_min { ++ __u32 insn_off; ++ __u32 type_id; ++}; ++ ++/* The minimum bpf_line_info checked by the loader */ ++struct bpf_line_info_min { ++ __u32 insn_off; ++ __u32 file_name_off; ++ __u32 line_off; ++ __u32 line_col; ++}; ++ ++ ++typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); ++typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); ++int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx); ++int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); ++int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); ++int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); ++__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, ++ __u32 kind); ++ ++typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, ++ const char *sym_name, void *ctx); ++ ++int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *arg); ++ ++/* handle direct returned errors */ ++static inline int libbpf_err(int ret) ++{ ++ if (ret < 0) ++ errno = -ret; ++ return ret; ++} ++ ++/* handle errno-based (e.g., syscall or libc) errors according to libbpf's ++ * strict mode settings ++ */ ++static inline int libbpf_err_errno(int ret) ++{ ++ /* errno is already assumed to be set on error */ ++ return ret < 0 ? -errno : ret; ++} ++ ++/* handle error for pointer-returning APIs, err is assumed to be < 0 always */ ++static inline void *libbpf_err_ptr(int err) ++{ ++ /* set errno on error, this doesn't break anything */ ++ errno = -err; ++ return NULL; ++} ++ ++/* handle pointer-returning APIs' error handling */ ++static inline void *libbpf_ptr(void *ret) ++{ ++ /* set errno on error, this doesn't break anything */ ++ if (IS_ERR(ret)) ++ errno = -PTR_ERR(ret); ++ ++ return IS_ERR(ret) ? NULL : ret; ++} ++ ++static inline bool str_is_empty(const char *s) ++{ ++ return !s || !s[0]; ++} ++ ++static inline bool is_ldimm64_insn(struct bpf_insn *insn) ++{ ++ return insn->code == (BPF_LD | BPF_IMM | BPF_DW); ++} ++ ++/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 ++ * Takes ownership of the fd passed in, and closes it if calling ++ * fcntl(fd, F_DUPFD_CLOEXEC, 3). ++ */ ++static inline int ensure_good_fd(int fd) ++{ ++ int old_fd = fd, saved_errno; ++ ++ if (fd < 0) ++ return fd; ++ if (fd < 3) { ++ fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); ++ saved_errno = errno; ++ close(old_fd); ++ if (fd < 0) { ++ pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno); ++ errno = saved_errno; ++ } ++ } ++ return fd; ++} ++ ++/* The following two functions are exposed to bpftool */ ++int bpf_core_add_cands(struct bpf_core_cand *local_cand, ++ size_t local_essent_len, ++ const struct btf *targ_btf, ++ const char *targ_btf_name, ++ int targ_start_id, ++ struct bpf_core_cand_list *cands); ++void bpf_core_free_cands(struct bpf_core_cand_list *cands); ++ ++struct usdt_manager *usdt_manager_new(struct bpf_object *obj); ++void usdt_manager_free(struct usdt_manager *man); ++struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, ++ const struct bpf_program *prog, ++ pid_t pid, const char *path, ++ const char *usdt_provider, const char *usdt_name, ++ __u64 usdt_cookie); ++ ++static inline bool is_pow_of_2(size_t x) ++{ ++ return x && (x & (x - 1)) == 0; ++} ++ ++#endif /* __LIBBPF_LIBBPF_INTERNAL_H */ +diff --git a/src/cc/libbpf/src/libbpf_legacy.h b/src/cc/libbpf/src/libbpf_legacy.h +new file mode 100644 +index 0000000..5b7e015 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf_legacy.h +@@ -0,0 +1,138 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++/* ++ * Libbpf legacy APIs (either discouraged or deprecated, as mentioned in [0]) ++ * ++ * [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY ++ * ++ * Copyright (C) 2021 Facebook ++ */ ++#ifndef __LIBBPF_LEGACY_BPF_H ++#define __LIBBPF_LEGACY_BPF_H ++ ++#include ++#include ++#include ++#include ++#include "libbpf_common.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* As of libbpf 1.0 libbpf_set_strict_mode() and enum libbpf_struct_mode have ++ * no effect. But they are left in libbpf_legacy.h so that applications that ++ * prepared for libbpf 1.0 before final release by using ++ * libbpf_set_strict_mode() still work with libbpf 1.0+ without any changes. ++ */ ++enum libbpf_strict_mode { ++ /* Turn on all supported strict features of libbpf to simulate libbpf ++ * v1.0 behavior. ++ * This will be the default behavior in libbpf v1.0. ++ */ ++ LIBBPF_STRICT_ALL = 0xffffffff, ++ ++ /* ++ * Disable any libbpf 1.0 behaviors. This is the default before libbpf ++ * v1.0. It won't be supported anymore in v1.0, please update your ++ * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0. ++ */ ++ LIBBPF_STRICT_NONE = 0x00, ++ /* ++ * Return NULL pointers on error, not ERR_PTR(err). ++ * Additionally, libbpf also always sets errno to corresponding Exx ++ * (positive) error code. ++ */ ++ LIBBPF_STRICT_CLEAN_PTRS = 0x01, ++ /* ++ * Return actual error codes from low-level APIs directly, not just -1. ++ * Additionally, libbpf also always sets errno to corresponding Exx ++ * (positive) error code. ++ */ ++ LIBBPF_STRICT_DIRECT_ERRS = 0x02, ++ /* ++ * Enforce strict BPF program section (SEC()) names. ++ * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were ++ * allowed, with LIBBPF_STRICT_SEC_PREFIX this will become ++ * unrecognized by libbpf and would have to be just SEC("xdp") and ++ * SEC("xdp") and SEC("perf_event"). ++ * ++ * Note, in this mode the program pin path will be based on the ++ * function name instead of section name. ++ * ++ * Additionally, routines in the .text section are always considered ++ * sub-programs. Legacy behavior allows for a single routine in .text ++ * to be a program. ++ */ ++ LIBBPF_STRICT_SEC_NAME = 0x04, ++ /* ++ * Disable the global 'bpf_objects_list'. Maintaining this list adds ++ * a race condition to bpf_object__open() and bpf_object__close(). ++ * Clients can maintain it on their own if it is valuable for them. ++ */ ++ LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, ++ /* ++ * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the ++ * first BPF program or map creation operation. This is done only if ++ * kernel is too old to support memcg-based memory accounting for BPF ++ * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, ++ * but it can be overriden with libbpf_set_memlock_rlim() API. ++ * Note that libbpf_set_memlock_rlim() needs to be called before ++ * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() ++ * operation. ++ */ ++ LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, ++ /* ++ * Error out on any SEC("maps") map definition, which are deprecated ++ * in favor of BTF-defined map definitions in SEC(".maps"). ++ */ ++ LIBBPF_STRICT_MAP_DEFINITIONS = 0x20, ++ ++ __LIBBPF_STRICT_LAST, ++}; ++ ++LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); ++ ++/** ++ * @brief **libbpf_get_error()** extracts the error code from the passed ++ * pointer ++ * @param ptr pointer returned from libbpf API function ++ * @return error code; or 0 if no error occured ++ * ++ * Note, as of libbpf 1.0 this function is not necessary and not recommended ++ * to be used. Libbpf doesn't return error code embedded into the pointer ++ * itself. Instead, NULL is returned on error and error code is passed through ++ * thread-local errno variable. **libbpf_get_error()** is just returning -errno ++ * value if it receives NULL, which is correct only if errno hasn't been ++ * modified between libbpf API call and corresponding **libbpf_get_error()** ++ * call. Prefer to check return for NULL and use errno directly. ++ * ++ * This API is left in libbpf 1.0 to allow applications that were 1.0-ready ++ * before final libbpf 1.0 without needing to change them. ++ */ ++LIBBPF_API long libbpf_get_error(const void *ptr); ++ ++#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS ++ ++/* "Discouraged" APIs which don't follow consistent libbpf naming patterns. ++ * They are normally a trivial aliases or wrappers for proper APIs and are ++ * left to minimize unnecessary disruption for users of libbpf. But they ++ * shouldn't be used going forward. ++ */ ++ ++struct bpf_program; ++struct bpf_map; ++struct btf; ++struct btf_ext; ++ ++LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); ++LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); ++LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); ++LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); ++LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* __LIBBPF_LEGACY_BPF_H */ +diff --git a/src/cc/libbpf/src/libbpf_probes.c b/src/cc/libbpf/src/libbpf_probes.c +new file mode 100644 +index 0000000..0b53987 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf_probes.c +@@ -0,0 +1,362 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2019 Netronome Systems, Inc. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "bpf.h" ++#include "libbpf.h" ++#include "libbpf_internal.h" ++ ++static int probe_prog_load(enum bpf_prog_type prog_type, ++ const struct bpf_insn *insns, size_t insns_cnt, ++ char *log_buf, size_t log_buf_sz) ++{ ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .log_buf = log_buf, ++ .log_size = log_buf_sz, ++ .log_level = log_buf ? 1 : 0, ++ ); ++ int fd, err, exp_err = 0; ++ const char *exp_msg = NULL; ++ char buf[4096]; ++ ++ switch (prog_type) { ++ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: ++ opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT; ++ break; ++ case BPF_PROG_TYPE_CGROUP_SOCKOPT: ++ opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT; ++ break; ++ case BPF_PROG_TYPE_SK_LOOKUP: ++ opts.expected_attach_type = BPF_SK_LOOKUP; ++ break; ++ case BPF_PROG_TYPE_KPROBE: ++ opts.kern_version = get_kernel_version(); ++ break; ++ case BPF_PROG_TYPE_LIRC_MODE2: ++ opts.expected_attach_type = BPF_LIRC_MODE2; ++ break; ++ case BPF_PROG_TYPE_TRACING: ++ case BPF_PROG_TYPE_LSM: ++ opts.log_buf = buf; ++ opts.log_size = sizeof(buf); ++ opts.log_level = 1; ++ if (prog_type == BPF_PROG_TYPE_TRACING) ++ opts.expected_attach_type = BPF_TRACE_FENTRY; ++ else ++ opts.expected_attach_type = BPF_MODIFY_RETURN; ++ opts.attach_btf_id = 1; ++ ++ exp_err = -EINVAL; ++ exp_msg = "attach_btf_id 1 is not a function"; ++ break; ++ case BPF_PROG_TYPE_EXT: ++ opts.log_buf = buf; ++ opts.log_size = sizeof(buf); ++ opts.log_level = 1; ++ opts.attach_btf_id = 1; ++ ++ exp_err = -EINVAL; ++ exp_msg = "Cannot replace kernel functions"; ++ break; ++ case BPF_PROG_TYPE_SYSCALL: ++ opts.prog_flags = BPF_F_SLEEPABLE; ++ break; ++ case BPF_PROG_TYPE_STRUCT_OPS: ++ exp_err = -524; /* -ENOTSUPP */ ++ break; ++ case BPF_PROG_TYPE_UNSPEC: ++ case BPF_PROG_TYPE_SOCKET_FILTER: ++ case BPF_PROG_TYPE_SCHED_CLS: ++ case BPF_PROG_TYPE_SCHED_ACT: ++ case BPF_PROG_TYPE_TRACEPOINT: ++ case BPF_PROG_TYPE_XDP: ++ case BPF_PROG_TYPE_PERF_EVENT: ++ case BPF_PROG_TYPE_CGROUP_SKB: ++ case BPF_PROG_TYPE_CGROUP_SOCK: ++ case BPF_PROG_TYPE_LWT_IN: ++ case BPF_PROG_TYPE_LWT_OUT: ++ case BPF_PROG_TYPE_LWT_XMIT: ++ case BPF_PROG_TYPE_SOCK_OPS: ++ case BPF_PROG_TYPE_SK_SKB: ++ case BPF_PROG_TYPE_CGROUP_DEVICE: ++ case BPF_PROG_TYPE_SK_MSG: ++ case BPF_PROG_TYPE_RAW_TRACEPOINT: ++ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: ++ case BPF_PROG_TYPE_LWT_SEG6LOCAL: ++ case BPF_PROG_TYPE_SK_REUSEPORT: ++ case BPF_PROG_TYPE_FLOW_DISSECTOR: ++ case BPF_PROG_TYPE_CGROUP_SYSCTL: ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts); ++ err = -errno; ++ if (fd >= 0) ++ close(fd); ++ if (exp_err) { ++ if (fd >= 0 || err != exp_err) ++ return 0; ++ if (exp_msg && !strstr(buf, exp_msg)) ++ return 0; ++ return 1; ++ } ++ return fd >= 0 ? 1 : 0; ++} ++ ++int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) ++{ ++ struct bpf_insn insns[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN() ++ }; ++ const size_t insn_cnt = ARRAY_SIZE(insns); ++ int ret; ++ ++ if (opts) ++ return libbpf_err(-EINVAL); ++ ++ ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0); ++ return libbpf_err(ret); ++} ++ ++int libbpf__load_raw_btf(const char *raw_types, size_t types_len, ++ const char *str_sec, size_t str_len) ++{ ++ struct btf_header hdr = { ++ .magic = BTF_MAGIC, ++ .version = BTF_VERSION, ++ .hdr_len = sizeof(struct btf_header), ++ .type_len = types_len, ++ .str_off = types_len, ++ .str_len = str_len, ++ }; ++ int btf_fd, btf_len; ++ __u8 *raw_btf; ++ ++ btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len; ++ raw_btf = malloc(btf_len); ++ if (!raw_btf) ++ return -ENOMEM; ++ ++ memcpy(raw_btf, &hdr, sizeof(hdr)); ++ memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); ++ memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); ++ ++ btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); ++ ++ free(raw_btf); ++ return btf_fd; ++} ++ ++static int load_local_storage_btf(void) ++{ ++ const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l"; ++ /* struct bpf_spin_lock { ++ * int val; ++ * }; ++ * struct val { ++ * int cnt; ++ * struct bpf_spin_lock l; ++ * }; ++ */ ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ /* struct bpf_spin_lock */ /* [2] */ ++ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), ++ BTF_MEMBER_ENC(15, 1, 0), /* int val; */ ++ /* struct val */ /* [3] */ ++ BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), ++ BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ ++ BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ ++ }; ++ ++ return libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs)); ++} ++ ++static int probe_map_create(enum bpf_map_type map_type) ++{ ++ LIBBPF_OPTS(bpf_map_create_opts, opts); ++ int key_size, value_size, max_entries; ++ __u32 btf_key_type_id = 0, btf_value_type_id = 0; ++ int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; ++ ++ key_size = sizeof(__u32); ++ value_size = sizeof(__u32); ++ max_entries = 1; ++ ++ switch (map_type) { ++ case BPF_MAP_TYPE_STACK_TRACE: ++ value_size = sizeof(__u64); ++ break; ++ case BPF_MAP_TYPE_LPM_TRIE: ++ key_size = sizeof(__u64); ++ value_size = sizeof(__u64); ++ opts.map_flags = BPF_F_NO_PREALLOC; ++ break; ++ case BPF_MAP_TYPE_CGROUP_STORAGE: ++ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: ++ key_size = sizeof(struct bpf_cgroup_storage_key); ++ value_size = sizeof(__u64); ++ max_entries = 0; ++ break; ++ case BPF_MAP_TYPE_QUEUE: ++ case BPF_MAP_TYPE_STACK: ++ key_size = 0; ++ break; ++ case BPF_MAP_TYPE_SK_STORAGE: ++ case BPF_MAP_TYPE_INODE_STORAGE: ++ case BPF_MAP_TYPE_TASK_STORAGE: ++ btf_key_type_id = 1; ++ btf_value_type_id = 3; ++ value_size = 8; ++ max_entries = 0; ++ opts.map_flags = BPF_F_NO_PREALLOC; ++ btf_fd = load_local_storage_btf(); ++ if (btf_fd < 0) ++ return btf_fd; ++ break; ++ case BPF_MAP_TYPE_RINGBUF: ++ key_size = 0; ++ value_size = 0; ++ max_entries = 4096; ++ break; ++ case BPF_MAP_TYPE_STRUCT_OPS: ++ /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ ++ opts.btf_vmlinux_value_type_id = 1; ++ exp_err = -524; /* -ENOTSUPP */ ++ break; ++ case BPF_MAP_TYPE_BLOOM_FILTER: ++ key_size = 0; ++ max_entries = 1; ++ break; ++ case BPF_MAP_TYPE_HASH: ++ case BPF_MAP_TYPE_ARRAY: ++ case BPF_MAP_TYPE_PROG_ARRAY: ++ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: ++ case BPF_MAP_TYPE_PERCPU_HASH: ++ case BPF_MAP_TYPE_PERCPU_ARRAY: ++ case BPF_MAP_TYPE_CGROUP_ARRAY: ++ case BPF_MAP_TYPE_LRU_HASH: ++ case BPF_MAP_TYPE_LRU_PERCPU_HASH: ++ case BPF_MAP_TYPE_ARRAY_OF_MAPS: ++ case BPF_MAP_TYPE_HASH_OF_MAPS: ++ case BPF_MAP_TYPE_DEVMAP: ++ case BPF_MAP_TYPE_DEVMAP_HASH: ++ case BPF_MAP_TYPE_SOCKMAP: ++ case BPF_MAP_TYPE_CPUMAP: ++ case BPF_MAP_TYPE_XSKMAP: ++ case BPF_MAP_TYPE_SOCKHASH: ++ case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: ++ break; ++ case BPF_MAP_TYPE_UNSPEC: ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || ++ map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { ++ fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, ++ sizeof(__u32), sizeof(__u32), 1, NULL); ++ if (fd_inner < 0) ++ goto cleanup; ++ ++ opts.inner_map_fd = fd_inner; ++ } ++ ++ if (btf_fd >= 0) { ++ opts.btf_fd = btf_fd; ++ opts.btf_key_type_id = btf_key_type_id; ++ opts.btf_value_type_id = btf_value_type_id; ++ } ++ ++ fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); ++ err = -errno; ++ ++cleanup: ++ if (fd >= 0) ++ close(fd); ++ if (fd_inner >= 0) ++ close(fd_inner); ++ if (btf_fd >= 0) ++ close(btf_fd); ++ ++ if (exp_err) ++ return fd < 0 && err == exp_err ? 1 : 0; ++ else ++ return fd >= 0 ? 1 : 0; ++} ++ ++int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts) ++{ ++ int ret; ++ ++ if (opts) ++ return libbpf_err(-EINVAL); ++ ++ ret = probe_map_create(map_type); ++ return libbpf_err(ret); ++} ++ ++int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, ++ const void *opts) ++{ ++ struct bpf_insn insns[] = { ++ BPF_EMIT_CALL((__u32)helper_id), ++ BPF_EXIT_INSN(), ++ }; ++ const size_t insn_cnt = ARRAY_SIZE(insns); ++ char buf[4096]; ++ int ret; ++ ++ if (opts) ++ return libbpf_err(-EINVAL); ++ ++ /* we can't successfully load all prog types to check for BPF helper ++ * support, so bail out with -EOPNOTSUPP error ++ */ ++ switch (prog_type) { ++ case BPF_PROG_TYPE_TRACING: ++ case BPF_PROG_TYPE_EXT: ++ case BPF_PROG_TYPE_LSM: ++ case BPF_PROG_TYPE_STRUCT_OPS: ++ return -EOPNOTSUPP; ++ default: ++ break; ++ } ++ ++ buf[0] = '\0'; ++ ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf)); ++ if (ret < 0) ++ return libbpf_err(ret); ++ ++ /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id) ++ * at all, it will emit something like "invalid func unknown#181". ++ * If BPF verifier recognizes BPF helper but it's not supported for ++ * given BPF program type, it will emit "unknown func bpf_sys_bpf#166". ++ * In both cases, provided combination of BPF program type and BPF ++ * helper is not supported by the kernel. ++ * In all other cases, probe_prog_load() above will either succeed (e.g., ++ * because BPF helper happens to accept no input arguments or it ++ * accepts one input argument and initial PTR_TO_CTX is fine for ++ * that), or we'll get some more specific BPF verifier error about ++ * some unsatisfied conditions. ++ */ ++ if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func "))) ++ return 0; ++ return 1; /* assume supported */ ++} +diff --git a/src/cc/libbpf/src/libbpf_version.h b/src/cc/libbpf/src/libbpf_version.h +new file mode 100644 +index 0000000..2fb2f42 +--- /dev/null ++++ b/src/cc/libbpf/src/libbpf_version.h +@@ -0,0 +1,9 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++/* Copyright (C) 2021 Facebook */ ++#ifndef __LIBBPF_VERSION_H ++#define __LIBBPF_VERSION_H ++ ++#define LIBBPF_MAJOR_VERSION 1 ++#define LIBBPF_MINOR_VERSION 0 ++ ++#endif /* __LIBBPF_VERSION_H */ +diff --git a/src/cc/libbpf/src/linker.c b/src/cc/libbpf/src/linker.c +new file mode 100644 +index 0000000..4ac02c2 +--- /dev/null ++++ b/src/cc/libbpf/src/linker.c +@@ -0,0 +1,2900 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* ++ * BPF static linker ++ * ++ * Copyright (c) 2021 Facebook ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "libbpf.h" ++#include "btf.h" ++#include "libbpf_internal.h" ++#include "strset.h" ++ ++#define BTF_EXTERN_SEC ".extern" ++ ++struct src_sec { ++ const char *sec_name; ++ /* positional (not necessarily ELF) index in an array of sections */ ++ int id; ++ /* positional (not necessarily ELF) index of a matching section in a final object file */ ++ int dst_id; ++ /* section data offset in a matching output section */ ++ int dst_off; ++ /* whether section is omitted from the final ELF file */ ++ bool skipped; ++ /* whether section is an ephemeral section, not mapped to an ELF section */ ++ bool ephemeral; ++ ++ /* ELF info */ ++ size_t sec_idx; ++ Elf_Scn *scn; ++ Elf64_Shdr *shdr; ++ Elf_Data *data; ++ ++ /* corresponding BTF DATASEC type ID */ ++ int sec_type_id; ++}; ++ ++struct src_obj { ++ const char *filename; ++ int fd; ++ Elf *elf; ++ /* Section header strings section index */ ++ size_t shstrs_sec_idx; ++ /* SYMTAB section index */ ++ size_t symtab_sec_idx; ++ ++ struct btf *btf; ++ struct btf_ext *btf_ext; ++ ++ /* List of sections (including ephemeral). Slot zero is unused. */ ++ struct src_sec *secs; ++ int sec_cnt; ++ ++ /* mapping of symbol indices from src to dst ELF */ ++ int *sym_map; ++ /* mapping from the src BTF type IDs to dst ones */ ++ int *btf_type_map; ++}; ++ ++/* single .BTF.ext data section */ ++struct btf_ext_sec_data { ++ size_t rec_cnt; ++ __u32 rec_sz; ++ void *recs; ++}; ++ ++struct glob_sym { ++ /* ELF symbol index */ ++ int sym_idx; ++ /* associated section id for .ksyms, .kconfig, etc, but not .extern */ ++ int sec_id; ++ /* extern name offset in STRTAB */ ++ int name_off; ++ /* optional associated BTF type ID */ ++ int btf_id; ++ /* BTF type ID to which VAR/FUNC type is pointing to; used for ++ * rewriting types when extern VAR/FUNC is resolved to a concrete ++ * definition ++ */ ++ int underlying_btf_id; ++ /* sec_var index in the corresponding dst_sec, if exists */ ++ int var_idx; ++ ++ /* extern or resolved/global symbol */ ++ bool is_extern; ++ /* weak or strong symbol, never goes back from strong to weak */ ++ bool is_weak; ++}; ++ ++struct dst_sec { ++ char *sec_name; ++ /* positional (not necessarily ELF) index in an array of sections */ ++ int id; ++ ++ bool ephemeral; ++ ++ /* ELF info */ ++ size_t sec_idx; ++ Elf_Scn *scn; ++ Elf64_Shdr *shdr; ++ Elf_Data *data; ++ ++ /* final output section size */ ++ int sec_sz; ++ /* final output contents of the section */ ++ void *raw_data; ++ ++ /* corresponding STT_SECTION symbol index in SYMTAB */ ++ int sec_sym_idx; ++ ++ /* section's DATASEC variable info, emitted on BTF finalization */ ++ bool has_btf; ++ int sec_var_cnt; ++ struct btf_var_secinfo *sec_vars; ++ ++ /* section's .BTF.ext data */ ++ struct btf_ext_sec_data func_info; ++ struct btf_ext_sec_data line_info; ++ struct btf_ext_sec_data core_relo_info; ++}; ++ ++struct bpf_linker { ++ char *filename; ++ int fd; ++ Elf *elf; ++ Elf64_Ehdr *elf_hdr; ++ ++ /* Output sections metadata */ ++ struct dst_sec *secs; ++ int sec_cnt; ++ ++ struct strset *strtab_strs; /* STRTAB unique strings */ ++ size_t strtab_sec_idx; /* STRTAB section index */ ++ size_t symtab_sec_idx; /* SYMTAB section index */ ++ ++ struct btf *btf; ++ struct btf_ext *btf_ext; ++ ++ /* global (including extern) ELF symbols */ ++ int glob_sym_cnt; ++ struct glob_sym *glob_syms; ++}; ++ ++#define pr_warn_elf(fmt, ...) \ ++ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)) ++ ++static int init_output_elf(struct bpf_linker *linker, const char *file); ++ ++static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, ++ const struct bpf_linker_file_opts *opts, ++ struct src_obj *obj); ++static int linker_sanity_check_elf(struct src_obj *obj); ++static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec); ++static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec); ++static int linker_sanity_check_btf(struct src_obj *obj); ++static int linker_sanity_check_btf_ext(struct src_obj *obj); ++static int linker_fixup_btf(struct src_obj *obj); ++static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj); ++static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj); ++static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, ++ Elf64_Sym *sym, const char *sym_name, int src_sym_idx); ++static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj); ++static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj); ++static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj); ++ ++static int finalize_btf(struct bpf_linker *linker); ++static int finalize_btf_ext(struct bpf_linker *linker); ++ ++void bpf_linker__free(struct bpf_linker *linker) ++{ ++ int i; ++ ++ if (!linker) ++ return; ++ ++ free(linker->filename); ++ ++ if (linker->elf) ++ elf_end(linker->elf); ++ ++ if (linker->fd >= 0) ++ close(linker->fd); ++ ++ strset__free(linker->strtab_strs); ++ ++ btf__free(linker->btf); ++ btf_ext__free(linker->btf_ext); ++ ++ for (i = 1; i < linker->sec_cnt; i++) { ++ struct dst_sec *sec = &linker->secs[i]; ++ ++ free(sec->sec_name); ++ free(sec->raw_data); ++ free(sec->sec_vars); ++ ++ free(sec->func_info.recs); ++ free(sec->line_info.recs); ++ free(sec->core_relo_info.recs); ++ } ++ free(linker->secs); ++ ++ free(linker->glob_syms); ++ free(linker); ++} ++ ++struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts) ++{ ++ struct bpf_linker *linker; ++ int err; ++ ++ if (!OPTS_VALID(opts, bpf_linker_opts)) ++ return errno = EINVAL, NULL; ++ ++ if (elf_version(EV_CURRENT) == EV_NONE) { ++ pr_warn_elf("libelf initialization failed"); ++ return errno = EINVAL, NULL; ++ } ++ ++ linker = calloc(1, sizeof(*linker)); ++ if (!linker) ++ return errno = ENOMEM, NULL; ++ ++ linker->fd = -1; ++ ++ err = init_output_elf(linker, filename); ++ if (err) ++ goto err_out; ++ ++ return linker; ++ ++err_out: ++ bpf_linker__free(linker); ++ return errno = -err, NULL; ++} ++ ++static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name) ++{ ++ struct dst_sec *secs = linker->secs, *sec; ++ size_t new_cnt = linker->sec_cnt ? linker->sec_cnt + 1 : 2; ++ ++ secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs)); ++ if (!secs) ++ return NULL; ++ ++ /* zero out newly allocated memory */ ++ memset(secs + linker->sec_cnt, 0, (new_cnt - linker->sec_cnt) * sizeof(*secs)); ++ ++ linker->secs = secs; ++ linker->sec_cnt = new_cnt; ++ ++ sec = &linker->secs[new_cnt - 1]; ++ sec->id = new_cnt - 1; ++ sec->sec_name = strdup(sec_name); ++ if (!sec->sec_name) ++ return NULL; ++ ++ return sec; ++} ++ ++static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx) ++{ ++ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx]; ++ Elf64_Sym *syms, *sym; ++ size_t sym_cnt = symtab->sec_sz / sizeof(*sym); ++ ++ syms = libbpf_reallocarray(symtab->raw_data, sym_cnt + 1, sizeof(*sym)); ++ if (!syms) ++ return NULL; ++ ++ sym = &syms[sym_cnt]; ++ memset(sym, 0, sizeof(*sym)); ++ ++ symtab->raw_data = syms; ++ symtab->sec_sz += sizeof(*sym); ++ symtab->shdr->sh_size += sizeof(*sym); ++ symtab->data->d_size += sizeof(*sym); ++ ++ if (sym_idx) ++ *sym_idx = sym_cnt; ++ ++ return sym; ++} ++ ++static int init_output_elf(struct bpf_linker *linker, const char *file) ++{ ++ int err, str_off; ++ Elf64_Sym *init_sym; ++ struct dst_sec *sec; ++ ++ linker->filename = strdup(file); ++ if (!linker->filename) ++ return -ENOMEM; ++ ++ linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); ++ if (linker->fd < 0) { ++ err = -errno; ++ pr_warn("failed to create '%s': %d\n", file, err); ++ return err; ++ } ++ ++ linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL); ++ if (!linker->elf) { ++ pr_warn_elf("failed to create ELF object"); ++ return -EINVAL; ++ } ++ ++ /* ELF header */ ++ linker->elf_hdr = elf64_newehdr(linker->elf); ++ if (!linker->elf_hdr) { ++ pr_warn_elf("failed to create ELF header"); ++ return -EINVAL; ++ } ++ ++ linker->elf_hdr->e_machine = EM_BPF; ++ linker->elf_hdr->e_type = ET_REL; ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; ++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; ++#else ++#error "Unknown __BYTE_ORDER__" ++#endif ++ ++ /* STRTAB */ ++ /* initialize strset with an empty string to conform to ELF */ ++ linker->strtab_strs = strset__new(INT_MAX, "", sizeof("")); ++ if (libbpf_get_error(linker->strtab_strs)) ++ return libbpf_get_error(linker->strtab_strs); ++ ++ sec = add_dst_sec(linker, ".strtab"); ++ if (!sec) ++ return -ENOMEM; ++ ++ sec->scn = elf_newscn(linker->elf); ++ if (!sec->scn) { ++ pr_warn_elf("failed to create STRTAB section"); ++ return -EINVAL; ++ } ++ ++ sec->shdr = elf64_getshdr(sec->scn); ++ if (!sec->shdr) ++ return -EINVAL; ++ ++ sec->data = elf_newdata(sec->scn); ++ if (!sec->data) { ++ pr_warn_elf("failed to create STRTAB data"); ++ return -EINVAL; ++ } ++ ++ str_off = strset__add_str(linker->strtab_strs, sec->sec_name); ++ if (str_off < 0) ++ return str_off; ++ ++ sec->sec_idx = elf_ndxscn(sec->scn); ++ linker->elf_hdr->e_shstrndx = sec->sec_idx; ++ linker->strtab_sec_idx = sec->sec_idx; ++ ++ sec->shdr->sh_name = str_off; ++ sec->shdr->sh_type = SHT_STRTAB; ++ sec->shdr->sh_flags = SHF_STRINGS; ++ sec->shdr->sh_offset = 0; ++ sec->shdr->sh_link = 0; ++ sec->shdr->sh_info = 0; ++ sec->shdr->sh_addralign = 1; ++ sec->shdr->sh_size = sec->sec_sz = 0; ++ sec->shdr->sh_entsize = 0; ++ ++ /* SYMTAB */ ++ sec = add_dst_sec(linker, ".symtab"); ++ if (!sec) ++ return -ENOMEM; ++ ++ sec->scn = elf_newscn(linker->elf); ++ if (!sec->scn) { ++ pr_warn_elf("failed to create SYMTAB section"); ++ return -EINVAL; ++ } ++ ++ sec->shdr = elf64_getshdr(sec->scn); ++ if (!sec->shdr) ++ return -EINVAL; ++ ++ sec->data = elf_newdata(sec->scn); ++ if (!sec->data) { ++ pr_warn_elf("failed to create SYMTAB data"); ++ return -EINVAL; ++ } ++ ++ str_off = strset__add_str(linker->strtab_strs, sec->sec_name); ++ if (str_off < 0) ++ return str_off; ++ ++ sec->sec_idx = elf_ndxscn(sec->scn); ++ linker->symtab_sec_idx = sec->sec_idx; ++ ++ sec->shdr->sh_name = str_off; ++ sec->shdr->sh_type = SHT_SYMTAB; ++ sec->shdr->sh_flags = 0; ++ sec->shdr->sh_offset = 0; ++ sec->shdr->sh_link = linker->strtab_sec_idx; ++ /* sh_info should be one greater than the index of the last local ++ * symbol (i.e., binding is STB_LOCAL). But why and who cares? ++ */ ++ sec->shdr->sh_info = 0; ++ sec->shdr->sh_addralign = 8; ++ sec->shdr->sh_entsize = sizeof(Elf64_Sym); ++ ++ /* .BTF */ ++ linker->btf = btf__new_empty(); ++ err = libbpf_get_error(linker->btf); ++ if (err) ++ return err; ++ ++ /* add the special all-zero symbol */ ++ init_sym = add_new_sym(linker, NULL); ++ if (!init_sym) ++ return -EINVAL; ++ ++ init_sym->st_name = 0; ++ init_sym->st_info = 0; ++ init_sym->st_other = 0; ++ init_sym->st_shndx = SHN_UNDEF; ++ init_sym->st_value = 0; ++ init_sym->st_size = 0; ++ ++ return 0; ++} ++ ++int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, ++ const struct bpf_linker_file_opts *opts) ++{ ++ struct src_obj obj = {}; ++ int err = 0; ++ ++ if (!OPTS_VALID(opts, bpf_linker_file_opts)) ++ return libbpf_err(-EINVAL); ++ ++ if (!linker->elf) ++ return libbpf_err(-EINVAL); ++ ++ err = err ?: linker_load_obj_file(linker, filename, opts, &obj); ++ err = err ?: linker_append_sec_data(linker, &obj); ++ err = err ?: linker_append_elf_syms(linker, &obj); ++ err = err ?: linker_append_elf_relos(linker, &obj); ++ err = err ?: linker_append_btf(linker, &obj); ++ err = err ?: linker_append_btf_ext(linker, &obj); ++ ++ /* free up src_obj resources */ ++ free(obj.btf_type_map); ++ btf__free(obj.btf); ++ btf_ext__free(obj.btf_ext); ++ free(obj.secs); ++ free(obj.sym_map); ++ if (obj.elf) ++ elf_end(obj.elf); ++ if (obj.fd >= 0) ++ close(obj.fd); ++ ++ return libbpf_err(err); ++} ++ ++static bool is_dwarf_sec_name(const char *name) ++{ ++ /* approximation, but the actual list is too long */ ++ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0; ++} ++ ++static bool is_ignored_sec(struct src_sec *sec) ++{ ++ Elf64_Shdr *shdr = sec->shdr; ++ const char *name = sec->sec_name; ++ ++ /* no special handling of .strtab */ ++ if (shdr->sh_type == SHT_STRTAB) ++ return true; ++ ++ /* ignore .llvm_addrsig section as well */ ++ if (shdr->sh_type == SHT_LLVM_ADDRSIG) ++ return true; ++ ++ /* no subprograms will lead to an empty .text section, ignore it */ ++ if (shdr->sh_type == SHT_PROGBITS && shdr->sh_size == 0 && ++ strcmp(sec->sec_name, ".text") == 0) ++ return true; ++ ++ /* DWARF sections */ ++ if (is_dwarf_sec_name(sec->sec_name)) ++ return true; ++ ++ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) { ++ name += sizeof(".rel") - 1; ++ /* DWARF section relocations */ ++ if (is_dwarf_sec_name(name)) ++ return true; ++ ++ /* .BTF and .BTF.ext don't need relocations */ ++ if (strcmp(name, BTF_ELF_SEC) == 0 || ++ strcmp(name, BTF_EXT_ELF_SEC) == 0) ++ return true; ++ } ++ ++ return false; ++} ++ ++static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name) ++{ ++ struct src_sec *secs = obj->secs, *sec; ++ size_t new_cnt = obj->sec_cnt ? obj->sec_cnt + 1 : 2; ++ ++ secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs)); ++ if (!secs) ++ return NULL; ++ ++ /* zero out newly allocated memory */ ++ memset(secs + obj->sec_cnt, 0, (new_cnt - obj->sec_cnt) * sizeof(*secs)); ++ ++ obj->secs = secs; ++ obj->sec_cnt = new_cnt; ++ ++ sec = &obj->secs[new_cnt - 1]; ++ sec->id = new_cnt - 1; ++ sec->sec_name = sec_name; ++ ++ return sec; ++} ++ ++static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, ++ const struct bpf_linker_file_opts *opts, ++ struct src_obj *obj) ++{ ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ const int host_endianness = ELFDATA2LSB; ++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ const int host_endianness = ELFDATA2MSB; ++#else ++#error "Unknown __BYTE_ORDER__" ++#endif ++ int err = 0; ++ Elf_Scn *scn; ++ Elf_Data *data; ++ Elf64_Ehdr *ehdr; ++ Elf64_Shdr *shdr; ++ struct src_sec *sec; ++ ++ pr_debug("linker: adding object file '%s'...\n", filename); ++ ++ obj->filename = filename; ++ ++ obj->fd = open(filename, O_RDONLY | O_CLOEXEC); ++ if (obj->fd < 0) { ++ err = -errno; ++ pr_warn("failed to open file '%s': %d\n", filename, err); ++ return err; ++ } ++ obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); ++ if (!obj->elf) { ++ err = -errno; ++ pr_warn_elf("failed to parse ELF file '%s'", filename); ++ return err; ++ } ++ ++ /* Sanity check ELF file high-level properties */ ++ ehdr = elf64_getehdr(obj->elf); ++ if (!ehdr) { ++ err = -errno; ++ pr_warn_elf("failed to get ELF header for %s", filename); ++ return err; ++ } ++ if (ehdr->e_ident[EI_DATA] != host_endianness) { ++ err = -EOPNOTSUPP; ++ pr_warn_elf("unsupported byte order of ELF file %s", filename); ++ return err; ++ } ++ if (ehdr->e_type != ET_REL ++ || ehdr->e_machine != EM_BPF ++ || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { ++ err = -EOPNOTSUPP; ++ pr_warn_elf("unsupported kind of ELF file %s", filename); ++ return err; ++ } ++ ++ if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) { ++ err = -errno; ++ pr_warn_elf("failed to get SHSTRTAB section index for %s", filename); ++ return err; ++ } ++ ++ scn = NULL; ++ while ((scn = elf_nextscn(obj->elf, scn)) != NULL) { ++ size_t sec_idx = elf_ndxscn(scn); ++ const char *sec_name; ++ ++ shdr = elf64_getshdr(scn); ++ if (!shdr) { ++ err = -errno; ++ pr_warn_elf("failed to get section #%zu header for %s", ++ sec_idx, filename); ++ return err; ++ } ++ ++ sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name); ++ if (!sec_name) { ++ err = -errno; ++ pr_warn_elf("failed to get section #%zu name for %s", ++ sec_idx, filename); ++ return err; ++ } ++ ++ data = elf_getdata(scn, 0); ++ if (!data) { ++ err = -errno; ++ pr_warn_elf("failed to get section #%zu (%s) data from %s", ++ sec_idx, sec_name, filename); ++ return err; ++ } ++ ++ sec = add_src_sec(obj, sec_name); ++ if (!sec) ++ return -ENOMEM; ++ ++ sec->scn = scn; ++ sec->shdr = shdr; ++ sec->data = data; ++ sec->sec_idx = elf_ndxscn(scn); ++ ++ if (is_ignored_sec(sec)) { ++ sec->skipped = true; ++ continue; ++ } ++ ++ switch (shdr->sh_type) { ++ case SHT_SYMTAB: ++ if (obj->symtab_sec_idx) { ++ err = -EOPNOTSUPP; ++ pr_warn("multiple SYMTAB sections found, not supported\n"); ++ return err; ++ } ++ obj->symtab_sec_idx = sec_idx; ++ break; ++ case SHT_STRTAB: ++ /* we'll construct our own string table */ ++ break; ++ case SHT_PROGBITS: ++ if (strcmp(sec_name, BTF_ELF_SEC) == 0) { ++ obj->btf = btf__new(data->d_buf, shdr->sh_size); ++ err = libbpf_get_error(obj->btf); ++ if (err) { ++ pr_warn("failed to parse .BTF from %s: %d\n", filename, err); ++ return err; ++ } ++ sec->skipped = true; ++ continue; ++ } ++ if (strcmp(sec_name, BTF_EXT_ELF_SEC) == 0) { ++ obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size); ++ err = libbpf_get_error(obj->btf_ext); ++ if (err) { ++ pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err); ++ return err; ++ } ++ sec->skipped = true; ++ continue; ++ } ++ ++ /* data & code */ ++ break; ++ case SHT_NOBITS: ++ /* BSS */ ++ break; ++ case SHT_REL: ++ /* relocations */ ++ break; ++ default: ++ pr_warn("unrecognized section #%zu (%s) in %s\n", ++ sec_idx, sec_name, filename); ++ err = -EINVAL; ++ return err; ++ } ++ } ++ ++ err = err ?: linker_sanity_check_elf(obj); ++ err = err ?: linker_sanity_check_btf(obj); ++ err = err ?: linker_sanity_check_btf_ext(obj); ++ err = err ?: linker_fixup_btf(obj); ++ ++ return err; ++} ++ ++static int linker_sanity_check_elf(struct src_obj *obj) ++{ ++ struct src_sec *sec; ++ int i, err; ++ ++ if (!obj->symtab_sec_idx) { ++ pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename); ++ return -EINVAL; ++ } ++ if (!obj->shstrs_sec_idx) { ++ pr_warn("ELF is missing section headers STRTAB section in %s\n", obj->filename); ++ return -EINVAL; ++ } ++ ++ for (i = 1; i < obj->sec_cnt; i++) { ++ sec = &obj->secs[i]; ++ ++ if (sec->sec_name[0] == '\0') { ++ pr_warn("ELF section #%zu has empty name in %s\n", sec->sec_idx, obj->filename); ++ return -EINVAL; ++ } ++ ++ if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) ++ return -EINVAL; ++ if (sec->shdr->sh_addralign != sec->data->d_align) ++ return -EINVAL; ++ ++ if (sec->shdr->sh_size != sec->data->d_size) ++ return -EINVAL; ++ ++ switch (sec->shdr->sh_type) { ++ case SHT_SYMTAB: ++ err = linker_sanity_check_elf_symtab(obj, sec); ++ if (err) ++ return err; ++ break; ++ case SHT_STRTAB: ++ break; ++ case SHT_PROGBITS: ++ if (sec->shdr->sh_flags & SHF_EXECINSTR) { ++ if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) ++ return -EINVAL; ++ } ++ break; ++ case SHT_NOBITS: ++ break; ++ case SHT_REL: ++ err = linker_sanity_check_elf_relos(obj, sec); ++ if (err) ++ return err; ++ break; ++ case SHT_LLVM_ADDRSIG: ++ break; ++ default: ++ pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n", ++ sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec) ++{ ++ struct src_sec *link_sec; ++ Elf64_Sym *sym; ++ int i, n; ++ ++ if (sec->shdr->sh_entsize != sizeof(Elf64_Sym)) ++ return -EINVAL; ++ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0) ++ return -EINVAL; ++ ++ if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) { ++ pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n", ++ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); ++ return -EINVAL; ++ } ++ link_sec = &obj->secs[sec->shdr->sh_link]; ++ if (link_sec->shdr->sh_type != SHT_STRTAB) { ++ pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n", ++ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); ++ return -EINVAL; ++ } ++ ++ n = sec->shdr->sh_size / sec->shdr->sh_entsize; ++ sym = sec->data->d_buf; ++ for (i = 0; i < n; i++, sym++) { ++ int sym_type = ELF64_ST_TYPE(sym->st_info); ++ int sym_bind = ELF64_ST_BIND(sym->st_info); ++ int sym_vis = ELF64_ST_VISIBILITY(sym->st_other); ++ ++ if (i == 0) { ++ if (sym->st_name != 0 || sym->st_info != 0 ++ || sym->st_other != 0 || sym->st_shndx != 0 ++ || sym->st_value != 0 || sym->st_size != 0) { ++ pr_warn("ELF sym #0 is invalid in %s\n", obj->filename); ++ return -EINVAL; ++ } ++ continue; ++ } ++ if (sym_bind != STB_LOCAL && sym_bind != STB_GLOBAL && sym_bind != STB_WEAK) { ++ pr_warn("ELF sym #%d in section #%zu has unsupported symbol binding %d\n", ++ i, sec->sec_idx, sym_bind); ++ return -EINVAL; ++ } ++ if (sym_vis != STV_DEFAULT && sym_vis != STV_HIDDEN) { ++ pr_warn("ELF sym #%d in section #%zu has unsupported symbol visibility %d\n", ++ i, sec->sec_idx, sym_vis); ++ return -EINVAL; ++ } ++ if (sym->st_shndx == 0) { ++ if (sym_type != STT_NOTYPE || sym_bind == STB_LOCAL ++ || sym->st_value != 0 || sym->st_size != 0) { ++ pr_warn("ELF sym #%d is invalid extern symbol in %s\n", ++ i, obj->filename); ++ ++ return -EINVAL; ++ } ++ continue; ++ } ++ if (sym->st_shndx < SHN_LORESERVE && sym->st_shndx >= obj->sec_cnt) { ++ pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n", ++ i, sec->sec_idx, (size_t)sym->st_shndx, obj->filename); ++ return -EINVAL; ++ } ++ if (sym_type == STT_SECTION) { ++ if (sym->st_value != 0) ++ return -EINVAL; ++ continue; ++ } ++ } ++ ++ return 0; ++} ++ ++static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec) ++{ ++ struct src_sec *link_sec, *sym_sec; ++ Elf64_Rel *relo; ++ int i, n; ++ ++ if (sec->shdr->sh_entsize != sizeof(Elf64_Rel)) ++ return -EINVAL; ++ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0) ++ return -EINVAL; ++ ++ /* SHT_REL's sh_link should point to SYMTAB */ ++ if (sec->shdr->sh_link != obj->symtab_sec_idx) { ++ pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n", ++ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename); ++ return -EINVAL; ++ } ++ ++ /* SHT_REL's sh_info points to relocated section */ ++ if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) { ++ pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n", ++ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename); ++ return -EINVAL; ++ } ++ link_sec = &obj->secs[sec->shdr->sh_info]; ++ ++ /* .rel -> pattern is followed */ ++ if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0 ++ || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) { ++ pr_warn("ELF relo section #%zu name has invalid name in %s\n", ++ sec->sec_idx, obj->filename); ++ return -EINVAL; ++ } ++ ++ /* don't further validate relocations for ignored sections */ ++ if (link_sec->skipped) ++ return 0; ++ ++ /* relocatable section is data or instructions */ ++ if (link_sec->shdr->sh_type != SHT_PROGBITS && link_sec->shdr->sh_type != SHT_NOBITS) { ++ pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n", ++ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename); ++ return -EINVAL; ++ } ++ ++ /* check sanity of each relocation */ ++ n = sec->shdr->sh_size / sec->shdr->sh_entsize; ++ relo = sec->data->d_buf; ++ sym_sec = &obj->secs[obj->symtab_sec_idx]; ++ for (i = 0; i < n; i++, relo++) { ++ size_t sym_idx = ELF64_R_SYM(relo->r_info); ++ size_t sym_type = ELF64_R_TYPE(relo->r_info); ++ ++ if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 && ++ sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) { ++ pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n", ++ i, sec->sec_idx, sym_type, obj->filename); ++ return -EINVAL; ++ } ++ ++ if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) { ++ pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n", ++ i, sec->sec_idx, sym_idx, obj->filename); ++ return -EINVAL; ++ } ++ ++ if (link_sec->shdr->sh_flags & SHF_EXECINSTR) { ++ if (relo->r_offset % sizeof(struct bpf_insn) != 0) { ++ pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n", ++ i, sec->sec_idx, sym_idx, obj->filename); ++ return -EINVAL; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static int check_btf_type_id(__u32 *type_id, void *ctx) ++{ ++ struct btf *btf = ctx; ++ ++ if (*type_id >= btf__type_cnt(btf)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int check_btf_str_off(__u32 *str_off, void *ctx) ++{ ++ struct btf *btf = ctx; ++ const char *s; ++ ++ s = btf__str_by_offset(btf, *str_off); ++ ++ if (!s) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int linker_sanity_check_btf(struct src_obj *obj) ++{ ++ struct btf_type *t; ++ int i, n, err = 0; ++ ++ if (!obj->btf) ++ return 0; ++ ++ n = btf__type_cnt(obj->btf); ++ for (i = 1; i < n; i++) { ++ t = btf_type_by_id(obj->btf, i); ++ ++ err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); ++ err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static int linker_sanity_check_btf_ext(struct src_obj *obj) ++{ ++ int err = 0; ++ ++ if (!obj->btf_ext) ++ return 0; ++ ++ /* can't use .BTF.ext without .BTF */ ++ if (!obj->btf) ++ return -EINVAL; ++ ++ err = err ?: btf_ext_visit_type_ids(obj->btf_ext, check_btf_type_id, obj->btf); ++ err = err ?: btf_ext_visit_str_offs(obj->btf_ext, check_btf_str_off, obj->btf); ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct src_sec *src_sec) ++{ ++ Elf_Scn *scn; ++ Elf_Data *data; ++ Elf64_Shdr *shdr; ++ int name_off; ++ ++ dst_sec->sec_sz = 0; ++ dst_sec->sec_idx = 0; ++ dst_sec->ephemeral = src_sec->ephemeral; ++ ++ /* ephemeral sections are just thin section shells lacking most parts */ ++ if (src_sec->ephemeral) ++ return 0; ++ ++ scn = elf_newscn(linker->elf); ++ if (!scn) ++ return -ENOMEM; ++ data = elf_newdata(scn); ++ if (!data) ++ return -ENOMEM; ++ shdr = elf64_getshdr(scn); ++ if (!shdr) ++ return -ENOMEM; ++ ++ dst_sec->scn = scn; ++ dst_sec->shdr = shdr; ++ dst_sec->data = data; ++ dst_sec->sec_idx = elf_ndxscn(scn); ++ ++ name_off = strset__add_str(linker->strtab_strs, src_sec->sec_name); ++ if (name_off < 0) ++ return name_off; ++ ++ shdr->sh_name = name_off; ++ shdr->sh_type = src_sec->shdr->sh_type; ++ shdr->sh_flags = src_sec->shdr->sh_flags; ++ shdr->sh_size = 0; ++ /* sh_link and sh_info have different meaning for different types of ++ * sections, so we leave it up to the caller code to fill them in, if ++ * necessary ++ */ ++ shdr->sh_link = 0; ++ shdr->sh_info = 0; ++ shdr->sh_addralign = src_sec->shdr->sh_addralign; ++ shdr->sh_entsize = src_sec->shdr->sh_entsize; ++ ++ data->d_type = src_sec->data->d_type; ++ data->d_size = 0; ++ data->d_buf = NULL; ++ data->d_align = src_sec->data->d_align; ++ data->d_off = 0; ++ ++ return 0; ++} ++ ++static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const char *sec_name) ++{ ++ struct dst_sec *sec; ++ int i; ++ ++ for (i = 1; i < linker->sec_cnt; i++) { ++ sec = &linker->secs[i]; ++ ++ if (strcmp(sec->sec_name, sec_name) == 0) ++ return sec; ++ } ++ ++ return NULL; ++} ++ ++static bool secs_match(struct dst_sec *dst, struct src_sec *src) ++{ ++ if (dst->ephemeral || src->ephemeral) ++ return true; ++ ++ if (dst->shdr->sh_type != src->shdr->sh_type) { ++ pr_warn("sec %s types mismatch\n", dst->sec_name); ++ return false; ++ } ++ if (dst->shdr->sh_flags != src->shdr->sh_flags) { ++ pr_warn("sec %s flags mismatch\n", dst->sec_name); ++ return false; ++ } ++ if (dst->shdr->sh_entsize != src->shdr->sh_entsize) { ++ pr_warn("sec %s entsize mismatch\n", dst->sec_name); ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec) ++{ ++ if (dst_sec->sec_sz != src_sec->shdr->sh_size) ++ return false; ++ if (memcmp(dst_sec->raw_data, src_sec->data->d_buf, dst_sec->sec_sz) != 0) ++ return false; ++ return true; ++} ++ ++static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src) ++{ ++ void *tmp; ++ size_t dst_align, src_align; ++ size_t dst_align_sz, dst_final_sz; ++ int err; ++ ++ /* Ephemeral source section doesn't contribute anything to ELF ++ * section data. ++ */ ++ if (src->ephemeral) ++ return 0; ++ ++ /* Some sections (like .maps) can contain both externs (and thus be ++ * ephemeral) and non-externs (map definitions). So it's possible that ++ * it has to be "upgraded" from ephemeral to non-ephemeral when the ++ * first non-ephemeral entity appears. In such case, we add ELF ++ * section, data, etc. ++ */ ++ if (dst->ephemeral) { ++ err = init_sec(linker, dst, src); ++ if (err) ++ return err; ++ } ++ ++ dst_align = dst->shdr->sh_addralign; ++ src_align = src->shdr->sh_addralign; ++ if (dst_align == 0) ++ dst_align = 1; ++ if (dst_align < src_align) ++ dst_align = src_align; ++ ++ dst_align_sz = (dst->sec_sz + dst_align - 1) / dst_align * dst_align; ++ ++ /* no need to re-align final size */ ++ dst_final_sz = dst_align_sz + src->shdr->sh_size; ++ ++ if (src->shdr->sh_type != SHT_NOBITS) { ++ tmp = realloc(dst->raw_data, dst_final_sz); ++ if (!tmp) ++ return -ENOMEM; ++ dst->raw_data = tmp; ++ ++ /* pad dst section, if it's alignment forced size increase */ ++ memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz); ++ /* now copy src data at a properly aligned offset */ ++ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size); ++ } ++ ++ dst->sec_sz = dst_final_sz; ++ dst->shdr->sh_size = dst_final_sz; ++ dst->data->d_size = dst_final_sz; ++ ++ dst->shdr->sh_addralign = dst_align; ++ dst->data->d_align = dst_align; ++ ++ src->dst_off = dst_align_sz; ++ ++ return 0; ++} ++ ++static bool is_data_sec(struct src_sec *sec) ++{ ++ if (!sec || sec->skipped) ++ return false; ++ /* ephemeral sections are data sections, e.g., .kconfig, .ksyms */ ++ if (sec->ephemeral) ++ return true; ++ return sec->shdr->sh_type == SHT_PROGBITS || sec->shdr->sh_type == SHT_NOBITS; ++} ++ ++static bool is_relo_sec(struct src_sec *sec) ++{ ++ if (!sec || sec->skipped || sec->ephemeral) ++ return false; ++ return sec->shdr->sh_type == SHT_REL; ++} ++ ++static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj) ++{ ++ int i, err; ++ ++ for (i = 1; i < obj->sec_cnt; i++) { ++ struct src_sec *src_sec; ++ struct dst_sec *dst_sec; ++ ++ src_sec = &obj->secs[i]; ++ if (!is_data_sec(src_sec)) ++ continue; ++ ++ dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name); ++ if (!dst_sec) { ++ dst_sec = add_dst_sec(linker, src_sec->sec_name); ++ if (!dst_sec) ++ return -ENOMEM; ++ err = init_sec(linker, dst_sec, src_sec); ++ if (err) { ++ pr_warn("failed to init section '%s'\n", src_sec->sec_name); ++ return err; ++ } ++ } else { ++ if (!secs_match(dst_sec, src_sec)) { ++ pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); ++ return -1; ++ } ++ ++ /* "license" and "version" sections are deduped */ ++ if (strcmp(src_sec->sec_name, "license") == 0 ++ || strcmp(src_sec->sec_name, "version") == 0) { ++ if (!sec_content_is_same(dst_sec, src_sec)) { ++ pr_warn("non-identical contents of section '%s' are not supported\n", src_sec->sec_name); ++ return -EINVAL; ++ } ++ src_sec->skipped = true; ++ src_sec->dst_id = dst_sec->id; ++ continue; ++ } ++ } ++ ++ /* record mapped section index */ ++ src_sec->dst_id = dst_sec->id; ++ ++ err = extend_sec(linker, dst_sec, src_sec); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj) ++{ ++ struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx]; ++ Elf64_Sym *sym = symtab->data->d_buf; ++ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize, err; ++ int str_sec_idx = symtab->shdr->sh_link; ++ const char *sym_name; ++ ++ obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map)); ++ if (!obj->sym_map) ++ return -ENOMEM; ++ ++ for (i = 0; i < n; i++, sym++) { ++ /* We already validated all-zero symbol #0 and we already ++ * appended it preventively to the final SYMTAB, so skip it. ++ */ ++ if (i == 0) ++ continue; ++ ++ sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name); ++ if (!sym_name) { ++ pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename); ++ return -EINVAL; ++ } ++ ++ err = linker_append_elf_sym(linker, obj, sym, sym_name, i); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static Elf64_Sym *get_sym_by_idx(struct bpf_linker *linker, size_t sym_idx) ++{ ++ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx]; ++ Elf64_Sym *syms = symtab->raw_data; ++ ++ return &syms[sym_idx]; ++} ++ ++static struct glob_sym *find_glob_sym(struct bpf_linker *linker, const char *sym_name) ++{ ++ struct glob_sym *glob_sym; ++ const char *name; ++ int i; ++ ++ for (i = 0; i < linker->glob_sym_cnt; i++) { ++ glob_sym = &linker->glob_syms[i]; ++ name = strset__data(linker->strtab_strs) + glob_sym->name_off; ++ ++ if (strcmp(name, sym_name) == 0) ++ return glob_sym; ++ } ++ ++ return NULL; ++} ++ ++static struct glob_sym *add_glob_sym(struct bpf_linker *linker) ++{ ++ struct glob_sym *syms, *sym; ++ ++ syms = libbpf_reallocarray(linker->glob_syms, linker->glob_sym_cnt + 1, ++ sizeof(*linker->glob_syms)); ++ if (!syms) ++ return NULL; ++ ++ sym = &syms[linker->glob_sym_cnt]; ++ memset(sym, 0, sizeof(*sym)); ++ sym->var_idx = -1; ++ ++ linker->glob_syms = syms; ++ linker->glob_sym_cnt++; ++ ++ return sym; ++} ++ ++static bool glob_sym_btf_matches(const char *sym_name, bool exact, ++ const struct btf *btf1, __u32 id1, ++ const struct btf *btf2, __u32 id2) ++{ ++ const struct btf_type *t1, *t2; ++ bool is_static1, is_static2; ++ const char *n1, *n2; ++ int i, n; ++ ++recur: ++ n1 = n2 = NULL; ++ t1 = skip_mods_and_typedefs(btf1, id1, &id1); ++ t2 = skip_mods_and_typedefs(btf2, id2, &id2); ++ ++ /* check if only one side is FWD, otherwise handle with common logic */ ++ if (!exact && btf_is_fwd(t1) != btf_is_fwd(t2)) { ++ n1 = btf__str_by_offset(btf1, t1->name_off); ++ n2 = btf__str_by_offset(btf2, t2->name_off); ++ if (strcmp(n1, n2) != 0) { ++ pr_warn("global '%s': incompatible forward declaration names '%s' and '%s'\n", ++ sym_name, n1, n2); ++ return false; ++ } ++ /* validate if FWD kind matches concrete kind */ ++ if (btf_is_fwd(t1)) { ++ if (btf_kflag(t1) && btf_is_union(t2)) ++ return true; ++ if (!btf_kflag(t1) && btf_is_struct(t2)) ++ return true; ++ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n", ++ sym_name, btf_kflag(t1) ? "union" : "struct", btf_kind_str(t2)); ++ } else { ++ if (btf_kflag(t2) && btf_is_union(t1)) ++ return true; ++ if (!btf_kflag(t2) && btf_is_struct(t1)) ++ return true; ++ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n", ++ sym_name, btf_kflag(t2) ? "union" : "struct", btf_kind_str(t1)); ++ } ++ return false; ++ } ++ ++ if (btf_kind(t1) != btf_kind(t2)) { ++ pr_warn("global '%s': incompatible BTF kinds %s and %s\n", ++ sym_name, btf_kind_str(t1), btf_kind_str(t2)); ++ return false; ++ } ++ ++ switch (btf_kind(t1)) { ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ case BTF_KIND_FWD: ++ case BTF_KIND_FUNC: ++ case BTF_KIND_VAR: ++ n1 = btf__str_by_offset(btf1, t1->name_off); ++ n2 = btf__str_by_offset(btf2, t2->name_off); ++ if (strcmp(n1, n2) != 0) { ++ pr_warn("global '%s': incompatible %s names '%s' and '%s'\n", ++ sym_name, btf_kind_str(t1), n1, n2); ++ return false; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ switch (btf_kind(t1)) { ++ case BTF_KIND_UNKN: /* void */ ++ case BTF_KIND_FWD: ++ return true; ++ case BTF_KIND_INT: ++ case BTF_KIND_FLOAT: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ /* ignore encoding for int and enum values for enum */ ++ if (t1->size != t2->size) { ++ pr_warn("global '%s': incompatible %s '%s' size %u and %u\n", ++ sym_name, btf_kind_str(t1), n1, t1->size, t2->size); ++ return false; ++ } ++ return true; ++ case BTF_KIND_PTR: ++ /* just validate overall shape of the referenced type, so no ++ * contents comparison for struct/union, and allowd fwd vs ++ * struct/union ++ */ ++ exact = false; ++ id1 = t1->type; ++ id2 = t2->type; ++ goto recur; ++ case BTF_KIND_ARRAY: ++ /* ignore index type and array size */ ++ id1 = btf_array(t1)->type; ++ id2 = btf_array(t2)->type; ++ goto recur; ++ case BTF_KIND_FUNC: ++ /* extern and global linkages are compatible */ ++ is_static1 = btf_func_linkage(t1) == BTF_FUNC_STATIC; ++ is_static2 = btf_func_linkage(t2) == BTF_FUNC_STATIC; ++ if (is_static1 != is_static2) { ++ pr_warn("global '%s': incompatible func '%s' linkage\n", sym_name, n1); ++ return false; ++ } ++ ++ id1 = t1->type; ++ id2 = t2->type; ++ goto recur; ++ case BTF_KIND_VAR: ++ /* extern and global linkages are compatible */ ++ is_static1 = btf_var(t1)->linkage == BTF_VAR_STATIC; ++ is_static2 = btf_var(t2)->linkage == BTF_VAR_STATIC; ++ if (is_static1 != is_static2) { ++ pr_warn("global '%s': incompatible var '%s' linkage\n", sym_name, n1); ++ return false; ++ } ++ ++ id1 = t1->type; ++ id2 = t2->type; ++ goto recur; ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: { ++ const struct btf_member *m1, *m2; ++ ++ if (!exact) ++ return true; ++ ++ if (btf_vlen(t1) != btf_vlen(t2)) { ++ pr_warn("global '%s': incompatible number of %s fields %u and %u\n", ++ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2)); ++ return false; ++ } ++ ++ n = btf_vlen(t1); ++ m1 = btf_members(t1); ++ m2 = btf_members(t2); ++ for (i = 0; i < n; i++, m1++, m2++) { ++ n1 = btf__str_by_offset(btf1, m1->name_off); ++ n2 = btf__str_by_offset(btf2, m2->name_off); ++ if (strcmp(n1, n2) != 0) { ++ pr_warn("global '%s': incompatible field #%d names '%s' and '%s'\n", ++ sym_name, i, n1, n2); ++ return false; ++ } ++ if (m1->offset != m2->offset) { ++ pr_warn("global '%s': incompatible field #%d ('%s') offsets\n", ++ sym_name, i, n1); ++ return false; ++ } ++ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type)) ++ return false; ++ } ++ ++ return true; ++ } ++ case BTF_KIND_FUNC_PROTO: { ++ const struct btf_param *m1, *m2; ++ ++ if (btf_vlen(t1) != btf_vlen(t2)) { ++ pr_warn("global '%s': incompatible number of %s params %u and %u\n", ++ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2)); ++ return false; ++ } ++ ++ n = btf_vlen(t1); ++ m1 = btf_params(t1); ++ m2 = btf_params(t2); ++ for (i = 0; i < n; i++, m1++, m2++) { ++ /* ignore func arg names */ ++ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type)) ++ return false; ++ } ++ ++ /* now check return type as well */ ++ id1 = t1->type; ++ id2 = t2->type; ++ goto recur; ++ } ++ ++ /* skip_mods_and_typedefs() make this impossible */ ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ /* DATASECs are never compared with each other */ ++ case BTF_KIND_DATASEC: ++ default: ++ pr_warn("global '%s': unsupported BTF kind %s\n", ++ sym_name, btf_kind_str(t1)); ++ return false; ++ } ++} ++ ++static bool map_defs_match(const char *sym_name, ++ const struct btf *main_btf, ++ const struct btf_map_def *main_def, ++ const struct btf_map_def *main_inner_def, ++ const struct btf *extra_btf, ++ const struct btf_map_def *extra_def, ++ const struct btf_map_def *extra_inner_def) ++{ ++ const char *reason; ++ ++ if (main_def->map_type != extra_def->map_type) { ++ reason = "type"; ++ goto mismatch; ++ } ++ ++ /* check key type/size match */ ++ if (main_def->key_size != extra_def->key_size) { ++ reason = "key_size"; ++ goto mismatch; ++ } ++ if (!!main_def->key_type_id != !!extra_def->key_type_id) { ++ reason = "key type"; ++ goto mismatch; ++ } ++ if ((main_def->parts & MAP_DEF_KEY_TYPE) ++ && !glob_sym_btf_matches(sym_name, true /*exact*/, ++ main_btf, main_def->key_type_id, ++ extra_btf, extra_def->key_type_id)) { ++ reason = "key type"; ++ goto mismatch; ++ } ++ ++ /* validate value type/size match */ ++ if (main_def->value_size != extra_def->value_size) { ++ reason = "value_size"; ++ goto mismatch; ++ } ++ if (!!main_def->value_type_id != !!extra_def->value_type_id) { ++ reason = "value type"; ++ goto mismatch; ++ } ++ if ((main_def->parts & MAP_DEF_VALUE_TYPE) ++ && !glob_sym_btf_matches(sym_name, true /*exact*/, ++ main_btf, main_def->value_type_id, ++ extra_btf, extra_def->value_type_id)) { ++ reason = "key type"; ++ goto mismatch; ++ } ++ ++ if (main_def->max_entries != extra_def->max_entries) { ++ reason = "max_entries"; ++ goto mismatch; ++ } ++ if (main_def->map_flags != extra_def->map_flags) { ++ reason = "map_flags"; ++ goto mismatch; ++ } ++ if (main_def->numa_node != extra_def->numa_node) { ++ reason = "numa_node"; ++ goto mismatch; ++ } ++ if (main_def->pinning != extra_def->pinning) { ++ reason = "pinning"; ++ goto mismatch; ++ } ++ ++ if ((main_def->parts & MAP_DEF_INNER_MAP) != (extra_def->parts & MAP_DEF_INNER_MAP)) { ++ reason = "inner map"; ++ goto mismatch; ++ } ++ ++ if (main_def->parts & MAP_DEF_INNER_MAP) { ++ char inner_map_name[128]; ++ ++ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", sym_name); ++ ++ return map_defs_match(inner_map_name, ++ main_btf, main_inner_def, NULL, ++ extra_btf, extra_inner_def, NULL); ++ } ++ ++ return true; ++ ++mismatch: ++ pr_warn("global '%s': map %s mismatch\n", sym_name, reason); ++ return false; ++} ++ ++static bool glob_map_defs_match(const char *sym_name, ++ struct bpf_linker *linker, struct glob_sym *glob_sym, ++ struct src_obj *obj, Elf64_Sym *sym, int btf_id) ++{ ++ struct btf_map_def dst_def = {}, dst_inner_def = {}; ++ struct btf_map_def src_def = {}, src_inner_def = {}; ++ const struct btf_type *t; ++ int err; ++ ++ t = btf__type_by_id(obj->btf, btf_id); ++ if (!btf_is_var(t)) { ++ pr_warn("global '%s': invalid map definition type [%d]\n", sym_name, btf_id); ++ return false; ++ } ++ t = skip_mods_and_typedefs(obj->btf, t->type, NULL); ++ ++ err = parse_btf_map_def(sym_name, obj->btf, t, true /*strict*/, &src_def, &src_inner_def); ++ if (err) { ++ pr_warn("global '%s': invalid map definition\n", sym_name); ++ return false; ++ } ++ ++ /* re-parse existing map definition */ ++ t = btf__type_by_id(linker->btf, glob_sym->btf_id); ++ t = skip_mods_and_typedefs(linker->btf, t->type, NULL); ++ err = parse_btf_map_def(sym_name, linker->btf, t, true /*strict*/, &dst_def, &dst_inner_def); ++ if (err) { ++ /* this should not happen, because we already validated it */ ++ pr_warn("global '%s': invalid dst map definition\n", sym_name); ++ return false; ++ } ++ ++ /* Currently extern map definition has to be complete and match ++ * concrete map definition exactly. This restriction might be lifted ++ * in the future. ++ */ ++ return map_defs_match(sym_name, linker->btf, &dst_def, &dst_inner_def, ++ obj->btf, &src_def, &src_inner_def); ++} ++ ++static bool glob_syms_match(const char *sym_name, ++ struct bpf_linker *linker, struct glob_sym *glob_sym, ++ struct src_obj *obj, Elf64_Sym *sym, size_t sym_idx, int btf_id) ++{ ++ const struct btf_type *src_t; ++ ++ /* if we are dealing with externs, BTF types describing both global ++ * and extern VARs/FUNCs should be completely present in all files ++ */ ++ if (!glob_sym->btf_id || !btf_id) { ++ pr_warn("BTF info is missing for global symbol '%s'\n", sym_name); ++ return false; ++ } ++ ++ src_t = btf__type_by_id(obj->btf, btf_id); ++ if (!btf_is_var(src_t) && !btf_is_func(src_t)) { ++ pr_warn("only extern variables and functions are supported, but got '%s' for '%s'\n", ++ btf_kind_str(src_t), sym_name); ++ return false; ++ } ++ ++ /* deal with .maps definitions specially */ ++ if (glob_sym->sec_id && strcmp(linker->secs[glob_sym->sec_id].sec_name, MAPS_ELF_SEC) == 0) ++ return glob_map_defs_match(sym_name, linker, glob_sym, obj, sym, btf_id); ++ ++ if (!glob_sym_btf_matches(sym_name, true /*exact*/, ++ linker->btf, glob_sym->btf_id, obj->btf, btf_id)) ++ return false; ++ ++ return true; ++} ++ ++static bool btf_is_non_static(const struct btf_type *t) ++{ ++ return (btf_is_var(t) && btf_var(t)->linkage != BTF_VAR_STATIC) ++ || (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_STATIC); ++} ++ ++static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name, ++ int *out_btf_sec_id, int *out_btf_id) ++{ ++ int i, j, n, m, btf_id = 0; ++ const struct btf_type *t; ++ const struct btf_var_secinfo *vi; ++ const char *name; ++ ++ if (!obj->btf) { ++ pr_warn("failed to find BTF info for object '%s'\n", obj->filename); ++ return -EINVAL; ++ } ++ ++ n = btf__type_cnt(obj->btf); ++ for (i = 1; i < n; i++) { ++ t = btf__type_by_id(obj->btf, i); ++ ++ /* some global and extern FUNCs and VARs might not be associated with any ++ * DATASEC, so try to detect them in the same pass ++ */ ++ if (btf_is_non_static(t)) { ++ name = btf__str_by_offset(obj->btf, t->name_off); ++ if (strcmp(name, sym_name) != 0) ++ continue; ++ ++ /* remember and still try to find DATASEC */ ++ btf_id = i; ++ continue; ++ } ++ ++ if (!btf_is_datasec(t)) ++ continue; ++ ++ vi = btf_var_secinfos(t); ++ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) { ++ t = btf__type_by_id(obj->btf, vi->type); ++ name = btf__str_by_offset(obj->btf, t->name_off); ++ ++ if (strcmp(name, sym_name) != 0) ++ continue; ++ if (btf_is_var(t) && btf_var(t)->linkage == BTF_VAR_STATIC) ++ continue; ++ if (btf_is_func(t) && btf_func_linkage(t) == BTF_FUNC_STATIC) ++ continue; ++ ++ if (btf_id && btf_id != vi->type) { ++ pr_warn("global/extern '%s' BTF is ambiguous: both types #%d and #%u match\n", ++ sym_name, btf_id, vi->type); ++ return -EINVAL; ++ } ++ ++ *out_btf_sec_id = i; ++ *out_btf_id = vi->type; ++ ++ return 0; ++ } ++ } ++ ++ /* free-floating extern or global FUNC */ ++ if (btf_id) { ++ *out_btf_sec_id = 0; ++ *out_btf_id = btf_id; ++ return 0; ++ } ++ ++ pr_warn("failed to find BTF info for global/extern symbol '%s'\n", sym_name); ++ return -ENOENT; ++} ++ ++static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name) ++{ ++ struct src_sec *sec; ++ int i; ++ ++ for (i = 1; i < obj->sec_cnt; i++) { ++ sec = &obj->secs[i]; ++ ++ if (strcmp(sec->sec_name, sec_name) == 0) ++ return sec; ++ } ++ ++ return NULL; ++} ++ ++static int complete_extern_btf_info(struct btf *dst_btf, int dst_id, ++ struct btf *src_btf, int src_id) ++{ ++ struct btf_type *dst_t = btf_type_by_id(dst_btf, dst_id); ++ struct btf_type *src_t = btf_type_by_id(src_btf, src_id); ++ struct btf_param *src_p, *dst_p; ++ const char *s; ++ int i, n, off; ++ ++ /* We already made sure that source and destination types (FUNC or ++ * VAR) match in terms of types and argument names. ++ */ ++ if (btf_is_var(dst_t)) { ++ btf_var(dst_t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; ++ return 0; ++ } ++ ++ dst_t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_GLOBAL, 0); ++ ++ /* now onto FUNC_PROTO types */ ++ src_t = btf_type_by_id(src_btf, src_t->type); ++ dst_t = btf_type_by_id(dst_btf, dst_t->type); ++ ++ /* Fill in all the argument names, which for extern FUNCs are missing. ++ * We'll end up with two copies of FUNCs/VARs for externs, but that ++ * will be taken care of by BTF dedup at the very end. ++ * It might be that BTF types for extern in one file has less/more BTF ++ * information (e.g., FWD instead of full STRUCT/UNION information), ++ * but that should be (in most cases, subject to BTF dedup rules) ++ * handled and resolved by BTF dedup algorithm as well, so we won't ++ * worry about it. Our only job is to make sure that argument names ++ * are populated on both sides, otherwise BTF dedup will pedantically ++ * consider them different. ++ */ ++ src_p = btf_params(src_t); ++ dst_p = btf_params(dst_t); ++ for (i = 0, n = btf_vlen(dst_t); i < n; i++, src_p++, dst_p++) { ++ if (!src_p->name_off) ++ continue; ++ ++ /* src_btf has more complete info, so add name to dst_btf */ ++ s = btf__str_by_offset(src_btf, src_p->name_off); ++ off = btf__add_str(dst_btf, s); ++ if (off < 0) ++ return off; ++ dst_p->name_off = off; ++ } ++ return 0; ++} ++ ++static void sym_update_bind(Elf64_Sym *sym, int sym_bind) ++{ ++ sym->st_info = ELF64_ST_INFO(sym_bind, ELF64_ST_TYPE(sym->st_info)); ++} ++ ++static void sym_update_type(Elf64_Sym *sym, int sym_type) ++{ ++ sym->st_info = ELF64_ST_INFO(ELF64_ST_BIND(sym->st_info), sym_type); ++} ++ ++static void sym_update_visibility(Elf64_Sym *sym, int sym_vis) ++{ ++ /* libelf doesn't provide setters for ST_VISIBILITY, ++ * but it is stored in the lower 2 bits of st_other ++ */ ++ sym->st_other &= ~0x03; ++ sym->st_other |= sym_vis; ++} ++ ++static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, ++ Elf64_Sym *sym, const char *sym_name, int src_sym_idx) ++{ ++ struct src_sec *src_sec = NULL; ++ struct dst_sec *dst_sec = NULL; ++ struct glob_sym *glob_sym = NULL; ++ int name_off, sym_type, sym_bind, sym_vis, err; ++ int btf_sec_id = 0, btf_id = 0; ++ size_t dst_sym_idx; ++ Elf64_Sym *dst_sym; ++ bool sym_is_extern; ++ ++ sym_type = ELF64_ST_TYPE(sym->st_info); ++ sym_bind = ELF64_ST_BIND(sym->st_info); ++ sym_vis = ELF64_ST_VISIBILITY(sym->st_other); ++ sym_is_extern = sym->st_shndx == SHN_UNDEF; ++ ++ if (sym_is_extern) { ++ if (!obj->btf) { ++ pr_warn("externs without BTF info are not supported\n"); ++ return -ENOTSUP; ++ } ++ } else if (sym->st_shndx < SHN_LORESERVE) { ++ src_sec = &obj->secs[sym->st_shndx]; ++ if (src_sec->skipped) ++ return 0; ++ dst_sec = &linker->secs[src_sec->dst_id]; ++ ++ /* allow only one STT_SECTION symbol per section */ ++ if (sym_type == STT_SECTION && dst_sec->sec_sym_idx) { ++ obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx; ++ return 0; ++ } ++ } ++ ++ if (sym_bind == STB_LOCAL) ++ goto add_sym; ++ ++ /* find matching BTF info */ ++ err = find_glob_sym_btf(obj, sym, sym_name, &btf_sec_id, &btf_id); ++ if (err) ++ return err; ++ ++ if (sym_is_extern && btf_sec_id) { ++ const char *sec_name = NULL; ++ const struct btf_type *t; ++ ++ t = btf__type_by_id(obj->btf, btf_sec_id); ++ sec_name = btf__str_by_offset(obj->btf, t->name_off); ++ ++ /* Clang puts unannotated extern vars into ++ * '.extern' BTF DATASEC. Treat them the same ++ * as unannotated extern funcs (which are ++ * currently not put into any DATASECs). ++ * Those don't have associated src_sec/dst_sec. ++ */ ++ if (strcmp(sec_name, BTF_EXTERN_SEC) != 0) { ++ src_sec = find_src_sec_by_name(obj, sec_name); ++ if (!src_sec) { ++ pr_warn("failed to find matching ELF sec '%s'\n", sec_name); ++ return -ENOENT; ++ } ++ dst_sec = &linker->secs[src_sec->dst_id]; ++ } ++ } ++ ++ glob_sym = find_glob_sym(linker, sym_name); ++ if (glob_sym) { ++ /* Preventively resolve to existing symbol. This is ++ * needed for further relocation symbol remapping in ++ * the next step of linking. ++ */ ++ obj->sym_map[src_sym_idx] = glob_sym->sym_idx; ++ ++ /* If both symbols are non-externs, at least one of ++ * them has to be STB_WEAK, otherwise they are in ++ * a conflict with each other. ++ */ ++ if (!sym_is_extern && !glob_sym->is_extern ++ && !glob_sym->is_weak && sym_bind != STB_WEAK) { ++ pr_warn("conflicting non-weak symbol #%d (%s) definition in '%s'\n", ++ src_sym_idx, sym_name, obj->filename); ++ return -EINVAL; ++ } ++ ++ if (!glob_syms_match(sym_name, linker, glob_sym, obj, sym, src_sym_idx, btf_id)) ++ return -EINVAL; ++ ++ dst_sym = get_sym_by_idx(linker, glob_sym->sym_idx); ++ ++ /* If new symbol is strong, then force dst_sym to be strong as ++ * well; this way a mix of weak and non-weak extern ++ * definitions will end up being strong. ++ */ ++ if (sym_bind == STB_GLOBAL) { ++ /* We still need to preserve type (NOTYPE or ++ * OBJECT/FUNC, depending on whether the symbol is ++ * extern or not) ++ */ ++ sym_update_bind(dst_sym, STB_GLOBAL); ++ glob_sym->is_weak = false; ++ } ++ ++ /* Non-default visibility is "contaminating", with stricter ++ * visibility overwriting more permissive ones, even if more ++ * permissive visibility comes from just an extern definition. ++ * Currently only STV_DEFAULT and STV_HIDDEN are allowed and ++ * ensured by ELF symbol sanity checks above. ++ */ ++ if (sym_vis > ELF64_ST_VISIBILITY(dst_sym->st_other)) ++ sym_update_visibility(dst_sym, sym_vis); ++ ++ /* If the new symbol is extern, then regardless if ++ * existing symbol is extern or resolved global, just ++ * keep the existing one untouched. ++ */ ++ if (sym_is_extern) ++ return 0; ++ ++ /* If existing symbol is a strong resolved symbol, bail out, ++ * because we lost resolution battle have nothing to ++ * contribute. We already checked abover that there is no ++ * strong-strong conflict. We also already tightened binding ++ * and visibility, so nothing else to contribute at that point. ++ */ ++ if (!glob_sym->is_extern && sym_bind == STB_WEAK) ++ return 0; ++ ++ /* At this point, new symbol is strong non-extern, ++ * so overwrite glob_sym with new symbol information. ++ * Preserve binding and visibility. ++ */ ++ sym_update_type(dst_sym, sym_type); ++ dst_sym->st_shndx = dst_sec->sec_idx; ++ dst_sym->st_value = src_sec->dst_off + sym->st_value; ++ dst_sym->st_size = sym->st_size; ++ ++ /* see comment below about dst_sec->id vs dst_sec->sec_idx */ ++ glob_sym->sec_id = dst_sec->id; ++ glob_sym->is_extern = false; ++ ++ if (complete_extern_btf_info(linker->btf, glob_sym->btf_id, ++ obj->btf, btf_id)) ++ return -EINVAL; ++ ++ /* request updating VAR's/FUNC's underlying BTF type when appending BTF type */ ++ glob_sym->underlying_btf_id = 0; ++ ++ obj->sym_map[src_sym_idx] = glob_sym->sym_idx; ++ return 0; ++ } ++ ++add_sym: ++ name_off = strset__add_str(linker->strtab_strs, sym_name); ++ if (name_off < 0) ++ return name_off; ++ ++ dst_sym = add_new_sym(linker, &dst_sym_idx); ++ if (!dst_sym) ++ return -ENOMEM; ++ ++ dst_sym->st_name = name_off; ++ dst_sym->st_info = sym->st_info; ++ dst_sym->st_other = sym->st_other; ++ dst_sym->st_shndx = dst_sec ? dst_sec->sec_idx : sym->st_shndx; ++ dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value; ++ dst_sym->st_size = sym->st_size; ++ ++ obj->sym_map[src_sym_idx] = dst_sym_idx; ++ ++ if (sym_type == STT_SECTION && dst_sym) { ++ dst_sec->sec_sym_idx = dst_sym_idx; ++ dst_sym->st_value = 0; ++ } ++ ++ if (sym_bind != STB_LOCAL) { ++ glob_sym = add_glob_sym(linker); ++ if (!glob_sym) ++ return -ENOMEM; ++ ++ glob_sym->sym_idx = dst_sym_idx; ++ /* we use dst_sec->id (and not dst_sec->sec_idx), because ++ * ephemeral sections (.kconfig, .ksyms, etc) don't have ++ * sec_idx (as they don't have corresponding ELF section), but ++ * still have id. .extern doesn't have even ephemeral section ++ * associated with it, so dst_sec->id == dst_sec->sec_idx == 0. ++ */ ++ glob_sym->sec_id = dst_sec ? dst_sec->id : 0; ++ glob_sym->name_off = name_off; ++ /* we will fill btf_id in during BTF merging step */ ++ glob_sym->btf_id = 0; ++ glob_sym->is_extern = sym_is_extern; ++ glob_sym->is_weak = sym_bind == STB_WEAK; ++ } ++ ++ return 0; ++} ++ ++static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj) ++{ ++ struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx]; ++ struct dst_sec *dst_symtab; ++ int i, err; ++ ++ for (i = 1; i < obj->sec_cnt; i++) { ++ struct src_sec *src_sec, *src_linked_sec; ++ struct dst_sec *dst_sec, *dst_linked_sec; ++ Elf64_Rel *src_rel, *dst_rel; ++ int j, n; ++ ++ src_sec = &obj->secs[i]; ++ if (!is_relo_sec(src_sec)) ++ continue; ++ ++ /* shdr->sh_info points to relocatable section */ ++ src_linked_sec = &obj->secs[src_sec->shdr->sh_info]; ++ if (src_linked_sec->skipped) ++ continue; ++ ++ dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name); ++ if (!dst_sec) { ++ dst_sec = add_dst_sec(linker, src_sec->sec_name); ++ if (!dst_sec) ++ return -ENOMEM; ++ err = init_sec(linker, dst_sec, src_sec); ++ if (err) { ++ pr_warn("failed to init section '%s'\n", src_sec->sec_name); ++ return err; ++ } ++ } else if (!secs_match(dst_sec, src_sec)) { ++ pr_warn("sections %s are not compatible\n", src_sec->sec_name); ++ return -1; ++ } ++ ++ /* add_dst_sec() above could have invalidated linker->secs */ ++ dst_symtab = &linker->secs[linker->symtab_sec_idx]; ++ ++ /* shdr->sh_link points to SYMTAB */ ++ dst_sec->shdr->sh_link = linker->symtab_sec_idx; ++ ++ /* shdr->sh_info points to relocated section */ ++ dst_linked_sec = &linker->secs[src_linked_sec->dst_id]; ++ dst_sec->shdr->sh_info = dst_linked_sec->sec_idx; ++ ++ src_sec->dst_id = dst_sec->id; ++ err = extend_sec(linker, dst_sec, src_sec); ++ if (err) ++ return err; ++ ++ src_rel = src_sec->data->d_buf; ++ dst_rel = dst_sec->raw_data + src_sec->dst_off; ++ n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize; ++ for (j = 0; j < n; j++, src_rel++, dst_rel++) { ++ size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info); ++ size_t sym_type = ELF64_R_TYPE(src_rel->r_info); ++ Elf64_Sym *src_sym, *dst_sym; ++ size_t dst_sym_idx; ++ ++ src_sym_idx = ELF64_R_SYM(src_rel->r_info); ++ src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx; ++ ++ dst_sym_idx = obj->sym_map[src_sym_idx]; ++ dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx; ++ dst_rel->r_offset += src_linked_sec->dst_off; ++ sym_type = ELF64_R_TYPE(src_rel->r_info); ++ dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type); ++ ++ if (ELF64_ST_TYPE(src_sym->st_info) == STT_SECTION) { ++ struct src_sec *sec = &obj->secs[src_sym->st_shndx]; ++ struct bpf_insn *insn; ++ ++ if (src_linked_sec->shdr->sh_flags & SHF_EXECINSTR) { ++ /* calls to the very first static function inside ++ * .text section at offset 0 will ++ * reference section symbol, not the ++ * function symbol. Fix that up, ++ * otherwise it won't be possible to ++ * relocate calls to two different ++ * static functions with the same name ++ * (rom two different object files) ++ */ ++ insn = dst_linked_sec->raw_data + dst_rel->r_offset; ++ if (insn->code == (BPF_JMP | BPF_CALL)) ++ insn->imm += sec->dst_off / sizeof(struct bpf_insn); ++ else ++ insn->imm += sec->dst_off; ++ } else { ++ pr_warn("relocation against STT_SECTION in non-exec section is not supported!\n"); ++ return -EINVAL; ++ } ++ } ++ ++ } ++ } ++ ++ return 0; ++} ++ ++static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx, ++ int sym_type, const char *sym_name) ++{ ++ struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx]; ++ Elf64_Sym *sym = symtab->data->d_buf; ++ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize; ++ int str_sec_idx = symtab->shdr->sh_link; ++ const char *name; ++ ++ for (i = 0; i < n; i++, sym++) { ++ if (sym->st_shndx != sec_idx) ++ continue; ++ if (ELF64_ST_TYPE(sym->st_info) != sym_type) ++ continue; ++ ++ name = elf_strptr(obj->elf, str_sec_idx, sym->st_name); ++ if (!name) ++ return NULL; ++ ++ if (strcmp(sym_name, name) != 0) ++ continue; ++ ++ return sym; ++ } ++ ++ return NULL; ++} ++ ++static int linker_fixup_btf(struct src_obj *obj) ++{ ++ const char *sec_name; ++ struct src_sec *sec; ++ int i, j, n, m; ++ ++ if (!obj->btf) ++ return 0; ++ ++ n = btf__type_cnt(obj->btf); ++ for (i = 1; i < n; i++) { ++ struct btf_var_secinfo *vi; ++ struct btf_type *t; ++ ++ t = btf_type_by_id(obj->btf, i); ++ if (btf_kind(t) != BTF_KIND_DATASEC) ++ continue; ++ ++ sec_name = btf__str_by_offset(obj->btf, t->name_off); ++ sec = find_src_sec_by_name(obj, sec_name); ++ if (sec) { ++ /* record actual section size, unless ephemeral */ ++ if (sec->shdr) ++ t->size = sec->shdr->sh_size; ++ } else { ++ /* BTF can have some sections that are not represented ++ * in ELF, e.g., .kconfig, .ksyms, .extern, which are used ++ * for special extern variables. ++ * ++ * For all but one such special (ephemeral) ++ * sections, we pre-create "section shells" to be able ++ * to keep track of extra per-section metadata later ++ * (e.g., those BTF extern variables). ++ * ++ * .extern is even more special, though, because it ++ * contains extern variables that need to be resolved ++ * by static linker, not libbpf and kernel. When such ++ * externs are resolved, we are going to remove them ++ * from .extern BTF section and might end up not ++ * needing it at all. Each resolved extern should have ++ * matching non-extern VAR/FUNC in other sections. ++ * ++ * We do support leaving some of the externs ++ * unresolved, though, to support cases of building ++ * libraries, which will later be linked against final ++ * BPF applications. So if at finalization we still ++ * see unresolved externs, we'll create .extern ++ * section on our own. ++ */ ++ if (strcmp(sec_name, BTF_EXTERN_SEC) == 0) ++ continue; ++ ++ sec = add_src_sec(obj, sec_name); ++ if (!sec) ++ return -ENOMEM; ++ ++ sec->ephemeral = true; ++ sec->sec_idx = 0; /* will match UNDEF shndx in ELF */ ++ } ++ ++ /* remember ELF section and its BTF type ID match */ ++ sec->sec_type_id = i; ++ ++ /* fix up variable offsets */ ++ vi = btf_var_secinfos(t); ++ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) { ++ const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type); ++ const char *var_name = btf__str_by_offset(obj->btf, vt->name_off); ++ int var_linkage = btf_var(vt)->linkage; ++ Elf64_Sym *sym; ++ ++ /* no need to patch up static or extern vars */ ++ if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED) ++ continue; ++ ++ sym = find_sym_by_name(obj, sec->sec_idx, STT_OBJECT, var_name); ++ if (!sym) { ++ pr_warn("failed to find symbol for variable '%s' in section '%s'\n", var_name, sec_name); ++ return -ENOENT; ++ } ++ ++ vi->offset = sym->st_value; ++ } ++ } ++ ++ return 0; ++} ++ ++static int remap_type_id(__u32 *type_id, void *ctx) ++{ ++ int *id_map = ctx; ++ int new_id = id_map[*type_id]; ++ ++ /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ ++ if (new_id == 0 && *type_id != 0) { ++ pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id); ++ return -EINVAL; ++ } ++ ++ *type_id = id_map[*type_id]; ++ ++ return 0; ++} ++ ++static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) ++{ ++ const struct btf_type *t; ++ int i, j, n, start_id, id; ++ const char *name; ++ ++ if (!obj->btf) ++ return 0; ++ ++ start_id = btf__type_cnt(linker->btf); ++ n = btf__type_cnt(obj->btf); ++ ++ obj->btf_type_map = calloc(n + 1, sizeof(int)); ++ if (!obj->btf_type_map) ++ return -ENOMEM; ++ ++ for (i = 1; i < n; i++) { ++ struct glob_sym *glob_sym = NULL; ++ ++ t = btf__type_by_id(obj->btf, i); ++ ++ /* DATASECs are handled specially below */ ++ if (btf_kind(t) == BTF_KIND_DATASEC) ++ continue; ++ ++ if (btf_is_non_static(t)) { ++ /* there should be glob_sym already */ ++ name = btf__str_by_offset(obj->btf, t->name_off); ++ glob_sym = find_glob_sym(linker, name); ++ ++ /* VARs without corresponding glob_sym are those that ++ * belong to skipped/deduplicated sections (i.e., ++ * license and version), so just skip them ++ */ ++ if (!glob_sym) ++ continue; ++ ++ /* linker_append_elf_sym() might have requested ++ * updating underlying type ID, if extern was resolved ++ * to strong symbol or weak got upgraded to non-weak ++ */ ++ if (glob_sym->underlying_btf_id == 0) ++ glob_sym->underlying_btf_id = -t->type; ++ ++ /* globals from previous object files that match our ++ * VAR/FUNC already have a corresponding associated ++ * BTF type, so just make sure to use it ++ */ ++ if (glob_sym->btf_id) { ++ /* reuse existing BTF type for global var/func */ ++ obj->btf_type_map[i] = glob_sym->btf_id; ++ continue; ++ } ++ } ++ ++ id = btf__add_type(linker->btf, obj->btf, t); ++ if (id < 0) { ++ pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename); ++ return id; ++ } ++ ++ obj->btf_type_map[i] = id; ++ ++ /* record just appended BTF type for var/func */ ++ if (glob_sym) { ++ glob_sym->btf_id = id; ++ glob_sym->underlying_btf_id = -t->type; ++ } ++ } ++ ++ /* remap all the types except DATASECs */ ++ n = btf__type_cnt(linker->btf); ++ for (i = start_id; i < n; i++) { ++ struct btf_type *dst_t = btf_type_by_id(linker->btf, i); ++ ++ if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) ++ return -EINVAL; ++ } ++ ++ /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's ++ * actual type), if necessary ++ */ ++ for (i = 0; i < linker->glob_sym_cnt; i++) { ++ struct glob_sym *glob_sym = &linker->glob_syms[i]; ++ struct btf_type *glob_t; ++ ++ if (glob_sym->underlying_btf_id >= 0) ++ continue; ++ ++ glob_sym->underlying_btf_id = obj->btf_type_map[-glob_sym->underlying_btf_id]; ++ ++ glob_t = btf_type_by_id(linker->btf, glob_sym->btf_id); ++ glob_t->type = glob_sym->underlying_btf_id; ++ } ++ ++ /* append DATASEC info */ ++ for (i = 1; i < obj->sec_cnt; i++) { ++ struct src_sec *src_sec; ++ struct dst_sec *dst_sec; ++ const struct btf_var_secinfo *src_var; ++ struct btf_var_secinfo *dst_var; ++ ++ src_sec = &obj->secs[i]; ++ if (!src_sec->sec_type_id || src_sec->skipped) ++ continue; ++ dst_sec = &linker->secs[src_sec->dst_id]; ++ ++ /* Mark section as having BTF regardless of the presence of ++ * variables. In some cases compiler might generate empty BTF ++ * with no variables information. E.g., when promoting local ++ * array/structure variable initial values and BPF object ++ * file otherwise has no read-only static variables in ++ * .rodata. We need to preserve such empty BTF and just set ++ * correct section size. ++ */ ++ dst_sec->has_btf = true; ++ ++ t = btf__type_by_id(obj->btf, src_sec->sec_type_id); ++ src_var = btf_var_secinfos(t); ++ n = btf_vlen(t); ++ for (j = 0; j < n; j++, src_var++) { ++ void *sec_vars = dst_sec->sec_vars; ++ int new_id = obj->btf_type_map[src_var->type]; ++ struct glob_sym *glob_sym = NULL; ++ ++ t = btf_type_by_id(linker->btf, new_id); ++ if (btf_is_non_static(t)) { ++ name = btf__str_by_offset(linker->btf, t->name_off); ++ glob_sym = find_glob_sym(linker, name); ++ if (glob_sym->sec_id != dst_sec->id) { ++ pr_warn("global '%s': section mismatch %d vs %d\n", ++ name, glob_sym->sec_id, dst_sec->id); ++ return -EINVAL; ++ } ++ } ++ ++ /* If there is already a member (VAR or FUNC) mapped ++ * to the same type, don't add a duplicate entry. ++ * This will happen when multiple object files define ++ * the same extern VARs/FUNCs. ++ */ ++ if (glob_sym && glob_sym->var_idx >= 0) { ++ __s64 sz; ++ ++ dst_var = &dst_sec->sec_vars[glob_sym->var_idx]; ++ /* Because underlying BTF type might have ++ * changed, so might its size have changed, so ++ * re-calculate and update it in sec_var. ++ */ ++ sz = btf__resolve_size(linker->btf, glob_sym->underlying_btf_id); ++ if (sz < 0) { ++ pr_warn("global '%s': failed to resolve size of underlying type: %d\n", ++ name, (int)sz); ++ return -EINVAL; ++ } ++ dst_var->size = sz; ++ continue; ++ } ++ ++ sec_vars = libbpf_reallocarray(sec_vars, ++ dst_sec->sec_var_cnt + 1, ++ sizeof(*dst_sec->sec_vars)); ++ if (!sec_vars) ++ return -ENOMEM; ++ ++ dst_sec->sec_vars = sec_vars; ++ dst_sec->sec_var_cnt++; ++ ++ dst_var = &dst_sec->sec_vars[dst_sec->sec_var_cnt - 1]; ++ dst_var->type = obj->btf_type_map[src_var->type]; ++ dst_var->size = src_var->size; ++ dst_var->offset = src_sec->dst_off + src_var->offset; ++ ++ if (glob_sym) ++ glob_sym->var_idx = dst_sec->sec_var_cnt - 1; ++ } ++ } ++ ++ return 0; ++} ++ ++static void *add_btf_ext_rec(struct btf_ext_sec_data *ext_data, const void *src_rec) ++{ ++ void *tmp; ++ ++ tmp = libbpf_reallocarray(ext_data->recs, ext_data->rec_cnt + 1, ext_data->rec_sz); ++ if (!tmp) ++ return NULL; ++ ext_data->recs = tmp; ++ ++ tmp += ext_data->rec_cnt * ext_data->rec_sz; ++ memcpy(tmp, src_rec, ext_data->rec_sz); ++ ++ ext_data->rec_cnt++; ++ ++ return tmp; ++} ++ ++static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj) ++{ ++ const struct btf_ext_info_sec *ext_sec; ++ const char *sec_name, *s; ++ struct src_sec *src_sec; ++ struct dst_sec *dst_sec; ++ int rec_sz, str_off, i; ++ ++ if (!obj->btf_ext) ++ return 0; ++ ++ rec_sz = obj->btf_ext->func_info.rec_size; ++ for_each_btf_ext_sec(&obj->btf_ext->func_info, ext_sec) { ++ struct bpf_func_info_min *src_rec, *dst_rec; ++ ++ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off); ++ src_sec = find_src_sec_by_name(obj, sec_name); ++ if (!src_sec) { ++ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name); ++ return -EINVAL; ++ } ++ dst_sec = &linker->secs[src_sec->dst_id]; ++ ++ if (dst_sec->func_info.rec_sz == 0) ++ dst_sec->func_info.rec_sz = rec_sz; ++ if (dst_sec->func_info.rec_sz != rec_sz) { ++ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name); ++ return -EINVAL; ++ } ++ ++ for_each_btf_ext_rec(&obj->btf_ext->func_info, ext_sec, i, src_rec) { ++ dst_rec = add_btf_ext_rec(&dst_sec->func_info, src_rec); ++ if (!dst_rec) ++ return -ENOMEM; ++ ++ dst_rec->insn_off += src_sec->dst_off; ++ dst_rec->type_id = obj->btf_type_map[dst_rec->type_id]; ++ } ++ } ++ ++ rec_sz = obj->btf_ext->line_info.rec_size; ++ for_each_btf_ext_sec(&obj->btf_ext->line_info, ext_sec) { ++ struct bpf_line_info_min *src_rec, *dst_rec; ++ ++ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off); ++ src_sec = find_src_sec_by_name(obj, sec_name); ++ if (!src_sec) { ++ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name); ++ return -EINVAL; ++ } ++ dst_sec = &linker->secs[src_sec->dst_id]; ++ ++ if (dst_sec->line_info.rec_sz == 0) ++ dst_sec->line_info.rec_sz = rec_sz; ++ if (dst_sec->line_info.rec_sz != rec_sz) { ++ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name); ++ return -EINVAL; ++ } ++ ++ for_each_btf_ext_rec(&obj->btf_ext->line_info, ext_sec, i, src_rec) { ++ dst_rec = add_btf_ext_rec(&dst_sec->line_info, src_rec); ++ if (!dst_rec) ++ return -ENOMEM; ++ ++ dst_rec->insn_off += src_sec->dst_off; ++ ++ s = btf__str_by_offset(obj->btf, src_rec->file_name_off); ++ str_off = btf__add_str(linker->btf, s); ++ if (str_off < 0) ++ return -ENOMEM; ++ dst_rec->file_name_off = str_off; ++ ++ s = btf__str_by_offset(obj->btf, src_rec->line_off); ++ str_off = btf__add_str(linker->btf, s); ++ if (str_off < 0) ++ return -ENOMEM; ++ dst_rec->line_off = str_off; ++ ++ /* dst_rec->line_col is fine */ ++ } ++ } ++ ++ rec_sz = obj->btf_ext->core_relo_info.rec_size; ++ for_each_btf_ext_sec(&obj->btf_ext->core_relo_info, ext_sec) { ++ struct bpf_core_relo *src_rec, *dst_rec; ++ ++ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off); ++ src_sec = find_src_sec_by_name(obj, sec_name); ++ if (!src_sec) { ++ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name); ++ return -EINVAL; ++ } ++ dst_sec = &linker->secs[src_sec->dst_id]; ++ ++ if (dst_sec->core_relo_info.rec_sz == 0) ++ dst_sec->core_relo_info.rec_sz = rec_sz; ++ if (dst_sec->core_relo_info.rec_sz != rec_sz) { ++ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name); ++ return -EINVAL; ++ } ++ ++ for_each_btf_ext_rec(&obj->btf_ext->core_relo_info, ext_sec, i, src_rec) { ++ dst_rec = add_btf_ext_rec(&dst_sec->core_relo_info, src_rec); ++ if (!dst_rec) ++ return -ENOMEM; ++ ++ dst_rec->insn_off += src_sec->dst_off; ++ dst_rec->type_id = obj->btf_type_map[dst_rec->type_id]; ++ ++ s = btf__str_by_offset(obj->btf, src_rec->access_str_off); ++ str_off = btf__add_str(linker->btf, s); ++ if (str_off < 0) ++ return -ENOMEM; ++ dst_rec->access_str_off = str_off; ++ ++ /* dst_rec->kind is fine */ ++ } ++ } ++ ++ return 0; ++} ++ ++int bpf_linker__finalize(struct bpf_linker *linker) ++{ ++ struct dst_sec *sec; ++ size_t strs_sz; ++ const void *strs; ++ int err, i; ++ ++ if (!linker->elf) ++ return libbpf_err(-EINVAL); ++ ++ err = finalize_btf(linker); ++ if (err) ++ return libbpf_err(err); ++ ++ /* Finalize strings */ ++ strs_sz = strset__data_size(linker->strtab_strs); ++ strs = strset__data(linker->strtab_strs); ++ ++ sec = &linker->secs[linker->strtab_sec_idx]; ++ sec->data->d_align = 1; ++ sec->data->d_off = 0LL; ++ sec->data->d_buf = (void *)strs; ++ sec->data->d_type = ELF_T_BYTE; ++ sec->data->d_size = strs_sz; ++ sec->shdr->sh_size = strs_sz; ++ ++ for (i = 1; i < linker->sec_cnt; i++) { ++ sec = &linker->secs[i]; ++ ++ /* STRTAB is handled specially above */ ++ if (sec->sec_idx == linker->strtab_sec_idx) ++ continue; ++ ++ /* special ephemeral sections (.ksyms, .kconfig, etc) */ ++ if (!sec->scn) ++ continue; ++ ++ sec->data->d_buf = sec->raw_data; ++ } ++ ++ /* Finalize ELF layout */ ++ if (elf_update(linker->elf, ELF_C_NULL) < 0) { ++ err = -errno; ++ pr_warn_elf("failed to finalize ELF layout"); ++ return libbpf_err(err); ++ } ++ ++ /* Write out final ELF contents */ ++ if (elf_update(linker->elf, ELF_C_WRITE) < 0) { ++ err = -errno; ++ pr_warn_elf("failed to write ELF contents"); ++ return libbpf_err(err); ++ } ++ ++ elf_end(linker->elf); ++ close(linker->fd); ++ ++ linker->elf = NULL; ++ linker->fd = -1; ++ ++ return 0; ++} ++ ++static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, ++ size_t align, const void *raw_data, size_t raw_sz) ++{ ++ Elf_Scn *scn; ++ Elf_Data *data; ++ Elf64_Shdr *shdr; ++ int name_off; ++ ++ name_off = strset__add_str(linker->strtab_strs, sec_name); ++ if (name_off < 0) ++ return name_off; ++ ++ scn = elf_newscn(linker->elf); ++ if (!scn) ++ return -ENOMEM; ++ data = elf_newdata(scn); ++ if (!data) ++ return -ENOMEM; ++ shdr = elf64_getshdr(scn); ++ if (!shdr) ++ return -EINVAL; ++ ++ shdr->sh_name = name_off; ++ shdr->sh_type = SHT_PROGBITS; ++ shdr->sh_flags = 0; ++ shdr->sh_size = raw_sz; ++ shdr->sh_link = 0; ++ shdr->sh_info = 0; ++ shdr->sh_addralign = align; ++ shdr->sh_entsize = 0; ++ ++ data->d_type = ELF_T_BYTE; ++ data->d_size = raw_sz; ++ data->d_buf = (void *)raw_data; ++ data->d_align = align; ++ data->d_off = 0; ++ ++ return 0; ++} ++ ++static int finalize_btf(struct bpf_linker *linker) ++{ ++ LIBBPF_OPTS(btf_dedup_opts, opts); ++ struct btf *btf = linker->btf; ++ const void *raw_data; ++ int i, j, id, err; ++ __u32 raw_sz; ++ ++ /* bail out if no BTF data was produced */ ++ if (btf__type_cnt(linker->btf) == 1) ++ return 0; ++ ++ for (i = 1; i < linker->sec_cnt; i++) { ++ struct dst_sec *sec = &linker->secs[i]; ++ ++ if (!sec->has_btf) ++ continue; ++ ++ id = btf__add_datasec(btf, sec->sec_name, sec->sec_sz); ++ if (id < 0) { ++ pr_warn("failed to add consolidated BTF type for datasec '%s': %d\n", ++ sec->sec_name, id); ++ return id; ++ } ++ ++ for (j = 0; j < sec->sec_var_cnt; j++) { ++ struct btf_var_secinfo *vi = &sec->sec_vars[j]; ++ ++ if (btf__add_datasec_var_info(btf, vi->type, vi->offset, vi->size)) ++ return -EINVAL; ++ } ++ } ++ ++ err = finalize_btf_ext(linker); ++ if (err) { ++ pr_warn(".BTF.ext generation failed: %d\n", err); ++ return err; ++ } ++ ++ opts.btf_ext = linker->btf_ext; ++ err = btf__dedup(linker->btf, &opts); ++ if (err) { ++ pr_warn("BTF dedup failed: %d\n", err); ++ return err; ++ } ++ ++ /* Emit .BTF section */ ++ raw_data = btf__raw_data(linker->btf, &raw_sz); ++ if (!raw_data) ++ return -ENOMEM; ++ ++ err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz); ++ if (err) { ++ pr_warn("failed to write out .BTF ELF section: %d\n", err); ++ return err; ++ } ++ ++ /* Emit .BTF.ext section */ ++ if (linker->btf_ext) { ++ raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz); ++ if (!raw_data) ++ return -ENOMEM; ++ ++ err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz); ++ if (err) { ++ pr_warn("failed to write out .BTF.ext ELF section: %d\n", err); ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++static int emit_btf_ext_data(struct bpf_linker *linker, void *output, ++ const char *sec_name, struct btf_ext_sec_data *sec_data) ++{ ++ struct btf_ext_info_sec *sec_info; ++ void *cur = output; ++ int str_off; ++ size_t sz; ++ ++ if (!sec_data->rec_cnt) ++ return 0; ++ ++ str_off = btf__add_str(linker->btf, sec_name); ++ if (str_off < 0) ++ return -ENOMEM; ++ ++ sec_info = cur; ++ sec_info->sec_name_off = str_off; ++ sec_info->num_info = sec_data->rec_cnt; ++ cur += sizeof(struct btf_ext_info_sec); ++ ++ sz = sec_data->rec_cnt * sec_data->rec_sz; ++ memcpy(cur, sec_data->recs, sz); ++ cur += sz; ++ ++ return cur - output; ++} ++ ++static int finalize_btf_ext(struct bpf_linker *linker) ++{ ++ size_t funcs_sz = 0, lines_sz = 0, core_relos_sz = 0, total_sz = 0; ++ size_t func_rec_sz = 0, line_rec_sz = 0, core_relo_rec_sz = 0; ++ struct btf_ext_header *hdr; ++ void *data, *cur; ++ int i, err, sz; ++ ++ /* validate that all sections have the same .BTF.ext record sizes ++ * and calculate total data size for each type of data (func info, ++ * line info, core relos) ++ */ ++ for (i = 1; i < linker->sec_cnt; i++) { ++ struct dst_sec *sec = &linker->secs[i]; ++ ++ if (sec->func_info.rec_cnt) { ++ if (func_rec_sz == 0) ++ func_rec_sz = sec->func_info.rec_sz; ++ if (func_rec_sz != sec->func_info.rec_sz) { ++ pr_warn("mismatch in func_info record size %zu != %u\n", ++ func_rec_sz, sec->func_info.rec_sz); ++ return -EINVAL; ++ } ++ ++ funcs_sz += sizeof(struct btf_ext_info_sec) + func_rec_sz * sec->func_info.rec_cnt; ++ } ++ if (sec->line_info.rec_cnt) { ++ if (line_rec_sz == 0) ++ line_rec_sz = sec->line_info.rec_sz; ++ if (line_rec_sz != sec->line_info.rec_sz) { ++ pr_warn("mismatch in line_info record size %zu != %u\n", ++ line_rec_sz, sec->line_info.rec_sz); ++ return -EINVAL; ++ } ++ ++ lines_sz += sizeof(struct btf_ext_info_sec) + line_rec_sz * sec->line_info.rec_cnt; ++ } ++ if (sec->core_relo_info.rec_cnt) { ++ if (core_relo_rec_sz == 0) ++ core_relo_rec_sz = sec->core_relo_info.rec_sz; ++ if (core_relo_rec_sz != sec->core_relo_info.rec_sz) { ++ pr_warn("mismatch in core_relo_info record size %zu != %u\n", ++ core_relo_rec_sz, sec->core_relo_info.rec_sz); ++ return -EINVAL; ++ } ++ ++ core_relos_sz += sizeof(struct btf_ext_info_sec) + core_relo_rec_sz * sec->core_relo_info.rec_cnt; ++ } ++ } ++ ++ if (!funcs_sz && !lines_sz && !core_relos_sz) ++ return 0; ++ ++ total_sz += sizeof(struct btf_ext_header); ++ if (funcs_sz) { ++ funcs_sz += sizeof(__u32); /* record size prefix */ ++ total_sz += funcs_sz; ++ } ++ if (lines_sz) { ++ lines_sz += sizeof(__u32); /* record size prefix */ ++ total_sz += lines_sz; ++ } ++ if (core_relos_sz) { ++ core_relos_sz += sizeof(__u32); /* record size prefix */ ++ total_sz += core_relos_sz; ++ } ++ ++ cur = data = calloc(1, total_sz); ++ if (!data) ++ return -ENOMEM; ++ ++ hdr = cur; ++ hdr->magic = BTF_MAGIC; ++ hdr->version = BTF_VERSION; ++ hdr->flags = 0; ++ hdr->hdr_len = sizeof(struct btf_ext_header); ++ cur += sizeof(struct btf_ext_header); ++ ++ /* All offsets are in bytes relative to the end of this header */ ++ hdr->func_info_off = 0; ++ hdr->func_info_len = funcs_sz; ++ hdr->line_info_off = funcs_sz; ++ hdr->line_info_len = lines_sz; ++ hdr->core_relo_off = funcs_sz + lines_sz; ++ hdr->core_relo_len = core_relos_sz; ++ ++ if (funcs_sz) { ++ *(__u32 *)cur = func_rec_sz; ++ cur += sizeof(__u32); ++ ++ for (i = 1; i < linker->sec_cnt; i++) { ++ struct dst_sec *sec = &linker->secs[i]; ++ ++ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->func_info); ++ if (sz < 0) { ++ err = sz; ++ goto out; ++ } ++ ++ cur += sz; ++ } ++ } ++ ++ if (lines_sz) { ++ *(__u32 *)cur = line_rec_sz; ++ cur += sizeof(__u32); ++ ++ for (i = 1; i < linker->sec_cnt; i++) { ++ struct dst_sec *sec = &linker->secs[i]; ++ ++ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->line_info); ++ if (sz < 0) { ++ err = sz; ++ goto out; ++ } ++ ++ cur += sz; ++ } ++ } ++ ++ if (core_relos_sz) { ++ *(__u32 *)cur = core_relo_rec_sz; ++ cur += sizeof(__u32); ++ ++ for (i = 1; i < linker->sec_cnt; i++) { ++ struct dst_sec *sec = &linker->secs[i]; ++ ++ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->core_relo_info); ++ if (sz < 0) { ++ err = sz; ++ goto out; ++ } ++ ++ cur += sz; ++ } ++ } ++ ++ linker->btf_ext = btf_ext__new(data, total_sz); ++ err = libbpf_get_error(linker->btf_ext); ++ if (err) { ++ linker->btf_ext = NULL; ++ pr_warn("failed to parse final .BTF.ext data: %d\n", err); ++ goto out; ++ } ++ ++out: ++ free(data); ++ return err; ++} +diff --git a/src/cc/libbpf/src/netlink.c b/src/cc/libbpf/src/netlink.c +new file mode 100644 +index 0000000..6c01316 +--- /dev/null ++++ b/src/cc/libbpf/src/netlink.c +@@ -0,0 +1,812 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2018 Facebook */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "bpf.h" ++#include "libbpf.h" ++#include "libbpf_internal.h" ++#include "nlattr.h" ++ ++#ifndef SOL_NETLINK ++#define SOL_NETLINK 270 ++#endif ++ ++typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); ++ ++typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, ++ void *cookie); ++ ++struct xdp_link_info { ++ __u32 prog_id; ++ __u32 drv_prog_id; ++ __u32 hw_prog_id; ++ __u32 skb_prog_id; ++ __u8 attach_mode; ++}; ++ ++struct xdp_id_md { ++ int ifindex; ++ __u32 flags; ++ struct xdp_link_info info; ++}; ++ ++static int libbpf_netlink_open(__u32 *nl_pid) ++{ ++ struct sockaddr_nl sa; ++ socklen_t addrlen; ++ int one = 1, ret; ++ int sock; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.nl_family = AF_NETLINK; ++ ++ sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); ++ if (sock < 0) ++ return -errno; ++ ++ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, ++ &one, sizeof(one)) < 0) { ++ pr_warn("Netlink error reporting not supported\n"); ++ } ++ ++ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ++ ret = -errno; ++ goto cleanup; ++ } ++ ++ addrlen = sizeof(sa); ++ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { ++ ret = -errno; ++ goto cleanup; ++ } ++ ++ if (addrlen != sizeof(sa)) { ++ ret = -LIBBPF_ERRNO__INTERNAL; ++ goto cleanup; ++ } ++ ++ *nl_pid = sa.nl_pid; ++ return sock; ++ ++cleanup: ++ close(sock); ++ return ret; ++} ++ ++static void libbpf_netlink_close(int sock) ++{ ++ close(sock); ++} ++ ++enum { ++ NL_CONT, ++ NL_NEXT, ++ NL_DONE, ++}; ++ ++static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) ++{ ++ int len; ++ ++ do { ++ len = recvmsg(sock, mhdr, flags); ++ } while (len < 0 && (errno == EINTR || errno == EAGAIN)); ++ ++ if (len < 0) ++ return -errno; ++ return len; ++} ++ ++static int alloc_iov(struct iovec *iov, int len) ++{ ++ void *nbuf; ++ ++ nbuf = realloc(iov->iov_base, len); ++ if (!nbuf) ++ return -ENOMEM; ++ ++ iov->iov_base = nbuf; ++ iov->iov_len = len; ++ return 0; ++} ++ ++static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, ++ __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, ++ void *cookie) ++{ ++ struct iovec iov = {}; ++ struct msghdr mhdr = { ++ .msg_iov = &iov, ++ .msg_iovlen = 1, ++ }; ++ bool multipart = true; ++ struct nlmsgerr *err; ++ struct nlmsghdr *nh; ++ int len, ret; ++ ++ ret = alloc_iov(&iov, 4096); ++ if (ret) ++ goto done; ++ ++ while (multipart) { ++start: ++ multipart = false; ++ len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC); ++ if (len < 0) { ++ ret = len; ++ goto done; ++ } ++ ++ if (len > iov.iov_len) { ++ ret = alloc_iov(&iov, len); ++ if (ret) ++ goto done; ++ } ++ ++ len = netlink_recvmsg(sock, &mhdr, 0); ++ if (len < 0) { ++ ret = len; ++ goto done; ++ } ++ ++ if (len == 0) ++ break; ++ ++ for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); ++ nh = NLMSG_NEXT(nh, len)) { ++ if (nh->nlmsg_pid != nl_pid) { ++ ret = -LIBBPF_ERRNO__WRNGPID; ++ goto done; ++ } ++ if (nh->nlmsg_seq != seq) { ++ ret = -LIBBPF_ERRNO__INVSEQ; ++ goto done; ++ } ++ if (nh->nlmsg_flags & NLM_F_MULTI) ++ multipart = true; ++ switch (nh->nlmsg_type) { ++ case NLMSG_ERROR: ++ err = (struct nlmsgerr *)NLMSG_DATA(nh); ++ if (!err->error) ++ continue; ++ ret = err->error; ++ libbpf_nla_dump_errormsg(nh); ++ goto done; ++ case NLMSG_DONE: ++ ret = 0; ++ goto done; ++ default: ++ break; ++ } ++ if (_fn) { ++ ret = _fn(nh, fn, cookie); ++ switch (ret) { ++ case NL_CONT: ++ break; ++ case NL_NEXT: ++ goto start; ++ case NL_DONE: ++ ret = 0; ++ goto done; ++ default: ++ goto done; ++ } ++ } ++ } ++ } ++ ret = 0; ++done: ++ free(iov.iov_base); ++ return ret; ++} ++ ++static int libbpf_netlink_send_recv(struct libbpf_nla_req *req, ++ __dump_nlmsg_t parse_msg, ++ libbpf_dump_nlmsg_t parse_attr, ++ void *cookie) ++{ ++ __u32 nl_pid = 0; ++ int sock, ret; ++ ++ sock = libbpf_netlink_open(&nl_pid); ++ if (sock < 0) ++ return sock; ++ ++ req->nh.nlmsg_pid = 0; ++ req->nh.nlmsg_seq = time(NULL); ++ ++ if (send(sock, req, req->nh.nlmsg_len, 0) < 0) { ++ ret = -errno; ++ goto out; ++ } ++ ++ ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq, ++ parse_msg, parse_attr, cookie); ++out: ++ libbpf_netlink_close(sock); ++ return ret; ++} ++ ++static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, ++ __u32 flags) ++{ ++ struct nlattr *nla; ++ int ret; ++ struct libbpf_nla_req req; ++ ++ memset(&req, 0, sizeof(req)); ++ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); ++ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; ++ req.nh.nlmsg_type = RTM_SETLINK; ++ req.ifinfo.ifi_family = AF_UNSPEC; ++ req.ifinfo.ifi_index = ifindex; ++ ++ nla = nlattr_begin_nested(&req, IFLA_XDP); ++ if (!nla) ++ return -EMSGSIZE; ++ ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd)); ++ if (ret < 0) ++ return ret; ++ if (flags) { ++ ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags)); ++ if (ret < 0) ++ return ret; ++ } ++ if (flags & XDP_FLAGS_REPLACE) { ++ ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd, ++ sizeof(old_fd)); ++ if (ret < 0) ++ return ret; ++ } ++ nlattr_end_nested(&req, nla); ++ ++ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); ++} ++ ++int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts) ++{ ++ int old_prog_fd, err; ++ ++ if (!OPTS_VALID(opts, bpf_xdp_attach_opts)) ++ return libbpf_err(-EINVAL); ++ ++ old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); ++ if (old_prog_fd) ++ flags |= XDP_FLAGS_REPLACE; ++ else ++ old_prog_fd = -1; ++ ++ err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags); ++ return libbpf_err(err); ++} ++ ++int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts) ++{ ++ return bpf_xdp_attach(ifindex, -1, flags, opts); ++} ++ ++static int __dump_link_nlmsg(struct nlmsghdr *nlh, ++ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) ++{ ++ struct nlattr *tb[IFLA_MAX + 1], *attr; ++ struct ifinfomsg *ifi = NLMSG_DATA(nlh); ++ int len; ++ ++ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); ++ attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); ++ ++ if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) ++ return -LIBBPF_ERRNO__NLPARSE; ++ ++ return dump_link_nlmsg(cookie, ifi, tb); ++} ++ ++static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) ++{ ++ struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; ++ struct xdp_id_md *xdp_id = cookie; ++ struct ifinfomsg *ifinfo = msg; ++ int ret; ++ ++ if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) ++ return 0; ++ ++ if (!tb[IFLA_XDP]) ++ return 0; ++ ++ ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL); ++ if (ret) ++ return ret; ++ ++ if (!xdp_tb[IFLA_XDP_ATTACHED]) ++ return 0; ++ ++ xdp_id->info.attach_mode = libbpf_nla_getattr_u8( ++ xdp_tb[IFLA_XDP_ATTACHED]); ++ ++ if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE) ++ return 0; ++ ++ if (xdp_tb[IFLA_XDP_PROG_ID]) ++ xdp_id->info.prog_id = libbpf_nla_getattr_u32( ++ xdp_tb[IFLA_XDP_PROG_ID]); ++ ++ if (xdp_tb[IFLA_XDP_SKB_PROG_ID]) ++ xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32( ++ xdp_tb[IFLA_XDP_SKB_PROG_ID]); ++ ++ if (xdp_tb[IFLA_XDP_DRV_PROG_ID]) ++ xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32( ++ xdp_tb[IFLA_XDP_DRV_PROG_ID]); ++ ++ if (xdp_tb[IFLA_XDP_HW_PROG_ID]) ++ xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32( ++ xdp_tb[IFLA_XDP_HW_PROG_ID]); ++ ++ return 0; ++} ++ ++int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) ++{ ++ struct libbpf_nla_req req = { ++ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), ++ .nh.nlmsg_type = RTM_GETLINK, ++ .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, ++ .ifinfo.ifi_family = AF_PACKET, ++ }; ++ struct xdp_id_md xdp_id = {}; ++ int err; ++ ++ if (!OPTS_VALID(opts, bpf_xdp_query_opts)) ++ return libbpf_err(-EINVAL); ++ ++ if (xdp_flags & ~XDP_FLAGS_MASK) ++ return libbpf_err(-EINVAL); ++ ++ /* Check whether the single {HW,DRV,SKB} mode is set */ ++ xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE; ++ if (xdp_flags & (xdp_flags - 1)) ++ return libbpf_err(-EINVAL); ++ ++ xdp_id.ifindex = ifindex; ++ xdp_id.flags = xdp_flags; ++ ++ err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, ++ get_xdp_info, &xdp_id); ++ if (err) ++ return libbpf_err(err); ++ ++ OPTS_SET(opts, prog_id, xdp_id.info.prog_id); ++ OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id); ++ OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id); ++ OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id); ++ OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode); ++ ++ return 0; ++} ++ ++int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) ++{ ++ LIBBPF_OPTS(bpf_xdp_query_opts, opts); ++ int ret; ++ ++ ret = bpf_xdp_query(ifindex, flags, &opts); ++ if (ret) ++ return libbpf_err(ret); ++ ++ flags &= XDP_FLAGS_MODES; ++ ++ if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags) ++ *prog_id = opts.prog_id; ++ else if (flags & XDP_FLAGS_DRV_MODE) ++ *prog_id = opts.drv_prog_id; ++ else if (flags & XDP_FLAGS_HW_MODE) ++ *prog_id = opts.hw_prog_id; ++ else if (flags & XDP_FLAGS_SKB_MODE) ++ *prog_id = opts.skb_prog_id; ++ else ++ *prog_id = 0; ++ ++ return 0; ++} ++ ++ ++typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); ++ ++static int clsact_config(struct libbpf_nla_req *req) ++{ ++ req->tc.tcm_parent = TC_H_CLSACT; ++ req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); ++ ++ return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact")); ++} ++ ++static int attach_point_to_config(struct bpf_tc_hook *hook, ++ qdisc_config_t *config) ++{ ++ switch (OPTS_GET(hook, attach_point, 0)) { ++ case BPF_TC_INGRESS: ++ case BPF_TC_EGRESS: ++ case BPF_TC_INGRESS | BPF_TC_EGRESS: ++ if (OPTS_GET(hook, parent, 0)) ++ return -EINVAL; ++ *config = &clsact_config; ++ return 0; ++ case BPF_TC_CUSTOM: ++ return -EOPNOTSUPP; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point, ++ __u32 *parent) ++{ ++ switch (attach_point) { ++ case BPF_TC_INGRESS: ++ case BPF_TC_EGRESS: ++ if (*parent) ++ return -EINVAL; ++ *parent = TC_H_MAKE(TC_H_CLSACT, ++ attach_point == BPF_TC_INGRESS ? ++ TC_H_MIN_INGRESS : TC_H_MIN_EGRESS); ++ break; ++ case BPF_TC_CUSTOM: ++ if (!*parent) ++ return -EINVAL; ++ break; ++ default: ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags) ++{ ++ qdisc_config_t config; ++ int ret; ++ struct libbpf_nla_req req; ++ ++ ret = attach_point_to_config(hook, &config); ++ if (ret < 0) ++ return ret; ++ ++ memset(&req, 0, sizeof(req)); ++ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); ++ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; ++ req.nh.nlmsg_type = cmd; ++ req.tc.tcm_family = AF_UNSPEC; ++ req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); ++ ++ ret = config(&req); ++ if (ret < 0) ++ return ret; ++ ++ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); ++} ++ ++static int tc_qdisc_create_excl(struct bpf_tc_hook *hook) ++{ ++ return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL); ++} ++ ++static int tc_qdisc_delete(struct bpf_tc_hook *hook) ++{ ++ return tc_qdisc_modify(hook, RTM_DELQDISC, 0); ++} ++ ++int bpf_tc_hook_create(struct bpf_tc_hook *hook) ++{ ++ int ret; ++ ++ if (!hook || !OPTS_VALID(hook, bpf_tc_hook) || ++ OPTS_GET(hook, ifindex, 0) <= 0) ++ return libbpf_err(-EINVAL); ++ ++ ret = tc_qdisc_create_excl(hook); ++ return libbpf_err(ret); ++} ++ ++static int __bpf_tc_detach(const struct bpf_tc_hook *hook, ++ const struct bpf_tc_opts *opts, ++ const bool flush); ++ ++int bpf_tc_hook_destroy(struct bpf_tc_hook *hook) ++{ ++ if (!hook || !OPTS_VALID(hook, bpf_tc_hook) || ++ OPTS_GET(hook, ifindex, 0) <= 0) ++ return libbpf_err(-EINVAL); ++ ++ switch (OPTS_GET(hook, attach_point, 0)) { ++ case BPF_TC_INGRESS: ++ case BPF_TC_EGRESS: ++ return libbpf_err(__bpf_tc_detach(hook, NULL, true)); ++ case BPF_TC_INGRESS | BPF_TC_EGRESS: ++ return libbpf_err(tc_qdisc_delete(hook)); ++ case BPF_TC_CUSTOM: ++ return libbpf_err(-EOPNOTSUPP); ++ default: ++ return libbpf_err(-EINVAL); ++ } ++} ++ ++struct bpf_cb_ctx { ++ struct bpf_tc_opts *opts; ++ bool processed; ++}; ++ ++static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb, ++ bool unicast) ++{ ++ struct nlattr *tbb[TCA_BPF_MAX + 1]; ++ struct bpf_cb_ctx *info = cookie; ++ ++ if (!info || !info->opts) ++ return -EINVAL; ++ if (unicast && info->processed) ++ return -EINVAL; ++ if (!tb[TCA_OPTIONS]) ++ return NL_CONT; ++ ++ libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL); ++ if (!tbb[TCA_BPF_ID]) ++ return -EINVAL; ++ ++ OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID])); ++ OPTS_SET(info->opts, handle, tc->tcm_handle); ++ OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16); ++ ++ info->processed = true; ++ return unicast ? NL_NEXT : NL_DONE; ++} ++ ++static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, ++ void *cookie) ++{ ++ struct tcmsg *tc = NLMSG_DATA(nh); ++ struct nlattr *tb[TCA_MAX + 1]; ++ ++ libbpf_nla_parse(tb, TCA_MAX, ++ (struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))), ++ NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL); ++ if (!tb[TCA_KIND]) ++ return NL_CONT; ++ return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO); ++} ++ ++static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd) ++{ ++ struct bpf_prog_info info = {}; ++ __u32 info_len = sizeof(info); ++ char name[256]; ++ int len, ret; ++ ++ ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); ++ if (ret < 0) ++ return ret; ++ ++ ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd)); ++ if (ret < 0) ++ return ret; ++ len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id); ++ if (len < 0) ++ return -errno; ++ if (len >= sizeof(name)) ++ return -ENAMETOOLONG; ++ return nlattr_add(req, TCA_BPF_NAME, name, len + 1); ++} ++ ++int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts) ++{ ++ __u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags; ++ int ret, ifindex, attach_point, prog_fd; ++ struct bpf_cb_ctx info = {}; ++ struct libbpf_nla_req req; ++ struct nlattr *nla; ++ ++ if (!hook || !opts || ++ !OPTS_VALID(hook, bpf_tc_hook) || ++ !OPTS_VALID(opts, bpf_tc_opts)) ++ return libbpf_err(-EINVAL); ++ ++ ifindex = OPTS_GET(hook, ifindex, 0); ++ parent = OPTS_GET(hook, parent, 0); ++ attach_point = OPTS_GET(hook, attach_point, 0); ++ ++ handle = OPTS_GET(opts, handle, 0); ++ priority = OPTS_GET(opts, priority, 0); ++ prog_fd = OPTS_GET(opts, prog_fd, 0); ++ prog_id = OPTS_GET(opts, prog_id, 0); ++ flags = OPTS_GET(opts, flags, 0); ++ ++ if (ifindex <= 0 || !prog_fd || prog_id) ++ return libbpf_err(-EINVAL); ++ if (priority > UINT16_MAX) ++ return libbpf_err(-EINVAL); ++ if (flags & ~BPF_TC_F_REPLACE) ++ return libbpf_err(-EINVAL); ++ ++ flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL; ++ protocol = ETH_P_ALL; ++ ++ memset(&req, 0, sizeof(req)); ++ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); ++ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | ++ NLM_F_ECHO | flags; ++ req.nh.nlmsg_type = RTM_NEWTFILTER; ++ req.tc.tcm_family = AF_UNSPEC; ++ req.tc.tcm_ifindex = ifindex; ++ req.tc.tcm_handle = handle; ++ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); ++ ++ ret = tc_get_tcm_parent(attach_point, &parent); ++ if (ret < 0) ++ return libbpf_err(ret); ++ req.tc.tcm_parent = parent; ++ ++ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf")); ++ if (ret < 0) ++ return libbpf_err(ret); ++ nla = nlattr_begin_nested(&req, TCA_OPTIONS); ++ if (!nla) ++ return libbpf_err(-EMSGSIZE); ++ ret = tc_add_fd_and_name(&req, prog_fd); ++ if (ret < 0) ++ return libbpf_err(ret); ++ bpf_flags = TCA_BPF_FLAG_ACT_DIRECT; ++ ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags)); ++ if (ret < 0) ++ return libbpf_err(ret); ++ nlattr_end_nested(&req, nla); ++ ++ info.opts = opts; ++ ++ ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info); ++ if (ret < 0) ++ return libbpf_err(ret); ++ if (!info.processed) ++ return libbpf_err(-ENOENT); ++ return ret; ++} ++ ++static int __bpf_tc_detach(const struct bpf_tc_hook *hook, ++ const struct bpf_tc_opts *opts, ++ const bool flush) ++{ ++ __u32 protocol = 0, handle, priority, parent, prog_id, flags; ++ int ret, ifindex, attach_point, prog_fd; ++ struct libbpf_nla_req req; ++ ++ if (!hook || ++ !OPTS_VALID(hook, bpf_tc_hook) || ++ !OPTS_VALID(opts, bpf_tc_opts)) ++ return -EINVAL; ++ ++ ifindex = OPTS_GET(hook, ifindex, 0); ++ parent = OPTS_GET(hook, parent, 0); ++ attach_point = OPTS_GET(hook, attach_point, 0); ++ ++ handle = OPTS_GET(opts, handle, 0); ++ priority = OPTS_GET(opts, priority, 0); ++ prog_fd = OPTS_GET(opts, prog_fd, 0); ++ prog_id = OPTS_GET(opts, prog_id, 0); ++ flags = OPTS_GET(opts, flags, 0); ++ ++ if (ifindex <= 0 || flags || prog_fd || prog_id) ++ return -EINVAL; ++ if (priority > UINT16_MAX) ++ return -EINVAL; ++ if (!flush) { ++ if (!handle || !priority) ++ return -EINVAL; ++ protocol = ETH_P_ALL; ++ } else { ++ if (handle || priority) ++ return -EINVAL; ++ } ++ ++ memset(&req, 0, sizeof(req)); ++ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); ++ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; ++ req.nh.nlmsg_type = RTM_DELTFILTER; ++ req.tc.tcm_family = AF_UNSPEC; ++ req.tc.tcm_ifindex = ifindex; ++ if (!flush) { ++ req.tc.tcm_handle = handle; ++ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); ++ } ++ ++ ret = tc_get_tcm_parent(attach_point, &parent); ++ if (ret < 0) ++ return ret; ++ req.tc.tcm_parent = parent; ++ ++ if (!flush) { ++ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf")); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); ++} ++ ++int bpf_tc_detach(const struct bpf_tc_hook *hook, ++ const struct bpf_tc_opts *opts) ++{ ++ int ret; ++ ++ if (!opts) ++ return libbpf_err(-EINVAL); ++ ++ ret = __bpf_tc_detach(hook, opts, false); ++ return libbpf_err(ret); ++} ++ ++int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts) ++{ ++ __u32 protocol, handle, priority, parent, prog_id, flags; ++ int ret, ifindex, attach_point, prog_fd; ++ struct bpf_cb_ctx info = {}; ++ struct libbpf_nla_req req; ++ ++ if (!hook || !opts || ++ !OPTS_VALID(hook, bpf_tc_hook) || ++ !OPTS_VALID(opts, bpf_tc_opts)) ++ return libbpf_err(-EINVAL); ++ ++ ifindex = OPTS_GET(hook, ifindex, 0); ++ parent = OPTS_GET(hook, parent, 0); ++ attach_point = OPTS_GET(hook, attach_point, 0); ++ ++ handle = OPTS_GET(opts, handle, 0); ++ priority = OPTS_GET(opts, priority, 0); ++ prog_fd = OPTS_GET(opts, prog_fd, 0); ++ prog_id = OPTS_GET(opts, prog_id, 0); ++ flags = OPTS_GET(opts, flags, 0); ++ ++ if (ifindex <= 0 || flags || prog_fd || prog_id || ++ !handle || !priority) ++ return libbpf_err(-EINVAL); ++ if (priority > UINT16_MAX) ++ return libbpf_err(-EINVAL); ++ ++ protocol = ETH_P_ALL; ++ ++ memset(&req, 0, sizeof(req)); ++ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); ++ req.nh.nlmsg_flags = NLM_F_REQUEST; ++ req.nh.nlmsg_type = RTM_GETTFILTER; ++ req.tc.tcm_family = AF_UNSPEC; ++ req.tc.tcm_ifindex = ifindex; ++ req.tc.tcm_handle = handle; ++ req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); ++ ++ ret = tc_get_tcm_parent(attach_point, &parent); ++ if (ret < 0) ++ return libbpf_err(ret); ++ req.tc.tcm_parent = parent; ++ ++ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf")); ++ if (ret < 0) ++ return libbpf_err(ret); ++ ++ info.opts = opts; ++ ++ ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info); ++ if (ret < 0) ++ return libbpf_err(ret); ++ if (!info.processed) ++ return libbpf_err(-ENOENT); ++ return ret; ++} +diff --git a/src/cc/libbpf/src/nlattr.c b/src/cc/libbpf/src/nlattr.c +new file mode 100644 +index 0000000..f57e77a +--- /dev/null ++++ b/src/cc/libbpf/src/nlattr.c +@@ -0,0 +1,195 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++/* ++ * NETLINK Netlink attributes ++ * ++ * Copyright (c) 2003-2013 Thomas Graf ++ */ ++ ++#include ++#include ++#include ++#include ++#include "nlattr.h" ++#include "libbpf_internal.h" ++ ++static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { ++ [LIBBPF_NLA_U8] = sizeof(uint8_t), ++ [LIBBPF_NLA_U16] = sizeof(uint16_t), ++ [LIBBPF_NLA_U32] = sizeof(uint32_t), ++ [LIBBPF_NLA_U64] = sizeof(uint64_t), ++ [LIBBPF_NLA_STRING] = 1, ++ [LIBBPF_NLA_FLAG] = 0, ++}; ++ ++static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) ++{ ++ int totlen = NLA_ALIGN(nla->nla_len); ++ ++ *remaining -= totlen; ++ return (struct nlattr *)((void *)nla + totlen); ++} ++ ++static int nla_ok(const struct nlattr *nla, int remaining) ++{ ++ return remaining >= sizeof(*nla) && ++ nla->nla_len >= sizeof(*nla) && ++ nla->nla_len <= remaining; ++} ++ ++static int nla_type(const struct nlattr *nla) ++{ ++ return nla->nla_type & NLA_TYPE_MASK; ++} ++ ++static int validate_nla(struct nlattr *nla, int maxtype, ++ struct libbpf_nla_policy *policy) ++{ ++ struct libbpf_nla_policy *pt; ++ unsigned int minlen = 0; ++ int type = nla_type(nla); ++ ++ if (type < 0 || type > maxtype) ++ return 0; ++ ++ pt = &policy[type]; ++ ++ if (pt->type > LIBBPF_NLA_TYPE_MAX) ++ return 0; ++ ++ if (pt->minlen) ++ minlen = pt->minlen; ++ else if (pt->type != LIBBPF_NLA_UNSPEC) ++ minlen = nla_attr_minlen[pt->type]; ++ ++ if (libbpf_nla_len(nla) < minlen) ++ return -1; ++ ++ if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) ++ return -1; ++ ++ if (pt->type == LIBBPF_NLA_STRING) { ++ char *data = libbpf_nla_data(nla); ++ ++ if (data[libbpf_nla_len(nla) - 1] != '\0') ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static inline int nlmsg_len(const struct nlmsghdr *nlh) ++{ ++ return nlh->nlmsg_len - NLMSG_HDRLEN; ++} ++ ++/** ++ * Create attribute index based on a stream of attributes. ++ * @arg tb Index array to be filled (maxtype+1 elements). ++ * @arg maxtype Maximum attribute type expected and accepted. ++ * @arg head Head of attribute stream. ++ * @arg len Length of attribute stream. ++ * @arg policy Attribute validation policy. ++ * ++ * Iterates over the stream of attributes and stores a pointer to each ++ * attribute in the index array using the attribute type as index to ++ * the array. Attribute with a type greater than the maximum type ++ * specified will be silently ignored in order to maintain backwards ++ * compatibility. If \a policy is not NULL, the attribute will be ++ * validated using the specified policy. ++ * ++ * @see nla_validate ++ * @return 0 on success or a negative error code. ++ */ ++int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, ++ int len, struct libbpf_nla_policy *policy) ++{ ++ struct nlattr *nla; ++ int rem, err; ++ ++ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); ++ ++ libbpf_nla_for_each_attr(nla, head, len, rem) { ++ int type = nla_type(nla); ++ ++ if (type > maxtype) ++ continue; ++ ++ if (policy) { ++ err = validate_nla(nla, maxtype, policy); ++ if (err < 0) ++ goto errout; ++ } ++ ++ if (tb[type]) ++ pr_warn("Attribute of type %#x found multiple times in message, " ++ "previous attribute is being ignored.\n", type); ++ ++ tb[type] = nla; ++ } ++ ++ err = 0; ++errout: ++ return err; ++} ++ ++/** ++ * Create attribute index based on nested attribute ++ * @arg tb Index array to be filled (maxtype+1 elements). ++ * @arg maxtype Maximum attribute type expected and accepted. ++ * @arg nla Nested Attribute. ++ * @arg policy Attribute validation policy. ++ * ++ * Feeds the stream of attributes nested into the specified attribute ++ * to libbpf_nla_parse(). ++ * ++ * @see libbpf_nla_parse ++ * @return 0 on success or a negative error code. ++ */ ++int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, ++ struct nlattr *nla, ++ struct libbpf_nla_policy *policy) ++{ ++ return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla), ++ libbpf_nla_len(nla), policy); ++} ++ ++/* dump netlink extended ack error message */ ++int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh) ++{ ++ struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = { ++ [NLMSGERR_ATTR_MSG] = { .type = LIBBPF_NLA_STRING }, ++ [NLMSGERR_ATTR_OFFS] = { .type = LIBBPF_NLA_U32 }, ++ }; ++ struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr; ++ struct nlmsgerr *err; ++ char *errmsg = NULL; ++ int hlen, alen; ++ ++ /* no TLVs, nothing to do here */ ++ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) ++ return 0; ++ ++ err = (struct nlmsgerr *)NLMSG_DATA(nlh); ++ hlen = sizeof(*err); ++ ++ /* if NLM_F_CAPPED is set then the inner err msg was capped */ ++ if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) ++ hlen += nlmsg_len(&err->msg); ++ ++ attr = (struct nlattr *) ((void *) err + hlen); ++ alen = nlh->nlmsg_len - hlen; ++ ++ if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, ++ extack_policy) != 0) { ++ pr_warn("Failed to parse extended error attributes\n"); ++ return 0; ++ } ++ ++ if (tb[NLMSGERR_ATTR_MSG]) ++ errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]); ++ ++ pr_warn("Kernel error message: %s\n", errmsg); ++ ++ return 0; ++} +diff --git a/src/cc/libbpf/src/nlattr.h b/src/cc/libbpf/src/nlattr.h +new file mode 100644 +index 0000000..4d15ae2 +--- /dev/null ++++ b/src/cc/libbpf/src/nlattr.h +@@ -0,0 +1,164 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++/* ++ * NETLINK Netlink attributes ++ * ++ * Copyright (c) 2003-2013 Thomas Graf ++ */ ++ ++#ifndef __LIBBPF_NLATTR_H ++#define __LIBBPF_NLATTR_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++/* avoid multiple definition of netlink features */ ++#define __LINUX_NETLINK_H ++ ++/** ++ * Standard attribute types to specify validation policy ++ */ ++enum { ++ LIBBPF_NLA_UNSPEC, /**< Unspecified type, binary data chunk */ ++ LIBBPF_NLA_U8, /**< 8 bit integer */ ++ LIBBPF_NLA_U16, /**< 16 bit integer */ ++ LIBBPF_NLA_U32, /**< 32 bit integer */ ++ LIBBPF_NLA_U64, /**< 64 bit integer */ ++ LIBBPF_NLA_STRING, /**< NUL terminated character string */ ++ LIBBPF_NLA_FLAG, /**< Flag */ ++ LIBBPF_NLA_MSECS, /**< Micro seconds (64bit) */ ++ LIBBPF_NLA_NESTED, /**< Nested attributes */ ++ __LIBBPF_NLA_TYPE_MAX, ++}; ++ ++#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1) ++ ++/** ++ * @ingroup attr ++ * Attribute validation policy. ++ * ++ * See section @core_doc{core_attr_parse,Attribute Parsing} for more details. ++ */ ++struct libbpf_nla_policy { ++ /** Type of attribute or LIBBPF_NLA_UNSPEC */ ++ uint16_t type; ++ ++ /** Minimal length of payload required */ ++ uint16_t minlen; ++ ++ /** Maximal length of payload allowed */ ++ uint16_t maxlen; ++}; ++ ++struct libbpf_nla_req { ++ struct nlmsghdr nh; ++ union { ++ struct ifinfomsg ifinfo; ++ struct tcmsg tc; ++ }; ++ char buf[128]; ++}; ++ ++/** ++ * @ingroup attr ++ * Iterate over a stream of attributes ++ * @arg pos loop counter, set to current attribute ++ * @arg head head of attribute stream ++ * @arg len length of attribute stream ++ * @arg rem initialized to len, holds bytes currently remaining in stream ++ */ ++#define libbpf_nla_for_each_attr(pos, head, len, rem) \ ++ for (pos = head, rem = len; \ ++ nla_ok(pos, rem); \ ++ pos = nla_next(pos, &(rem))) ++ ++/** ++ * libbpf_nla_data - head of payload ++ * @nla: netlink attribute ++ */ ++static inline void *libbpf_nla_data(const struct nlattr *nla) ++{ ++ return (void *)nla + NLA_HDRLEN; ++} ++ ++static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla) ++{ ++ return *(uint8_t *)libbpf_nla_data(nla); ++} ++ ++static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla) ++{ ++ return *(uint32_t *)libbpf_nla_data(nla); ++} ++ ++static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla) ++{ ++ return (const char *)libbpf_nla_data(nla); ++} ++ ++/** ++ * libbpf_nla_len - length of payload ++ * @nla: netlink attribute ++ */ ++static inline int libbpf_nla_len(const struct nlattr *nla) ++{ ++ return nla->nla_len - NLA_HDRLEN; ++} ++ ++int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, ++ int len, struct libbpf_nla_policy *policy); ++int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, ++ struct nlattr *nla, ++ struct libbpf_nla_policy *policy); ++ ++int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh); ++ ++static inline struct nlattr *nla_data(struct nlattr *nla) ++{ ++ return (struct nlattr *)((void *)nla + NLA_HDRLEN); ++} ++ ++static inline struct nlattr *req_tail(struct libbpf_nla_req *req) ++{ ++ return (struct nlattr *)((void *)req + NLMSG_ALIGN(req->nh.nlmsg_len)); ++} ++ ++static inline int nlattr_add(struct libbpf_nla_req *req, int type, ++ const void *data, int len) ++{ ++ struct nlattr *nla; ++ ++ if (NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > sizeof(*req)) ++ return -EMSGSIZE; ++ if (!!data != !!len) ++ return -EINVAL; ++ ++ nla = req_tail(req); ++ nla->nla_type = type; ++ nla->nla_len = NLA_HDRLEN + len; ++ if (data) ++ memcpy(nla_data(nla), data, len); ++ req->nh.nlmsg_len = NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(nla->nla_len); ++ return 0; ++} ++ ++static inline struct nlattr *nlattr_begin_nested(struct libbpf_nla_req *req, int type) ++{ ++ struct nlattr *tail; ++ ++ tail = req_tail(req); ++ if (nlattr_add(req, type | NLA_F_NESTED, NULL, 0)) ++ return NULL; ++ return tail; ++} ++ ++static inline void nlattr_end_nested(struct libbpf_nla_req *req, ++ struct nlattr *tail) ++{ ++ tail->nla_len = (void *)req_tail(req) - (void *)tail; ++} ++ ++#endif /* __LIBBPF_NLATTR_H */ +diff --git a/src/cc/libbpf/src/relo_core.c b/src/cc/libbpf/src/relo_core.c +new file mode 100644 +index 0000000..c4b0e81 +--- /dev/null ++++ b/src/cc/libbpf/src/relo_core.c +@@ -0,0 +1,1690 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2019 Facebook */ ++ ++#ifdef __KERNEL__ ++#include ++#include ++#include ++#include ++#include "relo_core.h" ++ ++static const char *btf_kind_str(const struct btf_type *t) ++{ ++ return btf_type_str(t); ++} ++ ++static bool is_ldimm64_insn(struct bpf_insn *insn) ++{ ++ return insn->code == (BPF_LD | BPF_IMM | BPF_DW); ++} ++ ++static const struct btf_type * ++skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id) ++{ ++ return btf_type_skip_modifiers(btf, id, res_id); ++} ++ ++static const char *btf__name_by_offset(const struct btf *btf, u32 offset) ++{ ++ return btf_name_by_offset(btf, offset); ++} ++ ++static s64 btf__resolve_size(const struct btf *btf, u32 type_id) ++{ ++ const struct btf_type *t; ++ int size; ++ ++ t = btf_type_by_id(btf, type_id); ++ t = btf_resolve_size(btf, t, &size); ++ if (IS_ERR(t)) ++ return PTR_ERR(t); ++ return size; ++} ++ ++enum libbpf_print_level { ++ LIBBPF_WARN, ++ LIBBPF_INFO, ++ LIBBPF_DEBUG, ++}; ++ ++#undef pr_warn ++#undef pr_info ++#undef pr_debug ++#define pr_warn(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) ++#define pr_info(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) ++#define pr_debug(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) ++#define libbpf_print(level, fmt, ...) bpf_log((void *)prog_name, fmt, ##__VA_ARGS__) ++#else ++#include ++#include ++#include ++#include ++#include ++ ++#include "libbpf.h" ++#include "bpf.h" ++#include "btf.h" ++#include "str_error.h" ++#include "libbpf_internal.h" ++#endif ++ ++static bool is_flex_arr(const struct btf *btf, ++ const struct bpf_core_accessor *acc, ++ const struct btf_array *arr) ++{ ++ const struct btf_type *t; ++ ++ /* not a flexible array, if not inside a struct or has non-zero size */ ++ if (!acc->name || arr->nelems > 0) ++ return false; ++ ++ /* has to be the last member of enclosing struct */ ++ t = btf_type_by_id(btf, acc->type_id); ++ return acc->idx == btf_vlen(t) - 1; ++} ++ ++static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) ++{ ++ switch (kind) { ++ case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off"; ++ case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz"; ++ case BPF_CORE_FIELD_EXISTS: return "field_exists"; ++ case BPF_CORE_FIELD_SIGNED: return "signed"; ++ case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64"; ++ case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64"; ++ case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; ++ case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; ++ case BPF_CORE_TYPE_EXISTS: return "type_exists"; ++ case BPF_CORE_TYPE_MATCHES: return "type_matches"; ++ case BPF_CORE_TYPE_SIZE: return "type_size"; ++ case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; ++ case BPF_CORE_ENUMVAL_VALUE: return "enumval_value"; ++ default: return "unknown"; ++ } ++} ++ ++static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) ++{ ++ switch (kind) { ++ case BPF_CORE_FIELD_BYTE_OFFSET: ++ case BPF_CORE_FIELD_BYTE_SIZE: ++ case BPF_CORE_FIELD_EXISTS: ++ case BPF_CORE_FIELD_SIGNED: ++ case BPF_CORE_FIELD_LSHIFT_U64: ++ case BPF_CORE_FIELD_RSHIFT_U64: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) ++{ ++ switch (kind) { ++ case BPF_CORE_TYPE_ID_LOCAL: ++ case BPF_CORE_TYPE_ID_TARGET: ++ case BPF_CORE_TYPE_EXISTS: ++ case BPF_CORE_TYPE_MATCHES: ++ case BPF_CORE_TYPE_SIZE: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) ++{ ++ switch (kind) { ++ case BPF_CORE_ENUMVAL_EXISTS: ++ case BPF_CORE_ENUMVAL_VALUE: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, ++ const struct btf *targ_btf, __u32 targ_id, int level) ++{ ++ const struct btf_type *local_type, *targ_type; ++ int depth = 32; /* max recursion depth */ ++ ++ /* caller made sure that names match (ignoring flavor suffix) */ ++ local_type = btf_type_by_id(local_btf, local_id); ++ targ_type = btf_type_by_id(targ_btf, targ_id); ++ if (!btf_kind_core_compat(local_type, targ_type)) ++ return 0; ++ ++recur: ++ depth--; ++ if (depth < 0) ++ return -EINVAL; ++ ++ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); ++ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); ++ if (!local_type || !targ_type) ++ return -EINVAL; ++ ++ if (!btf_kind_core_compat(local_type, targ_type)) ++ return 0; ++ ++ switch (btf_kind(local_type)) { ++ case BTF_KIND_UNKN: ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_FWD: ++ case BTF_KIND_ENUM64: ++ return 1; ++ case BTF_KIND_INT: ++ /* just reject deprecated bitfield-like integers; all other ++ * integers are by default compatible between each other ++ */ ++ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; ++ case BTF_KIND_PTR: ++ local_id = local_type->type; ++ targ_id = targ_type->type; ++ goto recur; ++ case BTF_KIND_ARRAY: ++ local_id = btf_array(local_type)->type; ++ targ_id = btf_array(targ_type)->type; ++ goto recur; ++ case BTF_KIND_FUNC_PROTO: { ++ struct btf_param *local_p = btf_params(local_type); ++ struct btf_param *targ_p = btf_params(targ_type); ++ __u16 local_vlen = btf_vlen(local_type); ++ __u16 targ_vlen = btf_vlen(targ_type); ++ int i, err; ++ ++ if (local_vlen != targ_vlen) ++ return 0; ++ ++ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { ++ if (level <= 0) ++ return -EINVAL; ++ ++ skip_mods_and_typedefs(local_btf, local_p->type, &local_id); ++ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); ++ err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, ++ level - 1); ++ if (err <= 0) ++ return err; ++ } ++ ++ /* tail recurse for return type check */ ++ skip_mods_and_typedefs(local_btf, local_type->type, &local_id); ++ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); ++ goto recur; ++ } ++ default: ++ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", ++ btf_kind_str(local_type), local_id, targ_id); ++ return 0; ++ } ++} ++ ++/* ++ * Turn bpf_core_relo into a low- and high-level spec representation, ++ * validating correctness along the way, as well as calculating resulting ++ * field bit offset, specified by accessor string. Low-level spec captures ++ * every single level of nestedness, including traversing anonymous ++ * struct/union members. High-level one only captures semantically meaningful ++ * "turning points": named fields and array indicies. ++ * E.g., for this case: ++ * ++ * struct sample { ++ * int __unimportant; ++ * struct { ++ * int __1; ++ * int __2; ++ * int a[7]; ++ * }; ++ * }; ++ * ++ * struct sample *s = ...; ++ * ++ * int x = &s->a[3]; // access string = '0:1:2:3' ++ * ++ * Low-level spec has 1:1 mapping with each element of access string (it's ++ * just a parsed access string representation): [0, 1, 2, 3]. ++ * ++ * High-level spec will capture only 3 points: ++ * - initial zero-index access by pointer (&s->... is the same as &s[0]...); ++ * - field 'a' access (corresponds to '2' in low-level spec); ++ * - array element #3 access (corresponds to '3' in low-level spec). ++ * ++ * Type-based relocations (TYPE_EXISTS/TYPE_MATCHES/TYPE_SIZE, ++ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their ++ * spec and raw_spec are kept empty. ++ * ++ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access ++ * string to specify enumerator's value index that need to be relocated. ++ */ ++int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, ++ const struct bpf_core_relo *relo, ++ struct bpf_core_spec *spec) ++{ ++ int access_idx, parsed_len, i; ++ struct bpf_core_accessor *acc; ++ const struct btf_type *t; ++ const char *name, *spec_str; ++ __u32 id, name_off; ++ __s64 sz; ++ ++ spec_str = btf__name_by_offset(btf, relo->access_str_off); ++ if (str_is_empty(spec_str) || *spec_str == ':') ++ return -EINVAL; ++ ++ memset(spec, 0, sizeof(*spec)); ++ spec->btf = btf; ++ spec->root_type_id = relo->type_id; ++ spec->relo_kind = relo->kind; ++ ++ /* type-based relocations don't have a field access string */ ++ if (core_relo_is_type_based(relo->kind)) { ++ if (strcmp(spec_str, "0")) ++ return -EINVAL; ++ return 0; ++ } ++ ++ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ ++ while (*spec_str) { ++ if (*spec_str == ':') ++ ++spec_str; ++ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) ++ return -EINVAL; ++ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) ++ return -E2BIG; ++ spec_str += parsed_len; ++ spec->raw_spec[spec->raw_len++] = access_idx; ++ } ++ ++ if (spec->raw_len == 0) ++ return -EINVAL; ++ ++ t = skip_mods_and_typedefs(btf, relo->type_id, &id); ++ if (!t) ++ return -EINVAL; ++ ++ access_idx = spec->raw_spec[0]; ++ acc = &spec->spec[0]; ++ acc->type_id = id; ++ acc->idx = access_idx; ++ spec->len++; ++ ++ if (core_relo_is_enumval_based(relo->kind)) { ++ if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) ++ return -EINVAL; ++ ++ /* record enumerator name in a first accessor */ ++ name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off ++ : btf_enum64(t)[access_idx].name_off; ++ acc->name = btf__name_by_offset(btf, name_off); ++ return 0; ++ } ++ ++ if (!core_relo_is_field_based(relo->kind)) ++ return -EINVAL; ++ ++ sz = btf__resolve_size(btf, id); ++ if (sz < 0) ++ return sz; ++ spec->bit_offset = access_idx * sz * 8; ++ ++ for (i = 1; i < spec->raw_len; i++) { ++ t = skip_mods_and_typedefs(btf, id, &id); ++ if (!t) ++ return -EINVAL; ++ ++ access_idx = spec->raw_spec[i]; ++ acc = &spec->spec[spec->len]; ++ ++ if (btf_is_composite(t)) { ++ const struct btf_member *m; ++ __u32 bit_offset; ++ ++ if (access_idx >= btf_vlen(t)) ++ return -EINVAL; ++ ++ bit_offset = btf_member_bit_offset(t, access_idx); ++ spec->bit_offset += bit_offset; ++ ++ m = btf_members(t) + access_idx; ++ if (m->name_off) { ++ name = btf__name_by_offset(btf, m->name_off); ++ if (str_is_empty(name)) ++ return -EINVAL; ++ ++ acc->type_id = id; ++ acc->idx = access_idx; ++ acc->name = name; ++ spec->len++; ++ } ++ ++ id = m->type; ++ } else if (btf_is_array(t)) { ++ const struct btf_array *a = btf_array(t); ++ bool flex; ++ ++ t = skip_mods_and_typedefs(btf, a->type, &id); ++ if (!t) ++ return -EINVAL; ++ ++ flex = is_flex_arr(btf, acc - 1, a); ++ if (!flex && access_idx >= a->nelems) ++ return -EINVAL; ++ ++ spec->spec[spec->len].type_id = id; ++ spec->spec[spec->len].idx = access_idx; ++ spec->len++; ++ ++ sz = btf__resolve_size(btf, id); ++ if (sz < 0) ++ return sz; ++ spec->bit_offset += access_idx * sz * 8; ++ } else { ++ pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", ++ prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t)); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++/* Check two types for compatibility for the purpose of field access ++ * relocation. const/volatile/restrict and typedefs are skipped to ensure we ++ * are relocating semantically compatible entities: ++ * - any two STRUCTs/UNIONs are compatible and can be mixed; ++ * - any two FWDs are compatible, if their names match (modulo flavor suffix); ++ * - any two PTRs are always compatible; ++ * - for ENUMs, names should be the same (ignoring flavor suffix) or at ++ * least one of enums should be anonymous; ++ * - for ENUMs, check sizes, names are ignored; ++ * - for INT, size and signedness are ignored; ++ * - any two FLOATs are always compatible; ++ * - for ARRAY, dimensionality is ignored, element types are checked for ++ * compatibility recursively; ++ * - everything else shouldn't be ever a target of relocation. ++ * These rules are not set in stone and probably will be adjusted as we get ++ * more experience with using BPF CO-RE relocations. ++ */ ++static int bpf_core_fields_are_compat(const struct btf *local_btf, ++ __u32 local_id, ++ const struct btf *targ_btf, ++ __u32 targ_id) ++{ ++ const struct btf_type *local_type, *targ_type; ++ ++recur: ++ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); ++ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); ++ if (!local_type || !targ_type) ++ return -EINVAL; ++ ++ if (btf_is_composite(local_type) && btf_is_composite(targ_type)) ++ return 1; ++ if (!btf_kind_core_compat(local_type, targ_type)) ++ return 0; ++ ++ switch (btf_kind(local_type)) { ++ case BTF_KIND_PTR: ++ case BTF_KIND_FLOAT: ++ return 1; ++ case BTF_KIND_FWD: ++ case BTF_KIND_ENUM64: ++ case BTF_KIND_ENUM: { ++ const char *local_name, *targ_name; ++ size_t local_len, targ_len; ++ ++ local_name = btf__name_by_offset(local_btf, ++ local_type->name_off); ++ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); ++ local_len = bpf_core_essential_name_len(local_name); ++ targ_len = bpf_core_essential_name_len(targ_name); ++ /* one of them is anonymous or both w/ same flavor-less names */ ++ return local_len == 0 || targ_len == 0 || ++ (local_len == targ_len && ++ strncmp(local_name, targ_name, local_len) == 0); ++ } ++ case BTF_KIND_INT: ++ /* just reject deprecated bitfield-like integers; all other ++ * integers are by default compatible between each other ++ */ ++ return btf_int_offset(local_type) == 0 && ++ btf_int_offset(targ_type) == 0; ++ case BTF_KIND_ARRAY: ++ local_id = btf_array(local_type)->type; ++ targ_id = btf_array(targ_type)->type; ++ goto recur; ++ default: ++ return 0; ++ } ++} ++ ++/* ++ * Given single high-level named field accessor in local type, find ++ * corresponding high-level accessor for a target type. Along the way, ++ * maintain low-level spec for target as well. Also keep updating target ++ * bit offset. ++ * ++ * Searching is performed through recursive exhaustive enumeration of all ++ * fields of a struct/union. If there are any anonymous (embedded) ++ * structs/unions, they are recursively searched as well. If field with ++ * desired name is found, check compatibility between local and target types, ++ * before returning result. ++ * ++ * 1 is returned, if field is found. ++ * 0 is returned if no compatible field is found. ++ * <0 is returned on error. ++ */ ++static int bpf_core_match_member(const struct btf *local_btf, ++ const struct bpf_core_accessor *local_acc, ++ const struct btf *targ_btf, ++ __u32 targ_id, ++ struct bpf_core_spec *spec, ++ __u32 *next_targ_id) ++{ ++ const struct btf_type *local_type, *targ_type; ++ const struct btf_member *local_member, *m; ++ const char *local_name, *targ_name; ++ __u32 local_id; ++ int i, n, found; ++ ++ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); ++ if (!targ_type) ++ return -EINVAL; ++ if (!btf_is_composite(targ_type)) ++ return 0; ++ ++ local_id = local_acc->type_id; ++ local_type = btf_type_by_id(local_btf, local_id); ++ local_member = btf_members(local_type) + local_acc->idx; ++ local_name = btf__name_by_offset(local_btf, local_member->name_off); ++ ++ n = btf_vlen(targ_type); ++ m = btf_members(targ_type); ++ for (i = 0; i < n; i++, m++) { ++ __u32 bit_offset; ++ ++ bit_offset = btf_member_bit_offset(targ_type, i); ++ ++ /* too deep struct/union/array nesting */ ++ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) ++ return -E2BIG; ++ ++ /* speculate this member will be the good one */ ++ spec->bit_offset += bit_offset; ++ spec->raw_spec[spec->raw_len++] = i; ++ ++ targ_name = btf__name_by_offset(targ_btf, m->name_off); ++ if (str_is_empty(targ_name)) { ++ /* embedded struct/union, we need to go deeper */ ++ found = bpf_core_match_member(local_btf, local_acc, ++ targ_btf, m->type, ++ spec, next_targ_id); ++ if (found) /* either found or error */ ++ return found; ++ } else if (strcmp(local_name, targ_name) == 0) { ++ /* matching named field */ ++ struct bpf_core_accessor *targ_acc; ++ ++ targ_acc = &spec->spec[spec->len++]; ++ targ_acc->type_id = targ_id; ++ targ_acc->idx = i; ++ targ_acc->name = targ_name; ++ ++ *next_targ_id = m->type; ++ found = bpf_core_fields_are_compat(local_btf, ++ local_member->type, ++ targ_btf, m->type); ++ if (!found) ++ spec->len--; /* pop accessor */ ++ return found; ++ } ++ /* member turned out not to be what we looked for */ ++ spec->bit_offset -= bit_offset; ++ spec->raw_len--; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Try to match local spec to a target type and, if successful, produce full ++ * target spec (high-level, low-level + bit offset). ++ */ ++static int bpf_core_spec_match(struct bpf_core_spec *local_spec, ++ const struct btf *targ_btf, __u32 targ_id, ++ struct bpf_core_spec *targ_spec) ++{ ++ const struct btf_type *targ_type; ++ const struct bpf_core_accessor *local_acc; ++ struct bpf_core_accessor *targ_acc; ++ int i, sz, matched; ++ __u32 name_off; ++ ++ memset(targ_spec, 0, sizeof(*targ_spec)); ++ targ_spec->btf = targ_btf; ++ targ_spec->root_type_id = targ_id; ++ targ_spec->relo_kind = local_spec->relo_kind; ++ ++ if (core_relo_is_type_based(local_spec->relo_kind)) { ++ if (local_spec->relo_kind == BPF_CORE_TYPE_MATCHES) ++ return bpf_core_types_match(local_spec->btf, ++ local_spec->root_type_id, ++ targ_btf, targ_id); ++ else ++ return bpf_core_types_are_compat(local_spec->btf, ++ local_spec->root_type_id, ++ targ_btf, targ_id); ++ } ++ ++ local_acc = &local_spec->spec[0]; ++ targ_acc = &targ_spec->spec[0]; ++ ++ if (core_relo_is_enumval_based(local_spec->relo_kind)) { ++ size_t local_essent_len, targ_essent_len; ++ const char *targ_name; ++ ++ /* has to resolve to an enum */ ++ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); ++ if (!btf_is_any_enum(targ_type)) ++ return 0; ++ ++ local_essent_len = bpf_core_essential_name_len(local_acc->name); ++ ++ for (i = 0; i < btf_vlen(targ_type); i++) { ++ if (btf_is_enum(targ_type)) ++ name_off = btf_enum(targ_type)[i].name_off; ++ else ++ name_off = btf_enum64(targ_type)[i].name_off; ++ ++ targ_name = btf__name_by_offset(targ_spec->btf, name_off); ++ targ_essent_len = bpf_core_essential_name_len(targ_name); ++ if (targ_essent_len != local_essent_len) ++ continue; ++ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { ++ targ_acc->type_id = targ_id; ++ targ_acc->idx = i; ++ targ_acc->name = targ_name; ++ targ_spec->len++; ++ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; ++ targ_spec->raw_len++; ++ return 1; ++ } ++ } ++ return 0; ++ } ++ ++ if (!core_relo_is_field_based(local_spec->relo_kind)) ++ return -EINVAL; ++ ++ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { ++ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, ++ &targ_id); ++ if (!targ_type) ++ return -EINVAL; ++ ++ if (local_acc->name) { ++ matched = bpf_core_match_member(local_spec->btf, ++ local_acc, ++ targ_btf, targ_id, ++ targ_spec, &targ_id); ++ if (matched <= 0) ++ return matched; ++ } else { ++ /* for i=0, targ_id is already treated as array element ++ * type (because it's the original struct), for others ++ * we should find array element type first ++ */ ++ if (i > 0) { ++ const struct btf_array *a; ++ bool flex; ++ ++ if (!btf_is_array(targ_type)) ++ return 0; ++ ++ a = btf_array(targ_type); ++ flex = is_flex_arr(targ_btf, targ_acc - 1, a); ++ if (!flex && local_acc->idx >= a->nelems) ++ return 0; ++ if (!skip_mods_and_typedefs(targ_btf, a->type, ++ &targ_id)) ++ return -EINVAL; ++ } ++ ++ /* too deep struct/union/array nesting */ ++ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) ++ return -E2BIG; ++ ++ targ_acc->type_id = targ_id; ++ targ_acc->idx = local_acc->idx; ++ targ_acc->name = NULL; ++ targ_spec->len++; ++ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; ++ targ_spec->raw_len++; ++ ++ sz = btf__resolve_size(targ_btf, targ_id); ++ if (sz < 0) ++ return sz; ++ targ_spec->bit_offset += local_acc->idx * sz * 8; ++ } ++ } ++ ++ return 1; ++} ++ ++static int bpf_core_calc_field_relo(const char *prog_name, ++ const struct bpf_core_relo *relo, ++ const struct bpf_core_spec *spec, ++ __u64 *val, __u32 *field_sz, __u32 *type_id, ++ bool *validate) ++{ ++ const struct bpf_core_accessor *acc; ++ const struct btf_type *t; ++ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; ++ const struct btf_member *m; ++ const struct btf_type *mt; ++ bool bitfield; ++ __s64 sz; ++ ++ *field_sz = 0; ++ ++ if (relo->kind == BPF_CORE_FIELD_EXISTS) { ++ *val = spec ? 1 : 0; ++ return 0; ++ } ++ ++ if (!spec) ++ return -EUCLEAN; /* request instruction poisoning */ ++ ++ acc = &spec->spec[spec->len - 1]; ++ t = btf_type_by_id(spec->btf, acc->type_id); ++ ++ /* a[n] accessor needs special handling */ ++ if (!acc->name) { ++ if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { ++ *val = spec->bit_offset / 8; ++ /* remember field size for load/store mem size */ ++ sz = btf__resolve_size(spec->btf, acc->type_id); ++ if (sz < 0) ++ return -EINVAL; ++ *field_sz = sz; ++ *type_id = acc->type_id; ++ } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) { ++ sz = btf__resolve_size(spec->btf, acc->type_id); ++ if (sz < 0) ++ return -EINVAL; ++ *val = sz; ++ } else { ++ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", ++ prog_name, relo->kind, relo->insn_off / 8); ++ return -EINVAL; ++ } ++ if (validate) ++ *validate = true; ++ return 0; ++ } ++ ++ m = btf_members(t) + acc->idx; ++ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); ++ bit_off = spec->bit_offset; ++ bit_sz = btf_member_bitfield_size(t, acc->idx); ++ ++ bitfield = bit_sz > 0; ++ if (bitfield) { ++ byte_sz = mt->size; ++ byte_off = bit_off / 8 / byte_sz * byte_sz; ++ /* figure out smallest int size necessary for bitfield load */ ++ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { ++ if (byte_sz >= 8) { ++ /* bitfield can't be read with 64-bit read */ ++ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", ++ prog_name, relo->kind, relo->insn_off / 8); ++ return -E2BIG; ++ } ++ byte_sz *= 2; ++ byte_off = bit_off / 8 / byte_sz * byte_sz; ++ } ++ } else { ++ sz = btf__resolve_size(spec->btf, field_type_id); ++ if (sz < 0) ++ return -EINVAL; ++ byte_sz = sz; ++ byte_off = spec->bit_offset / 8; ++ bit_sz = byte_sz * 8; ++ } ++ ++ /* for bitfields, all the relocatable aspects are ambiguous and we ++ * might disagree with compiler, so turn off validation of expected ++ * value, except for signedness ++ */ ++ if (validate) ++ *validate = !bitfield; ++ ++ switch (relo->kind) { ++ case BPF_CORE_FIELD_BYTE_OFFSET: ++ *val = byte_off; ++ if (!bitfield) { ++ *field_sz = byte_sz; ++ *type_id = field_type_id; ++ } ++ break; ++ case BPF_CORE_FIELD_BYTE_SIZE: ++ *val = byte_sz; ++ break; ++ case BPF_CORE_FIELD_SIGNED: ++ *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) || ++ (btf_int_encoding(mt) & BTF_INT_SIGNED); ++ if (validate) ++ *validate = true; /* signedness is never ambiguous */ ++ break; ++ case BPF_CORE_FIELD_LSHIFT_U64: ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ *val = 64 - (bit_off + bit_sz - byte_off * 8); ++#else ++ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); ++#endif ++ break; ++ case BPF_CORE_FIELD_RSHIFT_U64: ++ *val = 64 - bit_sz; ++ if (validate) ++ *validate = true; /* right shift is never ambiguous */ ++ break; ++ case BPF_CORE_FIELD_EXISTS: ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++ ++static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, ++ const struct bpf_core_spec *spec, ++ __u64 *val, bool *validate) ++{ ++ __s64 sz; ++ ++ /* by default, always check expected value in bpf_insn */ ++ if (validate) ++ *validate = true; ++ ++ /* type-based relos return zero when target type is not found */ ++ if (!spec) { ++ *val = 0; ++ return 0; ++ } ++ ++ switch (relo->kind) { ++ case BPF_CORE_TYPE_ID_TARGET: ++ *val = spec->root_type_id; ++ /* type ID, embedded in bpf_insn, might change during linking, ++ * so enforcing it is pointless ++ */ ++ if (validate) ++ *validate = false; ++ break; ++ case BPF_CORE_TYPE_EXISTS: ++ case BPF_CORE_TYPE_MATCHES: ++ *val = 1; ++ break; ++ case BPF_CORE_TYPE_SIZE: ++ sz = btf__resolve_size(spec->btf, spec->root_type_id); ++ if (sz < 0) ++ return -EINVAL; ++ *val = sz; ++ break; ++ case BPF_CORE_TYPE_ID_LOCAL: ++ /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */ ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++ ++static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, ++ const struct bpf_core_spec *spec, ++ __u64 *val) ++{ ++ const struct btf_type *t; ++ ++ switch (relo->kind) { ++ case BPF_CORE_ENUMVAL_EXISTS: ++ *val = spec ? 1 : 0; ++ break; ++ case BPF_CORE_ENUMVAL_VALUE: ++ if (!spec) ++ return -EUCLEAN; /* request instruction poisoning */ ++ t = btf_type_by_id(spec->btf, spec->spec[0].type_id); ++ if (btf_is_enum(t)) ++ *val = btf_enum(t)[spec->spec[0].idx].val; ++ else ++ *val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx); ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++ ++/* Calculate original and target relocation values, given local and target ++ * specs and relocation kind. These values are calculated for each candidate. ++ * If there are multiple candidates, resulting values should all be consistent ++ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. ++ * If instruction has to be poisoned, *poison will be set to true. ++ */ ++static int bpf_core_calc_relo(const char *prog_name, ++ const struct bpf_core_relo *relo, ++ int relo_idx, ++ const struct bpf_core_spec *local_spec, ++ const struct bpf_core_spec *targ_spec, ++ struct bpf_core_relo_res *res) ++{ ++ int err = -EOPNOTSUPP; ++ ++ res->orig_val = 0; ++ res->new_val = 0; ++ res->poison = false; ++ res->validate = true; ++ res->fail_memsz_adjust = false; ++ res->orig_sz = res->new_sz = 0; ++ res->orig_type_id = res->new_type_id = 0; ++ ++ if (core_relo_is_field_based(relo->kind)) { ++ err = bpf_core_calc_field_relo(prog_name, relo, local_spec, ++ &res->orig_val, &res->orig_sz, ++ &res->orig_type_id, &res->validate); ++ err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec, ++ &res->new_val, &res->new_sz, ++ &res->new_type_id, NULL); ++ if (err) ++ goto done; ++ /* Validate if it's safe to adjust load/store memory size. ++ * Adjustments are performed only if original and new memory ++ * sizes differ. ++ */ ++ res->fail_memsz_adjust = false; ++ if (res->orig_sz != res->new_sz) { ++ const struct btf_type *orig_t, *new_t; ++ ++ orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id); ++ new_t = btf_type_by_id(targ_spec->btf, res->new_type_id); ++ ++ /* There are two use cases in which it's safe to ++ * adjust load/store's mem size: ++ * - reading a 32-bit kernel pointer, while on BPF ++ * size pointers are always 64-bit; in this case ++ * it's safe to "downsize" instruction size due to ++ * pointer being treated as unsigned integer with ++ * zero-extended upper 32-bits; ++ * - reading unsigned integers, again due to ++ * zero-extension is preserving the value correctly. ++ * ++ * In all other cases it's incorrect to attempt to ++ * load/store field because read value will be ++ * incorrect, so we poison relocated instruction. ++ */ ++ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) ++ goto done; ++ if (btf_is_int(orig_t) && btf_is_int(new_t) && ++ btf_int_encoding(orig_t) != BTF_INT_SIGNED && ++ btf_int_encoding(new_t) != BTF_INT_SIGNED) ++ goto done; ++ ++ /* mark as invalid mem size adjustment, but this will ++ * only be checked for LDX/STX/ST insns ++ */ ++ res->fail_memsz_adjust = true; ++ } ++ } else if (core_relo_is_type_based(relo->kind)) { ++ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate); ++ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL); ++ } else if (core_relo_is_enumval_based(relo->kind)) { ++ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); ++ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); ++ } ++ ++done: ++ if (err == -EUCLEAN) { ++ /* EUCLEAN is used to signal instruction poisoning request */ ++ res->poison = true; ++ err = 0; ++ } else if (err == -EOPNOTSUPP) { ++ /* EOPNOTSUPP means unknown/unsupported relocation */ ++ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", ++ prog_name, relo_idx, core_relo_kind_str(relo->kind), ++ relo->kind, relo->insn_off / 8); ++ } ++ ++ return err; ++} ++ ++/* ++ * Turn instruction for which CO_RE relocation failed into invalid one with ++ * distinct signature. ++ */ ++static void bpf_core_poison_insn(const char *prog_name, int relo_idx, ++ int insn_idx, struct bpf_insn *insn) ++{ ++ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", ++ prog_name, relo_idx, insn_idx); ++ insn->code = BPF_JMP | BPF_CALL; ++ insn->dst_reg = 0; ++ insn->src_reg = 0; ++ insn->off = 0; ++ /* if this instruction is reachable (not a dead code), ++ * verifier will complain with the following message: ++ * invalid func unknown#195896080 ++ */ ++ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ ++} ++ ++static int insn_bpf_size_to_bytes(struct bpf_insn *insn) ++{ ++ switch (BPF_SIZE(insn->code)) { ++ case BPF_DW: return 8; ++ case BPF_W: return 4; ++ case BPF_H: return 2; ++ case BPF_B: return 1; ++ default: return -1; ++ } ++} ++ ++static int insn_bytes_to_bpf_size(__u32 sz) ++{ ++ switch (sz) { ++ case 8: return BPF_DW; ++ case 4: return BPF_W; ++ case 2: return BPF_H; ++ case 1: return BPF_B; ++ default: return -1; ++ } ++} ++ ++/* ++ * Patch relocatable BPF instruction. ++ * ++ * Patched value is determined by relocation kind and target specification. ++ * For existence relocations target spec will be NULL if field/type is not found. ++ * Expected insn->imm value is determined using relocation kind and local ++ * spec, and is checked before patching instruction. If actual insn->imm value ++ * is wrong, bail out with error. ++ * ++ * Currently supported classes of BPF instruction are: ++ * 1. rX = (assignment with immediate operand); ++ * 2. rX += (arithmetic operations with immediate operand); ++ * 3. rX = (load with 64-bit immediate value); ++ * 4. rX = *(T *)(rY + ), where T is one of {u8, u16, u32, u64}; ++ * 5. *(T *)(rX + ) = rY, where T is one of {u8, u16, u32, u64}; ++ * 6. *(T *)(rX + ) = , where T is one of {u8, u16, u32, u64}. ++ */ ++int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, ++ int insn_idx, const struct bpf_core_relo *relo, ++ int relo_idx, const struct bpf_core_relo_res *res) ++{ ++ __u64 orig_val, new_val; ++ __u8 class; ++ ++ class = BPF_CLASS(insn->code); ++ ++ if (res->poison) { ++poison: ++ /* poison second part of ldimm64 to avoid confusing error from ++ * verifier about "unknown opcode 00" ++ */ ++ if (is_ldimm64_insn(insn)) ++ bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1); ++ bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn); ++ return 0; ++ } ++ ++ orig_val = res->orig_val; ++ new_val = res->new_val; ++ ++ switch (class) { ++ case BPF_ALU: ++ case BPF_ALU64: ++ if (BPF_SRC(insn->code) != BPF_K) ++ return -EINVAL; ++ if (res->validate && insn->imm != orig_val) { ++ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n", ++ prog_name, relo_idx, ++ insn_idx, insn->imm, (unsigned long long)orig_val, ++ (unsigned long long)new_val); ++ return -EINVAL; ++ } ++ orig_val = insn->imm; ++ insn->imm = new_val; ++ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n", ++ prog_name, relo_idx, insn_idx, ++ (unsigned long long)orig_val, (unsigned long long)new_val); ++ break; ++ case BPF_LDX: ++ case BPF_ST: ++ case BPF_STX: ++ if (res->validate && insn->off != orig_val) { ++ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n", ++ prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val, ++ (unsigned long long)new_val); ++ return -EINVAL; ++ } ++ if (new_val > SHRT_MAX) { ++ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n", ++ prog_name, relo_idx, insn_idx, (unsigned long long)new_val); ++ return -ERANGE; ++ } ++ if (res->fail_memsz_adjust) { ++ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " ++ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", ++ prog_name, relo_idx, insn_idx); ++ goto poison; ++ } ++ ++ orig_val = insn->off; ++ insn->off = new_val; ++ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n", ++ prog_name, relo_idx, insn_idx, (unsigned long long)orig_val, ++ (unsigned long long)new_val); ++ ++ if (res->new_sz != res->orig_sz) { ++ int insn_bytes_sz, insn_bpf_sz; ++ ++ insn_bytes_sz = insn_bpf_size_to_bytes(insn); ++ if (insn_bytes_sz != res->orig_sz) { ++ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", ++ prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); ++ return -EINVAL; ++ } ++ ++ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); ++ if (insn_bpf_sz < 0) { ++ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", ++ prog_name, relo_idx, insn_idx, res->new_sz); ++ return -EINVAL; ++ } ++ ++ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); ++ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", ++ prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz); ++ } ++ break; ++ case BPF_LD: { ++ __u64 imm; ++ ++ if (!is_ldimm64_insn(insn) || ++ insn[0].src_reg != 0 || insn[0].off != 0 || ++ insn[1].code != 0 || insn[1].dst_reg != 0 || ++ insn[1].src_reg != 0 || insn[1].off != 0) { ++ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", ++ prog_name, relo_idx, insn_idx); ++ return -EINVAL; ++ } ++ ++ imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32); ++ if (res->validate && imm != orig_val) { ++ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n", ++ prog_name, relo_idx, ++ insn_idx, (unsigned long long)imm, ++ (unsigned long long)orig_val, (unsigned long long)new_val); ++ return -EINVAL; ++ } ++ ++ insn[0].imm = new_val; ++ insn[1].imm = new_val >> 32; ++ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n", ++ prog_name, relo_idx, insn_idx, ++ (unsigned long long)imm, (unsigned long long)new_val); ++ break; ++ } ++ default: ++ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", ++ prog_name, relo_idx, insn_idx, insn->code, ++ insn->src_reg, insn->dst_reg, insn->off, insn->imm); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/* Output spec definition in the format: ++ * [] () + => @, ++ * where is a C-syntax view of recorded field access, e.g.: x.a[3].b ++ */ ++int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) ++{ ++ const struct btf_type *t; ++ const char *s; ++ __u32 type_id; ++ int i, len = 0; ++ ++#define append_buf(fmt, args...) \ ++ ({ \ ++ int r; \ ++ r = snprintf(buf, buf_sz, fmt, ##args); \ ++ len += r; \ ++ if (r >= buf_sz) \ ++ r = buf_sz; \ ++ buf += r; \ ++ buf_sz -= r; \ ++ }) ++ ++ type_id = spec->root_type_id; ++ t = btf_type_by_id(spec->btf, type_id); ++ s = btf__name_by_offset(spec->btf, t->name_off); ++ ++ append_buf("<%s> [%u] %s %s", ++ core_relo_kind_str(spec->relo_kind), ++ type_id, btf_kind_str(t), str_is_empty(s) ? "" : s); ++ ++ if (core_relo_is_type_based(spec->relo_kind)) ++ return len; ++ ++ if (core_relo_is_enumval_based(spec->relo_kind)) { ++ t = skip_mods_and_typedefs(spec->btf, type_id, NULL); ++ if (btf_is_enum(t)) { ++ const struct btf_enum *e; ++ const char *fmt_str; ++ ++ e = btf_enum(t) + spec->raw_spec[0]; ++ s = btf__name_by_offset(spec->btf, e->name_off); ++ fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u"; ++ append_buf(fmt_str, s, e->val); ++ } else { ++ const struct btf_enum64 *e; ++ const char *fmt_str; ++ ++ e = btf_enum64(t) + spec->raw_spec[0]; ++ s = btf__name_by_offset(spec->btf, e->name_off); ++ fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu"; ++ append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e)); ++ } ++ return len; ++ } ++ ++ if (core_relo_is_field_based(spec->relo_kind)) { ++ for (i = 0; i < spec->len; i++) { ++ if (spec->spec[i].name) ++ append_buf(".%s", spec->spec[i].name); ++ else if (i > 0 || spec->spec[i].idx > 0) ++ append_buf("[%u]", spec->spec[i].idx); ++ } ++ ++ append_buf(" ("); ++ for (i = 0; i < spec->raw_len; i++) ++ append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); ++ ++ if (spec->bit_offset % 8) ++ append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8); ++ else ++ append_buf(" @ offset %u)", spec->bit_offset / 8); ++ return len; ++ } ++ ++ return len; ++#undef append_buf ++} ++ ++/* ++ * Calculate CO-RE relocation target result. ++ * ++ * The outline and important points of the algorithm: ++ * 1. For given local type, find corresponding candidate target types. ++ * Candidate type is a type with the same "essential" name, ignoring ++ * everything after last triple underscore (___). E.g., `sample`, ++ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates ++ * for each other. Names with triple underscore are referred to as ++ * "flavors" and are useful, among other things, to allow to ++ * specify/support incompatible variations of the same kernel struct, which ++ * might differ between different kernel versions and/or build ++ * configurations. ++ * ++ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C ++ * converter, when deduplicated BTF of a kernel still contains more than ++ * one different types with the same name. In that case, ___2, ___3, etc ++ * are appended starting from second name conflict. But start flavors are ++ * also useful to be defined "locally", in BPF program, to extract same ++ * data from incompatible changes between different kernel ++ * versions/configurations. For instance, to handle field renames between ++ * kernel versions, one can use two flavors of the struct name with the ++ * same common name and use conditional relocations to extract that field, ++ * depending on target kernel version. ++ * 2. For each candidate type, try to match local specification to this ++ * candidate target type. Matching involves finding corresponding ++ * high-level spec accessors, meaning that all named fields should match, ++ * as well as all array accesses should be within the actual bounds. Also, ++ * types should be compatible (see bpf_core_fields_are_compat for details). ++ * 3. It is supported and expected that there might be multiple flavors ++ * matching the spec. As long as all the specs resolve to the same set of ++ * offsets across all candidates, there is no error. If there is any ++ * ambiguity, CO-RE relocation will fail. This is necessary to accommodate ++ * imperfection of BTF deduplication, which can cause slight duplication of ++ * the same BTF type, if some directly or indirectly referenced (by ++ * pointer) type gets resolved to different actual types in different ++ * object files. If such a situation occurs, deduplicated BTF will end up ++ * with two (or more) structurally identical types, which differ only in ++ * types they refer to through pointer. This should be OK in most cases and ++ * is not an error. ++ * 4. Candidate types search is performed by linearly scanning through all ++ * types in target BTF. It is anticipated that this is overall more ++ * efficient memory-wise and not significantly worse (if not better) ++ * CPU-wise compared to prebuilding a map from all local type names to ++ * a list of candidate type names. It's also sped up by caching resolved ++ * list of matching candidates per each local "root" type ID, that has at ++ * least one bpf_core_relo associated with it. This list is shared ++ * between multiple relocations for the same type ID and is updated as some ++ * of the candidates are pruned due to structural incompatibility. ++ */ ++int bpf_core_calc_relo_insn(const char *prog_name, ++ const struct bpf_core_relo *relo, ++ int relo_idx, ++ const struct btf *local_btf, ++ struct bpf_core_cand_list *cands, ++ struct bpf_core_spec *specs_scratch, ++ struct bpf_core_relo_res *targ_res) ++{ ++ struct bpf_core_spec *local_spec = &specs_scratch[0]; ++ struct bpf_core_spec *cand_spec = &specs_scratch[1]; ++ struct bpf_core_spec *targ_spec = &specs_scratch[2]; ++ struct bpf_core_relo_res cand_res; ++ const struct btf_type *local_type; ++ const char *local_name; ++ __u32 local_id; ++ char spec_buf[256]; ++ int i, j, err; ++ ++ local_id = relo->type_id; ++ local_type = btf_type_by_id(local_btf, local_id); ++ local_name = btf__name_by_offset(local_btf, local_type->name_off); ++ if (!local_name) ++ return -EINVAL; ++ ++ err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec); ++ if (err) { ++ const char *spec_str; ++ ++ spec_str = btf__name_by_offset(local_btf, relo->access_str_off); ++ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", ++ prog_name, relo_idx, local_id, btf_kind_str(local_type), ++ str_is_empty(local_name) ? "" : local_name, ++ spec_str ?: "", err); ++ return -EINVAL; ++ } ++ ++ bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec); ++ pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf); ++ ++ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ ++ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { ++ /* bpf_insn's imm value could get out of sync during linking */ ++ memset(targ_res, 0, sizeof(*targ_res)); ++ targ_res->validate = false; ++ targ_res->poison = false; ++ targ_res->orig_val = local_spec->root_type_id; ++ targ_res->new_val = local_spec->root_type_id; ++ return 0; ++ } ++ ++ /* libbpf doesn't support candidate search for anonymous types */ ++ if (str_is_empty(local_name)) { ++ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", ++ prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); ++ return -EOPNOTSUPP; ++ } ++ ++ for (i = 0, j = 0; i < cands->len; i++) { ++ err = bpf_core_spec_match(local_spec, cands->cands[i].btf, ++ cands->cands[i].id, cand_spec); ++ if (err < 0) { ++ bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); ++ pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", ++ prog_name, relo_idx, i, spec_buf, err); ++ return err; ++ } ++ ++ bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); ++ pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name, ++ relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf); ++ ++ if (err == 0) ++ continue; ++ ++ err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res); ++ if (err) ++ return err; ++ ++ if (j == 0) { ++ *targ_res = cand_res; ++ *targ_spec = *cand_spec; ++ } else if (cand_spec->bit_offset != targ_spec->bit_offset) { ++ /* if there are many field relo candidates, they ++ * should all resolve to the same bit offset ++ */ ++ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", ++ prog_name, relo_idx, cand_spec->bit_offset, ++ targ_spec->bit_offset); ++ return -EINVAL; ++ } else if (cand_res.poison != targ_res->poison || ++ cand_res.new_val != targ_res->new_val) { ++ /* all candidates should result in the same relocation ++ * decision and value, otherwise it's dangerous to ++ * proceed due to ambiguity ++ */ ++ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n", ++ prog_name, relo_idx, ++ cand_res.poison ? "failure" : "success", ++ (unsigned long long)cand_res.new_val, ++ targ_res->poison ? "failure" : "success", ++ (unsigned long long)targ_res->new_val); ++ return -EINVAL; ++ } ++ ++ cands->cands[j++] = cands->cands[i]; ++ } ++ ++ /* ++ * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field ++ * existence checks or kernel version/config checks, it's expected ++ * that we might not find any candidates. In this case, if field ++ * wasn't found in any candidate, the list of candidates shouldn't ++ * change at all, we'll just handle relocating appropriately, ++ * depending on relo's kind. ++ */ ++ if (j > 0) ++ cands->len = j; ++ ++ /* ++ * If no candidates were found, it might be both a programmer error, ++ * as well as expected case, depending whether instruction w/ ++ * relocation is guarded in some way that makes it unreachable (dead ++ * code) if relocation can't be resolved. This is handled in ++ * bpf_core_patch_insn() uniformly by replacing that instruction with ++ * BPF helper call insn (using invalid helper ID). If that instruction ++ * is indeed unreachable, then it will be ignored and eliminated by ++ * verifier. If it was an error, then verifier will complain and point ++ * to a specific instruction number in its log. ++ */ ++ if (j == 0) { ++ pr_debug("prog '%s': relo #%d: no matching targets found\n", ++ prog_name, relo_idx); ++ ++ /* calculate single target relo result explicitly */ ++ err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, targ_res); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_off, ++ const struct btf *targ_btf, size_t targ_name_off) ++{ ++ const char *local_n, *targ_n; ++ size_t local_len, targ_len; ++ ++ local_n = btf__name_by_offset(local_btf, local_name_off); ++ targ_n = btf__name_by_offset(targ_btf, targ_name_off); ++ ++ if (str_is_empty(targ_n)) ++ return str_is_empty(local_n); ++ ++ targ_len = bpf_core_essential_name_len(targ_n); ++ local_len = bpf_core_essential_name_len(local_n); ++ ++ return targ_len == local_len && strncmp(local_n, targ_n, local_len) == 0; ++} ++ ++static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t, ++ const struct btf *targ_btf, const struct btf_type *targ_t) ++{ ++ __u16 local_vlen = btf_vlen(local_t); ++ __u16 targ_vlen = btf_vlen(targ_t); ++ int i, j; ++ ++ if (local_t->size != targ_t->size) ++ return 0; ++ ++ if (local_vlen > targ_vlen) ++ return 0; ++ ++ /* iterate over the local enum's variants and make sure each has ++ * a symbolic name correspondent in the target ++ */ ++ for (i = 0; i < local_vlen; i++) { ++ bool matched = false; ++ __u32 local_n_off, targ_n_off; ++ ++ local_n_off = btf_is_enum(local_t) ? btf_enum(local_t)[i].name_off : ++ btf_enum64(local_t)[i].name_off; ++ ++ for (j = 0; j < targ_vlen; j++) { ++ targ_n_off = btf_is_enum(targ_t) ? btf_enum(targ_t)[j].name_off : ++ btf_enum64(targ_t)[j].name_off; ++ ++ if (bpf_core_names_match(local_btf, local_n_off, targ_btf, targ_n_off)) { ++ matched = true; ++ break; ++ } ++ } ++ ++ if (!matched) ++ return 0; ++ } ++ return 1; ++} ++ ++static int bpf_core_composites_match(const struct btf *local_btf, const struct btf_type *local_t, ++ const struct btf *targ_btf, const struct btf_type *targ_t, ++ bool behind_ptr, int level) ++{ ++ const struct btf_member *local_m = btf_members(local_t); ++ __u16 local_vlen = btf_vlen(local_t); ++ __u16 targ_vlen = btf_vlen(targ_t); ++ int i, j, err; ++ ++ if (local_vlen > targ_vlen) ++ return 0; ++ ++ /* check that all local members have a match in the target */ ++ for (i = 0; i < local_vlen; i++, local_m++) { ++ const struct btf_member *targ_m = btf_members(targ_t); ++ bool matched = false; ++ ++ for (j = 0; j < targ_vlen; j++, targ_m++) { ++ if (!bpf_core_names_match(local_btf, local_m->name_off, ++ targ_btf, targ_m->name_off)) ++ continue; ++ ++ err = __bpf_core_types_match(local_btf, local_m->type, targ_btf, ++ targ_m->type, behind_ptr, level - 1); ++ if (err < 0) ++ return err; ++ if (err > 0) { ++ matched = true; ++ break; ++ } ++ } ++ ++ if (!matched) ++ return 0; ++ } ++ return 1; ++} ++ ++/* Check that two types "match". This function assumes that root types were ++ * already checked for name match. ++ * ++ * The matching relation is defined as follows: ++ * - modifiers and typedefs are stripped (and, hence, effectively ignored) ++ * - generally speaking types need to be of same kind (struct vs. struct, union ++ * vs. union, etc.) ++ * - exceptions are struct/union behind a pointer which could also match a ++ * forward declaration of a struct or union, respectively, and enum vs. ++ * enum64 (see below) ++ * Then, depending on type: ++ * - integers: ++ * - match if size and signedness match ++ * - arrays & pointers: ++ * - target types are recursively matched ++ * - structs & unions: ++ * - local members need to exist in target with the same name ++ * - for each member we recursively check match unless it is already behind a ++ * pointer, in which case we only check matching names and compatible kind ++ * - enums: ++ * - local variants have to have a match in target by symbolic name (but not ++ * numeric value) ++ * - size has to match (but enum may match enum64 and vice versa) ++ * - function pointers: ++ * - number and position of arguments in local type has to match target ++ * - for each argument and the return value we recursively check match ++ */ ++int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, ++ __u32 targ_id, bool behind_ptr, int level) ++{ ++ const struct btf_type *local_t, *targ_t; ++ int depth = 32; /* max recursion depth */ ++ __u16 local_k, targ_k; ++ ++ if (level <= 0) ++ return -EINVAL; ++ ++ local_t = btf_type_by_id(local_btf, local_id); ++ targ_t = btf_type_by_id(targ_btf, targ_id); ++ ++recur: ++ depth--; ++ if (depth < 0) ++ return -EINVAL; ++ ++ local_t = skip_mods_and_typedefs(local_btf, local_id, &local_id); ++ targ_t = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); ++ if (!local_t || !targ_t) ++ return -EINVAL; ++ ++ /* While the name check happens after typedefs are skipped, root-level ++ * typedefs would still be name-matched as that's the contract with ++ * callers. ++ */ ++ if (!bpf_core_names_match(local_btf, local_t->name_off, targ_btf, targ_t->name_off)) ++ return 0; ++ ++ local_k = btf_kind(local_t); ++ targ_k = btf_kind(targ_t); ++ ++ switch (local_k) { ++ case BTF_KIND_UNKN: ++ return local_k == targ_k; ++ case BTF_KIND_FWD: { ++ bool local_f = BTF_INFO_KFLAG(local_t->info); ++ ++ if (behind_ptr) { ++ if (local_k == targ_k) ++ return local_f == BTF_INFO_KFLAG(targ_t->info); ++ ++ /* for forward declarations kflag dictates whether the ++ * target is a struct (0) or union (1) ++ */ ++ return (targ_k == BTF_KIND_STRUCT && !local_f) || ++ (targ_k == BTF_KIND_UNION && local_f); ++ } else { ++ if (local_k != targ_k) ++ return 0; ++ ++ /* match if the forward declaration is for the same kind */ ++ return local_f == BTF_INFO_KFLAG(targ_t->info); ++ } ++ } ++ case BTF_KIND_ENUM: ++ case BTF_KIND_ENUM64: ++ if (!btf_is_any_enum(targ_t)) ++ return 0; ++ ++ return bpf_core_enums_match(local_btf, local_t, targ_btf, targ_t); ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ if (behind_ptr) { ++ bool targ_f = BTF_INFO_KFLAG(targ_t->info); ++ ++ if (local_k == targ_k) ++ return 1; ++ ++ if (targ_k != BTF_KIND_FWD) ++ return 0; ++ ++ return (local_k == BTF_KIND_UNION) == targ_f; ++ } else { ++ if (local_k != targ_k) ++ return 0; ++ ++ return bpf_core_composites_match(local_btf, local_t, targ_btf, targ_t, ++ behind_ptr, level); ++ } ++ case BTF_KIND_INT: { ++ __u8 local_sgn; ++ __u8 targ_sgn; ++ ++ if (local_k != targ_k) ++ return 0; ++ ++ local_sgn = btf_int_encoding(local_t) & BTF_INT_SIGNED; ++ targ_sgn = btf_int_encoding(targ_t) & BTF_INT_SIGNED; ++ ++ return local_t->size == targ_t->size && local_sgn == targ_sgn; ++ } ++ case BTF_KIND_PTR: ++ if (local_k != targ_k) ++ return 0; ++ ++ behind_ptr = true; ++ ++ local_id = local_t->type; ++ targ_id = targ_t->type; ++ goto recur; ++ case BTF_KIND_ARRAY: { ++ const struct btf_array *local_array = btf_array(local_t); ++ const struct btf_array *targ_array = btf_array(targ_t); ++ ++ if (local_k != targ_k) ++ return 0; ++ ++ if (local_array->nelems != targ_array->nelems) ++ return 0; ++ ++ local_id = local_array->type; ++ targ_id = targ_array->type; ++ goto recur; ++ } ++ case BTF_KIND_FUNC_PROTO: { ++ struct btf_param *local_p = btf_params(local_t); ++ struct btf_param *targ_p = btf_params(targ_t); ++ __u16 local_vlen = btf_vlen(local_t); ++ __u16 targ_vlen = btf_vlen(targ_t); ++ int i, err; ++ ++ if (local_k != targ_k) ++ return 0; ++ ++ if (local_vlen != targ_vlen) ++ return 0; ++ ++ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { ++ err = __bpf_core_types_match(local_btf, local_p->type, targ_btf, ++ targ_p->type, behind_ptr, level - 1); ++ if (err <= 0) ++ return err; ++ } ++ ++ /* tail recurse for return type check */ ++ local_id = local_t->type; ++ targ_id = targ_t->type; ++ goto recur; ++ } ++ default: ++ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", ++ btf_kind_str(local_t), local_id, targ_id); ++ return 0; ++ } ++} +diff --git a/src/cc/libbpf/src/relo_core.h b/src/cc/libbpf/src/relo_core.h +new file mode 100644 +index 0000000..1c0566d +--- /dev/null ++++ b/src/cc/libbpf/src/relo_core.h +@@ -0,0 +1,99 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++/* Copyright (c) 2019 Facebook */ ++ ++#ifndef __RELO_CORE_H ++#define __RELO_CORE_H ++ ++#include ++ ++struct bpf_core_cand { ++ const struct btf *btf; ++ __u32 id; ++}; ++ ++/* dynamically sized list of type IDs and its associated struct btf */ ++struct bpf_core_cand_list { ++ struct bpf_core_cand *cands; ++ int len; ++}; ++ ++#define BPF_CORE_SPEC_MAX_LEN 64 ++ ++/* represents BPF CO-RE field or array element accessor */ ++struct bpf_core_accessor { ++ __u32 type_id; /* struct/union type or array element type */ ++ __u32 idx; /* field index or array index */ ++ const char *name; /* field name or NULL for array accessor */ ++}; ++ ++struct bpf_core_spec { ++ const struct btf *btf; ++ /* high-level spec: named fields and array indices only */ ++ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; ++ /* original unresolved (no skip_mods_or_typedefs) root type ID */ ++ __u32 root_type_id; ++ /* CO-RE relocation kind */ ++ enum bpf_core_relo_kind relo_kind; ++ /* high-level spec length */ ++ int len; ++ /* raw, low-level spec: 1-to-1 with accessor spec string */ ++ int raw_spec[BPF_CORE_SPEC_MAX_LEN]; ++ /* raw spec length */ ++ int raw_len; ++ /* field bit offset represented by spec */ ++ __u32 bit_offset; ++}; ++ ++struct bpf_core_relo_res { ++ /* expected value in the instruction, unless validate == false */ ++ __u64 orig_val; ++ /* new value that needs to be patched up to */ ++ __u64 new_val; ++ /* relocation unsuccessful, poison instruction, but don't fail load */ ++ bool poison; ++ /* some relocations can't be validated against orig_val */ ++ bool validate; ++ /* for field byte offset relocations or the forms: ++ * *(T *)(rX + ) = rY ++ * rX = *(T *)(rY + ), ++ * we remember original and resolved field size to adjust direct ++ * memory loads of pointers and integers; this is necessary for 32-bit ++ * host kernel architectures, but also allows to automatically ++ * relocate fields that were resized from, e.g., u32 to u64, etc. ++ */ ++ bool fail_memsz_adjust; ++ __u32 orig_sz; ++ __u32 orig_type_id; ++ __u32 new_sz; ++ __u32 new_type_id; ++}; ++ ++int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, ++ const struct btf *targ_btf, __u32 targ_id, int level); ++int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, ++ const struct btf *targ_btf, __u32 targ_id); ++int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, ++ __u32 targ_id, bool behind_ptr, int level); ++int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, ++ __u32 targ_id); ++ ++size_t bpf_core_essential_name_len(const char *name); ++ ++int bpf_core_calc_relo_insn(const char *prog_name, ++ const struct bpf_core_relo *relo, int relo_idx, ++ const struct btf *local_btf, ++ struct bpf_core_cand_list *cands, ++ struct bpf_core_spec *specs_scratch, ++ struct bpf_core_relo_res *targ_res); ++ ++int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, ++ int insn_idx, const struct bpf_core_relo *relo, ++ int relo_idx, const struct bpf_core_relo_res *res); ++ ++int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, ++ const struct bpf_core_relo *relo, ++ struct bpf_core_spec *spec); ++ ++int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec); ++ ++#endif +diff --git a/src/cc/libbpf/src/ringbuf.c b/src/cc/libbpf/src/ringbuf.c +new file mode 100644 +index 0000000..8bc117b +--- /dev/null ++++ b/src/cc/libbpf/src/ringbuf.c +@@ -0,0 +1,302 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* ++ * Ring buffer operations. ++ * ++ * Copyright (C) 2020 Facebook, Inc. ++ */ ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "libbpf.h" ++#include "libbpf_internal.h" ++#include "bpf.h" ++ ++struct ring { ++ ring_buffer_sample_fn sample_cb; ++ void *ctx; ++ void *data; ++ unsigned long *consumer_pos; ++ unsigned long *producer_pos; ++ unsigned long mask; ++ int map_fd; ++}; ++ ++struct ring_buffer { ++ struct epoll_event *events; ++ struct ring *rings; ++ size_t page_size; ++ int epoll_fd; ++ int ring_cnt; ++}; ++ ++static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) ++{ ++ if (r->consumer_pos) { ++ munmap(r->consumer_pos, rb->page_size); ++ r->consumer_pos = NULL; ++ } ++ if (r->producer_pos) { ++ munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1)); ++ r->producer_pos = NULL; ++ } ++} ++ ++/* Add extra RINGBUF maps to this ring buffer manager */ ++int ring_buffer__add(struct ring_buffer *rb, int map_fd, ++ ring_buffer_sample_fn sample_cb, void *ctx) ++{ ++ struct bpf_map_info info; ++ __u32 len = sizeof(info); ++ struct epoll_event *e; ++ struct ring *r; ++ void *tmp; ++ int err; ++ ++ memset(&info, 0, sizeof(info)); ++ ++ err = bpf_obj_get_info_by_fd(map_fd, &info, &len); ++ if (err) { ++ err = -errno; ++ pr_warn("ringbuf: failed to get map info for fd=%d: %d\n", ++ map_fd, err); ++ return libbpf_err(err); ++ } ++ ++ if (info.type != BPF_MAP_TYPE_RINGBUF) { ++ pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n", ++ map_fd); ++ return libbpf_err(-EINVAL); ++ } ++ ++ tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings)); ++ if (!tmp) ++ return libbpf_err(-ENOMEM); ++ rb->rings = tmp; ++ ++ tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events)); ++ if (!tmp) ++ return libbpf_err(-ENOMEM); ++ rb->events = tmp; ++ ++ r = &rb->rings[rb->ring_cnt]; ++ memset(r, 0, sizeof(*r)); ++ ++ r->map_fd = map_fd; ++ r->sample_cb = sample_cb; ++ r->ctx = ctx; ++ r->mask = info.max_entries - 1; ++ ++ /* Map writable consumer page */ ++ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ map_fd, 0); ++ if (tmp == MAP_FAILED) { ++ err = -errno; ++ pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", ++ map_fd, err); ++ return libbpf_err(err); ++ } ++ r->consumer_pos = tmp; ++ ++ /* Map read-only producer page and data pages. We map twice as big ++ * data size to allow simple reading of samples that wrap around the ++ * end of a ring buffer. See kernel implementation for details. ++ * */ ++ tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, ++ MAP_SHARED, map_fd, rb->page_size); ++ if (tmp == MAP_FAILED) { ++ err = -errno; ++ ringbuf_unmap_ring(rb, r); ++ pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", ++ map_fd, err); ++ return libbpf_err(err); ++ } ++ r->producer_pos = tmp; ++ r->data = tmp + rb->page_size; ++ ++ e = &rb->events[rb->ring_cnt]; ++ memset(e, 0, sizeof(*e)); ++ ++ e->events = EPOLLIN; ++ e->data.fd = rb->ring_cnt; ++ if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { ++ err = -errno; ++ ringbuf_unmap_ring(rb, r); ++ pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", ++ map_fd, err); ++ return libbpf_err(err); ++ } ++ ++ rb->ring_cnt++; ++ return 0; ++} ++ ++void ring_buffer__free(struct ring_buffer *rb) ++{ ++ int i; ++ ++ if (!rb) ++ return; ++ ++ for (i = 0; i < rb->ring_cnt; ++i) ++ ringbuf_unmap_ring(rb, &rb->rings[i]); ++ if (rb->epoll_fd >= 0) ++ close(rb->epoll_fd); ++ ++ free(rb->events); ++ free(rb->rings); ++ free(rb); ++} ++ ++struct ring_buffer * ++ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, ++ const struct ring_buffer_opts *opts) ++{ ++ struct ring_buffer *rb; ++ int err; ++ ++ if (!OPTS_VALID(opts, ring_buffer_opts)) ++ return errno = EINVAL, NULL; ++ ++ rb = calloc(1, sizeof(*rb)); ++ if (!rb) ++ return errno = ENOMEM, NULL; ++ ++ rb->page_size = getpagesize(); ++ ++ rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); ++ if (rb->epoll_fd < 0) { ++ err = -errno; ++ pr_warn("ringbuf: failed to create epoll instance: %d\n", err); ++ goto err_out; ++ } ++ ++ err = ring_buffer__add(rb, map_fd, sample_cb, ctx); ++ if (err) ++ goto err_out; ++ ++ return rb; ++ ++err_out: ++ ring_buffer__free(rb); ++ return errno = -err, NULL; ++} ++ ++static inline int roundup_len(__u32 len) ++{ ++ /* clear out top 2 bits (discard and busy, if set) */ ++ len <<= 2; ++ len >>= 2; ++ /* add length prefix */ ++ len += BPF_RINGBUF_HDR_SZ; ++ /* round up to 8 byte alignment */ ++ return (len + 7) / 8 * 8; ++} ++ ++static int64_t ringbuf_process_ring(struct ring* r) ++{ ++ int *len_ptr, len, err; ++ /* 64-bit to avoid overflow in case of extreme application behavior */ ++ int64_t cnt = 0; ++ unsigned long cons_pos, prod_pos; ++ bool got_new_data; ++ void *sample; ++ ++ cons_pos = smp_load_acquire(r->consumer_pos); ++ do { ++ got_new_data = false; ++ prod_pos = smp_load_acquire(r->producer_pos); ++ while (cons_pos < prod_pos) { ++ len_ptr = r->data + (cons_pos & r->mask); ++ len = smp_load_acquire(len_ptr); ++ ++ /* sample not committed yet, bail out for now */ ++ if (len & BPF_RINGBUF_BUSY_BIT) ++ goto done; ++ ++ got_new_data = true; ++ cons_pos += roundup_len(len); ++ ++ if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) { ++ sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ; ++ err = r->sample_cb(r->ctx, sample, len); ++ if (err < 0) { ++ /* update consumer pos and bail out */ ++ smp_store_release(r->consumer_pos, ++ cons_pos); ++ return err; ++ } ++ cnt++; ++ } ++ ++ smp_store_release(r->consumer_pos, cons_pos); ++ } ++ } while (got_new_data); ++done: ++ return cnt; ++} ++ ++/* Consume available ring buffer(s) data without event polling. ++ * Returns number of records consumed across all registered ring buffers (or ++ * INT_MAX, whichever is less), or negative number if any of the callbacks ++ * return error. ++ */ ++int ring_buffer__consume(struct ring_buffer *rb) ++{ ++ int64_t err, res = 0; ++ int i; ++ ++ for (i = 0; i < rb->ring_cnt; i++) { ++ struct ring *ring = &rb->rings[i]; ++ ++ err = ringbuf_process_ring(ring); ++ if (err < 0) ++ return libbpf_err(err); ++ res += err; ++ } ++ if (res > INT_MAX) ++ return INT_MAX; ++ return res; ++} ++ ++/* Poll for available data and consume records, if any are available. ++ * Returns number of records consumed (or INT_MAX, whichever is less), or ++ * negative number, if any of the registered callbacks returned error. ++ */ ++int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) ++{ ++ int i, cnt; ++ int64_t err, res = 0; ++ ++ cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms); ++ if (cnt < 0) ++ return libbpf_err(-errno); ++ ++ for (i = 0; i < cnt; i++) { ++ __u32 ring_id = rb->events[i].data.fd; ++ struct ring *ring = &rb->rings[ring_id]; ++ ++ err = ringbuf_process_ring(ring); ++ if (err < 0) ++ return libbpf_err(err); ++ res += err; ++ } ++ if (res > INT_MAX) ++ return INT_MAX; ++ return res; ++} ++ ++/* Get an fd that can be used to sleep until data is available in the ring(s) */ ++int ring_buffer__epoll_fd(const struct ring_buffer *rb) ++{ ++ return rb->epoll_fd; ++} +diff --git a/src/cc/libbpf/src/skel_internal.h b/src/cc/libbpf/src/skel_internal.h +new file mode 100644 +index 0000000..bd6f450 +--- /dev/null ++++ b/src/cc/libbpf/src/skel_internal.h +@@ -0,0 +1,349 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++/* Copyright (c) 2021 Facebook */ ++#ifndef __SKEL_INTERNAL_H ++#define __SKEL_INTERNAL_H ++ ++#ifdef __KERNEL__ ++#include ++#include ++#include ++#include ++#include ++#else ++#include ++#include ++#include ++#include ++#include "bpf.h" ++#endif ++ ++#ifndef __NR_bpf ++# if defined(__mips__) && defined(_ABIO32) ++# define __NR_bpf 4355 ++# elif defined(__mips__) && defined(_ABIN32) ++# define __NR_bpf 6319 ++# elif defined(__mips__) && defined(_ABI64) ++# define __NR_bpf 5315 ++# endif ++#endif ++ ++/* This file is a base header for auto-generated *.lskel.h files. ++ * Its contents will change and may become part of auto-generation in the future. ++ * ++ * The layout of bpf_[map|prog]_desc and bpf_loader_ctx is feature dependent ++ * and will change from one version of libbpf to another and features ++ * requested during loader program generation. ++ */ ++struct bpf_map_desc { ++ /* output of the loader prog */ ++ int map_fd; ++ /* input for the loader prog */ ++ __u32 max_entries; ++ __aligned_u64 initial_value; ++}; ++struct bpf_prog_desc { ++ int prog_fd; ++}; ++ ++enum { ++ BPF_SKEL_KERNEL = (1ULL << 0), ++}; ++ ++struct bpf_loader_ctx { ++ __u32 sz; ++ __u32 flags; ++ __u32 log_level; ++ __u32 log_size; ++ __u64 log_buf; ++}; ++ ++struct bpf_load_and_run_opts { ++ struct bpf_loader_ctx *ctx; ++ const void *data; ++ const void *insns; ++ __u32 data_sz; ++ __u32 insns_sz; ++ const char *errstr; ++}; ++ ++long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size); ++ ++static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, ++ unsigned int size) ++{ ++#ifdef __KERNEL__ ++ return bpf_sys_bpf(cmd, attr, size); ++#else ++ return syscall(__NR_bpf, cmd, attr, size); ++#endif ++} ++ ++#ifdef __KERNEL__ ++static inline int close(int fd) ++{ ++ return close_fd(fd); ++} ++ ++static inline void *skel_alloc(size_t size) ++{ ++ struct bpf_loader_ctx *ctx = kzalloc(size, GFP_KERNEL); ++ ++ if (!ctx) ++ return NULL; ++ ctx->flags |= BPF_SKEL_KERNEL; ++ return ctx; ++} ++ ++static inline void skel_free(const void *p) ++{ ++ kfree(p); ++} ++ ++/* skel->bss/rodata maps are populated the following way: ++ * ++ * For kernel use: ++ * skel_prep_map_data() allocates kernel memory that kernel module can directly access. ++ * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value. ++ * The loader program will perform probe_read_kernel() from maps.rodata.initial_value. ++ * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and ++ * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree ++ * is not nessary. ++ * ++ * For user space: ++ * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly. ++ * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value. ++ * The loader program will perform copy_from_user() from maps.rodata.initial_value. ++ * skel_finalize_map_data() remaps bpf array map value from the kernel memory into ++ * skel->rodata address. ++ * ++ * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for ++ * both kernel and user space. The generated loader program does ++ * either bpf_probe_read_kernel() or bpf_copy_from_user() from initial_value ++ * depending on bpf_loader_ctx->flags. ++ */ ++static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) ++{ ++ if (addr != ~0ULL) ++ kvfree(p); ++ /* When addr == ~0ULL the 'p' points to ++ * ((struct bpf_array *)map)->value. See skel_finalize_map_data. ++ */ ++} ++ ++static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) ++{ ++ void *addr; ++ ++ addr = kvmalloc(val_sz, GFP_KERNEL); ++ if (!addr) ++ return NULL; ++ memcpy(addr, val, val_sz); ++ return addr; ++} ++ ++static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd) ++{ ++ struct bpf_map *map; ++ void *addr = NULL; ++ ++ kvfree((void *) (long) *init_val); ++ *init_val = ~0ULL; ++ ++ /* At this point bpf_load_and_run() finished without error and ++ * 'fd' is a valid bpf map FD. All sanity checks below should succeed. ++ */ ++ map = bpf_map_get(fd); ++ if (IS_ERR(map)) ++ return NULL; ++ if (map->map_type != BPF_MAP_TYPE_ARRAY) ++ goto out; ++ addr = ((struct bpf_array *)map)->value; ++ /* the addr stays valid, since FD is not closed */ ++out: ++ bpf_map_put(map); ++ return addr; ++} ++ ++#else ++ ++static inline void *skel_alloc(size_t size) ++{ ++ return calloc(1, size); ++} ++ ++static inline void skel_free(void *p) ++{ ++ free(p); ++} ++ ++static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) ++{ ++ munmap(p, sz); ++} ++ ++static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) ++{ ++ void *addr; ++ ++ addr = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_ANONYMOUS, -1, 0); ++ if (addr == (void *) -1) ++ return NULL; ++ memcpy(addr, val, val_sz); ++ return addr; ++} ++ ++static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd) ++{ ++ void *addr; ++ ++ addr = mmap((void *) (long) *init_val, mmap_sz, flags, MAP_SHARED | MAP_FIXED, fd, 0); ++ if (addr == (void *) -1) ++ return NULL; ++ return addr; ++} ++#endif ++ ++static inline int skel_closenz(int fd) ++{ ++ if (fd > 0) ++ return close(fd); ++ return -EINVAL; ++} ++ ++#ifndef offsetofend ++#define offsetofend(TYPE, MEMBER) \ ++ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) ++#endif ++ ++static inline int skel_map_create(enum bpf_map_type map_type, ++ const char *map_name, ++ __u32 key_size, ++ __u32 value_size, ++ __u32 max_entries) ++{ ++ const size_t attr_sz = offsetofend(union bpf_attr, map_extra); ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_sz); ++ ++ attr.map_type = map_type; ++ strncpy(attr.map_name, map_name, sizeof(attr.map_name)); ++ attr.key_size = key_size; ++ attr.value_size = value_size; ++ attr.max_entries = max_entries; ++ ++ return skel_sys_bpf(BPF_MAP_CREATE, &attr, attr_sz); ++} ++ ++static inline int skel_map_update_elem(int fd, const void *key, ++ const void *value, __u64 flags) ++{ ++ const size_t attr_sz = offsetofend(union bpf_attr, flags); ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_sz); ++ attr.map_fd = fd; ++ attr.key = (long) key; ++ attr.value = (long) value; ++ attr.flags = flags; ++ ++ return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); ++} ++ ++static inline int skel_raw_tracepoint_open(const char *name, int prog_fd) ++{ ++ const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd); ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_sz); ++ attr.raw_tracepoint.name = (long) name; ++ attr.raw_tracepoint.prog_fd = prog_fd; ++ ++ return skel_sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz); ++} ++ ++static inline int skel_link_create(int prog_fd, int target_fd, ++ enum bpf_attach_type attach_type) ++{ ++ const size_t attr_sz = offsetofend(union bpf_attr, link_create.iter_info_len); ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_sz); ++ attr.link_create.prog_fd = prog_fd; ++ attr.link_create.target_fd = target_fd; ++ attr.link_create.attach_type = attach_type; ++ ++ return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz); ++} ++ ++#ifdef __KERNEL__ ++#define set_err ++#else ++#define set_err err = -errno ++#endif ++ ++static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) ++{ ++ int map_fd = -1, prog_fd = -1, key = 0, err; ++ union bpf_attr attr; ++ ++ err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1); ++ if (map_fd < 0) { ++ opts->errstr = "failed to create loader map"; ++ set_err; ++ goto out; ++ } ++ ++ err = skel_map_update_elem(map_fd, &key, opts->data, 0); ++ if (err < 0) { ++ opts->errstr = "failed to update loader map"; ++ set_err; ++ goto out; ++ } ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.prog_type = BPF_PROG_TYPE_SYSCALL; ++ attr.insns = (long) opts->insns; ++ attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); ++ attr.license = (long) "Dual BSD/GPL"; ++ memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog")); ++ attr.fd_array = (long) &map_fd; ++ attr.log_level = opts->ctx->log_level; ++ attr.log_size = opts->ctx->log_size; ++ attr.log_buf = opts->ctx->log_buf; ++ attr.prog_flags = BPF_F_SLEEPABLE; ++ err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); ++ if (prog_fd < 0) { ++ opts->errstr = "failed to load loader prog"; ++ set_err; ++ goto out; ++ } ++ ++ memset(&attr, 0, sizeof(attr)); ++ attr.test.prog_fd = prog_fd; ++ attr.test.ctx_in = (long) opts->ctx; ++ attr.test.ctx_size_in = opts->ctx->sz; ++ err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr)); ++ if (err < 0 || (int)attr.test.retval < 0) { ++ opts->errstr = "failed to execute loader prog"; ++ if (err < 0) { ++ set_err; ++ } else { ++ err = (int)attr.test.retval; ++#ifndef __KERNEL__ ++ errno = -err; ++#endif ++ } ++ goto out; ++ } ++ err = 0; ++out: ++ if (map_fd >= 0) ++ close(map_fd); ++ if (prog_fd >= 0) ++ close(prog_fd); ++ return err; ++} ++ ++#endif +diff --git a/src/cc/libbpf/src/str_error.c b/src/cc/libbpf/src/str_error.c +new file mode 100644 +index 0000000..146da01 +--- /dev/null ++++ b/src/cc/libbpf/src/str_error.c +@@ -0,0 +1,21 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++#undef _GNU_SOURCE ++#include ++#include ++#include "str_error.h" ++ ++/* make sure libbpf doesn't use kernel-only integer typedefs */ ++#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 ++ ++/* ++ * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl ++ * libc, while checking strerror_r() return to avoid having to check this in ++ * all places calling it. ++ */ ++char *libbpf_strerror_r(int err, char *dst, int len) ++{ ++ int ret = strerror_r(err < 0 ? -err : err, dst, len); ++ if (ret) ++ snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); ++ return dst; ++} +diff --git a/src/cc/libbpf/src/str_error.h b/src/cc/libbpf/src/str_error.h +new file mode 100644 +index 0000000..a139334 +--- /dev/null ++++ b/src/cc/libbpf/src/str_error.h +@@ -0,0 +1,6 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++#ifndef __LIBBPF_STR_ERROR_H ++#define __LIBBPF_STR_ERROR_H ++ ++char *libbpf_strerror_r(int err, char *dst, int len); ++#endif /* __LIBBPF_STR_ERROR_H */ +diff --git a/src/cc/libbpf/src/strset.c b/src/cc/libbpf/src/strset.c +new file mode 100644 +index 0000000..ea65531 +--- /dev/null ++++ b/src/cc/libbpf/src/strset.c +@@ -0,0 +1,177 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2021 Facebook */ ++#include ++#include ++#include ++#include ++#include ++#include "hashmap.h" ++#include "libbpf_internal.h" ++#include "strset.h" ++ ++struct strset { ++ void *strs_data; ++ size_t strs_data_len; ++ size_t strs_data_cap; ++ size_t strs_data_max_len; ++ ++ /* lookup index for each unique string in strings set */ ++ struct hashmap *strs_hash; ++}; ++ ++static size_t strset_hash_fn(const void *key, void *ctx) ++{ ++ const struct strset *s = ctx; ++ const char *str = s->strs_data + (long)key; ++ ++ return str_hash(str); ++} ++ ++static bool strset_equal_fn(const void *key1, const void *key2, void *ctx) ++{ ++ const struct strset *s = ctx; ++ const char *str1 = s->strs_data + (long)key1; ++ const char *str2 = s->strs_data + (long)key2; ++ ++ return strcmp(str1, str2) == 0; ++} ++ ++struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz) ++{ ++ struct strset *set = calloc(1, sizeof(*set)); ++ struct hashmap *hash; ++ int err = -ENOMEM; ++ ++ if (!set) ++ return ERR_PTR(-ENOMEM); ++ ++ hash = hashmap__new(strset_hash_fn, strset_equal_fn, set); ++ if (IS_ERR(hash)) ++ goto err_out; ++ ++ set->strs_data_max_len = max_data_sz; ++ set->strs_hash = hash; ++ ++ if (init_data) { ++ long off; ++ ++ set->strs_data = malloc(init_data_sz); ++ if (!set->strs_data) ++ goto err_out; ++ ++ memcpy(set->strs_data, init_data, init_data_sz); ++ set->strs_data_len = init_data_sz; ++ set->strs_data_cap = init_data_sz; ++ ++ for (off = 0; off < set->strs_data_len; off += strlen(set->strs_data + off) + 1) { ++ /* hashmap__add() returns EEXIST if string with the same ++ * content already is in the hash map ++ */ ++ err = hashmap__add(hash, (void *)off, (void *)off); ++ if (err == -EEXIST) ++ continue; /* duplicate */ ++ if (err) ++ goto err_out; ++ } ++ } ++ ++ return set; ++err_out: ++ strset__free(set); ++ return ERR_PTR(err); ++} ++ ++void strset__free(struct strset *set) ++{ ++ if (IS_ERR_OR_NULL(set)) ++ return; ++ ++ hashmap__free(set->strs_hash); ++ free(set->strs_data); ++ free(set); ++} ++ ++size_t strset__data_size(const struct strset *set) ++{ ++ return set->strs_data_len; ++} ++ ++const char *strset__data(const struct strset *set) ++{ ++ return set->strs_data; ++} ++ ++static void *strset_add_str_mem(struct strset *set, size_t add_sz) ++{ ++ return libbpf_add_mem(&set->strs_data, &set->strs_data_cap, 1, ++ set->strs_data_len, set->strs_data_max_len, add_sz); ++} ++ ++/* Find string offset that corresponds to a given string *s*. ++ * Returns: ++ * - >0 offset into string data, if string is found; ++ * - -ENOENT, if string is not in the string data; ++ * - <0, on any other error. ++ */ ++int strset__find_str(struct strset *set, const char *s) ++{ ++ long old_off, new_off, len; ++ void *p; ++ ++ /* see strset__add_str() for why we do this */ ++ len = strlen(s) + 1; ++ p = strset_add_str_mem(set, len); ++ if (!p) ++ return -ENOMEM; ++ ++ new_off = set->strs_data_len; ++ memcpy(p, s, len); ++ ++ if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off)) ++ return old_off; ++ ++ return -ENOENT; ++} ++ ++/* Add a string s to the string data. If the string already exists, return its ++ * offset within string data. ++ * Returns: ++ * - > 0 offset into string data, on success; ++ * - < 0, on error. ++ */ ++int strset__add_str(struct strset *set, const char *s) ++{ ++ long old_off, new_off, len; ++ void *p; ++ int err; ++ ++ /* Hashmap keys are always offsets within set->strs_data, so to even ++ * look up some string from the "outside", we need to first append it ++ * at the end, so that it can be addressed with an offset. Luckily, ++ * until set->strs_data_len is incremented, that string is just a piece ++ * of garbage for the rest of the code, so no harm, no foul. On the ++ * other hand, if the string is unique, it's already appended and ++ * ready to be used, only a simple set->strs_data_len increment away. ++ */ ++ len = strlen(s) + 1; ++ p = strset_add_str_mem(set, len); ++ if (!p) ++ return -ENOMEM; ++ ++ new_off = set->strs_data_len; ++ memcpy(p, s, len); ++ ++ /* Now attempt to add the string, but only if the string with the same ++ * contents doesn't exist already (HASHMAP_ADD strategy). If such ++ * string exists, we'll get its offset in old_off (that's old_key). ++ */ ++ err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off, ++ HASHMAP_ADD, (const void **)&old_off, NULL); ++ if (err == -EEXIST) ++ return old_off; /* duplicated string, return existing offset */ ++ if (err) ++ return err; ++ ++ set->strs_data_len += len; /* new unique string, adjust data length */ ++ return new_off; ++} +diff --git a/src/cc/libbpf/src/strset.h b/src/cc/libbpf/src/strset.h +new file mode 100644 +index 0000000..b6ddf77 +--- /dev/null ++++ b/src/cc/libbpf/src/strset.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++ ++/* Copyright (c) 2021 Facebook */ ++#ifndef __LIBBPF_STRSET_H ++#define __LIBBPF_STRSET_H ++ ++#include ++#include ++ ++struct strset; ++ ++struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz); ++void strset__free(struct strset *set); ++ ++const char *strset__data(const struct strset *set); ++size_t strset__data_size(const struct strset *set); ++ ++int strset__find_str(struct strset *set, const char *s); ++int strset__add_str(struct strset *set, const char *s); ++ ++#endif /* __LIBBPF_STRSET_H */ +diff --git a/src/cc/libbpf/src/usdt.bpf.h b/src/cc/libbpf/src/usdt.bpf.h +new file mode 100644 +index 0000000..4f2adc0 +--- /dev/null ++++ b/src/cc/libbpf/src/usdt.bpf.h +@@ -0,0 +1,247 @@ ++/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ ++/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ ++#ifndef __USDT_BPF_H__ ++#define __USDT_BPF_H__ ++ ++#include ++#include ++#include ++ ++/* Below types and maps are internal implementation details of libbpf's USDT ++ * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should ++ * be considered an unstable API as well and might be adjusted based on user ++ * feedback from using libbpf's USDT support in production. ++ */ ++ ++/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal ++ * map that keeps track of USDT argument specifications. This might be ++ * necessary if there are a lot of USDT attachments. ++ */ ++#ifndef BPF_USDT_MAX_SPEC_CNT ++#define BPF_USDT_MAX_SPEC_CNT 256 ++#endif ++/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal ++ * map that keeps track of IP (memory address) mapping to USDT argument ++ * specification. ++ * Note, if kernel supports BPF cookies, this map is not used and could be ++ * resized all the way to 1 to save a bit of memory. ++ */ ++#ifndef BPF_USDT_MAX_IP_CNT ++#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) ++#endif ++ ++enum __bpf_usdt_arg_type { ++ BPF_USDT_ARG_CONST, ++ BPF_USDT_ARG_REG, ++ BPF_USDT_ARG_REG_DEREF, ++}; ++ ++struct __bpf_usdt_arg_spec { ++ /* u64 scalar interpreted depending on arg_type, see below */ ++ __u64 val_off; ++ /* arg location case, see bpf_udst_arg() for details */ ++ enum __bpf_usdt_arg_type arg_type; ++ /* offset of referenced register within struct pt_regs */ ++ short reg_off; ++ /* whether arg should be interpreted as signed value */ ++ bool arg_signed; ++ /* number of bits that need to be cleared and, optionally, ++ * sign-extended to cast arguments that are 1, 2, or 4 bytes ++ * long into final 8-byte u64/s64 value returned to user ++ */ ++ char arg_bitshift; ++}; ++ ++/* should match USDT_MAX_ARG_CNT in usdt.c exactly */ ++#define BPF_USDT_MAX_ARG_CNT 12 ++struct __bpf_usdt_spec { ++ struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; ++ __u64 usdt_cookie; ++ short arg_cnt; ++}; ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_ARRAY); ++ __uint(max_entries, BPF_USDT_MAX_SPEC_CNT); ++ __type(key, int); ++ __type(value, struct __bpf_usdt_spec); ++} __bpf_usdt_specs SEC(".maps") __weak; ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_HASH); ++ __uint(max_entries, BPF_USDT_MAX_IP_CNT); ++ __type(key, long); ++ __type(value, __u32); ++} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; ++ ++extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig; ++ ++static __always_inline ++int __bpf_usdt_spec_id(struct pt_regs *ctx) ++{ ++ if (!LINUX_HAS_BPF_COOKIE) { ++ long ip = PT_REGS_IP(ctx); ++ int *spec_id_ptr; ++ ++ spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip); ++ return spec_id_ptr ? *spec_id_ptr : -ESRCH; ++ } ++ ++ return bpf_get_attach_cookie(ctx); ++} ++ ++/* Return number of USDT arguments defined for currently traced USDT. */ ++__weak __hidden ++int bpf_usdt_arg_cnt(struct pt_regs *ctx) ++{ ++ struct __bpf_usdt_spec *spec; ++ int spec_id; ++ ++ spec_id = __bpf_usdt_spec_id(ctx); ++ if (spec_id < 0) ++ return -ESRCH; ++ ++ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); ++ if (!spec) ++ return -ESRCH; ++ ++ return spec->arg_cnt; ++} ++ ++/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. ++ * Returns 0 on success; negative error, otherwise. ++ * On error *res is guaranteed to be set to zero. ++ */ ++__weak __hidden ++int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) ++{ ++ struct __bpf_usdt_spec *spec; ++ struct __bpf_usdt_arg_spec *arg_spec; ++ unsigned long val; ++ int err, spec_id; ++ ++ *res = 0; ++ ++ spec_id = __bpf_usdt_spec_id(ctx); ++ if (spec_id < 0) ++ return -ESRCH; ++ ++ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); ++ if (!spec) ++ return -ESRCH; ++ ++ if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt) ++ return -ENOENT; ++ ++ arg_spec = &spec->args[arg_num]; ++ switch (arg_spec->arg_type) { ++ case BPF_USDT_ARG_CONST: ++ /* Arg is just a constant ("-4@$-9" in USDT arg spec). ++ * value is recorded in arg_spec->val_off directly. ++ */ ++ val = arg_spec->val_off; ++ break; ++ case BPF_USDT_ARG_REG: ++ /* Arg is in a register (e.g, "8@%rax" in USDT arg spec), ++ * so we read the contents of that register directly from ++ * struct pt_regs. To keep things simple user-space parts ++ * record offsetof(struct pt_regs, ) in arg_spec->reg_off. ++ */ ++ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); ++ if (err) ++ return err; ++ break; ++ case BPF_USDT_ARG_REG_DEREF: ++ /* Arg is in memory addressed by register, plus some offset ++ * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is ++ * identified like with BPF_USDT_ARG_REG case, and the offset ++ * is in arg_spec->val_off. We first fetch register contents ++ * from pt_regs, then do another user-space probe read to ++ * fetch argument value itself. ++ */ ++ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); ++ if (err) ++ return err; ++ err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); ++ if (err) ++ return err; ++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ val >>= arg_spec->arg_bitshift; ++#endif ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing ++ * necessary upper arg_bitshift bits, with sign extension if argument ++ * is signed ++ */ ++ val <<= arg_spec->arg_bitshift; ++ if (arg_spec->arg_signed) ++ val = ((long)val) >> arg_spec->arg_bitshift; ++ else ++ val = val >> arg_spec->arg_bitshift; ++ *res = val; ++ return 0; ++} ++ ++/* Retrieve user-specified cookie value provided during attach as ++ * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie ++ * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself ++ * utilizing BPF cookies internally, so user can't use BPF cookie directly ++ * for USDT programs and has to use bpf_usdt_cookie() API instead. ++ */ ++__weak __hidden ++long bpf_usdt_cookie(struct pt_regs *ctx) ++{ ++ struct __bpf_usdt_spec *spec; ++ int spec_id; ++ ++ spec_id = __bpf_usdt_spec_id(ctx); ++ if (spec_id < 0) ++ return 0; ++ ++ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); ++ if (!spec) ++ return 0; ++ ++ return spec->usdt_cookie; ++} ++ ++/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ ++#define ___bpf_usdt_args0() ctx ++#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; }) ++#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; }) ++#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; }) ++#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; }) ++#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; }) ++#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; }) ++#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; }) ++#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; }) ++#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; }) ++#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; }) ++#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; }) ++#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; }) ++#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) ++ ++/* ++ * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for ++ * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. ++ * Original struct pt_regs * context is preserved as 'ctx' argument. ++ */ ++#define BPF_USDT(name, args...) \ ++name(struct pt_regs *ctx); \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(struct pt_regs *ctx, ##args); \ ++typeof(name(0)) name(struct pt_regs *ctx) \ ++{ \ ++ _Pragma("GCC diagnostic push") \ ++ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ++ return ____##name(___bpf_usdt_args(args)); \ ++ _Pragma("GCC diagnostic pop") \ ++} \ ++static __attribute__((always_inline)) typeof(name(0)) \ ++____##name(struct pt_regs *ctx, ##args) ++ ++#endif /* __USDT_BPF_H__ */ +diff --git a/src/cc/libbpf/src/usdt.c b/src/cc/libbpf/src/usdt.c +new file mode 100644 +index 0000000..d18e379 +--- /dev/null ++++ b/src/cc/libbpf/src/usdt.c +@@ -0,0 +1,1519 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* s8 will be marked as poison while it's a reg of riscv */ ++#if defined(__riscv) ++#define rv_s8 s8 ++#endif ++ ++#include "bpf.h" ++#include "libbpf.h" ++#include "libbpf_common.h" ++#include "libbpf_internal.h" ++#include "hashmap.h" ++ ++/* libbpf's USDT support consists of BPF-side state/code and user-space ++ * state/code working together in concert. BPF-side parts are defined in ++ * usdt.bpf.h header library. User-space state is encapsulated by struct ++ * usdt_manager and all the supporting code centered around usdt_manager. ++ * ++ * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map ++ * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that ++ * don't support BPF cookie (see below). These two maps are implicitly ++ * embedded into user's end BPF object file when user's code included ++ * usdt.bpf.h. This means that libbpf doesn't do anything special to create ++ * these USDT support maps. They are created by normal libbpf logic of ++ * instantiating BPF maps when opening and loading BPF object. ++ * ++ * As such, libbpf is basically unaware of the need to do anything ++ * USDT-related until the very first call to bpf_program__attach_usdt(), which ++ * can be called by user explicitly or happen automatically during skeleton ++ * attach (or, equivalently, through generic bpf_program__attach() call). At ++ * this point, libbpf will instantiate and initialize struct usdt_manager and ++ * store it in bpf_object. USDT manager is per-BPF object construct, as each ++ * independent BPF object might or might not have USDT programs, and thus all ++ * the expected USDT-related state. There is no coordination between two ++ * bpf_object in parts of USDT attachment, they are oblivious of each other's ++ * existence and libbpf is just oblivious, dealing with bpf_object-specific ++ * USDT state. ++ * ++ * Quick crash course on USDTs. ++ * ++ * From user-space application's point of view, USDT is essentially just ++ * a slightly special function call that normally has zero overhead, unless it ++ * is being traced by some external entity (e.g, BPF-based tool). Here's how ++ * a typical application can trigger USDT probe: ++ * ++ * #include // provided by systemtap-sdt-devel package ++ * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h ++ * ++ * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); ++ * ++ * USDT is identified by it's : pair of names. Each ++ * individual USDT has a fixed number of arguments (3 in the above example) ++ * and specifies values of each argument as if it was a function call. ++ * ++ * USDT call is actually not a function call, but is instead replaced by ++ * a single NOP instruction (thus zero overhead, effectively). But in addition ++ * to that, those USDT macros generate special SHT_NOTE ELF records in ++ * .note.stapsdt ELF section. Here's an example USDT definition as emitted by ++ * `readelf -n `: ++ * ++ * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors) ++ * Provider: test ++ * Name: usdt12 ++ * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e ++ * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil ++ * ++ * In this case we have USDT test:usdt12 with 12 arguments. ++ * ++ * Location and base are offsets used to calculate absolute IP address of that ++ * NOP instruction that kernel can replace with an interrupt instruction to ++ * trigger instrumentation code (BPF program for all that we care about). ++ * ++ * Semaphore above is and optional feature. It records an address of a 2-byte ++ * refcount variable (normally in '.probes' ELF section) used for signaling if ++ * there is anything that is attached to USDT. This is useful for user ++ * applications if, for example, they need to prepare some arguments that are ++ * passed only to USDTs and preparation is expensive. By checking if USDT is ++ * "activated", an application can avoid paying those costs unnecessarily. ++ * Recent enough kernel has built-in support for automatically managing this ++ * refcount, which libbpf expects and relies on. If USDT is defined without ++ * associated semaphore, this value will be zero. See selftests for semaphore ++ * examples. ++ * ++ * Arguments is the most interesting part. This USDT specification string is ++ * providing information about all the USDT arguments and their locations. The ++ * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and ++ * whether the argument is signed or unsigned (negative size means signed). ++ * The part after @ sign is assembly-like definition of argument location ++ * (see [0] for more details). Technically, assembler can provide some pretty ++ * advanced definitions, but libbpf is currently supporting three most common ++ * cases: ++ * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9); ++ * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer ++ * whose value is in register %rdx"; ++ * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which ++ * specifies signed 32-bit integer stored at offset -1204 bytes from ++ * memory address stored in %rbp. ++ * ++ * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation ++ * ++ * During attachment, libbpf parses all the relevant USDT specifications and ++ * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side ++ * code through spec map. This allows BPF applications to quickly fetch the ++ * actual value at runtime using a simple BPF-side code. ++ * ++ * With basics out of the way, let's go over less immediately obvious aspects ++ * of supporting USDTs. ++ * ++ * First, there is no special USDT BPF program type. It is actually just ++ * a uprobe BPF program (which for kernel, at least currently, is just a kprobe ++ * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference ++ * that uprobe is usually attached at the function entry, while USDT will ++ * normally will be somewhere inside the function. But it should always be ++ * pointing to NOP instruction, which makes such uprobes the fastest uprobe ++ * kind. ++ * ++ * Second, it's important to realize that such STAP_PROBEn(provider, name, ...) ++ * macro invocations can end up being inlined many-many times, depending on ++ * specifics of each individual user application. So single conceptual USDT ++ * (identified by provider:name pair of identifiers) is, generally speaking, ++ * multiple uprobe locations (USDT call sites) in different places in user ++ * application. Further, again due to inlining, each USDT call site might end ++ * up having the same argument #N be located in a different place. In one call ++ * site it could be a constant, in another will end up in a register, and in ++ * yet another could be some other register or even somewhere on the stack. ++ * ++ * As such, "attaching to USDT" means (in general case) attaching the same ++ * uprobe BPF program to multiple target locations in user application, each ++ * potentially having a completely different USDT spec associated with it. ++ * To wire all this up together libbpf allocates a unique integer spec ID for ++ * each unique USDT spec. Spec IDs are allocated as sequential small integers ++ * so that they can be used as keys in array BPF map (for performance reasons). ++ * Spec ID allocation and accounting is big part of what usdt_manager is ++ * about. This state has to be maintained per-BPF object and coordinate ++ * between different USDT attachments within the same BPF object. ++ * ++ * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out ++ * as struct usdt_spec. Each invocation of BPF program at runtime needs to ++ * know its associated spec ID. It gets it either through BPF cookie, which ++ * libbpf sets to spec ID during attach time, or, if kernel is too old to ++ * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such ++ * case. The latter means that some modes of operation can't be supported ++ * without BPF cookie. Such mode is attaching to shared library "generically", ++ * without specifying target process. In such case, it's impossible to ++ * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode ++ * is not supported without BPF cookie support. ++ * ++ * Note that libbpf is using BPF cookie functionality for its own internal ++ * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf ++ * provides conceptually equivalent USDT cookie support. It's still u64 ++ * user-provided value that can be associated with USDT attachment. Note that ++ * this will be the same value for all USDT call sites within the same single ++ * *logical* USDT attachment. This makes sense because to user attaching to ++ * USDT is a single BPF program triggered for singular USDT probe. The fact ++ * that this is done at multiple actual locations is a mostly hidden ++ * implementation details. This USDT cookie value can be fetched with ++ * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h ++ * ++ * Lastly, while single USDT can have tons of USDT call sites, it doesn't ++ * necessarily have that many different USDT specs. It very well might be ++ * that 1000 USDT call sites only need 5 different USDT specs, because all the ++ * arguments are typically contained in a small set of registers or stack ++ * locations. As such, it's wasteful to allocate as many USDT spec IDs as ++ * there are USDT call sites. So libbpf tries to be frugal and performs ++ * on-the-fly deduplication during a single USDT attachment to only allocate ++ * the minimal required amount of unique USDT specs (and thus spec IDs). This ++ * is trivially achieved by using USDT spec string (Arguments string from USDT ++ * note) as a lookup key in a hashmap. USDT spec string uniquely defines ++ * everything about how to fetch USDT arguments, so two USDT call sites ++ * sharing USDT spec string can safely share the same USDT spec and spec ID. ++ * Note, this spec string deduplication is happening only during the same USDT ++ * attachment, so each USDT spec shares the same USDT cookie value. This is ++ * not generally true for other USDT attachments within the same BPF object, ++ * as even if USDT spec string is the same, USDT cookie value can be ++ * different. It was deemed excessive to try to deduplicate across independent ++ * USDT attachments by taking into account USDT spec string *and* USDT cookie ++ * value, which would complicated spec ID accounting significantly for little ++ * gain. ++ */ ++ ++#define USDT_BASE_SEC ".stapsdt.base" ++#define USDT_SEMA_SEC ".probes" ++#define USDT_NOTE_SEC ".note.stapsdt" ++#define USDT_NOTE_TYPE 3 ++#define USDT_NOTE_NAME "stapsdt" ++ ++/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */ ++enum usdt_arg_type { ++ USDT_ARG_CONST, ++ USDT_ARG_REG, ++ USDT_ARG_REG_DEREF, ++}; ++ ++/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ ++struct usdt_arg_spec { ++ __u64 val_off; ++ enum usdt_arg_type arg_type; ++ short reg_off; ++ bool arg_signed; ++ char arg_bitshift; ++}; ++ ++/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */ ++#define USDT_MAX_ARG_CNT 12 ++ ++/* should match struct __bpf_usdt_spec from usdt.bpf.h */ ++struct usdt_spec { ++ struct usdt_arg_spec args[USDT_MAX_ARG_CNT]; ++ __u64 usdt_cookie; ++ short arg_cnt; ++}; ++ ++struct usdt_note { ++ const char *provider; ++ const char *name; ++ /* USDT args specification string, e.g.: ++ * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx" ++ */ ++ const char *args; ++ long loc_addr; ++ long base_addr; ++ long sema_addr; ++}; ++ ++struct usdt_target { ++ long abs_ip; ++ long rel_ip; ++ long sema_off; ++ struct usdt_spec spec; ++ const char *spec_str; ++}; ++ ++struct usdt_manager { ++ struct bpf_map *specs_map; ++ struct bpf_map *ip_to_spec_id_map; ++ ++ int *free_spec_ids; ++ size_t free_spec_cnt; ++ size_t next_free_spec_id; ++ ++ bool has_bpf_cookie; ++ bool has_sema_refcnt; ++}; ++ ++struct usdt_manager *usdt_manager_new(struct bpf_object *obj) ++{ ++ static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"; ++ struct usdt_manager *man; ++ struct bpf_map *specs_map, *ip_to_spec_id_map; ++ ++ specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs"); ++ ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id"); ++ if (!specs_map || !ip_to_spec_id_map) { ++ pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n"); ++ return ERR_PTR(-ESRCH); ++ } ++ ++ man = calloc(1, sizeof(*man)); ++ if (!man) ++ return ERR_PTR(-ENOMEM); ++ ++ man->specs_map = specs_map; ++ man->ip_to_spec_id_map = ip_to_spec_id_map; ++ ++ /* Detect if BPF cookie is supported for kprobes. ++ * We don't need IP-to-ID mapping if we can use BPF cookies. ++ * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value") ++ */ ++ man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE); ++ ++ /* Detect kernel support for automatic refcounting of USDT semaphore. ++ * If this is not supported, USDTs with semaphores will not be supported. ++ * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") ++ */ ++ man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0; ++ ++ return man; ++} ++ ++void usdt_manager_free(struct usdt_manager *man) ++{ ++ if (IS_ERR_OR_NULL(man)) ++ return; ++ ++ free(man->free_spec_ids); ++ free(man); ++} ++ ++static int sanity_check_usdt_elf(Elf *elf, const char *path) ++{ ++ GElf_Ehdr ehdr; ++ int endianness; ++ ++ if (elf_kind(elf) != ELF_K_ELF) { ++ pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path); ++ return -EBADF; ++ } ++ ++ switch (gelf_getclass(elf)) { ++ case ELFCLASS64: ++ if (sizeof(void *) != 8) { ++ pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path); ++ return -EBADF; ++ } ++ break; ++ case ELFCLASS32: ++ if (sizeof(void *) != 4) { ++ pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path); ++ return -EBADF; ++ } ++ break; ++ default: ++ pr_warn("usdt: unsupported ELF class for '%s'\n", path); ++ return -EBADF; ++ } ++ ++ if (!gelf_getehdr(elf, &ehdr)) ++ return -EINVAL; ++ ++ if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) { ++ pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n", ++ path, ehdr.e_type); ++ return -EBADF; ++ } ++ ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++ endianness = ELFDATA2LSB; ++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ++ endianness = ELFDATA2MSB; ++#else ++# error "Unrecognized __BYTE_ORDER__" ++#endif ++ if (endianness != ehdr.e_ident[EI_DATA]) { ++ pr_warn("usdt: ELF endianness mismatch for '%s'\n", path); ++ return -EBADF; ++ } ++ ++ return 0; ++} ++ ++static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn) ++{ ++ Elf_Scn *sec = NULL; ++ size_t shstrndx; ++ ++ if (elf_getshdrstrndx(elf, &shstrndx)) ++ return -EINVAL; ++ ++ /* check if ELF is corrupted and avoid calling elf_strptr if yes */ ++ if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) ++ return -EINVAL; ++ ++ while ((sec = elf_nextscn(elf, sec)) != NULL) { ++ char *name; ++ ++ if (!gelf_getshdr(sec, shdr)) ++ return -EINVAL; ++ ++ name = elf_strptr(elf, shstrndx, shdr->sh_name); ++ if (name && strcmp(sec_name, name) == 0) { ++ *scn = sec; ++ return 0; ++ } ++ } ++ ++ return -ENOENT; ++} ++ ++struct elf_seg { ++ long start; ++ long end; ++ long offset; ++ bool is_exec; ++}; ++ ++static int cmp_elf_segs(const void *_a, const void *_b) ++{ ++ const struct elf_seg *a = _a; ++ const struct elf_seg *b = _b; ++ ++ return a->start < b->start ? -1 : 1; ++} ++ ++static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt) ++{ ++ GElf_Phdr phdr; ++ size_t n; ++ int i, err; ++ struct elf_seg *seg; ++ void *tmp; ++ ++ *seg_cnt = 0; ++ ++ if (elf_getphdrnum(elf, &n)) { ++ err = -errno; ++ return err; ++ } ++ ++ for (i = 0; i < n; i++) { ++ if (!gelf_getphdr(elf, i, &phdr)) { ++ err = -errno; ++ return err; ++ } ++ ++ pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n", ++ i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset, ++ (long)phdr.p_type, (long)phdr.p_flags); ++ if (phdr.p_type != PT_LOAD) ++ continue; ++ ++ tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); ++ if (!tmp) ++ return -ENOMEM; ++ ++ *segs = tmp; ++ seg = *segs + *seg_cnt; ++ (*seg_cnt)++; ++ ++ seg->start = phdr.p_vaddr; ++ seg->end = phdr.p_vaddr + phdr.p_memsz; ++ seg->offset = phdr.p_offset; ++ seg->is_exec = phdr.p_flags & PF_X; ++ } ++ ++ if (*seg_cnt == 0) { ++ pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path); ++ return -ESRCH; ++ } ++ ++ qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); ++ return 0; ++} ++ ++static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) ++{ ++ char path[PATH_MAX], line[PATH_MAX], mode[16]; ++ size_t seg_start, seg_end, seg_off; ++ struct elf_seg *seg; ++ int tmp_pid, i, err; ++ FILE *f; ++ ++ *seg_cnt = 0; ++ ++ /* Handle containerized binaries only accessible from ++ * /proc//root/. They will be reported as just / in ++ * /proc//maps. ++ */ ++ if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid) ++ goto proceed; ++ ++ if (!realpath(lib_path, path)) { ++ pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", ++ lib_path, -errno); ++ libbpf_strlcpy(path, lib_path, sizeof(path)); ++ } ++ ++proceed: ++ sprintf(line, "/proc/%d/maps", pid); ++ f = fopen(line, "r"); ++ if (!f) { ++ err = -errno; ++ pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", ++ line, lib_path, err); ++ return err; ++ } ++ ++ /* We need to handle lines with no path at the end: ++ * ++ * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so ++ * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0 ++ * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so ++ */ ++ while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n", ++ &seg_start, &seg_end, mode, &seg_off, line) == 5) { ++ void *tmp; ++ ++ /* to handle no path case (see above) we need to capture line ++ * without skipping any whitespaces. So we need to strip ++ * leading whitespaces manually here ++ */ ++ i = 0; ++ while (isblank(line[i])) ++ i++; ++ if (strcmp(line + i, path) != 0) ++ continue; ++ ++ pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n", ++ path, seg_start, seg_end, mode, seg_off); ++ ++ /* ignore non-executable sections for shared libs */ ++ if (mode[2] != 'x') ++ continue; ++ ++ tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); ++ if (!tmp) { ++ err = -ENOMEM; ++ goto err_out; ++ } ++ ++ *segs = tmp; ++ seg = *segs + *seg_cnt; ++ *seg_cnt += 1; ++ ++ seg->start = seg_start; ++ seg->end = seg_end; ++ seg->offset = seg_off; ++ seg->is_exec = true; ++ } ++ ++ if (*seg_cnt == 0) { ++ pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n", ++ lib_path, path, pid); ++ err = -ESRCH; ++ goto err_out; ++ } ++ ++ qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); ++ err = 0; ++err_out: ++ fclose(f); ++ return err; ++} ++ ++static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr) ++{ ++ struct elf_seg *seg; ++ int i; ++ ++ /* for ELF binaries (both executables and shared libraries), we are ++ * given virtual address (absolute for executables, relative for ++ * libraries) which should match address range of [seg_start, seg_end) ++ */ ++ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { ++ if (seg->start <= virtaddr && virtaddr < seg->end) ++ return seg; ++ } ++ return NULL; ++} ++ ++static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset) ++{ ++ struct elf_seg *seg; ++ int i; ++ ++ /* for VMA segments from /proc//maps file, provided "address" is ++ * actually a file offset, so should be fall within logical ++ * offset-based range of [offset_start, offset_end) ++ */ ++ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { ++ if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start)) ++ return seg; ++ } ++ return NULL; ++} ++ ++static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, ++ const char *data, size_t name_off, size_t desc_off, ++ struct usdt_note *usdt_note); ++ ++static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); ++ ++static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, ++ const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, ++ struct usdt_target **out_targets, size_t *out_target_cnt) ++{ ++ size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; ++ struct elf_seg *segs = NULL, *vma_segs = NULL; ++ struct usdt_target *targets = NULL, *target; ++ long base_addr = 0; ++ Elf_Scn *notes_scn, *base_scn; ++ GElf_Shdr base_shdr, notes_shdr; ++ GElf_Ehdr ehdr; ++ GElf_Nhdr nhdr; ++ Elf_Data *data; ++ int err; ++ ++ *out_targets = NULL; ++ *out_target_cnt = 0; ++ ++ err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, ¬es_shdr, ¬es_scn); ++ if (err) { ++ pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path); ++ return err; ++ } ++ ++ if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) { ++ pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path); ++ return -EINVAL; ++ } ++ ++ err = parse_elf_segs(elf, path, &segs, &seg_cnt); ++ if (err) { ++ pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err); ++ goto err_out; ++ } ++ ++ /* .stapsdt.base ELF section is optional, but is used for prelink ++ * offset compensation (see a big comment further below) ++ */ ++ if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0) ++ base_addr = base_shdr.sh_addr; ++ ++ data = elf_getdata(notes_scn, 0); ++ off = 0; ++ while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) { ++ long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0; ++ struct usdt_note note; ++ struct elf_seg *seg = NULL; ++ void *tmp; ++ ++ err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, ¬e); ++ if (err) ++ goto err_out; ++ ++ if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0) ++ continue; ++ ++ /* We need to compensate "prelink effect". See [0] for details, ++ * relevant parts quoted here: ++ * ++ * Each SDT probe also expands into a non-allocated ELF note. You can ++ * find this by looking at SHT_NOTE sections and decoding the format; ++ * see below for details. Because the note is non-allocated, it means ++ * there is no runtime cost, and also preserved in both stripped files ++ * and .debug files. ++ * ++ * However, this means that prelink won't adjust the note's contents ++ * for address offsets. Instead, this is done via the .stapsdt.base ++ * section. This is a special section that is added to the text. We ++ * will only ever have one of these sections in a final link and it ++ * will only ever be one byte long. Nothing about this section itself ++ * matters, we just use it as a marker to detect prelink address ++ * adjustments. ++ * ++ * Each probe note records the link-time address of the .stapsdt.base ++ * section alongside the probe PC address. The decoder compares the ++ * base address stored in the note with the .stapsdt.base section's ++ * sh_addr. Initially these are the same, but the section header will ++ * be adjusted by prelink. So the decoder applies the difference to ++ * the probe PC address to get the correct prelinked PC address; the ++ * same adjustment is applied to the semaphore address, if any. ++ * ++ * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation ++ */ ++ usdt_abs_ip = note.loc_addr; ++ if (base_addr) ++ usdt_abs_ip += base_addr - note.base_addr; ++ ++ /* When attaching uprobes (which is what USDTs basically are) ++ * kernel expects file offset to be specified, not a relative ++ * virtual address, so we need to translate virtual address to ++ * file offset, for both ET_EXEC and ET_DYN binaries. ++ */ ++ seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip); ++ if (!seg) { ++ err = -ESRCH; ++ pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", ++ usdt_provider, usdt_name, path, usdt_abs_ip); ++ goto err_out; ++ } ++ if (!seg->is_exec) { ++ err = -ESRCH; ++ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", ++ path, seg->start, seg->end, usdt_provider, usdt_name, ++ usdt_abs_ip); ++ goto err_out; ++ } ++ /* translate from virtual address to file offset */ ++ usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset; ++ ++ if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) { ++ /* If we don't have BPF cookie support but need to ++ * attach to a shared library, we'll need to know and ++ * record absolute addresses of attach points due to ++ * the need to lookup USDT spec by absolute IP of ++ * triggered uprobe. Doing this resolution is only ++ * possible when we have a specific PID of the process ++ * that's using specified shared library. BPF cookie ++ * removes the absolute address limitation as we don't ++ * need to do this lookup (we just use BPF cookie as ++ * an index of USDT spec), so for newer kernels with ++ * BPF cookie support libbpf supports USDT attachment ++ * to shared libraries with no PID filter. ++ */ ++ if (pid < 0) { ++ pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n"); ++ err = -ENOTSUP; ++ goto err_out; ++ } ++ ++ /* vma_segs are lazily initialized only if necessary */ ++ if (vma_seg_cnt == 0) { ++ err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); ++ if (err) { ++ pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", ++ pid, path, err); ++ goto err_out; ++ } ++ } ++ ++ seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip); ++ if (!seg) { ++ err = -ESRCH; ++ pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", ++ usdt_provider, usdt_name, path, usdt_rel_ip); ++ goto err_out; ++ } ++ ++ usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip; ++ } ++ ++ pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", ++ usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path, ++ note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, ++ seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); ++ ++ /* Adjust semaphore address to be a file offset */ ++ if (note.sema_addr) { ++ if (!man->has_sema_refcnt) { ++ pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", ++ usdt_provider, usdt_name, path); ++ err = -ENOTSUP; ++ goto err_out; ++ } ++ ++ seg = find_elf_seg(segs, seg_cnt, note.sema_addr); ++ if (!seg) { ++ err = -ESRCH; ++ pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", ++ usdt_provider, usdt_name, path, note.sema_addr); ++ goto err_out; ++ } ++ if (seg->is_exec) { ++ err = -ESRCH; ++ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n", ++ path, seg->start, seg->end, usdt_provider, usdt_name, ++ note.sema_addr); ++ goto err_out; ++ } ++ ++ usdt_sema_off = note.sema_addr - seg->start + seg->offset; ++ ++ pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", ++ usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", ++ path, note.sema_addr, note.base_addr, usdt_sema_off, ++ seg->start, seg->end, seg->offset); ++ } ++ ++ /* Record adjusted addresses and offsets and parse USDT spec */ ++ tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets)); ++ if (!tmp) { ++ err = -ENOMEM; ++ goto err_out; ++ } ++ targets = tmp; ++ ++ target = &targets[target_cnt]; ++ memset(target, 0, sizeof(*target)); ++ ++ target->abs_ip = usdt_abs_ip; ++ target->rel_ip = usdt_rel_ip; ++ target->sema_off = usdt_sema_off; ++ ++ /* notes.args references strings from Elf itself, so they can ++ * be referenced safely until elf_end() call ++ */ ++ target->spec_str = note.args; ++ ++ err = parse_usdt_spec(&target->spec, ¬e, usdt_cookie); ++ if (err) ++ goto err_out; ++ ++ target_cnt++; ++ } ++ ++ *out_targets = targets; ++ *out_target_cnt = target_cnt; ++ err = target_cnt; ++ ++err_out: ++ free(segs); ++ free(vma_segs); ++ if (err < 0) ++ free(targets); ++ return err; ++} ++ ++struct bpf_link_usdt { ++ struct bpf_link link; ++ ++ struct usdt_manager *usdt_man; ++ ++ size_t spec_cnt; ++ int *spec_ids; ++ ++ size_t uprobe_cnt; ++ struct { ++ long abs_ip; ++ struct bpf_link *link; ++ } *uprobes; ++}; ++ ++static int bpf_link_usdt_detach(struct bpf_link *link) ++{ ++ struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); ++ struct usdt_manager *man = usdt_link->usdt_man; ++ int i; ++ ++ for (i = 0; i < usdt_link->uprobe_cnt; i++) { ++ /* detach underlying uprobe link */ ++ bpf_link__destroy(usdt_link->uprobes[i].link); ++ /* there is no need to update specs map because it will be ++ * unconditionally overwritten on subsequent USDT attaches, ++ * but if BPF cookies are not used we need to remove entry ++ * from ip_to_spec_id map, otherwise we'll run into false ++ * conflicting IP errors ++ */ ++ if (!man->has_bpf_cookie) { ++ /* not much we can do about errors here */ ++ (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map), ++ &usdt_link->uprobes[i].abs_ip); ++ } ++ } ++ ++ /* try to return the list of previously used spec IDs to usdt_manager ++ * for future reuse for subsequent USDT attaches ++ */ ++ if (!man->free_spec_ids) { ++ /* if there were no free spec IDs yet, just transfer our IDs */ ++ man->free_spec_ids = usdt_link->spec_ids; ++ man->free_spec_cnt = usdt_link->spec_cnt; ++ usdt_link->spec_ids = NULL; ++ } else { ++ /* otherwise concat IDs */ ++ size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt; ++ int *new_free_ids; ++ ++ new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt, ++ sizeof(*new_free_ids)); ++ /* If we couldn't resize free_spec_ids, we'll just leak ++ * a bunch of free IDs; this is very unlikely to happen and if ++ * system is so exhausted on memory, it's the least of user's ++ * concerns, probably. ++ * So just do our best here to return those IDs to usdt_manager. ++ */ ++ if (new_free_ids) { ++ memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids, ++ usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids)); ++ man->free_spec_ids = new_free_ids; ++ man->free_spec_cnt = new_cnt; ++ } ++ } ++ ++ return 0; ++} ++ ++static void bpf_link_usdt_dealloc(struct bpf_link *link) ++{ ++ struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); ++ ++ free(usdt_link->spec_ids); ++ free(usdt_link->uprobes); ++ free(usdt_link); ++} ++ ++static size_t specs_hash_fn(const void *key, void *ctx) ++{ ++ const char *s = key; ++ ++ return str_hash(s); ++} ++ ++static bool specs_equal_fn(const void *key1, const void *key2, void *ctx) ++{ ++ const char *s1 = key1; ++ const char *s2 = key2; ++ ++ return strcmp(s1, s2) == 0; ++} ++ ++static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash, ++ struct bpf_link_usdt *link, struct usdt_target *target, ++ int *spec_id, bool *is_new) ++{ ++ void *tmp; ++ int err; ++ ++ /* check if we already allocated spec ID for this spec string */ ++ if (hashmap__find(specs_hash, target->spec_str, &tmp)) { ++ *spec_id = (long)tmp; ++ *is_new = false; ++ return 0; ++ } ++ ++ /* otherwise it's a new ID that needs to be set up in specs map and ++ * returned back to usdt_manager when USDT link is detached ++ */ ++ tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids)); ++ if (!tmp) ++ return -ENOMEM; ++ link->spec_ids = tmp; ++ ++ /* get next free spec ID, giving preference to free list, if not empty */ ++ if (man->free_spec_cnt) { ++ *spec_id = man->free_spec_ids[man->free_spec_cnt - 1]; ++ ++ /* cache spec ID for current spec string for future lookups */ ++ err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); ++ if (err) ++ return err; ++ ++ man->free_spec_cnt--; ++ } else { ++ /* don't allocate spec ID bigger than what fits in specs map */ ++ if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map)) ++ return -E2BIG; ++ ++ *spec_id = man->next_free_spec_id; ++ ++ /* cache spec ID for current spec string for future lookups */ ++ err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); ++ if (err) ++ return err; ++ ++ man->next_free_spec_id++; ++ } ++ ++ /* remember new spec ID in the link for later return back to free list on detach */ ++ link->spec_ids[link->spec_cnt] = *spec_id; ++ link->spec_cnt++; ++ *is_new = true; ++ return 0; ++} ++ ++struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, ++ pid_t pid, const char *path, ++ const char *usdt_provider, const char *usdt_name, ++ __u64 usdt_cookie) ++{ ++ int i, fd, err, spec_map_fd, ip_map_fd; ++ LIBBPF_OPTS(bpf_uprobe_opts, opts); ++ struct hashmap *specs_hash = NULL; ++ struct bpf_link_usdt *link = NULL; ++ struct usdt_target *targets = NULL; ++ size_t target_cnt; ++ Elf *elf; ++ ++ spec_map_fd = bpf_map__fd(man->specs_map); ++ ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); ++ ++ /* TODO: perform path resolution similar to uprobe's */ ++ fd = open(path, O_RDONLY); ++ if (fd < 0) { ++ err = -errno; ++ pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); ++ return libbpf_err_ptr(err); ++ } ++ ++ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); ++ if (!elf) { ++ err = -EBADF; ++ pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1)); ++ goto err_out; ++ } ++ ++ err = sanity_check_usdt_elf(elf, path); ++ if (err) ++ goto err_out; ++ ++ /* normalize PID filter */ ++ if (pid < 0) ++ pid = -1; ++ else if (pid == 0) ++ pid = getpid(); ++ ++ /* discover USDT in given binary, optionally limiting ++ * activations to a given PID, if pid > 0 ++ */ ++ err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name, ++ usdt_cookie, &targets, &target_cnt); ++ if (err <= 0) { ++ err = (err == 0) ? -ENOENT : err; ++ goto err_out; ++ } ++ ++ specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL); ++ if (IS_ERR(specs_hash)) { ++ err = PTR_ERR(specs_hash); ++ goto err_out; ++ } ++ ++ link = calloc(1, sizeof(*link)); ++ if (!link) { ++ err = -ENOMEM; ++ goto err_out; ++ } ++ ++ link->usdt_man = man; ++ link->link.detach = &bpf_link_usdt_detach; ++ link->link.dealloc = &bpf_link_usdt_dealloc; ++ ++ link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); ++ if (!link->uprobes) { ++ err = -ENOMEM; ++ goto err_out; ++ } ++ ++ for (i = 0; i < target_cnt; i++) { ++ struct usdt_target *target = &targets[i]; ++ struct bpf_link *uprobe_link; ++ bool is_new; ++ int spec_id; ++ ++ /* Spec ID can be either reused or newly allocated. If it is ++ * newly allocated, we'll need to fill out spec map, otherwise ++ * entire spec should be valid and can be just used by a new ++ * uprobe. We reuse spec when USDT arg spec is identical. We ++ * also never share specs between two different USDT ++ * attachments ("links"), so all the reused specs already ++ * share USDT cookie value implicitly. ++ */ ++ err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new); ++ if (err) ++ goto err_out; ++ ++ if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { ++ err = -errno; ++ pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n", ++ spec_id, usdt_provider, usdt_name, path, err); ++ goto err_out; ++ } ++ if (!man->has_bpf_cookie && ++ bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) { ++ err = -errno; ++ if (err == -EEXIST) { ++ pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", ++ spec_id, usdt_provider, usdt_name, path); ++ } else { ++ pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n", ++ target->abs_ip, spec_id, usdt_provider, usdt_name, ++ path, err); ++ } ++ goto err_out; ++ } ++ ++ opts.ref_ctr_offset = target->sema_off; ++ opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; ++ uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, ++ target->rel_ip, &opts); ++ err = libbpf_get_error(uprobe_link); ++ if (err) { ++ pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", ++ i, usdt_provider, usdt_name, path, err); ++ goto err_out; ++ } ++ ++ link->uprobes[i].link = uprobe_link; ++ link->uprobes[i].abs_ip = target->abs_ip; ++ link->uprobe_cnt++; ++ } ++ ++ free(targets); ++ hashmap__free(specs_hash); ++ elf_end(elf); ++ close(fd); ++ ++ return &link->link; ++ ++err_out: ++ if (link) ++ bpf_link__destroy(&link->link); ++ free(targets); ++ hashmap__free(specs_hash); ++ if (elf) ++ elf_end(elf); ++ close(fd); ++ return libbpf_err_ptr(err); ++} ++ ++/* Parse out USDT ELF note from '.note.stapsdt' section. ++ * Logic inspired by perf's code. ++ */ ++static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, ++ const char *data, size_t name_off, size_t desc_off, ++ struct usdt_note *note) ++{ ++ const char *provider, *name, *args; ++ long addrs[3]; ++ size_t len; ++ ++ /* sanity check USDT note name and type first */ ++ if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0) ++ return -EINVAL; ++ if (nhdr->n_type != USDT_NOTE_TYPE) ++ return -EINVAL; ++ ++ /* sanity check USDT note contents ("description" in ELF terminology) */ ++ len = nhdr->n_descsz; ++ data = data + desc_off; ++ ++ /* +3 is the very minimum required to store three empty strings */ ++ if (len < sizeof(addrs) + 3) ++ return -EINVAL; ++ ++ /* get location, base, and semaphore addrs */ ++ memcpy(&addrs, data, sizeof(addrs)); ++ ++ /* parse string fields: provider, name, args */ ++ provider = data + sizeof(addrs); ++ ++ name = (const char *)memchr(provider, '\0', data + len - provider); ++ if (!name) /* non-zero-terminated provider */ ++ return -EINVAL; ++ name++; ++ if (name >= data + len || *name == '\0') /* missing or empty name */ ++ return -EINVAL; ++ ++ args = memchr(name, '\0', data + len - name); ++ if (!args) /* non-zero-terminated name */ ++ return -EINVAL; ++ ++args; ++ if (args >= data + len) /* missing arguments spec */ ++ return -EINVAL; ++ ++ note->provider = provider; ++ note->name = name; ++ if (*args == '\0' || *args == ':') ++ note->args = ""; ++ else ++ note->args = args; ++ note->loc_addr = addrs[0]; ++ note->base_addr = addrs[1]; ++ note->sema_addr = addrs[2]; ++ ++ return 0; ++} ++ ++static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); ++ ++static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie) ++{ ++ const char *s; ++ int len; ++ ++ spec->usdt_cookie = usdt_cookie; ++ spec->arg_cnt = 0; ++ ++ s = note->args; ++ while (s[0]) { ++ if (spec->arg_cnt >= USDT_MAX_ARG_CNT) { ++ pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n", ++ USDT_MAX_ARG_CNT, note->provider, note->name, note->args); ++ return -E2BIG; ++ } ++ ++ len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]); ++ if (len < 0) ++ return len; ++ ++ s += len; ++ spec->arg_cnt++; ++ } ++ ++ return 0; ++} ++ ++/* Architecture-specific logic for parsing USDT argument location specs */ ++ ++#if defined(__x86_64__) || defined(__i386__) ++ ++static int calc_pt_regs_off(const char *reg_name) ++{ ++ static struct { ++ const char *names[4]; ++ size_t pt_regs_off; ++ } reg_map[] = { ++#ifdef __x86_64__ ++#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64) ++#else ++#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32) ++#endif ++ { {"rip", "eip", "", ""}, reg_off(rip, eip) }, ++ { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) }, ++ { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) }, ++ { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) }, ++ { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) }, ++ { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) }, ++ { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) }, ++ { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) }, ++ { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) }, ++#undef reg_off ++#ifdef __x86_64__ ++ { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) }, ++ { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) }, ++ { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) }, ++ { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) }, ++ { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) }, ++ { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) }, ++ { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) }, ++ { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) }, ++#endif ++ }; ++ int i, j; ++ ++ for (i = 0; i < ARRAY_SIZE(reg_map); i++) { ++ for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) { ++ if (strcmp(reg_name, reg_map[i].names[j]) == 0) ++ return reg_map[i].pt_regs_off; ++ } ++ } ++ ++ pr_warn("usdt: unrecognized register '%s'\n", reg_name); ++ return -ENOENT; ++} ++ ++static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) ++{ ++ char *reg_name = NULL; ++ int arg_sz, len, reg_off; ++ long off; ++ ++ if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, ®_name, &len) == 3) { ++ /* Memory dereference case, e.g., -4@-20(%rbp) */ ++ arg->arg_type = USDT_ARG_REG_DEREF; ++ arg->val_off = off; ++ reg_off = calc_pt_regs_off(reg_name); ++ free(reg_name); ++ if (reg_off < 0) ++ return reg_off; ++ arg->reg_off = reg_off; ++ } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, ®_name, &len) == 2) { ++ /* Register read case, e.g., -4@%eax */ ++ arg->arg_type = USDT_ARG_REG; ++ arg->val_off = 0; ++ ++ reg_off = calc_pt_regs_off(reg_name); ++ free(reg_name); ++ if (reg_off < 0) ++ return reg_off; ++ arg->reg_off = reg_off; ++ } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) { ++ /* Constant value case, e.g., 4@$71 */ ++ arg->arg_type = USDT_ARG_CONST; ++ arg->val_off = off; ++ arg->reg_off = 0; ++ } else { ++ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); ++ return -EINVAL; ++ } ++ ++ arg->arg_signed = arg_sz < 0; ++ if (arg_sz < 0) ++ arg_sz = -arg_sz; ++ ++ switch (arg_sz) { ++ case 1: case 2: case 4: case 8: ++ arg->arg_bitshift = 64 - arg_sz * 8; ++ break; ++ default: ++ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", ++ arg_num, arg_str, arg_sz); ++ return -EINVAL; ++ } ++ ++ return len; ++} ++ ++#elif defined(__s390x__) ++ ++/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */ ++ ++static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) ++{ ++ unsigned int reg; ++ int arg_sz, len; ++ long off; ++ ++ if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, ®, &len) == 3) { ++ /* Memory dereference case, e.g., -2@-28(%r15) */ ++ arg->arg_type = USDT_ARG_REG_DEREF; ++ arg->val_off = off; ++ if (reg > 15) { ++ pr_warn("usdt: unrecognized register '%%r%u'\n", reg); ++ return -EINVAL; ++ } ++ arg->reg_off = offsetof(user_pt_regs, gprs[reg]); ++ } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, ®, &len) == 2) { ++ /* Register read case, e.g., -8@%r0 */ ++ arg->arg_type = USDT_ARG_REG; ++ arg->val_off = 0; ++ if (reg > 15) { ++ pr_warn("usdt: unrecognized register '%%r%u'\n", reg); ++ return -EINVAL; ++ } ++ arg->reg_off = offsetof(user_pt_regs, gprs[reg]); ++ } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { ++ /* Constant value case, e.g., 4@71 */ ++ arg->arg_type = USDT_ARG_CONST; ++ arg->val_off = off; ++ arg->reg_off = 0; ++ } else { ++ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); ++ return -EINVAL; ++ } ++ ++ arg->arg_signed = arg_sz < 0; ++ if (arg_sz < 0) ++ arg_sz = -arg_sz; ++ ++ switch (arg_sz) { ++ case 1: case 2: case 4: case 8: ++ arg->arg_bitshift = 64 - arg_sz * 8; ++ break; ++ default: ++ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", ++ arg_num, arg_str, arg_sz); ++ return -EINVAL; ++ } ++ ++ return len; ++} ++ ++#elif defined(__aarch64__) ++ ++static int calc_pt_regs_off(const char *reg_name) ++{ ++ int reg_num; ++ ++ if (sscanf(reg_name, "x%d", ®_num) == 1) { ++ if (reg_num >= 0 && reg_num < 31) ++ return offsetof(struct user_pt_regs, regs[reg_num]); ++ } else if (strcmp(reg_name, "sp") == 0) { ++ return offsetof(struct user_pt_regs, sp); ++ } ++ pr_warn("usdt: unrecognized register '%s'\n", reg_name); ++ return -ENOENT; ++} ++ ++static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) ++{ ++ char *reg_name = NULL; ++ int arg_sz, len, reg_off; ++ long off; ++ ++ if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, ®_name, &off, &len) == 3) { ++ /* Memory dereference case, e.g., -4@[sp, 96] */ ++ arg->arg_type = USDT_ARG_REG_DEREF; ++ arg->val_off = off; ++ reg_off = calc_pt_regs_off(reg_name); ++ free(reg_name); ++ if (reg_off < 0) ++ return reg_off; ++ arg->reg_off = reg_off; ++ } else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, ®_name, &len) == 2) { ++ /* Memory dereference case, e.g., -4@[sp] */ ++ arg->arg_type = USDT_ARG_REG_DEREF; ++ arg->val_off = 0; ++ reg_off = calc_pt_regs_off(reg_name); ++ free(reg_name); ++ if (reg_off < 0) ++ return reg_off; ++ arg->reg_off = reg_off; ++ } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { ++ /* Constant value case, e.g., 4@5 */ ++ arg->arg_type = USDT_ARG_CONST; ++ arg->val_off = off; ++ arg->reg_off = 0; ++ } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { ++ /* Register read case, e.g., -8@x4 */ ++ arg->arg_type = USDT_ARG_REG; ++ arg->val_off = 0; ++ reg_off = calc_pt_regs_off(reg_name); ++ free(reg_name); ++ if (reg_off < 0) ++ return reg_off; ++ arg->reg_off = reg_off; ++ } else { ++ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); ++ return -EINVAL; ++ } ++ ++ arg->arg_signed = arg_sz < 0; ++ if (arg_sz < 0) ++ arg_sz = -arg_sz; ++ ++ switch (arg_sz) { ++ case 1: case 2: case 4: case 8: ++ arg->arg_bitshift = 64 - arg_sz * 8; ++ break; ++ default: ++ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", ++ arg_num, arg_str, arg_sz); ++ return -EINVAL; ++ } ++ ++ return len; ++} ++ ++#elif defined(__riscv) ++ ++static int calc_pt_regs_off(const char *reg_name) ++{ ++ static struct { ++ const char *name; ++ size_t pt_regs_off; ++ } reg_map[] = { ++ { "ra", offsetof(struct user_regs_struct, ra) }, ++ { "sp", offsetof(struct user_regs_struct, sp) }, ++ { "gp", offsetof(struct user_regs_struct, gp) }, ++ { "tp", offsetof(struct user_regs_struct, tp) }, ++ { "a0", offsetof(struct user_regs_struct, a0) }, ++ { "a1", offsetof(struct user_regs_struct, a1) }, ++ { "a2", offsetof(struct user_regs_struct, a2) }, ++ { "a3", offsetof(struct user_regs_struct, a3) }, ++ { "a4", offsetof(struct user_regs_struct, a4) }, ++ { "a5", offsetof(struct user_regs_struct, a5) }, ++ { "a6", offsetof(struct user_regs_struct, a6) }, ++ { "a7", offsetof(struct user_regs_struct, a7) }, ++ { "s0", offsetof(struct user_regs_struct, s0) }, ++ { "s1", offsetof(struct user_regs_struct, s1) }, ++ { "s2", offsetof(struct user_regs_struct, s2) }, ++ { "s3", offsetof(struct user_regs_struct, s3) }, ++ { "s4", offsetof(struct user_regs_struct, s4) }, ++ { "s5", offsetof(struct user_regs_struct, s5) }, ++ { "s6", offsetof(struct user_regs_struct, s6) }, ++ { "s7", offsetof(struct user_regs_struct, s7) }, ++ { "s8", offsetof(struct user_regs_struct, rv_s8) }, ++ { "s9", offsetof(struct user_regs_struct, s9) }, ++ { "s10", offsetof(struct user_regs_struct, s10) }, ++ { "s11", offsetof(struct user_regs_struct, s11) }, ++ { "t0", offsetof(struct user_regs_struct, t0) }, ++ { "t1", offsetof(struct user_regs_struct, t1) }, ++ { "t2", offsetof(struct user_regs_struct, t2) }, ++ { "t3", offsetof(struct user_regs_struct, t3) }, ++ { "t4", offsetof(struct user_regs_struct, t4) }, ++ { "t5", offsetof(struct user_regs_struct, t5) }, ++ { "t6", offsetof(struct user_regs_struct, t6) }, ++ }; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(reg_map); i++) { ++ if (strcmp(reg_name, reg_map[i].name) == 0) ++ return reg_map[i].pt_regs_off; ++ } ++ ++ pr_warn("usdt: unrecognized register '%s'\n", reg_name); ++ return -ENOENT; ++} ++ ++static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) ++{ ++ char *reg_name = NULL; ++ int arg_sz, len, reg_off; ++ long off; ++ ++ if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, ®_name, &len) == 3) { ++ /* Memory dereference case, e.g., -8@-88(s0) */ ++ arg->arg_type = USDT_ARG_REG_DEREF; ++ arg->val_off = off; ++ reg_off = calc_pt_regs_off(reg_name); ++ free(reg_name); ++ if (reg_off < 0) ++ return reg_off; ++ arg->reg_off = reg_off; ++ } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { ++ /* Constant value case, e.g., 4@5 */ ++ arg->arg_type = USDT_ARG_CONST; ++ arg->val_off = off; ++ arg->reg_off = 0; ++ } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { ++ /* Register read case, e.g., -8@a1 */ ++ arg->arg_type = USDT_ARG_REG; ++ arg->val_off = 0; ++ reg_off = calc_pt_regs_off(reg_name); ++ free(reg_name); ++ if (reg_off < 0) ++ return reg_off; ++ arg->reg_off = reg_off; ++ } else { ++ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); ++ return -EINVAL; ++ } ++ ++ arg->arg_signed = arg_sz < 0; ++ if (arg_sz < 0) ++ arg_sz = -arg_sz; ++ ++ switch (arg_sz) { ++ case 1: case 2: case 4: case 8: ++ arg->arg_bitshift = 64 - arg_sz * 8; ++ break; ++ default: ++ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", ++ arg_num, arg_str, arg_sz); ++ return -EINVAL; ++ } ++ ++ return len; ++} ++ ++#else ++ ++static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) ++{ ++ pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n"); ++ return -ENOTSUP; ++} ++ ++#endif +diff --git a/src/cc/libbpf/travis-ci/diffs/.do_not_use_dot_patch_here b/src/cc/libbpf/travis-ci/diffs/.do_not_use_dot_patch_here +new file mode 100644 +index 0000000..e69de29 +diff --git a/src/cc/libbpf/travis-ci/diffs/001-fix-oob-write-in-test_verifier.diff b/src/cc/libbpf/travis-ci/diffs/001-fix-oob-write-in-test_verifier.diff +new file mode 100644 +index 0000000..eb5ef26 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/diffs/001-fix-oob-write-in-test_verifier.diff +@@ -0,0 +1,35 @@ ++From: Kumar Kartikeya Dwivedi ++To: bpf@vger.kernel.org ++Cc: Alexei Starovoitov , ++ Daniel Borkmann , ++ Andrii Nakryiko ++Subject: [PATCH bpf-next] selftests/bpf: Fix OOB write in test_verifier ++Date: Tue, 14 Dec 2021 07:18:00 +0530 [thread overview] ++Message-ID: <20211214014800.78762-1-memxor@gmail.com> (raw) ++ ++The commit referenced below added fixup_map_timer support (to create a ++BPF map containing timers), but failed to increase the size of the ++map_fds array, leading to out of bounds write. Fix this by changing ++MAX_NR_MAPS to 22. ++ ++Fixes: e60e6962c503 ("selftests/bpf: Add tests for restricted helpers") ++Signed-off-by: Kumar Kartikeya Dwivedi ++--- ++ tools/testing/selftests/bpf/test_verifier.c | 2 +- ++ 1 file changed, 1 insertion(+), 1 deletion(-) ++ ++diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c ++index ad5d30bafd93..33e2ecb3bef9 100644 ++--- a/tools/testing/selftests/bpf/test_verifier.c +++++ b/tools/testing/selftests/bpf/test_verifier.c ++@@ -54,7 +54,7 @@ ++ #define MAX_INSNS BPF_MAXINSNS ++ #define MAX_TEST_INSNS 1000000 ++ #define MAX_FIXUPS 8 ++-#define MAX_NR_MAPS 21 +++#define MAX_NR_MAPS 22 ++ #define MAX_TEST_RUNS 8 ++ #define POINTER_VALUE 0xcafe4all ++ #define TEST_DATA_LEN 64 ++-- ++2.34.1 +diff --git a/src/cc/libbpf/travis-ci/managers/debian.sh b/src/cc/libbpf/travis-ci/managers/debian.sh +new file mode 100755 +index 0000000..bf1c060 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/managers/debian.sh +@@ -0,0 +1,90 @@ ++#!/bin/bash ++ ++PHASES=(${@:-SETUP RUN RUN_ASAN CLEANUP}) ++DEBIAN_RELEASE="${DEBIAN_RELEASE:-testing}" ++CONT_NAME="${CONT_NAME:-libbpf-debian-$DEBIAN_RELEASE}" ++ENV_VARS="${ENV_VARS:-}" ++DOCKER_RUN="${DOCKER_RUN:-docker run}" ++REPO_ROOT="${REPO_ROOT:-$PWD}" ++ADDITIONAL_DEPS=(clang pkg-config gcc-10) ++EXTRA_CFLAGS="" ++EXTRA_LDFLAGS="" ++ ++function info() { ++ echo -e "\033[33;1m$1\033[0m" ++} ++ ++function error() { ++ echo -e "\033[31;1m$1\033[0m" ++} ++ ++function docker_exec() { ++ docker exec $ENV_VARS $CONT_NAME "$@" ++} ++ ++set -eu ++ ++source "$(dirname $0)/travis_wait.bash" ++ ++for phase in "${PHASES[@]}"; do ++ case $phase in ++ SETUP) ++ info "Setup phase" ++ info "Using Debian $DEBIAN_RELEASE" ++ ++ docker --version ++ ++ docker pull debian:$DEBIAN_RELEASE ++ info "Starting container $CONT_NAME" ++ $DOCKER_RUN -v $REPO_ROOT:/build:rw \ ++ -w /build --privileged=true --name $CONT_NAME \ ++ -dit --net=host debian:$DEBIAN_RELEASE /bin/bash ++ echo -e "::group::Build Env Setup" ++ docker_exec bash -c "echo deb-src http://deb.debian.org/debian $DEBIAN_RELEASE main >>/etc/apt/sources.list" ++ docker_exec apt-get -y update ++ docker_exec apt-get -y install aptitude ++ docker_exec aptitude -y build-dep libelf-dev ++ docker_exec aptitude -y install libelf-dev ++ docker_exec aptitude -y install "${ADDITIONAL_DEPS[@]}" ++ echo -e "::endgroup::" ++ ;; ++ RUN|RUN_CLANG|RUN_GCC10|RUN_ASAN|RUN_CLANG_ASAN|RUN_GCC10_ASAN) ++ CC="cc" ++ if [[ "$phase" = *"CLANG"* ]]; then ++ ENV_VARS="-e CC=clang -e CXX=clang++" ++ CC="clang" ++ elif [[ "$phase" = *"GCC10"* ]]; then ++ ENV_VARS="-e CC=gcc-10 -e CXX=g++-10" ++ CC="gcc-10" ++ else ++ EXTRA_CFLAGS="${EXTRA_CFLAGS} -Wno-stringop-truncation" ++ fi ++ if [[ "$phase" = *"ASAN"* ]]; then ++ EXTRA_CFLAGS="${EXTRA_CFLAGS} -fsanitize=address,undefined" ++ EXTRA_LDFLAGS="${EXTRA_LDFLAGS} -fsanitize=address,undefined" ++ fi ++ docker_exec mkdir build install ++ docker_exec ${CC} --version ++ info "build" ++ docker_exec make -j$((4*$(nproc))) EXTRA_CFLAGS="${EXTRA_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LDFLAGS}" -C ./src -B OBJDIR=../build ++ info "ldd build/libbpf.so:" ++ docker_exec ldd build/libbpf.so ++ if ! docker_exec ldd build/libbpf.so | grep -q libelf; then ++ error "No reference to libelf.so in libbpf.so!" ++ exit 1 ++ fi ++ info "install" ++ docker_exec make -j$((4*$(nproc))) -C src OBJDIR=../build DESTDIR=../install install ++ info "link binary" ++ docker_exec bash -c "EXTRA_CFLAGS=\"${EXTRA_CFLAGS}\" EXTRA_LDFLAGS=\"${EXTRA_LDFLAGS}\" ./travis-ci/managers/test_compile.sh" ++ ;; ++ CLEANUP) ++ info "Cleanup phase" ++ docker stop $CONT_NAME ++ docker rm -f $CONT_NAME ++ ;; ++ *) ++ echo >&2 "Unknown phase '$phase'" ++ exit 1 ++ esac ++done +diff --git a/src/cc/libbpf/travis-ci/managers/test_compile.sh b/src/cc/libbpf/travis-ci/managers/test_compile.sh +new file mode 100755 +index 0000000..094ba3e +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/managers/test_compile.sh +@@ -0,0 +1,15 @@ ++#!/bin/bash ++set -euox pipefail ++ ++EXTRA_CFLAGS=${EXTRA_CFLAGS:-} ++EXTRA_LDFLAGS=${EXTRA_LDFLAGS:-} ++ ++cat << EOF > main.c ++#include ++int main() { ++ return bpf_object__open(0) < 0; ++} ++EOF ++ ++# static linking ++${CC:-cc} ${EXTRA_CFLAGS} ${EXTRA_LDFLAGS} -o main -I./include/uapi -I./install/usr/include main.c ./build/libbpf.a -lelf -lz +diff --git a/src/cc/libbpf/travis-ci/managers/travis_wait.bash b/src/cc/libbpf/travis-ci/managers/travis_wait.bash +new file mode 100644 +index 0000000..acf6ad1 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/managers/travis_wait.bash +@@ -0,0 +1,61 @@ ++# This was borrowed from https://github.com/travis-ci/travis-build/tree/master/lib/travis/build/bash ++# to get around https://github.com/travis-ci/travis-ci/issues/9979. It should probably be removed ++# as soon as Travis CI has started to provide an easy way to export the functions to bash scripts. ++ ++travis_jigger() { ++ local cmd_pid="${1}" ++ shift ++ local timeout="${1}" ++ shift ++ local count=0 ++ ++ echo -e "\\n" ++ ++ while [[ "${count}" -lt "${timeout}" ]]; do ++ count="$((count + 1))" ++ echo -ne "Still running (${count} of ${timeout}): ${*}\\r" ++ sleep 60 ++ done ++ ++ echo -e "\\n${ANSI_RED}Timeout (${timeout} minutes) reached. Terminating \"${*}\"${ANSI_RESET}\\n" ++ kill -9 "${cmd_pid}" ++} ++ ++travis_wait() { ++ local timeout="${1}" ++ ++ if [[ "${timeout}" =~ ^[0-9]+$ ]]; then ++ shift ++ else ++ timeout=20 ++ fi ++ ++ local cmd=("${@}") ++ local log_file="travis_wait_${$}.log" ++ ++ "${cmd[@]}" &>"${log_file}" & ++ local cmd_pid="${!}" ++ ++ travis_jigger "${!}" "${timeout}" "${cmd[@]}" & ++ local jigger_pid="${!}" ++ local result ++ ++ { ++ set +e ++ wait "${cmd_pid}" 2>/dev/null ++ result="${?}" ++ ps -p"${jigger_pid}" &>/dev/null && kill "${jigger_pid}" ++ set -e ++ } ++ ++ if [[ "${result}" -eq 0 ]]; then ++ echo -e "\\n${ANSI_GREEN}The command ${cmd[*]} exited with ${result}.${ANSI_RESET}" ++ else ++ echo -e "\\n${ANSI_RED}The command ${cmd[*]} exited with ${result}.${ANSI_RESET}" ++ fi ++ ++ echo -e "\\n${ANSI_GREEN}Log:${ANSI_RESET}\\n" ++ cat "${log_file}" ++ ++ return "${result}" ++} +diff --git a/src/cc/libbpf/travis-ci/managers/ubuntu.sh b/src/cc/libbpf/travis-ci/managers/ubuntu.sh +new file mode 100755 +index 0000000..7fe1b3f +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/managers/ubuntu.sh +@@ -0,0 +1,24 @@ ++#!/bin/bash ++set -eux ++ ++RELEASE="focal" ++ ++apt-get update ++apt-get install -y pkg-config ++ ++source "$(dirname $0)/travis_wait.bash" ++ ++cd $REPO_ROOT ++ ++EXTRA_CFLAGS="-Werror -Wall -fsanitize=address,undefined" ++EXTRA_LDFLAGS="-Werror -Wall -fsanitize=address,undefined" ++mkdir build install ++cc --version ++make -j$((4*$(nproc))) EXTRA_CFLAGS="${EXTRA_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LDFLAGS}" -C ./src -B OBJDIR=../build ++ldd build/libbpf.so ++if ! ldd build/libbpf.so | grep -q libelf; then ++ echo "FAIL: No reference to libelf.so in libbpf.so!" ++ exit 1 ++fi ++make -j$((4*$(nproc))) -C src OBJDIR=../build DESTDIR=../install install ++EXTRA_CFLAGS=${EXTRA_CFLAGS} EXTRA_LDFLAGS=${EXTRA_LDFLAGS} $(dirname $0)/test_compile.sh +diff --git a/src/cc/libbpf/travis-ci/rootfs/mkrootfs_arch.sh b/src/cc/libbpf/travis-ci/rootfs/mkrootfs_arch.sh +new file mode 100755 +index 0000000..90e2d58 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/mkrootfs_arch.sh +@@ -0,0 +1,107 @@ ++#!/bin/bash ++ ++# This script is based on drgn script for generating Arch Linux bootstrap ++# images. ++# https://github.com/osandov/drgn/blob/master/scripts/vmtest/mkrootfs.sh ++ ++set -euo pipefail ++ ++usage () { ++ USAGE_STRING="usage: $0 [NAME] ++ $0 -h ++ ++Build an Arch Linux root filesystem image for testing libbpf in a virtual ++machine. ++ ++The image is generated as a zstd-compressed tarball. ++ ++This must be run as root, as most of the installation is done in a chroot. ++ ++Arguments: ++ NAME name of generated image file (default: ++ libbpf-vmtest-rootfs-\$DATE.tar.zst) ++ ++Options: ++ -h display this help message and exit" ++ ++ case "$1" in ++ out) ++ echo "$USAGE_STRING" ++ exit 0 ++ ;; ++ err) ++ echo "$USAGE_STRING" >&2 ++ exit 1 ++ ;; ++ esac ++} ++ ++while getopts "h" OPT; do ++ case "$OPT" in ++ h) ++ usage out ++ ;; ++ *) ++ usage err ++ ;; ++ esac ++done ++if [[ $OPTIND -eq $# ]]; then ++ NAME="${!OPTIND}" ++elif [[ $OPTIND -gt $# ]]; then ++ NAME="libbpf-vmtest-rootfs-$(date +%Y.%m.%d).tar.zst" ++else ++ usage err ++fi ++ ++pacman_conf= ++root= ++trap 'rm -rf "$pacman_conf" "$root"' EXIT ++pacman_conf="$(mktemp -p "$PWD")" ++cat > "$pacman_conf" << "EOF" ++[options] ++Architecture = x86_64 ++CheckSpace ++SigLevel = Required DatabaseOptional ++[core] ++Include = /etc/pacman.d/mirrorlist ++[extra] ++Include = /etc/pacman.d/mirrorlist ++[community] ++Include = /etc/pacman.d/mirrorlist ++EOF ++root="$(mktemp -d -p "$PWD")" ++ ++packages=( ++ busybox ++ # libbpf dependencies. ++ libelf ++ zlib ++ # selftests test_progs dependencies. ++ binutils ++ elfutils ++ ethtool ++ glibc ++ iproute2 ++ # selftests test_verifier dependencies. ++ libcap ++) ++ ++pacstrap -C "$pacman_conf" -cGM "$root" "${packages[@]}" ++ ++# Remove unnecessary files from the chroot. ++ ++# We don't need the pacman databases anymore. ++rm -rf "$root/var/lib/pacman/sync/" ++# We don't need D, Fortran, or Go. ++ rm -f "$root/usr/lib/libgdruntime."* \ ++ "$root/usr/lib/libgphobos."* \ ++ "$root/usr/lib/libgfortran."* \ ++ "$root/usr/lib/libgo."* ++# We don't need any documentation. ++rm -rf "$root/usr/share/{doc,help,man,texinfo}" ++ ++"$(dirname "$0")"/mkrootfs_tweak.sh "$root" ++ ++tar -C "$root" -c . | zstd -T0 -19 -o "$NAME" ++chmod 644 "$NAME" +diff --git a/src/cc/libbpf/travis-ci/rootfs/mkrootfs_debian.sh b/src/cc/libbpf/travis-ci/rootfs/mkrootfs_debian.sh +new file mode 100755 +index 0000000..6dba868 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/mkrootfs_debian.sh +@@ -0,0 +1,52 @@ ++#!/bin/bash ++# This script builds a Debian root filesystem image for testing libbpf in a ++# virtual machine. Requires debootstrap >= 1.0.95 and zstd. ++ ++# Use e.g. ./mkrootfs_debian.sh --arch=s390x to generate a rootfs for a ++# foreign architecture. Requires configured binfmt_misc, e.g. using ++# Debian/Ubuntu's qemu-user-binfmt package or ++# https://github.com/multiarch/qemu-user-static. ++ ++set -e -u -x -o pipefail ++ ++# Check whether we are root now in order to avoid confusing errors later. ++if [ "$(id -u)" != 0 ]; then ++ echo "$0 must run as root" >&2 ++ exit 1 ++fi ++ ++# Create a working directory and schedule its deletion. ++root=$(mktemp -d -p "$PWD") ++trap 'rm -r "$root"' EXIT ++ ++# Install packages. ++packages=( ++ binutils ++ busybox ++ elfutils ++ ethtool ++ iproute2 ++ iptables ++ libcap2 ++ libelf1 ++ strace ++ zlib1g ++) ++packages=$(IFS=, && echo "${packages[*]}") ++debootstrap --include="$packages" --variant=minbase "$@" bookworm "$root" ++ ++# Remove the init scripts (tests use their own). Also remove various ++# unnecessary files in order to save space. ++rm -rf \ ++ "$root"/etc/rcS.d \ ++ "$root"/usr/share/{doc,info,locale,man,zoneinfo} \ ++ "$root"/var/cache/apt/archives/* \ ++ "$root"/var/lib/apt/lists/* ++ ++# Apply common tweaks. ++"$(dirname "$0")"/mkrootfs_tweak.sh "$root" ++ ++# Save the result. ++name="libbpf-vmtest-rootfs-$(date +%Y.%m.%d).tar.zst" ++rm -f "$name" ++tar -C "$root" -c . | zstd -T0 -19 -o "$name" +diff --git a/src/cc/libbpf/travis-ci/rootfs/mkrootfs_tweak.sh b/src/cc/libbpf/travis-ci/rootfs/mkrootfs_tweak.sh +new file mode 100755 +index 0000000..2aafca1 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/mkrootfs_tweak.sh +@@ -0,0 +1,61 @@ ++#!/bin/bash ++# This script prepares a mounted root filesystem for testing libbpf in a virtual ++# machine. ++set -e -u -x -o pipefail ++root=$1 ++shift ++ ++chroot "${root}" /bin/busybox --install ++ ++cat > "$root/etc/inittab" << "EOF" ++::sysinit:/etc/init.d/rcS ++::ctrlaltdel:/sbin/reboot ++::shutdown:/sbin/swapoff -a ++::shutdown:/bin/umount -a -r ++::restart:/sbin/init ++EOF ++chmod 644 "$root/etc/inittab" ++ ++mkdir -m 755 -p "$root/etc/init.d" "$root/etc/rcS.d" ++cat > "$root/etc/rcS.d/S10-mount" << "EOF" ++#!/bin/sh ++ ++set -eux ++ ++/bin/mount proc /proc -t proc ++ ++# Mount devtmpfs if not mounted ++if [[ -z $(/bin/mount -t devtmpfs) ]]; then ++ /bin/mount devtmpfs /dev -t devtmpfs ++fi ++ ++/bin/mount sysfs /sys -t sysfs ++/bin/mount bpffs /sys/fs/bpf -t bpf ++/bin/mount debugfs /sys/kernel/debug -t debugfs ++ ++echo 'Listing currently mounted file systems' ++/bin/mount ++EOF ++chmod 755 "$root/etc/rcS.d/S10-mount" ++ ++cat > "$root/etc/rcS.d/S40-network" << "EOF" ++#!/bin/sh ++ ++set -eux ++ ++ip link set lo up ++EOF ++chmod 755 "$root/etc/rcS.d/S40-network" ++ ++cat > "$root/etc/init.d/rcS" << "EOF" ++#!/bin/sh ++ ++set -eux ++ ++for path in /etc/rcS.d/S*; do ++ [ -x "$path" ] && "$path" ++done ++EOF ++chmod 755 "$root/etc/init.d/rcS" ++ ++chmod 755 "$root" +diff --git a/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/README.md b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/README.md +new file mode 100644 +index 0000000..cfc1466 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/README.md +@@ -0,0 +1,107 @@ ++# IBM Z self-hosted builder ++ ++libbpf CI uses an IBM-provided z15 self-hosted builder. There are no IBM Z ++builds of GitHub (GH) Actions runner, and stable qemu-user has problems with .NET ++apps, so the builder runs the x86_64 runner version with qemu-user built from ++the master branch. ++ ++We are currently supporting runners for the following repositories: ++* libbpf/libbpf ++* kernel-patches/bpf ++* kernel-patches/vmtest ++ ++Below instructions are directly applicable to libbpf, and require minor ++modifications for kernel-patches repos. Currently, qemu-user-static Docker ++image is shared between all GitHub runners, but separate actions-runner-\* ++service / Docker image is created for each runner type. ++ ++## Configuring the builder. ++ ++### Install prerequisites. ++ ++``` ++$ sudo apt install -y docker.io # Ubuntu ++``` ++ ++### Add services. ++ ++``` ++$ sudo cp *.service /etc/systemd/system/ ++$ sudo systemctl daemon-reload ++``` ++ ++### Create a config file. ++ ++``` ++$ sudo tee /etc/actions-runner-libbpf ++repo=/ ++access_token= ++``` ++ ++Access token should have the repo scope, consult ++https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository ++for details. ++ ++### Autostart the x86_64 emulation support. ++ ++This step is important, you would not be able to build docker container ++without having this service running. If container build fails, make sure ++service is running properly. ++ ++``` ++$ sudo systemctl enable --now qemu-user-static ++``` ++ ++### Autostart the runner. ++ ++``` ++$ sudo systemctl enable --now actions-runner-libbpf ++``` ++ ++## Rebuilding the image ++ ++In order to update the `iiilinuxibmcom/actions-runner-libbpf` image, e.g. to ++get the latest OS security fixes, use the following commands: ++ ++``` ++$ sudo docker build \ ++ --pull \ ++ -f actions-runner-libbpf.Dockerfile \ ++ -t iiilinuxibmcom/actions-runner-libbpf \ ++ . ++$ sudo systemctl restart actions-runner-libbpf ++``` ++ ++## Removing persistent data ++ ++The `actions-runner-libbpf` service stores various temporary data, such as ++runner registration information, work directories and logs, in the ++`actions-runner-libbpf` volume. In order to remove it and start from scratch, ++e.g. when upgrading the runner or switching it to a different repository, use ++the following commands: ++ ++``` ++$ sudo systemctl stop actions-runner-libbpf ++$ sudo docker rm -f actions-runner-libbpf ++$ sudo docker volume rm actions-runner-libbpf ++``` ++ ++## Troubleshooting ++ ++In order to check if service is running, use the following command: ++ ++``` ++$ sudo systemctl status ++``` ++ ++In order to get logs for service: ++ ++``` ++$ journalctl -u ++``` ++ ++In order to check which containers are currently active: ++ ++``` ++$ sudo docker ps ++``` +diff --git a/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.Dockerfile b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.Dockerfile +new file mode 100644 +index 0000000..d830657 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.Dockerfile +@@ -0,0 +1,50 @@ ++# Self-Hosted IBM Z Github Actions Runner. ++ ++# Temporary image: amd64 dependencies. ++FROM amd64/ubuntu:20.04 as ld-prefix ++ENV DEBIAN_FRONTEND=noninteractive ++RUN apt-get update && apt-get -y install ca-certificates libicu66 libssl1.1 ++ ++# Main image. ++FROM s390x/ubuntu:20.04 ++ ++# Packages for libbpf testing that are not installed by .github/actions/setup. ++ENV DEBIAN_FRONTEND=noninteractive ++RUN apt-get update && apt-get -y install \ ++ bc \ ++ bison \ ++ cmake \ ++ cpu-checker \ ++ curl \ ++ flex \ ++ git \ ++ jq \ ++ linux-image-generic \ ++ qemu-system-s390x \ ++ rsync \ ++ software-properties-common \ ++ sudo \ ++ tree ++ ++# amd64 dependencies. ++COPY --from=ld-prefix / /usr/x86_64-linux-gnu/ ++RUN ln -fs ../lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /usr/x86_64-linux-gnu/lib64/ ++RUN ln -fs /etc/resolv.conf /usr/x86_64-linux-gnu/etc/ ++ENV QEMU_LD_PREFIX=/usr/x86_64-linux-gnu ++ ++# amd64 Github Actions Runner. ++ARG version=2.285.0 ++RUN useradd -m actions-runner ++RUN echo "actions-runner ALL=(ALL) NOPASSWD: ALL" >>/etc/sudoers ++RUN echo "Defaults env_keep += \"DEBIAN_FRONTEND\"" >>/etc/sudoers ++RUN usermod -a -G kvm actions-runner ++USER actions-runner ++ENV USER=actions-runner ++WORKDIR /home/actions-runner ++RUN curl -L https://github.com/actions/runner/releases/download/v${version}/actions-runner-linux-x64-${version}.tar.gz | tar -xz ++VOLUME /home/actions-runner ++ ++# Scripts. ++COPY fs/ / ++ENTRYPOINT ["/usr/bin/entrypoint"] ++CMD ["/usr/bin/actions-runner"] +diff --git a/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.service b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.service +new file mode 100644 +index 0000000..88e0237 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.service +@@ -0,0 +1,24 @@ ++[Unit] ++Description=Self-Hosted IBM Z Github Actions Runner ++Wants=qemu-user-static ++After=qemu-user-static ++StartLimitIntervalSec=0 ++ ++[Service] ++Type=simple ++Restart=always ++ExecStart=/usr/bin/docker run \ ++ --device=/dev/kvm \ ++ --env-file=/etc/actions-runner-libbpf \ ++ --init \ ++ --interactive \ ++ --name=actions-runner-libbpf \ ++ --rm \ ++ --volume=actions-runner-libbpf:/home/actions-runner \ ++ iiilinuxibmcom/actions-runner-libbpf ++ExecStop=/bin/sh -c "docker exec actions-runner-libbpf kill -INT -- -1" ++ExecStop=/bin/sh -c "docker wait actions-runner-libbpf" ++ExecStop=/bin/sh -c "docker rm actions-runner-libbpf" ++ ++[Install] ++WantedBy=multi-user.target +diff --git a/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/actions-runner b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/actions-runner +new file mode 100755 +index 0000000..c9d8227 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/actions-runner +@@ -0,0 +1,40 @@ ++#!/bin/bash ++ ++# ++# Ephemeral runner startup script. ++# ++# Expects the following environment variables: ++# ++# - repo=/ ++# - access_token= ++# ++ ++set -e -u ++ ++# Check the cached registration token. ++token_file=registration-token.json ++set +e ++expires_at=$(jq --raw-output .expires_at "$token_file" 2>/dev/null) ++status=$? ++set -e ++if [[ $status -ne 0 || $(date +%s) -ge $(date -d "$expires_at" +%s) ]]; then ++ # Refresh the cached registration token. ++ curl \ ++ -X POST \ ++ -H "Accept: application/vnd.github.v3+json" \ ++ -H "Authorization: token $access_token" \ ++ "https://api.github.com/repos/$repo/actions/runners/registration-token" \ ++ -o "$token_file" ++fi ++ ++# (Re-)register the runner. ++registration_token=$(jq --raw-output .token "$token_file") ++./config.sh remove --token "$registration_token" || true ++./config.sh \ ++ --url "https://github.com/$repo" \ ++ --token "$registration_token" \ ++ --labels z15 \ ++ --ephemeral ++ ++# Run one job. ++./run.sh +diff --git a/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/entrypoint b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/entrypoint +new file mode 100755 +index 0000000..03cb61c +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/entrypoint +@@ -0,0 +1,35 @@ ++#!/bin/bash ++ ++# ++# Container entrypoint that waits for all spawned processes. ++# ++ ++set -e -u ++ ++# /dev/kvm has host permissions, fix it. ++if [ -e /dev/kvm ]; then ++ sudo chown root:kvm /dev/kvm ++fi ++ ++# Create a FIFO and start reading from its read end. ++tempdir=$(mktemp -d "/tmp/done.XXXXXXXXXX") ++trap 'rm -r "$tempdir"' EXIT ++done="$tempdir/pipe" ++mkfifo "$done" ++cat "$done" & waiter=$! ++ ++# Start the workload. Its descendants will inherit the FIFO's write end. ++status=0 ++if [ "$#" -eq 0 ]; then ++ bash 9>"$done" || status=$? ++else ++ "$@" 9>"$done" || status=$? ++fi ++ ++# When the workload and all of its descendants exit, the FIFO's write end will ++# be closed and `cat "$done"` will exit. Wait until it happens. This is needed ++# in order to handle SelfUpdater, which the workload may start in background ++# before exiting. ++wait "$waiter" ++ ++exit "$status" +diff --git a/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/qemu-user-static.service b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/qemu-user-static.service +new file mode 100644 +index 0000000..301f3ed +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/rootfs/s390x-self-hosted-builder/qemu-user-static.service +@@ -0,0 +1,11 @@ ++[Unit] ++Description=Support for transparent execution of non-native binaries with QEMU user emulation ++ ++[Service] ++Type=oneshot ++# The source code for iiilinuxibmcom/qemu-user-static is at https://github.com/iii-i/qemu-user-static/tree/v6.1.0-1 ++# TODO: replace it with multiarch/qemu-user-static once version >6.1 is available ++ExecStart=/usr/bin/docker run --rm --interactive --privileged iiilinuxibmcom/qemu-user-static --reset -p yes ++ ++[Install] ++WantedBy=multi-user.target +diff --git a/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 b/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 +new file mode 100644 +index 0000000..d32b52f +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-5.5.0 +@@ -0,0 +1,118 @@ ++# This file is not used and is there for historic purposes only. ++# See WHITELIST-5.5.0 instead. ++ ++# PERMANENTLY DISABLED ++align # verifier output format changed ++atomics # new atomic operations (v5.12+) ++atomic_bounds # new atomic operations (v5.12+) ++bind_perm # changed semantics of return values (v5.12+) ++bpf_cookie # 5.15+ ++bpf_iter # bpf_iter support is missing ++bpf_obj_id # bpf_link support missing for GET_OBJ_INFO, GET_FD_BY_ID, etc ++bpf_tcp_ca # STRUCT_OPS is missing ++btf_map_in_map # inner map leak fixed in 5.8 ++btf_skc_cls_ingress # v5.10+ functionality ++cg_storage_multi # v5.9+ functionality ++cgroup_attach_multi # BPF_F_REPLACE_PROG missing ++cgroup_link # LINK_CREATE is missing ++cgroup_skb_sk_lookup # bpf_sk_lookup_tcp() helper is missing ++check_mtu # missing BPF helper (v5.12+) ++cls_redirect # bpf_csum_level() helper is missing ++connect_force_port # cgroup/get{peer,sock}name{4,6} support is missing ++d_path # v5.10+ feature ++enable_stats # BPF_ENABLE_STATS support is missing ++fentry_fexit # bpf_prog_test_tracing missing ++fentry_test # bpf_prog_test_tracing missing ++fexit_bpf2bpf # freplace is missing ++fexit_sleep # relies on bpf_trampoline fix in 5.12+ ++fexit_test # bpf_prog_test_tracing missing ++flow_dissector # bpf_link-based flow dissector is in 5.8+ ++flow_dissector_reattach ++for_each # v5.12+ ++get_func_ip_test # v5.15+ ++get_stack_raw_tp # exercising BPF verifier bug causing infinite loop ++hash_large_key # v5.11+ ++ima # v5.11+ ++kfree_skb # 32-bit pointer arith in test_pkt_access ++ksyms # __start_BTF has different name ++kfunc_call # v5.13+ ++link_pinning # bpf_link is missing ++linked_vars # v5.13+ ++load_bytes_relative # new functionality in 5.8 ++lookup_and_delete # v5.14+ ++map_init # per-CPU LRU missing ++map_ptr # test uses BPF_MAP_TYPE_RINGBUF, added in 5.8 ++metadata # v5.10+ ++migrate_reuseport # v5.14+ ++mmap # 5.5 kernel is too permissive with re-mmaping ++modify_return # fmod_ret support is missing ++module_attach # module BTF support missing (v5.11+) ++netcnt ++netns_cookie # v5.15+ ++ns_current_pid_tgid # bpf_get_ns_current_pid_tgid() helper is missing ++pe_preserve_elems # v5.10+ ++perf_branches # bpf_read_branch_records() helper is missing ++perf_link # v5.15+ ++pkt_access # 32-bit pointer arith in test_pkt_access ++probe_read_user_str # kernel bug with garbage bytes at the end ++prog_run_xattr # 32-bit pointer arith in test_pkt_access ++raw_tp_test_run # v5.10+ ++recursion # v5.12+ ++ringbuf # BPF_MAP_TYPE_RINGBUF is supported in 5.8+ ++ ++# bug in verifier w/ tracking references ++#reference_tracking/classifier/sk_lookup_success ++reference_tracking ++ ++select_reuseport # UDP support is missing ++send_signal # bpf_send_signal_thread() helper is missing ++sk_assign # bpf_sk_assign helper missing ++sk_lookup # v5.9+ ++sk_storage_tracing # missing bpf_sk_storage_get() helper ++skb_ctx # ctx_{size, }_{in, out} in BPF_PROG_TEST_RUN is missing ++skb_helpers # helpers added in 5.8+ ++skeleton # creates too big ARRAY map ++snprintf # v5.13+ ++snprintf_btf # v5.10+ ++sock_fields # v5.10+ ++socket_cookie # v5.12+ ++sockmap_basic # uses new socket fields, 5.8+ ++sockmap_listen # no listen socket supportin SOCKMAP ++sockopt_sk ++sockopt_qos_to_cc # v5.15+ ++stacktrace_build_id # v5.9+ ++stack_var_off # v5.12+ ++syscall # v5.14+ ++task_local_storage # v5.12+ ++task_pt_regs # v5.15+ ++tcp_hdr_options # v5.10+, new TCP header options feature in BPF ++tcpbpf_user # LINK_CREATE is missing ++tc_redirect # v5.14+ ++test_bpffs # v5.10+, new CONFIG_BPF_PRELOAD=y and CONFIG_BPF_PRELOAD_UMG=y|m ++test_bprm_opts # v5.11+ ++test_global_funcs # kernel doesn't support BTF linkage=global on FUNCs ++test_local_storage # v5.10+ feature ++test_lsm # no BPF_LSM support ++test_overhead # no fmod_ret support ++test_profiler # needs verifier logic improvements from v5.10+ ++test_skb_pkt_end # v5.11+ ++timer # v5.15+ ++timer_mim # v5.15+ ++trace_ext # v5.10+ ++trace_printk # v5.14+ ++trampoline_count # v5.12+ have lower allowed limits ++udp_limit # no cgroup/sock_release BPF program type (5.9+) ++varlen # verifier bug fixed in later kernels ++vmlinux # hrtimer_nanosleep() signature changed incompatibly ++xdp_adjust_tail # new XDP functionality added in 5.8 ++xdp_attach # IFLA_XDP_EXPECTED_FD support is missing ++xdp_bonding # v5.15+ ++xdp_bpf2bpf # freplace is missing ++xdp_context_test_run # v5.15+ ++xdp_cpumap_attach # v5.9+ ++xdp_devmap_attach # new feature in 5.8 ++xdp_link # v5.9+ ++ ++# SUBTESTS FAILING (block entire test until blocking subtests works properly) ++btf # "size check test", "func (Non zero vlen)" ++tailcalls # tailcall_bpf2bpf_1, tailcall_bpf2bpf_2, tailcall_bpf2bpf_3 +diff --git a/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest b/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest +new file mode 100644 +index 0000000..939de57 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest +@@ -0,0 +1,6 @@ ++# TEMPORARY ++get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge ++stacktrace_build_id_nmi ++stacktrace_build_id ++task_fd_query_rawtp ++varlen +diff --git a/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest.s390x b/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest.s390x +new file mode 100644 +index 0000000..e33cab3 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/configs/blacklist/BLACKLIST-latest.s390x +@@ -0,0 +1,67 @@ ++# TEMPORARY ++atomics # attach(add): actual -524 <= expected 0 (trampoline) ++bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) ++bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) ++bpf_tcp_ca # JIT does not support calling kernel function (kfunc) ++bpf_loop # attaches to __x64_sys_nanosleep ++bpf_mod_race # BPF trampoline ++bpf_nf # JIT does not support calling kernel function ++core_read_macros # unknown func bpf_probe_read#4 (overlapping) ++d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) ++dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) ++fentry_fexit # fentry attach failed: -524 (trampoline) ++fentry_test # fentry_first_attach unexpected error: -524 (trampoline) ++fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline) ++fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) ++fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline) ++fexit_test # fexit_first_attach unexpected error: -524 (trampoline) ++get_func_args_test # trampoline ++get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) ++get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) ++kfree_skb # attach fentry unexpected error: -524 (trampoline) ++kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) ++ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) ++ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) ++ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) ++modify_return # modify_return attach failed: -524 (trampoline) ++module_attach # skel_attach skeleton attach failed: -524 (trampoline) ++mptcp ++kprobe_multi_test # relies on fentry ++netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) ++probe_user # check_kprobe_res wrong kprobe res from probe read (?) ++recursion # skel_attach unexpected error: -524 (trampoline) ++ringbuf # skel_load skeleton load failed (?) ++sk_assign # Can't read on server: Invalid argument (?) ++sk_lookup # endianness problem ++sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline) ++skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline) ++socket_cookie # prog_attach unexpected error: -524 (trampoline) ++stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) ++tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) ++task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) ++test_bpffs # bpffs test failed 255 (iterator) ++test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) ++test_ima # failed to auto-attach program 'ima': -524 (trampoline) ++test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) ++test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?) ++test_overhead # attach_fentry unexpected error: -524 (trampoline) ++test_profiler # unknown func bpf_probe_read_str#45 (overlapping) ++timer # failed to auto-attach program 'test1': -524 (trampoline) ++timer_crash # trampoline ++timer_mim # failed to auto-attach program 'test1': -524 (trampoline) ++trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline) ++trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) ++trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) ++trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) ++verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) ++vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline) ++xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?) ++xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) ++xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline) ++map_kptr # failed to open_and_load program: -524 (trampoline) ++bpf_cookie # failed to open_and_load program: -524 (trampoline) ++xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22) ++send_signal # intermittently fails to receive signal ++select_reuseport # intermittently fails on new s390x setup ++xdp_synproxy # JIT does not support calling kernel function (kfunc) ++unpriv_bpf_disabled # fentry +diff --git a/src/cc/libbpf/travis-ci/vmtest/configs/config-latest.s390x b/src/cc/libbpf/travis-ci/vmtest/configs/config-latest.s390x +new file mode 100644 +index 0000000..1e68df7 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/configs/config-latest.s390x +@@ -0,0 +1,2711 @@ ++# ++# Automatically generated file; DO NOT EDIT. ++# Linux/s390 5.15.0 Kernel Configuration ++# ++CONFIG_CC_VERSION_TEXT="gcc (Debian 10.2.1-6) 10.2.1 20210110" ++CONFIG_CC_IS_GCC=y ++CONFIG_GCC_VERSION=100201 ++CONFIG_CLANG_VERSION=0 ++CONFIG_AS_IS_GNU=y ++CONFIG_AS_VERSION=23502 ++CONFIG_LD_IS_BFD=y ++CONFIG_LD_VERSION=23502 ++CONFIG_LLD_VERSION=0 ++CONFIG_CC_CAN_LINK=y ++CONFIG_CC_CAN_LINK_STATIC=y ++CONFIG_CC_HAS_ASM_GOTO=y ++CONFIG_CC_HAS_ASM_INLINE=y ++CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y ++CONFIG_IRQ_WORK=y ++CONFIG_BUILDTIME_TABLE_SORT=y ++CONFIG_THREAD_INFO_IN_TASK=y ++ ++# ++# General setup ++# ++CONFIG_INIT_ENV_ARG_LIMIT=32 ++# CONFIG_COMPILE_TEST is not set ++# CONFIG_WERROR is not set ++# CONFIG_UAPI_HEADER_TEST is not set ++CONFIG_LOCALVERSION="" ++CONFIG_LOCALVERSION_AUTO=y ++CONFIG_BUILD_SALT="" ++CONFIG_HAVE_KERNEL_GZIP=y ++CONFIG_HAVE_KERNEL_BZIP2=y ++CONFIG_HAVE_KERNEL_LZMA=y ++CONFIG_HAVE_KERNEL_XZ=y ++CONFIG_HAVE_KERNEL_LZO=y ++CONFIG_HAVE_KERNEL_LZ4=y ++CONFIG_HAVE_KERNEL_ZSTD=y ++CONFIG_HAVE_KERNEL_UNCOMPRESSED=y ++# CONFIG_KERNEL_GZIP is not set ++# CONFIG_KERNEL_BZIP2 is not set ++# CONFIG_KERNEL_LZMA is not set ++# CONFIG_KERNEL_XZ is not set ++# CONFIG_KERNEL_LZO is not set ++# CONFIG_KERNEL_LZ4 is not set ++# CONFIG_KERNEL_ZSTD is not set ++CONFIG_KERNEL_UNCOMPRESSED=y ++CONFIG_DEFAULT_INIT="" ++CONFIG_DEFAULT_HOSTNAME="(none)" ++CONFIG_SWAP=y ++CONFIG_SYSVIPC=y ++CONFIG_SYSVIPC_SYSCTL=y ++CONFIG_POSIX_MQUEUE=y ++CONFIG_POSIX_MQUEUE_SYSCTL=y ++# CONFIG_WATCH_QUEUE is not set ++CONFIG_CROSS_MEMORY_ATTACH=y ++CONFIG_USELIB=y ++CONFIG_AUDIT=y ++CONFIG_HAVE_ARCH_AUDITSYSCALL=y ++CONFIG_AUDITSYSCALL=y ++ ++# ++# IRQ subsystem ++# ++CONFIG_IRQ_DOMAIN=y ++CONFIG_IRQ_DOMAIN_HIERARCHY=y ++CONFIG_GENERIC_MSI_IRQ=y ++CONFIG_GENERIC_MSI_IRQ_DOMAIN=y ++CONFIG_SPARSE_IRQ=y ++# CONFIG_GENERIC_IRQ_DEBUGFS is not set ++# end of IRQ subsystem ++ ++CONFIG_GENERIC_TIME_VSYSCALL=y ++CONFIG_GENERIC_CLOCKEVENTS=y ++ ++# ++# Timers subsystem ++# ++CONFIG_TICK_ONESHOT=y ++CONFIG_NO_HZ_COMMON=y ++# CONFIG_HZ_PERIODIC is not set ++CONFIG_NO_HZ_IDLE=y ++# CONFIG_NO_HZ is not set ++CONFIG_HIGH_RES_TIMERS=y ++# end of Timers subsystem ++ ++CONFIG_BPF=y ++CONFIG_HAVE_EBPF_JIT=y ++CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y ++ ++# ++# BPF subsystem ++# ++CONFIG_BPF_SYSCALL=y ++CONFIG_BPF_JIT=y ++CONFIG_BPF_JIT_ALWAYS_ON=y ++CONFIG_BPF_JIT_DEFAULT_ON=y ++CONFIG_BPF_UNPRIV_DEFAULT_OFF=y ++CONFIG_USERMODE_DRIVER=y ++CONFIG_BPF_PRELOAD=y ++CONFIG_BPF_PRELOAD_UMD=y ++CONFIG_BPF_LSM=y ++# end of BPF subsystem ++ ++CONFIG_PREEMPT_NONE_BEHAVIOUR=y ++# CONFIG_PREEMPT_VOLUNTARY_BEHAVIOUR is not set ++# CONFIG_PREEMPT_BEHAVIOUR is not set ++CONFIG_PREEMPT_NONE=y ++CONFIG_PREEMPT_COUNT=y ++# CONFIG_SCHED_CORE is not set ++ ++# ++# CPU/Task time and stats accounting ++# ++CONFIG_VIRT_CPU_ACCOUNTING=y ++CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y ++# CONFIG_BSD_PROCESS_ACCT is not set ++CONFIG_TASKSTATS=y ++CONFIG_TASK_DELAY_ACCT=y ++CONFIG_TASK_XACCT=y ++CONFIG_TASK_IO_ACCOUNTING=y ++# CONFIG_PSI is not set ++# end of CPU/Task time and stats accounting ++ ++# CONFIG_CPU_ISOLATION is not set ++ ++# ++# RCU Subsystem ++# ++CONFIG_TREE_RCU=y ++# CONFIG_RCU_EXPERT is not set ++CONFIG_SRCU=y ++CONFIG_TREE_SRCU=y ++CONFIG_TASKS_RCU_GENERIC=y ++CONFIG_TASKS_RUDE_RCU=y ++CONFIG_TASKS_TRACE_RCU=y ++CONFIG_RCU_STALL_COMMON=y ++CONFIG_RCU_NEED_SEGCBLIST=y ++# end of RCU Subsystem ++ ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y ++CONFIG_IKHEADERS=y ++CONFIG_LOG_BUF_SHIFT=17 ++CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 ++CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 ++# CONFIG_PRINTK_INDEX is not set ++ ++# ++# Scheduler features ++# ++# end of Scheduler features ++ ++CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y ++CONFIG_CC_HAS_INT128=y ++# CONFIG_NUMA_BALANCING is not set ++CONFIG_CGROUPS=y ++CONFIG_PAGE_COUNTER=y ++CONFIG_MEMCG=y ++CONFIG_MEMCG_SWAP=y ++CONFIG_MEMCG_KMEM=y ++CONFIG_BLK_CGROUP=y ++CONFIG_CGROUP_WRITEBACK=y ++CONFIG_CGROUP_SCHED=y ++CONFIG_FAIR_GROUP_SCHED=y ++# CONFIG_CFS_BANDWIDTH is not set ++CONFIG_RT_GROUP_SCHED=y ++CONFIG_CGROUP_PIDS=y ++# CONFIG_CGROUP_RDMA is not set ++CONFIG_CGROUP_FREEZER=y ++CONFIG_CGROUP_HUGETLB=y ++CONFIG_CPUSETS=y ++CONFIG_PROC_PID_CPUSET=y ++CONFIG_CGROUP_DEVICE=y ++CONFIG_CGROUP_CPUACCT=y ++CONFIG_CGROUP_PERF=y ++CONFIG_CGROUP_BPF=y ++# CONFIG_CGROUP_MISC is not set ++# CONFIG_CGROUP_DEBUG is not set ++CONFIG_SOCK_CGROUP_DATA=y ++CONFIG_NAMESPACES=y ++CONFIG_UTS_NS=y ++CONFIG_TIME_NS=y ++CONFIG_IPC_NS=y ++CONFIG_USER_NS=y ++CONFIG_PID_NS=y ++CONFIG_NET_NS=y ++CONFIG_CHECKPOINT_RESTORE=y ++# CONFIG_SCHED_AUTOGROUP is not set ++# CONFIG_SYSFS_DEPRECATED is not set ++CONFIG_RELAY=y ++CONFIG_BLK_DEV_INITRD=y ++CONFIG_INITRAMFS_SOURCE="" ++# CONFIG_RD_GZIP is not set ++# CONFIG_RD_BZIP2 is not set ++# CONFIG_RD_LZMA is not set ++# CONFIG_RD_XZ is not set ++# CONFIG_RD_LZO is not set ++# CONFIG_RD_LZ4 is not set ++CONFIG_RD_ZSTD=y ++# CONFIG_BOOT_CONFIG is not set ++CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y ++# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set ++CONFIG_SYSCTL=y ++CONFIG_HAVE_UID16=y ++CONFIG_SYSCTL_EXCEPTION_TRACE=y ++CONFIG_EXPERT=y ++CONFIG_UID16=y ++CONFIG_MULTIUSER=y ++# CONFIG_SGETMASK_SYSCALL is not set ++# CONFIG_SYSFS_SYSCALL is not set ++CONFIG_FHANDLE=y ++CONFIG_POSIX_TIMERS=y ++CONFIG_PRINTK=y ++CONFIG_BUG=y ++CONFIG_ELF_CORE=y ++CONFIG_BASE_FULL=y ++CONFIG_FUTEX=y ++CONFIG_FUTEX_PI=y ++CONFIG_HAVE_FUTEX_CMPXCHG=y ++CONFIG_EPOLL=y ++CONFIG_SIGNALFD=y ++CONFIG_TIMERFD=y ++CONFIG_EVENTFD=y ++CONFIG_SHMEM=y ++CONFIG_AIO=y ++CONFIG_IO_URING=y ++CONFIG_ADVISE_SYSCALLS=y ++CONFIG_MEMBARRIER=y ++CONFIG_KALLSYMS=y ++CONFIG_KALLSYMS_ALL=y ++CONFIG_KALLSYMS_BASE_RELATIVE=y ++CONFIG_USERFAULTFD=y ++CONFIG_KCMP=y ++CONFIG_RSEQ=y ++# CONFIG_DEBUG_RSEQ is not set ++# CONFIG_EMBEDDED is not set ++CONFIG_HAVE_PERF_EVENTS=y ++# CONFIG_PC104 is not set ++ ++# ++# Kernel Performance Events And Counters ++# ++CONFIG_PERF_EVENTS=y ++# CONFIG_DEBUG_PERF_USE_VMALLOC is not set ++# end of Kernel Performance Events And Counters ++ ++CONFIG_VM_EVENT_COUNTERS=y ++CONFIG_SLUB_DEBUG=y ++# CONFIG_COMPAT_BRK is not set ++# CONFIG_SLAB is not set ++CONFIG_SLUB=y ++# CONFIG_SLOB is not set ++CONFIG_SLAB_MERGE_DEFAULT=y ++# CONFIG_SLAB_FREELIST_RANDOM is not set ++# CONFIG_SLAB_FREELIST_HARDENED is not set ++# CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set ++CONFIG_SLUB_CPU_PARTIAL=y ++CONFIG_PROFILING=y ++CONFIG_TRACEPOINTS=y ++# end of General setup ++ ++CONFIG_MMU=y ++CONFIG_CPU_BIG_ENDIAN=y ++CONFIG_LOCKDEP_SUPPORT=y ++CONFIG_STACKTRACE_SUPPORT=y ++CONFIG_GENERIC_HWEIGHT=y ++CONFIG_GENERIC_BUG=y ++CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y ++CONFIG_PGSTE=y ++CONFIG_AUDIT_ARCH=y ++CONFIG_NO_IOPORT_MAP=y ++# CONFIG_PCI_QUIRKS is not set ++CONFIG_ARCH_SUPPORTS_UPROBES=y ++CONFIG_S390=y ++CONFIG_SCHED_OMIT_FRAME_POINTER=y ++CONFIG_PGTABLE_LEVELS=5 ++CONFIG_HAVE_LIVEPATCH=y ++CONFIG_LIVEPATCH=y ++ ++# ++# Processor type and features ++# ++CONFIG_HAVE_MARCH_Z900_FEATURES=y ++CONFIG_HAVE_MARCH_Z990_FEATURES=y ++CONFIG_HAVE_MARCH_Z9_109_FEATURES=y ++CONFIG_HAVE_MARCH_Z10_FEATURES=y ++CONFIG_HAVE_MARCH_Z196_FEATURES=y ++# CONFIG_MARCH_Z900 is not set ++# CONFIG_MARCH_Z990 is not set ++# CONFIG_MARCH_Z9_109 is not set ++# CONFIG_MARCH_Z10 is not set ++CONFIG_MARCH_Z196=y ++# CONFIG_MARCH_ZEC12 is not set ++# CONFIG_MARCH_Z13 is not set ++# CONFIG_MARCH_Z14 is not set ++# CONFIG_MARCH_Z15 is not set ++CONFIG_MARCH_Z196_TUNE=y ++CONFIG_TUNE_DEFAULT=y ++# CONFIG_TUNE_Z900 is not set ++# CONFIG_TUNE_Z990 is not set ++# CONFIG_TUNE_Z9_109 is not set ++# CONFIG_TUNE_Z10 is not set ++# CONFIG_TUNE_Z196 is not set ++# CONFIG_TUNE_ZEC12 is not set ++# CONFIG_TUNE_Z13 is not set ++# CONFIG_TUNE_Z14 is not set ++# CONFIG_TUNE_Z15 is not set ++CONFIG_64BIT=y ++CONFIG_COMPAT=y ++CONFIG_SYSVIPC_COMPAT=y ++CONFIG_SMP=y ++CONFIG_NR_CPUS=256 ++CONFIG_HOTPLUG_CPU=y ++CONFIG_NUMA=y ++CONFIG_NODES_SHIFT=1 ++CONFIG_SCHED_SMT=y ++CONFIG_SCHED_MC=y ++CONFIG_SCHED_BOOK=y ++CONFIG_SCHED_DRAWER=y ++CONFIG_SCHED_TOPOLOGY=y ++CONFIG_HZ_100=y ++# CONFIG_HZ_250 is not set ++# CONFIG_HZ_300 is not set ++# CONFIG_HZ_1000 is not set ++CONFIG_HZ=100 ++CONFIG_SCHED_HRTICK=y ++CONFIG_KEXEC=y ++CONFIG_ARCH_RANDOM=y ++# CONFIG_KERNEL_NOBP is not set ++# CONFIG_EXPOLINE is not set ++CONFIG_RELOCATABLE=y ++CONFIG_RANDOMIZE_BASE=y ++# end of Processor type and features ++ ++# ++# Memory setup ++# ++CONFIG_ARCH_SPARSEMEM_ENABLE=y ++CONFIG_ARCH_SPARSEMEM_DEFAULT=y ++CONFIG_MAX_PHYSMEM_BITS=46 ++CONFIG_PACK_STACK=y ++# end of Memory setup ++ ++# ++# I/O subsystem ++# ++CONFIG_QDIO=y ++CONFIG_PCI_NR_FUNCTIONS=512 ++CONFIG_HAS_IOMEM=y ++CONFIG_CHSC_SCH=m ++CONFIG_SCM_BUS=y ++# CONFIG_EADM_SCH is not set ++# end of I/O subsystem ++ ++# ++# Dump support ++# ++CONFIG_CRASH_DUMP=y ++# end of Dump support ++ ++CONFIG_CCW=y ++ ++# ++# Virtualization ++# ++# CONFIG_PROTECTED_VIRTUALIZATION_GUEST is not set ++CONFIG_PFAULT=y ++# CONFIG_CMM is not set ++# CONFIG_APPLDATA_BASE is not set ++CONFIG_S390_HYPFS_FS=y ++CONFIG_HAVE_KVM=y ++CONFIG_HAVE_KVM_IRQCHIP=y ++CONFIG_HAVE_KVM_IRQFD=y ++CONFIG_HAVE_KVM_IRQ_ROUTING=y ++CONFIG_HAVE_KVM_EVENTFD=y ++CONFIG_KVM_ASYNC_PF=y ++CONFIG_KVM_ASYNC_PF_SYNC=y ++CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y ++CONFIG_KVM_VFIO=y ++CONFIG_HAVE_KVM_INVALID_WAKEUPS=y ++CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL=y ++CONFIG_HAVE_KVM_NO_POLL=y ++CONFIG_VIRTUALIZATION=y ++CONFIG_KVM=y ++# CONFIG_KVM_S390_UCONTROL is not set ++CONFIG_S390_GUEST=y ++# end of Virtualization ++ ++# ++# Selftests ++# ++# CONFIG_S390_UNWIND_SELFTEST is not set ++# end of Selftests ++ ++# ++# General architecture-dependent options ++# ++CONFIG_CRASH_CORE=y ++CONFIG_KEXEC_CORE=y ++CONFIG_GENERIC_ENTRY=y ++CONFIG_KPROBES=y ++CONFIG_JUMP_LABEL=y ++CONFIG_STATIC_KEYS_SELFTEST=y ++CONFIG_KPROBES_ON_FTRACE=y ++CONFIG_UPROBES=y ++CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y ++CONFIG_ARCH_USE_BUILTIN_BSWAP=y ++CONFIG_KRETPROBES=y ++CONFIG_HAVE_IOREMAP_PROT=y ++CONFIG_HAVE_KPROBES=y ++CONFIG_HAVE_KRETPROBES=y ++CONFIG_HAVE_KPROBES_ON_FTRACE=y ++CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y ++CONFIG_HAVE_NMI=y ++CONFIG_TRACE_IRQFLAGS_SUPPORT=y ++CONFIG_HAVE_ARCH_TRACEHOOK=y ++CONFIG_HAVE_DMA_CONTIGUOUS=y ++CONFIG_GENERIC_SMP_IDLE_THREAD=y ++CONFIG_ARCH_HAS_FORTIFY_SOURCE=y ++CONFIG_ARCH_HAS_SET_MEMORY=y ++CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y ++CONFIG_ARCH_WANTS_NO_INSTR=y ++CONFIG_ARCH_32BIT_USTAT_F_TINODE=y ++CONFIG_HAVE_ASM_MODVERSIONS=y ++CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y ++CONFIG_HAVE_RSEQ=y ++CONFIG_HAVE_PERF_REGS=y ++CONFIG_HAVE_PERF_USER_STACK_DUMP=y ++CONFIG_HAVE_ARCH_JUMP_LABEL=y ++CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y ++CONFIG_MMU_GATHER_TABLE_FREE=y ++CONFIG_MMU_GATHER_RCU_TABLE_FREE=y ++CONFIG_MMU_GATHER_NO_GATHER=y ++CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y ++CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y ++CONFIG_HAVE_CMPXCHG_LOCAL=y ++CONFIG_HAVE_CMPXCHG_DOUBLE=y ++CONFIG_ARCH_WANT_IPC_PARSE_VERSION=y ++CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y ++CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y ++CONFIG_HAVE_ARCH_SECCOMP=y ++CONFIG_HAVE_ARCH_SECCOMP_FILTER=y ++CONFIG_SECCOMP=y ++CONFIG_SECCOMP_FILTER=y ++# CONFIG_SECCOMP_CACHE_DEBUG is not set ++CONFIG_LTO_NONE=y ++CONFIG_HAVE_VIRT_CPU_ACCOUNTING=y ++CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE=y ++CONFIG_ARCH_HAS_SCALED_CPUTIME=y ++CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y ++CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y ++CONFIG_HAVE_ARCH_SOFT_DIRTY=y ++CONFIG_HAVE_MOD_ARCH_SPECIFIC=y ++CONFIG_MODULES_USE_ELF_RELA=y ++CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y ++CONFIG_ARCH_HAS_ELF_RANDOMIZE=y ++CONFIG_HAVE_RELIABLE_STACKTRACE=y ++CONFIG_CLONE_BACKWARDS2=y ++CONFIG_OLD_SIGSUSPEND3=y ++CONFIG_OLD_SIGACTION=y ++CONFIG_COMPAT_OLD_SIGACTION=y ++CONFIG_COMPAT_32BIT_TIME=y ++CONFIG_HAVE_ARCH_VMAP_STACK=y ++CONFIG_VMAP_STACK=y ++CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y ++# CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT is not set ++CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y ++CONFIG_STRICT_KERNEL_RWX=y ++CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y ++CONFIG_STRICT_MODULE_RWX=y ++# CONFIG_LOCK_EVENT_COUNTS is not set ++CONFIG_ARCH_HAS_MEM_ENCRYPT=y ++CONFIG_ARCH_HAS_VDSO_DATA=y ++CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y ++ ++# ++# GCOV-based kernel profiling ++# ++# CONFIG_GCOV_KERNEL is not set ++CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y ++# end of GCOV-based kernel profiling ++ ++CONFIG_HAVE_GCC_PLUGINS=y ++# end of General architecture-dependent options ++ ++CONFIG_RT_MUTEXES=y ++CONFIG_BASE_SMALL=0 ++CONFIG_MODULES=y ++# CONFIG_MODULE_FORCE_LOAD is not set ++CONFIG_MODULE_UNLOAD=y ++# CONFIG_MODULE_FORCE_UNLOAD is not set ++# CONFIG_MODVERSIONS is not set ++# CONFIG_MODULE_SRCVERSION_ALL is not set ++CONFIG_MODULE_SIG=y ++CONFIG_MODULE_COMPRESS_NONE=y ++# CONFIG_MODULE_COMPRESS_GZIP is not set ++# CONFIG_MODULE_COMPRESS_XZ is not set ++# CONFIG_MODULE_COMPRESS_ZSTD is not set ++# CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set ++CONFIG_MODPROBE_PATH="/sbin/modprobe" ++# CONFIG_TRIM_UNUSED_KSYMS is not set ++CONFIG_MODULES_TREE_LOOKUP=y ++CONFIG_BLOCK=y ++# CONFIG_BLK_DEV_BSGLIB is not set ++# CONFIG_BLK_DEV_INTEGRITY is not set ++# CONFIG_BLK_DEV_ZONED is not set ++# CONFIG_BLK_DEV_THROTTLING is not set ++# CONFIG_BLK_WBT is not set ++# CONFIG_BLK_CGROUP_IOLATENCY is not set ++# CONFIG_BLK_CGROUP_IOCOST is not set ++# CONFIG_BLK_CGROUP_IOPRIO is not set ++# CONFIG_BLK_DEBUG_FS is not set ++# CONFIG_BLK_SED_OPAL is not set ++# CONFIG_BLK_INLINE_ENCRYPTION is not set ++ ++# ++# Partition Types ++# ++CONFIG_PARTITION_ADVANCED=y ++# CONFIG_ACORN_PARTITION is not set ++# CONFIG_AIX_PARTITION is not set ++# CONFIG_OSF_PARTITION is not set ++# CONFIG_AMIGA_PARTITION is not set ++# CONFIG_ATARI_PARTITION is not set ++# CONFIG_IBM_PARTITION is not set ++# CONFIG_MAC_PARTITION is not set ++# CONFIG_MSDOS_PARTITION is not set ++# CONFIG_LDM_PARTITION is not set ++# CONFIG_SGI_PARTITION is not set ++# CONFIG_ULTRIX_PARTITION is not set ++# CONFIG_SUN_PARTITION is not set ++# CONFIG_KARMA_PARTITION is not set ++CONFIG_EFI_PARTITION=y ++# CONFIG_SYSV68_PARTITION is not set ++# CONFIG_CMDLINE_PARTITION is not set ++# end of Partition Types ++ ++CONFIG_BLOCK_COMPAT=y ++CONFIG_BLK_MQ_PCI=y ++CONFIG_BLK_MQ_VIRTIO=y ++ ++# ++# IO Schedulers ++# ++CONFIG_MQ_IOSCHED_DEADLINE=y ++CONFIG_MQ_IOSCHED_KYBER=y ++# CONFIG_IOSCHED_BFQ is not set ++# end of IO Schedulers ++ ++CONFIG_PREEMPT_NOTIFIERS=y ++CONFIG_ARCH_INLINE_SPIN_TRYLOCK=y ++CONFIG_ARCH_INLINE_SPIN_TRYLOCK_BH=y ++CONFIG_ARCH_INLINE_SPIN_LOCK=y ++CONFIG_ARCH_INLINE_SPIN_LOCK_BH=y ++CONFIG_ARCH_INLINE_SPIN_LOCK_IRQ=y ++CONFIG_ARCH_INLINE_SPIN_LOCK_IRQSAVE=y ++CONFIG_ARCH_INLINE_SPIN_UNLOCK=y ++CONFIG_ARCH_INLINE_SPIN_UNLOCK_BH=y ++CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQ=y ++CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE=y ++CONFIG_ARCH_INLINE_READ_TRYLOCK=y ++CONFIG_ARCH_INLINE_READ_LOCK=y ++CONFIG_ARCH_INLINE_READ_LOCK_BH=y ++CONFIG_ARCH_INLINE_READ_LOCK_IRQ=y ++CONFIG_ARCH_INLINE_READ_LOCK_IRQSAVE=y ++CONFIG_ARCH_INLINE_READ_UNLOCK=y ++CONFIG_ARCH_INLINE_READ_UNLOCK_BH=y ++CONFIG_ARCH_INLINE_READ_UNLOCK_IRQ=y ++CONFIG_ARCH_INLINE_READ_UNLOCK_IRQRESTORE=y ++CONFIG_ARCH_INLINE_WRITE_TRYLOCK=y ++CONFIG_ARCH_INLINE_WRITE_LOCK=y ++CONFIG_ARCH_INLINE_WRITE_LOCK_BH=y ++CONFIG_ARCH_INLINE_WRITE_LOCK_IRQ=y ++CONFIG_ARCH_INLINE_WRITE_LOCK_IRQSAVE=y ++CONFIG_ARCH_INLINE_WRITE_UNLOCK=y ++CONFIG_ARCH_INLINE_WRITE_UNLOCK_BH=y ++CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQ=y ++CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE=y ++CONFIG_UNINLINE_SPIN_UNLOCK=y ++CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y ++CONFIG_MUTEX_SPIN_ON_OWNER=y ++CONFIG_RWSEM_SPIN_ON_OWNER=y ++CONFIG_LOCK_SPIN_ON_OWNER=y ++CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y ++CONFIG_FREEZER=y ++ ++# ++# Executable file formats ++# ++CONFIG_BINFMT_ELF=y ++CONFIG_COMPAT_BINFMT_ELF=y ++CONFIG_ARCH_BINFMT_ELF_STATE=y ++CONFIG_ELFCORE=y ++CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y ++CONFIG_BINFMT_SCRIPT=y ++# CONFIG_BINFMT_MISC is not set ++CONFIG_COREDUMP=y ++# end of Executable file formats ++ ++# ++# Memory Management options ++# ++CONFIG_SPARSEMEM=y ++CONFIG_SPARSEMEM_EXTREME=y ++CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y ++CONFIG_SPARSEMEM_VMEMMAP=y ++CONFIG_HAVE_MEMBLOCK_PHYS_MAP=y ++CONFIG_HAVE_FAST_GUP=y ++CONFIG_NUMA_KEEP_MEMINFO=y ++CONFIG_MEMORY_ISOLATION=y ++CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y ++CONFIG_MEMORY_HOTPLUG=y ++CONFIG_MEMORY_HOTPLUG_SPARSE=y ++# CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set ++CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y ++CONFIG_MEMORY_HOTREMOVE=y ++CONFIG_SPLIT_PTLOCK_CPUS=4 ++CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y ++CONFIG_MEMORY_BALLOON=y ++CONFIG_BALLOON_COMPACTION=y ++CONFIG_COMPACTION=y ++CONFIG_PAGE_REPORTING=y ++CONFIG_MIGRATION=y ++CONFIG_CONTIG_ALLOC=y ++CONFIG_PHYS_ADDR_T_64BIT=y ++CONFIG_KSM=y ++CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 ++CONFIG_TRANSPARENT_HUGEPAGE=y ++CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y ++# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set ++CONFIG_CLEANCACHE=y ++CONFIG_FRONTSWAP=y ++# CONFIG_CMA is not set ++# CONFIG_MEM_SOFT_DIRTY is not set ++# CONFIG_ZSWAP is not set ++# CONFIG_ZPOOL is not set ++# CONFIG_ZSMALLOC is not set ++# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set ++CONFIG_PAGE_IDLE_FLAG=y ++CONFIG_IDLE_PAGE_TRACKING=y ++CONFIG_ZONE_DMA=y ++# CONFIG_PERCPU_STATS is not set ++# CONFIG_GUP_TEST is not set ++# CONFIG_READ_ONLY_THP_FOR_FS is not set ++CONFIG_ARCH_HAS_PTE_SPECIAL=y ++ ++# ++# Data Access Monitoring ++# ++# CONFIG_DAMON is not set ++# end of Data Access Monitoring ++# end of Memory Management options ++ ++CONFIG_NET=y ++CONFIG_NET_INGRESS=y ++CONFIG_NET_EGRESS=y ++CONFIG_SKB_EXTENSIONS=y ++ ++# ++# Networking options ++# ++CONFIG_PACKET=y ++# CONFIG_PACKET_DIAG is not set ++CONFIG_UNIX=y ++CONFIG_UNIX_SCM=y ++CONFIG_AF_UNIX_OOB=y ++# CONFIG_UNIX_DIAG is not set ++CONFIG_TLS=y ++# CONFIG_TLS_DEVICE is not set ++# CONFIG_TLS_TOE is not set ++CONFIG_XFRM=y ++CONFIG_XFRM_ALGO=y ++CONFIG_XFRM_USER=y ++# CONFIG_XFRM_INTERFACE is not set ++# CONFIG_XFRM_SUB_POLICY is not set ++# CONFIG_XFRM_MIGRATE is not set ++# CONFIG_XFRM_STATISTICS is not set ++CONFIG_XFRM_ESP=y ++CONFIG_NET_KEY=y ++# CONFIG_NET_KEY_MIGRATE is not set ++# CONFIG_IUCV is not set ++# CONFIG_AFIUCV is not set ++CONFIG_XDP_SOCKETS=y ++# CONFIG_XDP_SOCKETS_DIAG is not set ++CONFIG_INET=y ++CONFIG_IP_MULTICAST=y ++CONFIG_IP_ADVANCED_ROUTER=y ++# CONFIG_IP_FIB_TRIE_STATS is not set ++CONFIG_IP_MULTIPLE_TABLES=y ++# CONFIG_IP_ROUTE_MULTIPATH is not set ++# CONFIG_IP_ROUTE_VERBOSE is not set ++# CONFIG_IP_PNP is not set ++CONFIG_NET_IPIP=y ++CONFIG_NET_IPGRE_DEMUX=y ++CONFIG_NET_IP_TUNNEL=y ++CONFIG_NET_IPGRE=y ++# CONFIG_NET_IPGRE_BROADCAST is not set ++# CONFIG_IP_MROUTE is not set ++CONFIG_SYN_COOKIES=y ++# CONFIG_NET_IPVTI is not set ++CONFIG_NET_UDP_TUNNEL=y ++CONFIG_NET_FOU=y ++CONFIG_NET_FOU_IP_TUNNELS=y ++# CONFIG_INET_AH is not set ++CONFIG_INET_ESP=y ++# CONFIG_INET_ESP_OFFLOAD is not set ++# CONFIG_INET_ESPINTCP is not set ++# CONFIG_INET_IPCOMP is not set ++CONFIG_INET_TUNNEL=y ++CONFIG_INET_DIAG=y ++CONFIG_INET_TCP_DIAG=y ++# CONFIG_INET_UDP_DIAG is not set ++# CONFIG_INET_RAW_DIAG is not set ++# CONFIG_INET_DIAG_DESTROY is not set ++CONFIG_TCP_CONG_ADVANCED=y ++# CONFIG_TCP_CONG_BIC is not set ++CONFIG_TCP_CONG_CUBIC=y ++# CONFIG_TCP_CONG_WESTWOOD is not set ++# CONFIG_TCP_CONG_HTCP is not set ++# CONFIG_TCP_CONG_HSTCP is not set ++# CONFIG_TCP_CONG_HYBLA is not set ++# CONFIG_TCP_CONG_VEGAS is not set ++# CONFIG_TCP_CONG_NV is not set ++# CONFIG_TCP_CONG_SCALABLE is not set ++# CONFIG_TCP_CONG_LP is not set ++# CONFIG_TCP_CONG_VENO is not set ++# CONFIG_TCP_CONG_YEAH is not set ++# CONFIG_TCP_CONG_ILLINOIS is not set ++CONFIG_TCP_CONG_DCTCP=y ++# CONFIG_TCP_CONG_CDG is not set ++# CONFIG_TCP_CONG_BBR is not set ++CONFIG_DEFAULT_CUBIC=y ++# CONFIG_DEFAULT_DCTCP is not set ++# CONFIG_DEFAULT_RENO is not set ++CONFIG_DEFAULT_TCP_CONG="cubic" ++# CONFIG_TCP_MD5SIG is not set ++CONFIG_IPV6=y ++# CONFIG_IPV6_ROUTER_PREF is not set ++# CONFIG_IPV6_OPTIMISTIC_DAD is not set ++# CONFIG_INET6_AH is not set ++CONFIG_INET6_ESP=y ++# CONFIG_INET6_ESP_OFFLOAD is not set ++# CONFIG_INET6_ESPINTCP is not set ++# CONFIG_INET6_IPCOMP is not set ++# CONFIG_IPV6_MIP6 is not set ++# CONFIG_IPV6_ILA is not set ++CONFIG_INET6_TUNNEL=y ++# CONFIG_IPV6_VTI is not set ++CONFIG_IPV6_SIT=y ++# CONFIG_IPV6_SIT_6RD is not set ++CONFIG_IPV6_NDISC_NODETYPE=y ++CONFIG_IPV6_TUNNEL=y ++CONFIG_IPV6_GRE=y ++CONFIG_IPV6_FOU=y ++CONFIG_IPV6_FOU_TUNNEL=y ++CONFIG_IPV6_MULTIPLE_TABLES=y ++# CONFIG_IPV6_SUBTREES is not set ++# CONFIG_IPV6_MROUTE is not set ++CONFIG_IPV6_SEG6_LWTUNNEL=y ++# CONFIG_IPV6_SEG6_HMAC is not set ++CONFIG_IPV6_SEG6_BPF=y ++# CONFIG_IPV6_RPL_LWTUNNEL is not set ++# CONFIG_IPV6_IOAM6_LWTUNNEL is not set ++# CONFIG_NETLABEL is not set ++CONFIG_MPTCP=y ++CONFIG_MPTCP_IPV6=y ++# CONFIG_NETWORK_SECMARK is not set ++# CONFIG_NETWORK_PHY_TIMESTAMPING is not set ++CONFIG_NETFILTER=y ++CONFIG_NETFILTER_ADVANCED=y ++ ++# ++# Core Netfilter Configuration ++# ++CONFIG_NETFILTER_INGRESS=y ++CONFIG_NETFILTER_EGRESS=y ++CONFIG_NETFILTER_SKIP_EGRESS=y ++CONFIG_NETFILTER_NETLINK=y ++# CONFIG_NETFILTER_NETLINK_HOOK is not set ++# CONFIG_NETFILTER_NETLINK_ACCT is not set ++# CONFIG_NETFILTER_NETLINK_QUEUE is not set ++# CONFIG_NETFILTER_NETLINK_LOG is not set ++# CONFIG_NETFILTER_NETLINK_OSF is not set ++CONFIG_NF_CONNTRACK=y ++# CONFIG_NF_LOG_SYSLOG is not set ++CONFIG_NETFILTER_SYNPROXY=y ++CONFIG_NF_TABLES=y ++# CONFIG_NF_TABLES_INET is not set ++# CONFIG_NF_TABLES_NETDEV is not set ++# CONFIG_NFT_NUMGEN is not set ++# CONFIG_NFT_COUNTER is not set ++# CONFIG_NFT_LOG is not set ++# CONFIG_NFT_LIMIT is not set ++# CONFIG_NFT_TUNNEL is not set ++# CONFIG_NFT_OBJREF is not set ++# CONFIG_NFT_QUOTA is not set ++# CONFIG_NFT_REJECT is not set ++# CONFIG_NFT_COMPAT is not set ++# CONFIG_NFT_HASH is not set ++# CONFIG_NFT_XFRM is not set ++# CONFIG_NFT_SOCKET is not set ++# CONFIG_NFT_OSF is not set ++# CONFIG_NFT_TPROXY is not set ++CONFIG_NETFILTER_XTABLES=y ++CONFIG_NETFILTER_XTABLES_COMPAT=y ++ ++# ++# Xtables combined modules ++# ++CONFIG_NETFILTER_XT_MARK=y ++ ++# ++# Xtables targets ++# ++# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set ++# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set ++CONFIG_NETFILTER_XT_TARGET_CT=y ++# CONFIG_NETFILTER_XT_TARGET_HMARK is not set ++# CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set ++# CONFIG_NETFILTER_XT_TARGET_LOG is not set ++CONFIG_NETFILTER_XT_TARGET_MARK=y ++# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set ++# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set ++# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set ++# CONFIG_NETFILTER_XT_TARGET_TEE is not set ++# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set ++ ++# ++# Xtables matches ++# ++# CONFIG_NETFILTER_XT_MATCH_ADDRTYPE is not set ++CONFIG_NETFILTER_XT_MATCH_BPF=y ++# CONFIG_NETFILTER_XT_MATCH_CGROUP is not set ++# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set ++# CONFIG_NETFILTER_XT_MATCH_CPU is not set ++# CONFIG_NETFILTER_XT_MATCH_DCCP is not set ++# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set ++# CONFIG_NETFILTER_XT_MATCH_DSCP is not set ++# CONFIG_NETFILTER_XT_MATCH_ECN is not set ++# CONFIG_NETFILTER_XT_MATCH_ESP is not set ++# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set ++# CONFIG_NETFILTER_XT_MATCH_HL is not set ++# CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set ++# CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set ++# CONFIG_NETFILTER_XT_MATCH_L2TP is not set ++# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set ++# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set ++# CONFIG_NETFILTER_XT_MATCH_MAC is not set ++# CONFIG_NETFILTER_XT_MATCH_MARK is not set ++# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set ++# CONFIG_NETFILTER_XT_MATCH_NFACCT is not set ++# CONFIG_NETFILTER_XT_MATCH_OSF is not set ++# CONFIG_NETFILTER_XT_MATCH_OWNER is not set ++# CONFIG_NETFILTER_XT_MATCH_POLICY is not set ++# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set ++# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set ++# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set ++# CONFIG_NETFILTER_XT_MATCH_REALM is not set ++# CONFIG_NETFILTER_XT_MATCH_RECENT is not set ++# CONFIG_NETFILTER_XT_MATCH_SCTP is not set ++# CONFIG_NETFILTER_XT_MATCH_SOCKET is not set ++CONFIG_NETFILTER_XT_MATCH_STATE=y ++# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set ++# CONFIG_NETFILTER_XT_MATCH_STRING is not set ++# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set ++# CONFIG_NETFILTER_XT_MATCH_TIME is not set ++# CONFIG_NETFILTER_XT_MATCH_U32 is not set ++# end of Core Netfilter Configuration ++ ++# CONFIG_IP_SET is not set ++# CONFIG_IP_VS is not set ++ ++# ++# IP: Netfilter Configuration ++# ++CONFIG_NF_DEFRAG_IPV4=y ++# CONFIG_NF_SOCKET_IPV4 is not set ++# CONFIG_NF_TPROXY_IPV4 is not set ++# CONFIG_NF_TABLES_IPV4 is not set ++# CONFIG_NF_TABLES_ARP is not set ++# CONFIG_NF_DUP_IPV4 is not set ++# CONFIG_NF_LOG_ARP is not set ++# CONFIG_NF_LOG_IPV4 is not set ++# CONFIG_NF_REJECT_IPV4 is not set ++CONFIG_IP_NF_IPTABLES=y ++# CONFIG_IP_NF_MATCH_AH is not set ++# CONFIG_IP_NF_MATCH_ECN is not set ++# CONFIG_IP_NF_MATCH_TTL is not set ++CONFIG_IP_NF_FILTER=y ++CONFIG_IP_NF_TARGET_SYNPROXY=y ++# CONFIG_IP_NF_TARGET_REJECT is not set ++# CONFIG_IP_NF_MANGLE is not set ++CONFIG_IP_NF_RAW=y ++# CONFIG_IP_NF_SECURITY is not set ++# CONFIG_IP_NF_ARPTABLES is not set ++# end of IP: Netfilter Configuration ++ ++# ++# IPv6: Netfilter Configuration ++# ++# CONFIG_NF_SOCKET_IPV6 is not set ++# CONFIG_NF_TPROXY_IPV6 is not set ++# CONFIG_NF_TABLES_IPV6 is not set ++# CONFIG_NF_DUP_IPV6 is not set ++# CONFIG_NF_REJECT_IPV6 is not set ++# CONFIG_NF_LOG_IPV6 is not set ++# CONFIG_IP6_NF_IPTABLES is not set ++# end of IPv6: Netfilter Configuration ++ ++CONFIG_NF_DEFRAG_IPV6=y ++CONFIG_BPFILTER=y ++CONFIG_BPFILTER_UMH=y ++# CONFIG_IP_DCCP is not set ++# CONFIG_IP_SCTP is not set ++# CONFIG_RDS is not set ++# CONFIG_TIPC is not set ++# CONFIG_ATM is not set ++# CONFIG_L2TP is not set ++# CONFIG_BRIDGE is not set ++# CONFIG_NET_DSA is not set ++CONFIG_VLAN_8021Q=y ++# CONFIG_VLAN_8021Q_GVRP is not set ++# CONFIG_VLAN_8021Q_MVRP is not set ++# CONFIG_DECNET is not set ++# CONFIG_LLC2 is not set ++# CONFIG_ATALK is not set ++# CONFIG_X25 is not set ++# CONFIG_LAPB is not set ++# CONFIG_PHONET is not set ++# CONFIG_6LOWPAN is not set ++# CONFIG_IEEE802154 is not set ++CONFIG_NET_SCHED=y ++ ++# ++# Queueing/Scheduling ++# ++# CONFIG_NET_SCH_CBQ is not set ++# CONFIG_NET_SCH_HTB is not set ++# CONFIG_NET_SCH_HFSC is not set ++# CONFIG_NET_SCH_PRIO is not set ++# CONFIG_NET_SCH_MULTIQ is not set ++# CONFIG_NET_SCH_RED is not set ++# CONFIG_NET_SCH_SFB is not set ++# CONFIG_NET_SCH_SFQ is not set ++# CONFIG_NET_SCH_TEQL is not set ++# CONFIG_NET_SCH_TBF is not set ++# CONFIG_NET_SCH_CBS is not set ++# CONFIG_NET_SCH_ETF is not set ++# CONFIG_NET_SCH_TAPRIO is not set ++# CONFIG_NET_SCH_GRED is not set ++# CONFIG_NET_SCH_DSMARK is not set ++# CONFIG_NET_SCH_NETEM is not set ++# CONFIG_NET_SCH_DRR is not set ++# CONFIG_NET_SCH_MQPRIO is not set ++# CONFIG_NET_SCH_SKBPRIO is not set ++# CONFIG_NET_SCH_CHOKE is not set ++# CONFIG_NET_SCH_QFQ is not set ++# CONFIG_NET_SCH_CODEL is not set ++# CONFIG_NET_SCH_FQ_CODEL is not set ++# CONFIG_NET_SCH_CAKE is not set ++CONFIG_NET_SCH_FQ=y ++# CONFIG_NET_SCH_HHF is not set ++# CONFIG_NET_SCH_PIE is not set ++CONFIG_NET_SCH_INGRESS=y ++# CONFIG_NET_SCH_PLUG is not set ++# CONFIG_NET_SCH_ETS is not set ++# CONFIG_NET_SCH_DEFAULT is not set ++ ++# ++# Classification ++# ++CONFIG_NET_CLS=y ++# CONFIG_NET_CLS_BASIC is not set ++# CONFIG_NET_CLS_TCINDEX is not set ++# CONFIG_NET_CLS_ROUTE4 is not set ++# CONFIG_NET_CLS_FW is not set ++# CONFIG_NET_CLS_U32 is not set ++# CONFIG_NET_CLS_RSVP is not set ++# CONFIG_NET_CLS_RSVP6 is not set ++# CONFIG_NET_CLS_FLOW is not set ++# CONFIG_NET_CLS_CGROUP is not set ++CONFIG_NET_CLS_BPF=y ++CONFIG_NET_CLS_FLOWER=y ++# CONFIG_NET_CLS_MATCHALL is not set ++# CONFIG_NET_EMATCH is not set ++CONFIG_NET_CLS_ACT=y ++# CONFIG_NET_ACT_POLICE is not set ++CONFIG_NET_ACT_GACT=y ++# CONFIG_GACT_PROB is not set ++# CONFIG_NET_ACT_MIRRED is not set ++# CONFIG_NET_ACT_SAMPLE is not set ++# CONFIG_NET_ACT_IPT is not set ++# CONFIG_NET_ACT_NAT is not set ++# CONFIG_NET_ACT_PEDIT is not set ++# CONFIG_NET_ACT_SIMP is not set ++# CONFIG_NET_ACT_SKBEDIT is not set ++# CONFIG_NET_ACT_CSUM is not set ++# CONFIG_NET_ACT_MPLS is not set ++# CONFIG_NET_ACT_VLAN is not set ++CONFIG_NET_ACT_BPF=y ++# CONFIG_NET_ACT_SKBMOD is not set ++# CONFIG_NET_ACT_IFE is not set ++# CONFIG_NET_ACT_TUNNEL_KEY is not set ++# CONFIG_NET_ACT_GATE is not set ++# CONFIG_NET_TC_SKB_EXT is not set ++CONFIG_NET_SCH_FIFO=y ++# CONFIG_DCB is not set ++# CONFIG_DNS_RESOLVER is not set ++# CONFIG_BATMAN_ADV is not set ++# CONFIG_OPENVSWITCH is not set ++# CONFIG_VSOCKETS is not set ++# CONFIG_NETLINK_DIAG is not set ++CONFIG_MPLS=y ++CONFIG_NET_MPLS_GSO=y ++CONFIG_MPLS_ROUTING=y ++CONFIG_MPLS_IPTUNNEL=y ++# CONFIG_NET_NSH is not set ++# CONFIG_HSR is not set ++# CONFIG_NET_SWITCHDEV is not set ++CONFIG_NET_L3_MASTER_DEV=y ++# CONFIG_QRTR is not set ++# CONFIG_NET_NCSI is not set ++CONFIG_PCPU_DEV_REFCNT=y ++CONFIG_RPS=y ++CONFIG_RFS_ACCEL=y ++CONFIG_SOCK_RX_QUEUE_MAPPING=y ++CONFIG_XPS=y ++# CONFIG_CGROUP_NET_PRIO is not set ++CONFIG_CGROUP_NET_CLASSID=y ++CONFIG_NET_RX_BUSY_POLL=y ++CONFIG_BQL=y ++CONFIG_BPF_STREAM_PARSER=y ++CONFIG_NET_FLOW_LIMIT=y ++ ++# ++# Network testing ++# ++# CONFIG_NET_PKTGEN is not set ++# CONFIG_NET_DROP_MONITOR is not set ++# end of Network testing ++# end of Networking options ++ ++# CONFIG_CAN is not set ++# CONFIG_AF_RXRPC is not set ++# CONFIG_AF_KCM is not set ++CONFIG_STREAM_PARSER=y ++# CONFIG_MCTP is not set ++CONFIG_FIB_RULES=y ++# CONFIG_RFKILL is not set ++CONFIG_NET_9P=y ++CONFIG_NET_9P_VIRTIO=y ++# CONFIG_NET_9P_DEBUG is not set ++# CONFIG_CAIF is not set ++# CONFIG_CEPH_LIB is not set ++# CONFIG_NFC is not set ++# CONFIG_PSAMPLE is not set ++# CONFIG_NET_IFE is not set ++CONFIG_LWTUNNEL=y ++CONFIG_LWTUNNEL_BPF=y ++CONFIG_DST_CACHE=y ++CONFIG_GRO_CELLS=y ++CONFIG_NET_SOCK_MSG=y ++CONFIG_NET_DEVLINK=y ++CONFIG_FAILOVER=y ++CONFIG_ETHTOOL_NETLINK=y ++ ++# ++# Device Drivers ++# ++CONFIG_HAVE_PCI=y ++CONFIG_PCI=y ++CONFIG_PCI_DOMAINS=y ++# CONFIG_PCIEPORTBUS is not set ++CONFIG_PCIEASPM=y ++CONFIG_PCIEASPM_DEFAULT=y ++# CONFIG_PCIEASPM_POWERSAVE is not set ++# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set ++# CONFIG_PCIEASPM_PERFORMANCE is not set ++# CONFIG_PCIE_PTM is not set ++CONFIG_PCI_MSI=y ++CONFIG_PCI_MSI_IRQ_DOMAIN=y ++CONFIG_PCI_MSI_ARCH_FALLBACKS=y ++# CONFIG_PCI_DEBUG is not set ++# CONFIG_PCI_STUB is not set ++# CONFIG_PCI_IOV is not set ++# CONFIG_PCI_PRI is not set ++# CONFIG_PCI_PASID is not set ++# CONFIG_PCIE_BUS_TUNE_OFF is not set ++CONFIG_PCIE_BUS_DEFAULT=y ++# CONFIG_PCIE_BUS_SAFE is not set ++# CONFIG_PCIE_BUS_PERFORMANCE is not set ++# CONFIG_PCIE_BUS_PEER2PEER is not set ++# CONFIG_HOTPLUG_PCI is not set ++ ++# ++# PCI controller drivers ++# ++ ++# ++# DesignWare PCI Core Support ++# ++# CONFIG_PCIE_DW_PLAT_HOST is not set ++# CONFIG_PCI_MESON is not set ++# end of DesignWare PCI Core Support ++ ++# ++# Mobiveil PCIe Core Support ++# ++# end of Mobiveil PCIe Core Support ++ ++# ++# Cadence PCIe controllers support ++# ++# end of Cadence PCIe controllers support ++# end of PCI controller drivers ++ ++# ++# PCI Endpoint ++# ++# CONFIG_PCI_ENDPOINT is not set ++# end of PCI Endpoint ++ ++# ++# PCI switch controller drivers ++# ++# CONFIG_PCI_SW_SWITCHTEC is not set ++# end of PCI switch controller drivers ++ ++# CONFIG_CXL_BUS is not set ++# CONFIG_PCCARD is not set ++# CONFIG_RAPIDIO is not set ++ ++# ++# Generic Driver Options ++# ++# CONFIG_UEVENT_HELPER is not set ++CONFIG_DEVTMPFS=y ++# CONFIG_DEVTMPFS_MOUNT is not set ++CONFIG_STANDALONE=y ++CONFIG_PREVENT_FIRMWARE_BUILD=y ++ ++# ++# Firmware loader ++# ++CONFIG_FW_LOADER=y ++CONFIG_EXTRA_FIRMWARE="" ++# CONFIG_FW_LOADER_USER_HELPER is not set ++# CONFIG_FW_LOADER_COMPRESS is not set ++# end of Firmware loader ++ ++CONFIG_ALLOW_DEV_COREDUMP=y ++# CONFIG_DEBUG_DRIVER is not set ++# CONFIG_DEBUG_DEVRES is not set ++# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set ++# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set ++CONFIG_SYS_HYPERVISOR=y ++CONFIG_GENERIC_CPU_AUTOPROBE=y ++CONFIG_GENERIC_CPU_VULNERABILITIES=y ++# end of Generic Driver Options ++ ++# ++# Bus devices ++# ++# CONFIG_MHI_BUS is not set ++# end of Bus devices ++ ++# CONFIG_CONNECTOR is not set ++ ++# ++# Firmware Drivers ++# ++ ++# ++# ARM System Control and Management Interface Protocol ++# ++# end of ARM System Control and Management Interface Protocol ++ ++# CONFIG_FIRMWARE_MEMMAP is not set ++# CONFIG_GOOGLE_FIRMWARE is not set ++ ++# ++# Tegra firmware driver ++# ++# end of Tegra firmware driver ++# end of Firmware Drivers ++ ++# CONFIG_GNSS is not set ++# CONFIG_MTD is not set ++# CONFIG_OF is not set ++# CONFIG_PARPORT is not set ++CONFIG_BLK_DEV=y ++# CONFIG_BLK_DEV_NULL_BLK is not set ++# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set ++CONFIG_BLK_DEV_LOOP=y ++CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 ++# CONFIG_BLK_DEV_DRBD is not set ++# CONFIG_BLK_DEV_NBD is not set ++# CONFIG_BLK_DEV_SX8 is not set ++CONFIG_BLK_DEV_RAM=y ++CONFIG_BLK_DEV_RAM_COUNT=16 ++CONFIG_BLK_DEV_RAM_SIZE=4096 ++# CONFIG_CDROM_PKTCDVD is not set ++# CONFIG_ATA_OVER_ETH is not set ++ ++# ++# S/390 block device drivers ++# ++# CONFIG_DCSSBLK is not set ++# CONFIG_DASD is not set ++CONFIG_VIRTIO_BLK=y ++# CONFIG_BLK_DEV_RBD is not set ++# CONFIG_BLK_DEV_RSXX is not set ++ ++# ++# NVME Support ++# ++# CONFIG_BLK_DEV_NVME is not set ++# CONFIG_NVME_FC is not set ++# CONFIG_NVME_TCP is not set ++# end of NVME Support ++ ++# ++# Misc devices ++# ++# CONFIG_DUMMY_IRQ is not set ++# CONFIG_PHANTOM is not set ++# CONFIG_TIFM_CORE is not set ++# CONFIG_ENCLOSURE_SERVICES is not set ++# CONFIG_HP_ILO is not set ++# CONFIG_SRAM is not set ++# CONFIG_DW_XDATA_PCIE is not set ++# CONFIG_PCI_ENDPOINT_TEST is not set ++# CONFIG_XILINX_SDFEC is not set ++# CONFIG_C2PORT is not set ++ ++# ++# EEPROM support ++# ++# CONFIG_EEPROM_93CX6 is not set ++# end of EEPROM support ++ ++# CONFIG_CB710_CORE is not set ++ ++# ++# Texas Instruments shared transport line discipline ++# ++# end of Texas Instruments shared transport line discipline ++ ++# ++# Altera FPGA firmware download module (requires I2C) ++# ++# CONFIG_GENWQE is not set ++# CONFIG_ECHO is not set ++# CONFIG_BCM_VK is not set ++# CONFIG_MISC_ALCOR_PCI is not set ++# CONFIG_MISC_RTSX_PCI is not set ++# CONFIG_HABANA_AI is not set ++# CONFIG_UACCE is not set ++# CONFIG_PVPANIC is not set ++# end of Misc devices ++ ++# ++# SCSI device support ++# ++CONFIG_SCSI_MOD=y ++# CONFIG_RAID_ATTRS is not set ++CONFIG_SCSI_COMMON=y ++CONFIG_SCSI=y ++CONFIG_SCSI_DMA=y ++# CONFIG_SCSI_PROC_FS is not set ++ ++# ++# SCSI support type (disk, tape, CD-ROM) ++# ++# CONFIG_BLK_DEV_SD is not set ++# CONFIG_CHR_DEV_ST is not set ++# CONFIG_BLK_DEV_SR is not set ++# CONFIG_CHR_DEV_SG is not set ++# CONFIG_BLK_DEV_BSG is not set ++# CONFIG_CHR_DEV_SCH is not set ++# CONFIG_SCSI_CONSTANTS is not set ++# CONFIG_SCSI_LOGGING is not set ++# CONFIG_SCSI_SCAN_ASYNC is not set ++ ++# ++# SCSI Transports ++# ++# CONFIG_SCSI_SPI_ATTRS is not set ++# CONFIG_SCSI_FC_ATTRS is not set ++# CONFIG_SCSI_ISCSI_ATTRS is not set ++# CONFIG_SCSI_SAS_ATTRS is not set ++# CONFIG_SCSI_SAS_LIBSAS is not set ++# CONFIG_SCSI_SRP_ATTRS is not set ++# end of SCSI Transports ++ ++CONFIG_SCSI_LOWLEVEL=y ++# CONFIG_ISCSI_TCP is not set ++# CONFIG_ISCSI_BOOT_SYSFS is not set ++# CONFIG_SCSI_CXGB3_ISCSI is not set ++# CONFIG_SCSI_BNX2_ISCSI is not set ++# CONFIG_BE2ISCSI is not set ++# CONFIG_BLK_DEV_3W_XXXX_RAID is not set ++# CONFIG_SCSI_HPSA is not set ++# CONFIG_SCSI_3W_9XXX is not set ++# CONFIG_SCSI_3W_SAS is not set ++# CONFIG_SCSI_ACARD is not set ++# CONFIG_SCSI_AACRAID is not set ++# CONFIG_SCSI_AIC7XXX is not set ++# CONFIG_SCSI_AIC79XX is not set ++# CONFIG_SCSI_AIC94XX is not set ++# CONFIG_SCSI_MVSAS is not set ++# CONFIG_SCSI_MVUMI is not set ++# CONFIG_SCSI_ADVANSYS is not set ++# CONFIG_SCSI_ARCMSR is not set ++# CONFIG_SCSI_ESAS2R is not set ++# CONFIG_MEGARAID_NEWGEN is not set ++# CONFIG_MEGARAID_LEGACY is not set ++# CONFIG_MEGARAID_SAS is not set ++# CONFIG_SCSI_MPT3SAS is not set ++# CONFIG_SCSI_MPT2SAS is not set ++# CONFIG_SCSI_MPI3MR is not set ++# CONFIG_SCSI_UFSHCD is not set ++# CONFIG_SCSI_HPTIOP is not set ++# CONFIG_SCSI_MYRB is not set ++# CONFIG_SCSI_SNIC is not set ++# CONFIG_SCSI_DMX3191D is not set ++# CONFIG_SCSI_FDOMAIN_PCI is not set ++# CONFIG_SCSI_IPS is not set ++# CONFIG_SCSI_INITIO is not set ++# CONFIG_SCSI_INIA100 is not set ++# CONFIG_SCSI_STEX is not set ++# CONFIG_SCSI_SYM53C8XX_2 is not set ++# CONFIG_SCSI_QLOGIC_1280 is not set ++# CONFIG_SCSI_QLA_ISCSI is not set ++# CONFIG_SCSI_DC395x is not set ++# CONFIG_SCSI_AM53C974 is not set ++# CONFIG_SCSI_WD719X is not set ++# CONFIG_SCSI_DEBUG is not set ++# CONFIG_SCSI_PMCRAID is not set ++# CONFIG_SCSI_PM8001 is not set ++CONFIG_SCSI_VIRTIO=y ++# CONFIG_SCSI_DH is not set ++# end of SCSI device support ++ ++# CONFIG_ATA is not set ++# CONFIG_MD is not set ++# CONFIG_TARGET_CORE is not set ++# CONFIG_FUSION is not set ++ ++# ++# IEEE 1394 (FireWire) support ++# ++# CONFIG_FIREWIRE is not set ++# CONFIG_FIREWIRE_NOSY is not set ++# end of IEEE 1394 (FireWire) support ++ ++CONFIG_NETDEVICES=y ++CONFIG_NET_CORE=y ++CONFIG_BONDING=y ++# CONFIG_DUMMY is not set ++# CONFIG_WIREGUARD is not set ++# CONFIG_EQUALIZER is not set ++# CONFIG_NET_FC is not set ++# CONFIG_NET_TEAM is not set ++CONFIG_MACVLAN=y ++CONFIG_MACVTAP=y ++CONFIG_IPVLAN_L3S=y ++CONFIG_IPVLAN=y ++# CONFIG_IPVTAP is not set ++CONFIG_VXLAN=y ++CONFIG_GENEVE=y ++# CONFIG_BAREUDP is not set ++# CONFIG_GTP is not set ++# CONFIG_AMT is not set ++# CONFIG_MACSEC is not set ++# CONFIG_NETCONSOLE is not set ++CONFIG_TUN=y ++CONFIG_TAP=y ++# CONFIG_TUN_VNET_CROSS_LE is not set ++CONFIG_VETH=y ++CONFIG_VIRTIO_NET=y ++# CONFIG_NLMON is not set ++CONFIG_NET_VRF=y ++# CONFIG_ARCNET is not set ++# CONFIG_ETHERNET is not set ++# CONFIG_FDDI is not set ++# CONFIG_HIPPI is not set ++# CONFIG_PHYLIB is not set ++# CONFIG_MDIO_DEVICE is not set ++ ++# ++# PCS device drivers ++# ++# end of PCS device drivers ++ ++# CONFIG_PPP is not set ++# CONFIG_SLIP is not set ++ ++# ++# S/390 network device drivers ++# ++# CONFIG_CTCM is not set ++# end of S/390 network device drivers ++ ++# ++# Host-side USB support is needed for USB Network Adapter support ++# ++# CONFIG_WAN is not set ++ ++# ++# Wireless WAN ++# ++# CONFIG_WWAN is not set ++# end of Wireless WAN ++ ++# CONFIG_VMXNET3 is not set ++CONFIG_NETDEVSIM=y ++CONFIG_NET_FAILOVER=y ++ ++# ++# Input device support ++# ++CONFIG_INPUT=y ++# CONFIG_INPUT_FF_MEMLESS is not set ++# CONFIG_INPUT_SPARSEKMAP is not set ++# CONFIG_INPUT_MATRIXKMAP is not set ++ ++# ++# Userland interfaces ++# ++# CONFIG_INPUT_MOUSEDEV is not set ++# CONFIG_INPUT_JOYDEV is not set ++# CONFIG_INPUT_EVDEV is not set ++# CONFIG_INPUT_EVBUG is not set ++ ++# ++# Input Device Drivers ++# ++CONFIG_INPUT_KEYBOARD=y ++CONFIG_KEYBOARD_ATKBD=y ++# CONFIG_KEYBOARD_LKKBD is not set ++# CONFIG_KEYBOARD_NEWTON is not set ++# CONFIG_KEYBOARD_OPENCORES is not set ++# CONFIG_KEYBOARD_STOWAWAY is not set ++# CONFIG_KEYBOARD_SUNKBD is not set ++# CONFIG_KEYBOARD_XTKBD is not set ++CONFIG_INPUT_MOUSE=y ++CONFIG_MOUSE_PS2=y ++CONFIG_MOUSE_PS2_ALPS=y ++CONFIG_MOUSE_PS2_BYD=y ++CONFIG_MOUSE_PS2_LOGIPS2PP=y ++CONFIG_MOUSE_PS2_SYNAPTICS=y ++CONFIG_MOUSE_PS2_CYPRESS=y ++CONFIG_MOUSE_PS2_TRACKPOINT=y ++# CONFIG_MOUSE_PS2_ELANTECH is not set ++# CONFIG_MOUSE_PS2_SENTELIC is not set ++# CONFIG_MOUSE_PS2_TOUCHKIT is not set ++CONFIG_MOUSE_PS2_FOCALTECH=y ++# CONFIG_MOUSE_SERIAL is not set ++# CONFIG_MOUSE_APPLETOUCH is not set ++# CONFIG_MOUSE_BCM5974 is not set ++# CONFIG_MOUSE_VSXXXAA is not set ++# CONFIG_MOUSE_SYNAPTICS_USB is not set ++# CONFIG_INPUT_JOYSTICK is not set ++# CONFIG_INPUT_TABLET is not set ++# CONFIG_INPUT_TOUCHSCREEN is not set ++# CONFIG_INPUT_MISC is not set ++# CONFIG_RMI4_CORE is not set ++ ++# ++# Hardware I/O ports ++# ++CONFIG_SERIO=y ++CONFIG_SERIO_SERPORT=y ++# CONFIG_SERIO_PCIPS2 is not set ++CONFIG_SERIO_LIBPS2=y ++# CONFIG_SERIO_RAW is not set ++# CONFIG_SERIO_ALTERA_PS2 is not set ++# CONFIG_SERIO_PS2MULT is not set ++# CONFIG_SERIO_ARC_PS2 is not set ++# CONFIG_USERIO is not set ++# CONFIG_GAMEPORT is not set ++# end of Hardware I/O ports ++# end of Input device support ++ ++# ++# Character devices ++# ++CONFIG_TTY=y ++# CONFIG_VT is not set ++CONFIG_UNIX98_PTYS=y ++CONFIG_LEGACY_PTYS=y ++CONFIG_LEGACY_PTY_COUNT=256 ++CONFIG_LDISC_AUTOLOAD=y ++ ++# ++# Serial drivers ++# ++ ++# ++# Non-8250 serial port support ++# ++# CONFIG_SERIAL_UARTLITE is not set ++# CONFIG_SERIAL_JSM is not set ++# CONFIG_SERIAL_SCCNXP is not set ++# CONFIG_SERIAL_ALTERA_JTAGUART is not set ++# CONFIG_SERIAL_ALTERA_UART is not set ++# CONFIG_SERIAL_ARC is not set ++# CONFIG_SERIAL_RP2 is not set ++# CONFIG_SERIAL_FSL_LPUART is not set ++# CONFIG_SERIAL_FSL_LINFLEXUART is not set ++# end of Serial drivers ++ ++# CONFIG_SERIAL_NONSTANDARD is not set ++# CONFIG_N_GSM is not set ++# CONFIG_NOZOMI is not set ++# CONFIG_NULL_TTY is not set ++# CONFIG_HVC_IUCV is not set ++# CONFIG_SERIAL_DEV_BUS is not set ++# CONFIG_TTY_PRINTK is not set ++# CONFIG_VIRTIO_CONSOLE is not set ++# CONFIG_IPMI_HANDLER is not set ++CONFIG_HW_RANDOM=y ++# CONFIG_HW_RANDOM_TIMERIOMEM is not set ++# CONFIG_HW_RANDOM_BA431 is not set ++# CONFIG_HW_RANDOM_VIRTIO is not set ++CONFIG_HW_RANDOM_S390=y ++# CONFIG_HW_RANDOM_XIPHERA is not set ++# CONFIG_APPLICOM is not set ++CONFIG_DEVMEM=y ++CONFIG_DEVPORT=y ++# CONFIG_HANGCHECK_TIMER is not set ++CONFIG_TCG_TPM=y ++CONFIG_HW_RANDOM_TPM=y ++# CONFIG_TCG_VTPM_PROXY is not set ++ ++# ++# S/390 character device drivers ++# ++CONFIG_TN3270=y ++CONFIG_TN3270_TTY=y ++# CONFIG_TN3270_FS is not set ++CONFIG_TN3270_CONSOLE=y ++# CONFIG_TN3215 is not set ++CONFIG_CCW_CONSOLE=y ++# CONFIG_SCLP_TTY is not set ++CONFIG_SCLP_VT220_TTY=y ++CONFIG_SCLP_VT220_CONSOLE=y ++# CONFIG_HMC_DRV is not set ++# CONFIG_SCLP_OFB is not set ++# CONFIG_S390_TAPE is not set ++# CONFIG_VMCP is not set ++# CONFIG_MONWRITER is not set ++# CONFIG_S390_VMUR is not set ++# CONFIG_XILLYBUS is not set ++# CONFIG_RANDOM_TRUST_CPU is not set ++# CONFIG_RANDOM_TRUST_BOOTLOADER is not set ++# end of Character devices ++ ++# ++# I2C support ++# ++# CONFIG_I2C is not set ++# end of I2C support ++ ++# CONFIG_I3C is not set ++# CONFIG_SPI is not set ++# CONFIG_SPMI is not set ++# CONFIG_HSI is not set ++# CONFIG_PPS is not set ++ ++# ++# PTP clock support ++# ++# CONFIG_PTP_1588_CLOCK is not set ++CONFIG_PTP_1588_CLOCK_OPTIONAL=y ++ ++# ++# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. ++# ++# end of PTP clock support ++ ++# CONFIG_PINCTRL is not set ++# CONFIG_GPIOLIB is not set ++# CONFIG_W1 is not set ++# CONFIG_POWER_RESET is not set ++# CONFIG_POWER_SUPPLY is not set ++CONFIG_HWMON=y ++# CONFIG_HWMON_DEBUG_CHIP is not set ++ ++# ++# Native drivers ++# ++# CONFIG_SENSORS_AS370 is not set ++# CONFIG_SENSORS_AXI_FAN_CONTROL is not set ++# CONFIG_SENSORS_ASPEED is not set ++# CONFIG_SENSORS_CORSAIR_CPRO is not set ++# CONFIG_SENSORS_CORSAIR_PSU is not set ++# CONFIG_SENSORS_I5K_AMB is not set ++# CONFIG_SENSORS_F71805F is not set ++# CONFIG_SENSORS_F71882FG is not set ++# CONFIG_SENSORS_IT87 is not set ++# CONFIG_SENSORS_MAX197 is not set ++# CONFIG_SENSORS_MR75203 is not set ++# CONFIG_SENSORS_PC87360 is not set ++# CONFIG_SENSORS_PC87427 is not set ++# CONFIG_SENSORS_NTC_THERMISTOR is not set ++# CONFIG_SENSORS_NCT6683 is not set ++# CONFIG_SENSORS_NCT6775 is not set ++# CONFIG_SENSORS_NPCM7XX is not set ++# CONFIG_SENSORS_SIS5595 is not set ++# CONFIG_SENSORS_SMSC47M1 is not set ++# CONFIG_SENSORS_SMSC47B397 is not set ++# CONFIG_SENSORS_VIA686A is not set ++# CONFIG_SENSORS_VT1211 is not set ++# CONFIG_SENSORS_VT8231 is not set ++# CONFIG_SENSORS_W83627HF is not set ++# CONFIG_SENSORS_W83627EHF is not set ++# CONFIG_THERMAL is not set ++# CONFIG_WATCHDOG is not set ++CONFIG_SSB_POSSIBLE=y ++# CONFIG_SSB is not set ++CONFIG_BCMA_POSSIBLE=y ++# CONFIG_BCMA is not set ++ ++# ++# Multifunction device drivers ++# ++# CONFIG_MFD_MADERA is not set ++# CONFIG_HTC_PASIC3 is not set ++# CONFIG_LPC_ICH is not set ++# CONFIG_LPC_SCH is not set ++# CONFIG_MFD_INTEL_PMT is not set ++# CONFIG_MFD_JANZ_CMODIO is not set ++# CONFIG_MFD_KEMPLD is not set ++# CONFIG_MFD_MT6397 is not set ++# CONFIG_MFD_RDC321X is not set ++# CONFIG_MFD_SM501 is not set ++# CONFIG_MFD_SYSCON is not set ++# CONFIG_MFD_TI_AM335X_TSCADC is not set ++# CONFIG_MFD_TQMX86 is not set ++# CONFIG_MFD_VX855 is not set ++# end of Multifunction device drivers ++ ++# CONFIG_REGULATOR is not set ++CONFIG_RC_CORE=y ++# CONFIG_RC_MAP is not set ++CONFIG_LIRC=y ++CONFIG_BPF_LIRC_MODE2=y ++# CONFIG_RC_DECODERS is not set ++CONFIG_RC_DEVICES=y ++CONFIG_RC_LOOPBACK=y ++# CONFIG_IR_SERIAL is not set ++ ++# ++# CEC support ++# ++# CONFIG_MEDIA_CEC_SUPPORT is not set ++# end of CEC support ++ ++# CONFIG_MEDIA_SUPPORT is not set ++ ++# ++# Graphics support ++# ++# CONFIG_DRM is not set ++ ++# ++# ARM devices ++# ++# end of ARM devices ++ ++# ++# Frame buffer Devices ++# ++# CONFIG_FB is not set ++# end of Frame buffer Devices ++ ++# ++# Backlight & LCD device support ++# ++# CONFIG_LCD_CLASS_DEVICE is not set ++# CONFIG_BACKLIGHT_CLASS_DEVICE is not set ++# end of Backlight & LCD device support ++# end of Graphics support ++ ++# CONFIG_SOUND is not set ++ ++# ++# HID support ++# ++CONFIG_HID=y ++# CONFIG_HID_BATTERY_STRENGTH is not set ++# CONFIG_HIDRAW is not set ++# CONFIG_UHID is not set ++CONFIG_HID_GENERIC=y ++ ++# ++# Special HID drivers ++# ++# CONFIG_HID_A4TECH is not set ++# CONFIG_HID_ACRUX is not set ++# CONFIG_HID_APPLE is not set ++# CONFIG_HID_AUREAL is not set ++# CONFIG_HID_BELKIN is not set ++# CONFIG_HID_CHERRY is not set ++# CONFIG_HID_CHICONY is not set ++# CONFIG_HID_COUGAR is not set ++# CONFIG_HID_MACALLY is not set ++# CONFIG_HID_CMEDIA is not set ++# CONFIG_HID_CYPRESS is not set ++# CONFIG_HID_DRAGONRISE is not set ++# CONFIG_HID_EMS_FF is not set ++# CONFIG_HID_ELECOM is not set ++# CONFIG_HID_EZKEY is not set ++# CONFIG_HID_GEMBIRD is not set ++# CONFIG_HID_GFRM is not set ++# CONFIG_HID_GLORIOUS is not set ++# CONFIG_HID_VIVALDI is not set ++# CONFIG_HID_KEYTOUCH is not set ++# CONFIG_HID_KYE is not set ++# CONFIG_HID_WALTOP is not set ++# CONFIG_HID_VIEWSONIC is not set ++# CONFIG_HID_GYRATION is not set ++# CONFIG_HID_ICADE is not set ++# CONFIG_HID_ITE is not set ++# CONFIG_HID_JABRA is not set ++# CONFIG_HID_TWINHAN is not set ++# CONFIG_HID_KENSINGTON is not set ++# CONFIG_HID_LCPOWER is not set ++# CONFIG_HID_LENOVO is not set ++# CONFIG_HID_MAGICMOUSE is not set ++# CONFIG_HID_MALTRON is not set ++# CONFIG_HID_MAYFLASH is not set ++# CONFIG_HID_REDRAGON is not set ++# CONFIG_HID_MICROSOFT is not set ++# CONFIG_HID_MONTEREY is not set ++# CONFIG_HID_MULTITOUCH is not set ++# CONFIG_HID_NTI is not set ++# CONFIG_HID_ORTEK is not set ++# CONFIG_HID_PANTHERLORD is not set ++# CONFIG_HID_PETALYNX is not set ++# CONFIG_HID_PICOLCD is not set ++# CONFIG_HID_PLANTRONICS is not set ++# CONFIG_HID_PLAYSTATION is not set ++# CONFIG_HID_PRIMAX is not set ++# CONFIG_HID_SAITEK is not set ++# CONFIG_HID_SAMSUNG is not set ++# CONFIG_HID_SEMITEK is not set ++# CONFIG_HID_SPEEDLINK is not set ++# CONFIG_HID_STEAM is not set ++# CONFIG_HID_STEELSERIES is not set ++# CONFIG_HID_SUNPLUS is not set ++# CONFIG_HID_RMI is not set ++# CONFIG_HID_GREENASIA is not set ++# CONFIG_HID_SMARTJOYPLUS is not set ++# CONFIG_HID_TIVO is not set ++# CONFIG_HID_TOPSEED is not set ++# CONFIG_HID_UDRAW_PS3 is not set ++# CONFIG_HID_XINMO is not set ++# CONFIG_HID_ZEROPLUS is not set ++# CONFIG_HID_ZYDACRON is not set ++# CONFIG_HID_SENSOR_HUB is not set ++# CONFIG_HID_ALPS is not set ++# end of Special HID drivers ++# end of HID support ++ ++CONFIG_USB_OHCI_LITTLE_ENDIAN=y ++CONFIG_USB_SUPPORT=y ++# CONFIG_USB_ULPI_BUS is not set ++CONFIG_USB_ARCH_HAS_HCD=y ++# CONFIG_USB is not set ++CONFIG_USB_PCI=y ++ ++# ++# USB port drivers ++# ++ ++# ++# USB Physical Layer drivers ++# ++# CONFIG_NOP_USB_XCEIV is not set ++# end of USB Physical Layer drivers ++ ++# CONFIG_USB_GADGET is not set ++# CONFIG_TYPEC is not set ++# CONFIG_USB_ROLE_SWITCH is not set ++# CONFIG_MMC is not set ++# CONFIG_MEMSTICK is not set ++# CONFIG_NEW_LEDS is not set ++# CONFIG_ACCESSIBILITY is not set ++# CONFIG_INFINIBAND is not set ++# CONFIG_DMADEVICES is not set ++ ++# ++# DMABUF options ++# ++# CONFIG_SYNC_FILE is not set ++# CONFIG_DMABUF_HEAPS is not set ++# end of DMABUF options ++ ++# CONFIG_AUXDISPLAY is not set ++# CONFIG_UIO is not set ++# CONFIG_VFIO is not set ++# CONFIG_VIRT_DRIVERS is not set ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_PCI_LIB=y ++CONFIG_VIRTIO_MENU=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_PCI_LEGACY=y ++CONFIG_VIRTIO_BALLOON=y ++# CONFIG_VIRTIO_INPUT is not set ++# CONFIG_VIRTIO_MMIO is not set ++# CONFIG_VDPA is not set ++CONFIG_VHOST_MENU=y ++# CONFIG_VHOST_NET is not set ++# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set ++ ++# ++# Microsoft Hyper-V guest support ++# ++# end of Microsoft Hyper-V guest support ++ ++# CONFIG_GREYBUS is not set ++# CONFIG_COMEDI is not set ++# CONFIG_STAGING is not set ++# CONFIG_GOLDFISH is not set ++# CONFIG_COMMON_CLK is not set ++# CONFIG_HWSPINLOCK is not set ++ ++# ++# Clock Source drivers ++# ++# end of Clock Source drivers ++ ++# CONFIG_MAILBOX is not set ++CONFIG_IOMMU_API=y ++CONFIG_IOMMU_SUPPORT=y ++ ++# ++# Generic IOMMU Pagetable Support ++# ++# end of Generic IOMMU Pagetable Support ++ ++# CONFIG_IOMMU_DEBUGFS is not set ++CONFIG_IOMMU_DEFAULT_DMA_STRICT=y ++# CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set ++# CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set ++CONFIG_S390_IOMMU=y ++# CONFIG_S390_CCW_IOMMU is not set ++ ++# ++# Remoteproc drivers ++# ++# CONFIG_REMOTEPROC is not set ++# end of Remoteproc drivers ++ ++# ++# Rpmsg drivers ++# ++# CONFIG_RPMSG_VIRTIO is not set ++# end of Rpmsg drivers ++ ++# ++# SOC (System On Chip) specific Drivers ++# ++ ++# ++# Amlogic SoC drivers ++# ++# end of Amlogic SoC drivers ++ ++# ++# Broadcom SoC drivers ++# ++# end of Broadcom SoC drivers ++ ++# ++# NXP/Freescale QorIQ SoC drivers ++# ++# end of NXP/Freescale QorIQ SoC drivers ++ ++# ++# i.MX SoC drivers ++# ++# end of i.MX SoC drivers ++ ++# ++# Enable LiteX SoC Builder specific drivers ++# ++# end of Enable LiteX SoC Builder specific drivers ++ ++# ++# Qualcomm SoC drivers ++# ++# end of Qualcomm SoC drivers ++ ++# CONFIG_SOC_TI is not set ++ ++# ++# Xilinx SoC drivers ++# ++# end of Xilinx SoC drivers ++# end of SOC (System On Chip) specific Drivers ++ ++# CONFIG_PM_DEVFREQ is not set ++# CONFIG_EXTCON is not set ++# CONFIG_MEMORY is not set ++# CONFIG_IIO is not set ++# CONFIG_NTB is not set ++# CONFIG_VME_BUS is not set ++# CONFIG_PWM is not set ++ ++# ++# IRQ chip support ++# ++# end of IRQ chip support ++ ++# CONFIG_IPACK_BUS is not set ++# CONFIG_RESET_CONTROLLER is not set ++ ++# ++# PHY Subsystem ++# ++# CONFIG_GENERIC_PHY is not set ++# CONFIG_PHY_CAN_TRANSCEIVER is not set ++# CONFIG_BCM_KONA_USB2_PHY is not set ++# CONFIG_PHY_PXA_28NM_HSIC is not set ++# CONFIG_PHY_PXA_28NM_USB2 is not set ++# end of PHY Subsystem ++ ++# CONFIG_POWERCAP is not set ++# CONFIG_MCB is not set ++ ++# ++# Performance monitor support ++# ++# end of Performance monitor support ++ ++# CONFIG_RAS is not set ++# CONFIG_USB4 is not set ++ ++# ++# Android ++# ++# CONFIG_ANDROID is not set ++# end of Android ++ ++# CONFIG_LIBNVDIMM is not set ++# CONFIG_DAX is not set ++# CONFIG_NVMEM is not set ++ ++# ++# HW tracing support ++# ++# CONFIG_STM is not set ++# CONFIG_INTEL_TH is not set ++# end of HW tracing support ++ ++# CONFIG_FPGA is not set ++# CONFIG_SIOX is not set ++# CONFIG_SLIMBUS is not set ++# CONFIG_INTERCONNECT is not set ++# CONFIG_COUNTER is not set ++# end of Device Drivers ++ ++# ++# File systems ++# ++# CONFIG_VALIDATE_FS_PARSER is not set ++CONFIG_FS_IOMAP=y ++# CONFIG_EXT2_FS is not set ++# CONFIG_EXT3_FS is not set ++CONFIG_EXT4_FS=y ++CONFIG_EXT4_USE_FOR_EXT2=y ++CONFIG_EXT4_FS_POSIX_ACL=y ++CONFIG_EXT4_FS_SECURITY=y ++# CONFIG_EXT4_DEBUG is not set ++CONFIG_JBD2=y ++# CONFIG_JBD2_DEBUG is not set ++CONFIG_FS_MBCACHE=y ++# CONFIG_REISERFS_FS is not set ++# CONFIG_JFS_FS is not set ++# CONFIG_XFS_FS is not set ++# CONFIG_GFS2_FS is not set ++# CONFIG_BTRFS_FS is not set ++# CONFIG_NILFS2_FS is not set ++# CONFIG_F2FS_FS is not set ++# CONFIG_FS_DAX is not set ++CONFIG_FS_POSIX_ACL=y ++CONFIG_EXPORTFS=y ++# CONFIG_EXPORTFS_BLOCK_OPS is not set ++CONFIG_FILE_LOCKING=y ++# CONFIG_FS_ENCRYPTION is not set ++# CONFIG_FS_VERITY is not set ++CONFIG_FSNOTIFY=y ++CONFIG_DNOTIFY=y ++CONFIG_INOTIFY_USER=y ++CONFIG_FANOTIFY=y ++# CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not set ++# CONFIG_QUOTA is not set ++# CONFIG_AUTOFS4_FS is not set ++# CONFIG_AUTOFS_FS is not set ++# CONFIG_FUSE_FS is not set ++# CONFIG_OVERLAY_FS is not set ++ ++# ++# Caches ++# ++# CONFIG_FSCACHE is not set ++# end of Caches ++ ++# ++# CD-ROM/DVD Filesystems ++# ++# CONFIG_ISO9660_FS is not set ++# CONFIG_UDF_FS is not set ++# end of CD-ROM/DVD Filesystems ++ ++# ++# DOS/FAT/EXFAT/NT Filesystems ++# ++# CONFIG_MSDOS_FS is not set ++# CONFIG_VFAT_FS is not set ++# CONFIG_EXFAT_FS is not set ++# CONFIG_NTFS_FS is not set ++# CONFIG_NTFS3_FS is not set ++# end of DOS/FAT/EXFAT/NT Filesystems ++ ++# ++# Pseudo filesystems ++# ++CONFIG_PROC_FS=y ++CONFIG_PROC_KCORE=y ++CONFIG_PROC_VMCORE=y ++# CONFIG_PROC_VMCORE_DEVICE_DUMP is not set ++CONFIG_PROC_SYSCTL=y ++CONFIG_PROC_PAGE_MONITOR=y ++CONFIG_PROC_CHILDREN=y ++CONFIG_KERNFS=y ++CONFIG_SYSFS=y ++CONFIG_TMPFS=y ++CONFIG_TMPFS_POSIX_ACL=y ++CONFIG_TMPFS_XATTR=y ++# CONFIG_TMPFS_INODE64 is not set ++CONFIG_ARCH_SUPPORTS_HUGETLBFS=y ++CONFIG_HUGETLBFS=y ++CONFIG_HUGETLB_PAGE=y ++CONFIG_MEMFD_CREATE=y ++CONFIG_ARCH_HAS_GIGANTIC_PAGE=y ++# CONFIG_CONFIGFS_FS is not set ++# end of Pseudo filesystems ++ ++# CONFIG_MISC_FILESYSTEMS is not set ++CONFIG_NETWORK_FILESYSTEMS=y ++# CONFIG_NFS_FS is not set ++# CONFIG_NFSD is not set ++# CONFIG_CEPH_FS is not set ++# CONFIG_CIFS is not set ++# CONFIG_SMB_SERVER is not set ++# CONFIG_CODA_FS is not set ++# CONFIG_AFS_FS is not set ++CONFIG_9P_FS=y ++# CONFIG_9P_FS_POSIX_ACL is not set ++# CONFIG_9P_FS_SECURITY is not set ++# CONFIG_NLS is not set ++# CONFIG_UNICODE is not set ++CONFIG_IO_WQ=y ++# end of File systems ++ ++# ++# Security options ++# ++CONFIG_KEYS=y ++# CONFIG_KEYS_REQUEST_CACHE is not set ++# CONFIG_PERSISTENT_KEYRINGS is not set ++# CONFIG_TRUSTED_KEYS is not set ++# CONFIG_ENCRYPTED_KEYS is not set ++# CONFIG_KEY_DH_OPERATIONS is not set ++# CONFIG_SECURITY_DMESG_RESTRICT is not set ++CONFIG_SECURITY=y ++CONFIG_SECURITYFS=y ++CONFIG_SECURITY_NETWORK=y ++# CONFIG_SECURITY_NETWORK_XFRM is not set ++# CONFIG_SECURITY_PATH is not set ++CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y ++# CONFIG_HARDENED_USERCOPY is not set ++# CONFIG_FORTIFY_SOURCE is not set ++# CONFIG_STATIC_USERMODEHELPER is not set ++# CONFIG_SECURITY_SELINUX is not set ++# CONFIG_SECURITY_SMACK is not set ++# CONFIG_SECURITY_TOMOYO is not set ++# CONFIG_SECURITY_APPARMOR is not set ++# CONFIG_SECURITY_LOADPIN is not set ++# CONFIG_SECURITY_YAMA is not set ++# CONFIG_SECURITY_SAFESETID is not set ++# CONFIG_SECURITY_LOCKDOWN_LSM is not set ++# CONFIG_SECURITY_LANDLOCK is not set ++CONFIG_INTEGRITY=y ++# CONFIG_INTEGRITY_SIGNATURE is not set ++CONFIG_INTEGRITY_AUDIT=y ++CONFIG_IMA=y ++CONFIG_IMA_MEASURE_PCR_IDX=10 ++# CONFIG_IMA_TEMPLATE is not set ++CONFIG_IMA_NG_TEMPLATE=y ++# CONFIG_IMA_SIG_TEMPLATE is not set ++CONFIG_IMA_DEFAULT_TEMPLATE="ima-ng" ++CONFIG_IMA_DEFAULT_HASH_SHA1=y ++# CONFIG_IMA_DEFAULT_HASH_SHA256 is not set ++# CONFIG_IMA_DEFAULT_HASH_SHA512 is not set ++CONFIG_IMA_DEFAULT_HASH="sha1" ++# CONFIG_IMA_WRITE_POLICY is not set ++# CONFIG_IMA_READ_POLICY is not set ++# CONFIG_IMA_APPRAISE is not set ++# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set ++# CONFIG_IMA_DISABLE_HTABLE is not set ++# CONFIG_EVM is not set ++CONFIG_DEFAULT_SECURITY_DAC=y ++CONFIG_LSM="landlock,lockdown,yama,loadpin,safesetid,integrity,bpf" ++ ++# ++# Kernel hardening options ++# ++ ++# ++# Memory initialization ++# ++CONFIG_INIT_STACK_NONE=y ++# CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set ++# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set ++# end of Memory initialization ++# end of Kernel hardening options ++# end of Security options ++ ++CONFIG_CRYPTO=y ++ ++# ++# Crypto core or helper ++# ++CONFIG_CRYPTO_ALGAPI=y ++CONFIG_CRYPTO_ALGAPI2=y ++CONFIG_CRYPTO_AEAD=y ++CONFIG_CRYPTO_AEAD2=y ++CONFIG_CRYPTO_SKCIPHER=y ++CONFIG_CRYPTO_SKCIPHER2=y ++CONFIG_CRYPTO_HASH=y ++CONFIG_CRYPTO_HASH2=y ++CONFIG_CRYPTO_RNG=y ++CONFIG_CRYPTO_RNG2=y ++CONFIG_CRYPTO_RNG_DEFAULT=y ++CONFIG_CRYPTO_AKCIPHER2=y ++CONFIG_CRYPTO_KPP2=y ++CONFIG_CRYPTO_ACOMP2=y ++CONFIG_CRYPTO_MANAGER=y ++CONFIG_CRYPTO_MANAGER2=y ++# CONFIG_CRYPTO_USER is not set ++CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y ++CONFIG_CRYPTO_GF128MUL=y ++CONFIG_CRYPTO_NULL=y ++CONFIG_CRYPTO_NULL2=y ++# CONFIG_CRYPTO_PCRYPT is not set ++# CONFIG_CRYPTO_CRYPTD is not set ++CONFIG_CRYPTO_AUTHENC=y ++# CONFIG_CRYPTO_TEST is not set ++ ++# ++# Public-key cryptography ++# ++# CONFIG_CRYPTO_RSA is not set ++# CONFIG_CRYPTO_DH is not set ++# CONFIG_CRYPTO_ECDH is not set ++# CONFIG_CRYPTO_ECDSA is not set ++# CONFIG_CRYPTO_ECRDSA is not set ++# CONFIG_CRYPTO_SM2 is not set ++# CONFIG_CRYPTO_CURVE25519 is not set ++ ++# ++# Authenticated Encryption with Associated Data ++# ++# CONFIG_CRYPTO_CCM is not set ++CONFIG_CRYPTO_GCM=y ++# CONFIG_CRYPTO_CHACHA20POLY1305 is not set ++# CONFIG_CRYPTO_AEGIS128 is not set ++CONFIG_CRYPTO_SEQIV=y ++CONFIG_CRYPTO_ECHAINIV=y ++ ++# ++# Block modes ++# ++CONFIG_CRYPTO_CBC=y ++# CONFIG_CRYPTO_CFB is not set ++CONFIG_CRYPTO_CTR=y ++# CONFIG_CRYPTO_CTS is not set ++# CONFIG_CRYPTO_ECB is not set ++# CONFIG_CRYPTO_LRW is not set ++# CONFIG_CRYPTO_OFB is not set ++# CONFIG_CRYPTO_PCBC is not set ++# CONFIG_CRYPTO_XTS is not set ++# CONFIG_CRYPTO_KEYWRAP is not set ++# CONFIG_CRYPTO_ADIANTUM is not set ++# CONFIG_CRYPTO_ESSIV is not set ++ ++# ++# Hash modes ++# ++# CONFIG_CRYPTO_CMAC is not set ++CONFIG_CRYPTO_HMAC=y ++# CONFIG_CRYPTO_XCBC is not set ++# CONFIG_CRYPTO_VMAC is not set ++ ++# ++# Digest ++# ++CONFIG_CRYPTO_CRC32C=y ++# CONFIG_CRYPTO_CRC32 is not set ++# CONFIG_CRYPTO_XXHASH is not set ++# CONFIG_CRYPTO_BLAKE2B is not set ++# CONFIG_CRYPTO_BLAKE2S is not set ++# CONFIG_CRYPTO_CRCT10DIF is not set ++CONFIG_CRYPTO_GHASH=y ++# CONFIG_CRYPTO_POLY1305 is not set ++# CONFIG_CRYPTO_MD4 is not set ++# CONFIG_CRYPTO_MD5 is not set ++# CONFIG_CRYPTO_MICHAEL_MIC is not set ++# CONFIG_CRYPTO_RMD160 is not set ++CONFIG_CRYPTO_SHA1=y ++CONFIG_CRYPTO_SHA256=y ++CONFIG_CRYPTO_SHA512=y ++# CONFIG_CRYPTO_SHA3 is not set ++# CONFIG_CRYPTO_SM3 is not set ++# CONFIG_CRYPTO_STREEBOG is not set ++# CONFIG_CRYPTO_WP512 is not set ++ ++# ++# Ciphers ++# ++CONFIG_CRYPTO_AES=y ++# CONFIG_CRYPTO_AES_TI is not set ++# CONFIG_CRYPTO_ANUBIS is not set ++# CONFIG_CRYPTO_ARC4 is not set ++# CONFIG_CRYPTO_BLOWFISH is not set ++# CONFIG_CRYPTO_CAMELLIA is not set ++# CONFIG_CRYPTO_CAST5 is not set ++# CONFIG_CRYPTO_CAST6 is not set ++# CONFIG_CRYPTO_DES is not set ++# CONFIG_CRYPTO_FCRYPT is not set ++# CONFIG_CRYPTO_KHAZAD is not set ++# CONFIG_CRYPTO_CHACHA20 is not set ++# CONFIG_CRYPTO_SEED is not set ++# CONFIG_CRYPTO_SERPENT is not set ++# CONFIG_CRYPTO_SM4 is not set ++# CONFIG_CRYPTO_TEA is not set ++# CONFIG_CRYPTO_TWOFISH is not set ++ ++# ++# Compression ++# ++# CONFIG_CRYPTO_DEFLATE is not set ++# CONFIG_CRYPTO_LZO is not set ++# CONFIG_CRYPTO_842 is not set ++# CONFIG_CRYPTO_LZ4 is not set ++# CONFIG_CRYPTO_LZ4HC is not set ++# CONFIG_CRYPTO_ZSTD is not set ++ ++# ++# Random Number Generation ++# ++# CONFIG_CRYPTO_ANSI_CPRNG is not set ++CONFIG_CRYPTO_DRBG_MENU=y ++CONFIG_CRYPTO_DRBG_HMAC=y ++# CONFIG_CRYPTO_DRBG_HASH is not set ++# CONFIG_CRYPTO_DRBG_CTR is not set ++CONFIG_CRYPTO_DRBG=y ++CONFIG_CRYPTO_JITTERENTROPY=y ++CONFIG_CRYPTO_USER_API=y ++CONFIG_CRYPTO_USER_API_HASH=y ++CONFIG_CRYPTO_USER_API_SKCIPHER=y ++CONFIG_CRYPTO_USER_API_RNG=y ++# CONFIG_CRYPTO_USER_API_RNG_CAVP is not set ++# CONFIG_CRYPTO_USER_API_AEAD is not set ++CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y ++CONFIG_CRYPTO_HASH_INFO=y ++ ++# ++# Crypto library routines ++# ++CONFIG_CRYPTO_LIB_AES=y ++# CONFIG_CRYPTO_LIB_BLAKE2S is not set ++# CONFIG_CRYPTO_LIB_CHACHA is not set ++# CONFIG_CRYPTO_LIB_CURVE25519 is not set ++CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 ++# CONFIG_CRYPTO_LIB_POLY1305 is not set ++# CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set ++CONFIG_CRYPTO_LIB_SHA256=y ++CONFIG_CRYPTO_HW=y ++# CONFIG_ZCRYPT is not set ++# CONFIG_CRYPTO_SHA1_S390 is not set ++# CONFIG_CRYPTO_SHA256_S390 is not set ++# CONFIG_CRYPTO_SHA512_S390 is not set ++# CONFIG_CRYPTO_SHA3_256_S390 is not set ++# CONFIG_CRYPTO_SHA3_512_S390 is not set ++# CONFIG_CRYPTO_DES_S390 is not set ++# CONFIG_CRYPTO_AES_S390 is not set ++# CONFIG_S390_PRNG is not set ++# CONFIG_CRYPTO_GHASH_S390 is not set ++# CONFIG_CRYPTO_CRC32_S390 is not set ++# CONFIG_CRYPTO_DEV_NITROX_CNN55XX is not set ++# CONFIG_CRYPTO_DEV_VIRTIO is not set ++# CONFIG_CRYPTO_DEV_SAFEXCEL is not set ++# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set ++# CONFIG_ASYMMETRIC_KEY_TYPE is not set ++ ++# ++# Certificates for signature checking ++# ++# CONFIG_SYSTEM_BLACKLIST_KEYRING is not set ++# end of Certificates for signature checking ++ ++CONFIG_BINARY_PRINTF=y ++ ++# ++# Library routines ++# ++# CONFIG_PACKING is not set ++CONFIG_BITREVERSE=y ++CONFIG_GENERIC_STRNCPY_FROM_USER=y ++CONFIG_GENERIC_STRNLEN_USER=y ++CONFIG_GENERIC_NET_UTILS=y ++CONFIG_GENERIC_FIND_FIRST_BIT=y ++# CONFIG_CORDIC is not set ++# CONFIG_PRIME_NUMBERS is not set ++CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y ++# CONFIG_CRC_CCITT is not set ++CONFIG_CRC16=y ++# CONFIG_CRC_T10DIF is not set ++# CONFIG_CRC_ITU_T is not set ++CONFIG_CRC32=y ++# CONFIG_CRC32_SELFTEST is not set ++CONFIG_CRC32_SLICEBY8=y ++# CONFIG_CRC32_SLICEBY4 is not set ++# CONFIG_CRC32_SARWATE is not set ++# CONFIG_CRC32_BIT is not set ++# CONFIG_CRC64 is not set ++# CONFIG_CRC4 is not set ++# CONFIG_CRC7 is not set ++CONFIG_LIBCRC32C=y ++# CONFIG_CRC8 is not set ++CONFIG_XXHASH=y ++# CONFIG_RANDOM32_SELFTEST is not set ++CONFIG_ZLIB_DFLTCC=y ++CONFIG_ZSTD_DECOMPRESS=y ++# CONFIG_XZ_DEC is not set ++CONFIG_DECOMPRESS_ZSTD=y ++CONFIG_GENERIC_ALLOCATOR=y ++CONFIG_XARRAY_MULTI=y ++CONFIG_ASSOCIATIVE_ARRAY=y ++CONFIG_HAS_DMA=y ++CONFIG_DMA_OPS=y ++CONFIG_NEED_SG_DMA_LENGTH=y ++CONFIG_NEED_DMA_MAP_STATE=y ++CONFIG_ARCH_DMA_ADDR_T_64BIT=y ++CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y ++CONFIG_SWIOTLB=y ++# CONFIG_DMA_API_DEBUG is not set ++# CONFIG_DMA_MAP_BENCHMARK is not set ++CONFIG_SGL_ALLOC=y ++CONFIG_IOMMU_HELPER=y ++CONFIG_CPU_RMAP=y ++CONFIG_DQL=y ++CONFIG_GLOB=y ++# CONFIG_GLOB_SELFTEST is not set ++CONFIG_NLATTR=y ++# CONFIG_IRQ_POLL is not set ++CONFIG_HAVE_GENERIC_VDSO=y ++CONFIG_GENERIC_GETTIMEOFDAY=y ++CONFIG_GENERIC_VDSO_TIME_NS=y ++CONFIG_SG_POOL=y ++CONFIG_ARCH_STACKWALK=y ++CONFIG_SBITMAP=y ++# end of Library routines ++ ++# ++# Kernel hacking ++# ++ ++# ++# printk and dmesg options ++# ++# CONFIG_PRINTK_TIME is not set ++# CONFIG_PRINTK_CALLER is not set ++# CONFIG_STACKTRACE_BUILD_ID is not set ++CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 ++CONFIG_CONSOLE_LOGLEVEL_QUIET=4 ++CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 ++# CONFIG_DYNAMIC_DEBUG is not set ++# CONFIG_DYNAMIC_DEBUG_CORE is not set ++CONFIG_SYMBOLIC_ERRNAME=y ++CONFIG_DEBUG_BUGVERBOSE=y ++# end of printk and dmesg options ++ ++# ++# Compile-time checks and compiler options ++# ++CONFIG_DEBUG_INFO=y ++# CONFIG_DEBUG_INFO_REDUCED is not set ++# CONFIG_DEBUG_INFO_COMPRESSED is not set ++# CONFIG_DEBUG_INFO_SPLIT is not set ++# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set ++CONFIG_DEBUG_INFO_DWARF4=y ++CONFIG_DEBUG_INFO_BTF=y ++CONFIG_PAHOLE_HAS_SPLIT_BTF=y ++CONFIG_DEBUG_INFO_BTF_MODULES=y ++CONFIG_GDB_SCRIPTS=y ++CONFIG_FRAME_WARN=2048 ++# CONFIG_STRIP_ASM_SYMS is not set ++# CONFIG_READABLE_ASM is not set ++CONFIG_HEADERS_INSTALL=y ++CONFIG_DEBUG_SECTION_MISMATCH=y ++CONFIG_SECTION_MISMATCH_WARN_ONLY=y ++# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set ++# CONFIG_VMLINUX_MAP is not set ++# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set ++# end of Compile-time checks and compiler options ++ ++# ++# Generic Kernel Debugging Instruments ++# ++CONFIG_MAGIC_SYSRQ=y ++CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 ++CONFIG_MAGIC_SYSRQ_SERIAL=y ++CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" ++CONFIG_DEBUG_FS=y ++CONFIG_DEBUG_FS_ALLOW_ALL=y ++# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set ++# CONFIG_DEBUG_FS_ALLOW_NONE is not set ++CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y ++# CONFIG_UBSAN is not set ++CONFIG_HAVE_ARCH_KCSAN=y ++# end of Generic Kernel Debugging Instruments ++ ++CONFIG_DEBUG_KERNEL=y ++CONFIG_DEBUG_MISC=y ++ ++# ++# Memory Debugging ++# ++# CONFIG_PAGE_EXTENSION is not set ++CONFIG_DEBUG_PAGEALLOC=y ++# CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT is not set ++# CONFIG_PAGE_OWNER is not set ++# CONFIG_PAGE_POISONING is not set ++# CONFIG_DEBUG_PAGE_REF is not set ++# CONFIG_DEBUG_RODATA_TEST is not set ++CONFIG_ARCH_HAS_DEBUG_WX=y ++# CONFIG_DEBUG_WX is not set ++CONFIG_GENERIC_PTDUMP=y ++CONFIG_PTDUMP_CORE=y ++CONFIG_PTDUMP_DEBUGFS=y ++# CONFIG_DEBUG_OBJECTS is not set ++# CONFIG_SLUB_DEBUG_ON is not set ++# CONFIG_SLUB_STATS is not set ++CONFIG_HAVE_DEBUG_KMEMLEAK=y ++# CONFIG_DEBUG_KMEMLEAK is not set ++# CONFIG_DEBUG_STACK_USAGE is not set ++# CONFIG_SCHED_STACK_END_CHECK is not set ++CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y ++# CONFIG_DEBUG_VM is not set ++# CONFIG_DEBUG_VM_PGTABLE is not set ++# CONFIG_DEBUG_MEMORY_INIT is not set ++# CONFIG_DEBUG_PER_CPU_MAPS is not set ++CONFIG_HAVE_ARCH_KASAN=y ++CONFIG_HAVE_ARCH_KASAN_VMALLOC=y ++CONFIG_CC_HAS_KASAN_GENERIC=y ++CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y ++# CONFIG_KASAN is not set ++CONFIG_HAVE_ARCH_KFENCE=y ++# CONFIG_KFENCE is not set ++# end of Memory Debugging ++ ++# CONFIG_DEBUG_SHIRQ is not set ++ ++# ++# Debug Oops, Lockups and Hangs ++# ++CONFIG_PANIC_ON_OOPS=y ++CONFIG_PANIC_ON_OOPS_VALUE=1 ++CONFIG_PANIC_TIMEOUT=0 ++CONFIG_DETECT_HUNG_TASK=y ++CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 ++# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set ++CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 ++# CONFIG_WQ_WATCHDOG is not set ++# CONFIG_TEST_LOCKUP is not set ++# end of Debug Oops, Lockups and Hangs ++ ++# ++# Scheduler Debugging ++# ++CONFIG_SCHED_DEBUG=y ++CONFIG_SCHED_INFO=y ++CONFIG_SCHEDSTATS=y ++# end of Scheduler Debugging ++ ++# CONFIG_DEBUG_TIMEKEEPING is not set ++ ++# ++# Lock Debugging (spinlocks, mutexes, etc...) ++# ++CONFIG_LOCK_DEBUGGING_SUPPORT=y ++CONFIG_PROVE_LOCKING=y ++# CONFIG_PROVE_RAW_LOCK_NESTING is not set ++CONFIG_LOCK_STAT=y ++CONFIG_DEBUG_RT_MUTEXES=y ++CONFIG_DEBUG_SPINLOCK=y ++CONFIG_DEBUG_MUTEXES=y ++CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y ++CONFIG_DEBUG_RWSEMS=y ++CONFIG_DEBUG_LOCK_ALLOC=y ++CONFIG_LOCKDEP=y ++CONFIG_LOCKDEP_BITS=15 ++CONFIG_LOCKDEP_CHAINS_BITS=16 ++CONFIG_LOCKDEP_STACK_TRACE_BITS=19 ++CONFIG_LOCKDEP_STACK_TRACE_HASH_BITS=14 ++CONFIG_LOCKDEP_CIRCULAR_QUEUE_BITS=12 ++CONFIG_DEBUG_LOCKDEP=y ++CONFIG_DEBUG_ATOMIC_SLEEP=y ++# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set ++# CONFIG_LOCK_TORTURE_TEST is not set ++# CONFIG_WW_MUTEX_SELFTEST is not set ++# CONFIG_SCF_TORTURE_TEST is not set ++# CONFIG_CSD_LOCK_WAIT_DEBUG is not set ++# end of Lock Debugging (spinlocks, mutexes, etc...) ++ ++CONFIG_TRACE_IRQFLAGS=y ++CONFIG_DEBUG_IRQFLAGS=y ++CONFIG_STACKTRACE=y ++# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set ++# CONFIG_DEBUG_KOBJECT is not set ++ ++# ++# Debug kernel data structures ++# ++CONFIG_DEBUG_LIST=y ++# CONFIG_DEBUG_PLIST is not set ++CONFIG_DEBUG_SG=y ++CONFIG_DEBUG_NOTIFIERS=y ++# CONFIG_BUG_ON_DATA_CORRUPTION is not set ++# end of Debug kernel data structures ++ ++# CONFIG_DEBUG_CREDENTIALS is not set ++ ++# ++# RCU Debugging ++# ++CONFIG_PROVE_RCU=y ++# CONFIG_RCU_SCALE_TEST is not set ++# CONFIG_RCU_TORTURE_TEST is not set ++# CONFIG_RCU_REF_SCALE_TEST is not set ++CONFIG_RCU_CPU_STALL_TIMEOUT=21 ++# CONFIG_RCU_TRACE is not set ++# CONFIG_RCU_EQS_DEBUG is not set ++# end of RCU Debugging ++ ++# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set ++# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set ++CONFIG_LATENCYTOP=y ++CONFIG_NOP_TRACER=y ++CONFIG_HAVE_FUNCTION_TRACER=y ++CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y ++CONFIG_HAVE_DYNAMIC_FTRACE=y ++CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y ++CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y ++CONFIG_HAVE_SYSCALL_TRACEPOINTS=y ++CONFIG_HAVE_FENTRY=y ++CONFIG_HAVE_NOP_MCOUNT=y ++CONFIG_TRACER_MAX_TRACE=y ++CONFIG_TRACE_CLOCK=y ++CONFIG_RING_BUFFER=y ++CONFIG_EVENT_TRACING=y ++CONFIG_CONTEXT_SWITCH_TRACER=y ++CONFIG_RING_BUFFER_ALLOW_SWAP=y ++CONFIG_PREEMPTIRQ_TRACEPOINTS=y ++CONFIG_TRACING=y ++CONFIG_GENERIC_TRACER=y ++CONFIG_TRACING_SUPPORT=y ++CONFIG_FTRACE=y ++# CONFIG_BOOTTIME_TRACING is not set ++CONFIG_FUNCTION_TRACER=y ++CONFIG_FUNCTION_GRAPH_TRACER=y ++CONFIG_DYNAMIC_FTRACE=y ++CONFIG_DYNAMIC_FTRACE_WITH_REGS=y ++CONFIG_FPROBE=y ++CONFIG_FUNCTION_PROFILER=y ++CONFIG_STACK_TRACER=y ++# CONFIG_IRQSOFF_TRACER is not set ++CONFIG_SCHED_TRACER=y ++# CONFIG_HWLAT_TRACER is not set ++# CONFIG_OSNOISE_TRACER is not set ++# CONFIG_TIMERLAT_TRACER is not set ++CONFIG_FTRACE_SYSCALLS=y ++CONFIG_TRACER_SNAPSHOT=y ++CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y ++CONFIG_BRANCH_PROFILE_NONE=y ++# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set ++# CONFIG_PROFILE_ALL_BRANCHES is not set ++CONFIG_BLK_DEV_IO_TRACE=y ++CONFIG_KPROBE_EVENTS=y ++# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set ++CONFIG_UPROBE_EVENTS=y ++CONFIG_BPF_EVENTS=y ++CONFIG_DYNAMIC_EVENTS=y ++CONFIG_PROBE_EVENTS=y ++# CONFIG_BPF_KPROBE_OVERRIDE is not set ++CONFIG_FTRACE_MCOUNT_RECORD=y ++CONFIG_FTRACE_MCOUNT_USE_CC=y ++# CONFIG_SYNTH_EVENTS is not set ++# CONFIG_HIST_TRIGGERS is not set ++# CONFIG_TRACE_EVENT_INJECT is not set ++# CONFIG_TRACEPOINT_BENCHMARK is not set ++# CONFIG_RING_BUFFER_BENCHMARK is not set ++# CONFIG_TRACE_EVAL_MAP_FILE is not set ++# CONFIG_FTRACE_RECORD_RECURSION is not set ++# CONFIG_FTRACE_STARTUP_TEST is not set ++# CONFIG_RING_BUFFER_STARTUP_TEST is not set ++# CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS is not set ++# CONFIG_PREEMPTIRQ_DELAY_TEST is not set ++# CONFIG_KPROBE_EVENT_GEN_TEST is not set ++CONFIG_SAMPLES=y ++# CONFIG_SAMPLE_AUXDISPLAY is not set ++# CONFIG_SAMPLE_TRACE_EVENTS is not set ++# CONFIG_SAMPLE_TRACE_PRINTK is not set ++# CONFIG_SAMPLE_TRACE_ARRAY is not set ++# CONFIG_SAMPLE_KOBJECT is not set ++# CONFIG_SAMPLE_KPROBES is not set ++# CONFIG_SAMPLE_KFIFO is not set ++# CONFIG_SAMPLE_LIVEPATCH is not set ++# CONFIG_SAMPLE_HIDRAW is not set ++# CONFIG_SAMPLE_LANDLOCK is not set ++# CONFIG_SAMPLE_PIDFD is not set ++CONFIG_SAMPLE_SECCOMP=y ++# CONFIG_SAMPLE_TIMER is not set ++# CONFIG_SAMPLE_UHID is not set ++# CONFIG_SAMPLE_ANDROID_BINDERFS is not set ++# CONFIG_SAMPLE_VFS is not set ++# CONFIG_SAMPLE_WATCHDOG is not set ++# CONFIG_SAMPLE_WATCH_QUEUE is not set ++CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y ++# CONFIG_STRICT_DEVMEM is not set ++ ++# ++# s390 Debugging ++# ++CONFIG_EARLY_PRINTK=y ++# CONFIG_DEBUG_ENTRY is not set ++# CONFIG_CIO_INJECT is not set ++# end of s390 Debugging ++ ++# ++# Kernel Testing and Coverage ++# ++# CONFIG_KUNIT is not set ++# CONFIG_NOTIFIER_ERROR_INJECTION is not set ++CONFIG_FUNCTION_ERROR_INJECTION=y ++# CONFIG_FAULT_INJECTION is not set ++CONFIG_ARCH_HAS_KCOV=y ++CONFIG_CC_HAS_SANCOV_TRACE_PC=y ++# CONFIG_KCOV is not set ++CONFIG_RUNTIME_TESTING_MENU=y ++# CONFIG_LKDTM is not set ++# CONFIG_TEST_MIN_HEAP is not set ++# CONFIG_TEST_DIV64 is not set ++# CONFIG_BACKTRACE_SELF_TEST is not set ++# CONFIG_RBTREE_TEST is not set ++# CONFIG_REED_SOLOMON_TEST is not set ++# CONFIG_INTERVAL_TREE_TEST is not set ++# CONFIG_PERCPU_TEST is not set ++# CONFIG_ATOMIC64_SELFTEST is not set ++# CONFIG_TEST_HEXDUMP is not set ++# CONFIG_STRING_SELFTEST is not set ++# CONFIG_TEST_STRING_HELPERS is not set ++# CONFIG_TEST_STRSCPY is not set ++# CONFIG_TEST_KSTRTOX is not set ++# CONFIG_TEST_PRINTF is not set ++# CONFIG_TEST_SCANF is not set ++# CONFIG_TEST_BITMAP is not set ++# CONFIG_TEST_UUID is not set ++# CONFIG_TEST_XARRAY is not set ++# CONFIG_TEST_OVERFLOW is not set ++# CONFIG_TEST_RHASHTABLE is not set ++# CONFIG_TEST_HASH is not set ++# CONFIG_TEST_IDA is not set ++# CONFIG_TEST_LKM is not set ++# CONFIG_TEST_BITOPS is not set ++# CONFIG_TEST_VMALLOC is not set ++# CONFIG_TEST_USER_COPY is not set ++CONFIG_TEST_BPF=m ++# CONFIG_TEST_BLACKHOLE_DEV is not set ++# CONFIG_FIND_BIT_BENCHMARK is not set ++# CONFIG_TEST_FIRMWARE is not set ++# CONFIG_TEST_SYSCTL is not set ++# CONFIG_TEST_UDELAY is not set ++# CONFIG_TEST_STATIC_KEYS is not set ++# CONFIG_TEST_KMOD is not set ++# CONFIG_TEST_MEMCAT_P is not set ++# CONFIG_TEST_STACKINIT is not set ++# CONFIG_TEST_MEMINIT is not set ++# CONFIG_TEST_FREE_PAGES is not set ++# end of Kernel Testing and Coverage ++# end of Kernel hacking +diff --git a/src/cc/libbpf/travis-ci/vmtest/configs/config-latest.x86_64 b/src/cc/libbpf/travis-ci/vmtest/configs/config-latest.x86_64 +new file mode 100644 +index 0000000..947a7b3 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/configs/config-latest.x86_64 +@@ -0,0 +1,3073 @@ ++# ++# Automatically generated file; DO NOT EDIT. ++# Linux/x86 5.9.0-rc1 Kernel Configuration ++# ++CONFIG_CC_VERSION_TEXT="gcc (GCC) 8.2.1 20180801 (Red Hat 8.2.1-2)" ++CONFIG_CC_IS_GCC=y ++CONFIG_GCC_VERSION=80201 ++CONFIG_LD_VERSION=230000000 ++CONFIG_CLANG_VERSION=0 ++CONFIG_CC_CAN_LINK=y ++CONFIG_CC_CAN_LINK_STATIC=y ++CONFIG_CC_HAS_ASM_GOTO=y ++CONFIG_IRQ_WORK=y ++CONFIG_BUILDTIME_TABLE_SORT=y ++CONFIG_THREAD_INFO_IN_TASK=y ++ ++# ++# General setup ++# ++CONFIG_INIT_ENV_ARG_LIMIT=32 ++# CONFIG_COMPILE_TEST is not set ++CONFIG_LOCALVERSION="" ++CONFIG_LOCALVERSION_AUTO=y ++CONFIG_BUILD_SALT="" ++CONFIG_HAVE_KERNEL_GZIP=y ++CONFIG_HAVE_KERNEL_BZIP2=y ++CONFIG_HAVE_KERNEL_LZMA=y ++CONFIG_HAVE_KERNEL_XZ=y ++CONFIG_HAVE_KERNEL_LZO=y ++CONFIG_HAVE_KERNEL_LZ4=y ++CONFIG_HAVE_KERNEL_ZSTD=y ++CONFIG_KERNEL_GZIP=y ++# CONFIG_KERNEL_BZIP2 is not set ++# CONFIG_KERNEL_LZMA is not set ++# CONFIG_KERNEL_XZ is not set ++# CONFIG_KERNEL_LZO is not set ++# CONFIG_KERNEL_LZ4 is not set ++# CONFIG_KERNEL_ZSTD is not set ++CONFIG_DEFAULT_INIT="" ++CONFIG_DEFAULT_HOSTNAME="(none)" ++CONFIG_SWAP=y ++CONFIG_SYSVIPC=y ++CONFIG_SYSVIPC_SYSCTL=y ++CONFIG_POSIX_MQUEUE=y ++CONFIG_POSIX_MQUEUE_SYSCTL=y ++# CONFIG_WATCH_QUEUE is not set ++CONFIG_CROSS_MEMORY_ATTACH=y ++# CONFIG_USELIB is not set ++CONFIG_AUDIT=y ++CONFIG_HAVE_ARCH_AUDITSYSCALL=y ++CONFIG_AUDITSYSCALL=y ++ ++# ++# IRQ subsystem ++# ++CONFIG_GENERIC_IRQ_PROBE=y ++CONFIG_GENERIC_IRQ_SHOW=y ++CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y ++CONFIG_GENERIC_PENDING_IRQ=y ++CONFIG_GENERIC_IRQ_MIGRATION=y ++CONFIG_HARDIRQS_SW_RESEND=y ++CONFIG_IRQ_DOMAIN=y ++CONFIG_IRQ_DOMAIN_HIERARCHY=y ++CONFIG_GENERIC_MSI_IRQ=y ++CONFIG_GENERIC_MSI_IRQ_DOMAIN=y ++CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y ++CONFIG_GENERIC_IRQ_RESERVATION_MODE=y ++CONFIG_IRQ_FORCED_THREADING=y ++CONFIG_SPARSE_IRQ=y ++# CONFIG_GENERIC_IRQ_DEBUGFS is not set ++# end of IRQ subsystem ++ ++CONFIG_CLOCKSOURCE_WATCHDOG=y ++CONFIG_ARCH_CLOCKSOURCE_INIT=y ++CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y ++CONFIG_GENERIC_TIME_VSYSCALL=y ++CONFIG_GENERIC_CLOCKEVENTS=y ++CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y ++CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y ++CONFIG_GENERIC_CMOS_UPDATE=y ++CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y ++CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y ++ ++# ++# Timers subsystem ++# ++CONFIG_TICK_ONESHOT=y ++CONFIG_NO_HZ_COMMON=y ++# CONFIG_HZ_PERIODIC is not set ++CONFIG_NO_HZ_IDLE=y ++# CONFIG_NO_HZ_FULL is not set ++CONFIG_NO_HZ=y ++CONFIG_HIGH_RES_TIMERS=y ++# end of Timers subsystem ++ ++# CONFIG_PREEMPT_NONE is not set ++# CONFIG_PREEMPT_VOLUNTARY is not set ++CONFIG_PREEMPT=y ++CONFIG_PREEMPT_COUNT=y ++CONFIG_PREEMPTION=y ++ ++# ++# CPU/Task time and stats accounting ++# ++CONFIG_TICK_CPU_ACCOUNTING=y ++# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set ++# CONFIG_IRQ_TIME_ACCOUNTING is not set ++CONFIG_BSD_PROCESS_ACCT=y ++# CONFIG_BSD_PROCESS_ACCT_V3 is not set ++CONFIG_TASKSTATS=y ++CONFIG_TASK_DELAY_ACCT=y ++CONFIG_TASK_XACCT=y ++CONFIG_TASK_IO_ACCOUNTING=y ++# CONFIG_PSI is not set ++# end of CPU/Task time and stats accounting ++ ++# CONFIG_CPU_ISOLATION is not set ++ ++# ++# RCU Subsystem ++# ++CONFIG_TREE_RCU=y ++CONFIG_PREEMPT_RCU=y ++# CONFIG_RCU_EXPERT is not set ++CONFIG_SRCU=y ++CONFIG_TREE_SRCU=y ++CONFIG_TASKS_RCU_GENERIC=y ++CONFIG_TASKS_RCU=y ++CONFIG_TASKS_RUDE_RCU=y ++CONFIG_RCU_STALL_COMMON=y ++CONFIG_RCU_NEED_SEGCBLIST=y ++# end of RCU Subsystem ++ ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y ++# CONFIG_IKHEADERS is not set ++CONFIG_LOG_BUF_SHIFT=21 ++CONFIG_LOG_CPU_MAX_BUF_SHIFT=0 ++CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 ++CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y ++ ++# ++# Scheduler features ++# ++# CONFIG_UCLAMP_TASK is not set ++# end of Scheduler features ++ ++CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y ++CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y ++CONFIG_CC_HAS_INT128=y ++CONFIG_ARCH_SUPPORTS_INT128=y ++CONFIG_NUMA_BALANCING=y ++# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set ++CONFIG_CGROUPS=y ++CONFIG_PAGE_COUNTER=y ++CONFIG_MEMCG=y ++CONFIG_MEMCG_SWAP=y ++CONFIG_MEMCG_KMEM=y ++CONFIG_BLK_CGROUP=y ++CONFIG_CGROUP_WRITEBACK=y ++CONFIG_CGROUP_SCHED=y ++CONFIG_FAIR_GROUP_SCHED=y ++CONFIG_CFS_BANDWIDTH=y ++# CONFIG_RT_GROUP_SCHED is not set ++# CONFIG_CGROUP_PIDS is not set ++# CONFIG_CGROUP_RDMA is not set ++CONFIG_CGROUP_FREEZER=y ++CONFIG_CGROUP_HUGETLB=y ++CONFIG_CPUSETS=y ++CONFIG_PROC_PID_CPUSET=y ++CONFIG_CGROUP_DEVICE=y ++CONFIG_CGROUP_CPUACCT=y ++CONFIG_CGROUP_PERF=y ++CONFIG_CGROUP_BPF=y ++# CONFIG_CGROUP_DEBUG is not set ++CONFIG_SOCK_CGROUP_DATA=y ++CONFIG_NAMESPACES=y ++CONFIG_UTS_NS=y ++CONFIG_TIME_NS=y ++CONFIG_IPC_NS=y ++CONFIG_USER_NS=y ++CONFIG_PID_NS=y ++CONFIG_NET_NS=y ++# CONFIG_CHECKPOINT_RESTORE is not set ++# CONFIG_SCHED_AUTOGROUP is not set ++# CONFIG_SYSFS_DEPRECATED is not set ++CONFIG_RELAY=y ++CONFIG_BLK_DEV_INITRD=y ++CONFIG_INITRAMFS_SOURCE="" ++CONFIG_RD_GZIP=y ++CONFIG_RD_BZIP2=y ++CONFIG_RD_LZMA=y ++CONFIG_RD_XZ=y ++CONFIG_RD_LZO=y ++CONFIG_RD_LZ4=y ++CONFIG_RD_ZSTD=y ++CONFIG_BOOT_CONFIG=y ++CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y ++# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set ++CONFIG_SYSCTL=y ++CONFIG_SYSCTL_EXCEPTION_TRACE=y ++CONFIG_HAVE_PCSPKR_PLATFORM=y ++CONFIG_BPF=y ++CONFIG_EXPERT=y ++CONFIG_MULTIUSER=y ++CONFIG_SGETMASK_SYSCALL=y ++# CONFIG_SYSFS_SYSCALL is not set ++CONFIG_FHANDLE=y ++CONFIG_POSIX_TIMERS=y ++CONFIG_PRINTK=y ++CONFIG_PRINTK_NMI=y ++CONFIG_BUG=y ++CONFIG_ELF_CORE=y ++CONFIG_PCSPKR_PLATFORM=y ++CONFIG_BASE_FULL=y ++CONFIG_FUTEX=y ++CONFIG_FUTEX_PI=y ++CONFIG_EPOLL=y ++CONFIG_SIGNALFD=y ++CONFIG_TIMERFD=y ++CONFIG_EVENTFD=y ++CONFIG_SHMEM=y ++CONFIG_AIO=y ++CONFIG_IO_URING=y ++CONFIG_ADVISE_SYSCALLS=y ++CONFIG_HAVE_ARCH_USERFAULTFD_WP=y ++CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y ++CONFIG_MEMBARRIER=y ++CONFIG_KALLSYMS=y ++CONFIG_KALLSYMS_ALL=y ++CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y ++CONFIG_KALLSYMS_BASE_RELATIVE=y ++CONFIG_BPF_LSM=y ++CONFIG_BPF_SYSCALL=y ++CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y ++CONFIG_BPF_JIT_ALWAYS_ON=y ++CONFIG_BPF_JIT_DEFAULT_ON=y ++CONFIG_USERMODE_DRIVER=y ++CONFIG_BPF_PRELOAD=y ++CONFIG_BPF_PRELOAD_UMD=y ++CONFIG_USERFAULTFD=y ++CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y ++CONFIG_RSEQ=y ++# CONFIG_DEBUG_RSEQ is not set ++# CONFIG_EMBEDDED is not set ++CONFIG_HAVE_PERF_EVENTS=y ++# CONFIG_PC104 is not set ++ ++# ++# Kernel Performance Events And Counters ++# ++CONFIG_PERF_EVENTS=y ++# CONFIG_DEBUG_PERF_USE_VMALLOC is not set ++# end of Kernel Performance Events And Counters ++ ++CONFIG_VM_EVENT_COUNTERS=y ++CONFIG_SLUB_DEBUG=y ++# CONFIG_SLUB_MEMCG_SYSFS_ON is not set ++CONFIG_COMPAT_BRK=y ++# CONFIG_SLAB is not set ++CONFIG_SLUB=y ++# CONFIG_SLOB is not set ++CONFIG_SLAB_MERGE_DEFAULT=y ++# CONFIG_SLAB_FREELIST_RANDOM is not set ++# CONFIG_SLAB_FREELIST_HARDENED is not set ++# CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set ++CONFIG_SLUB_CPU_PARTIAL=y ++CONFIG_PROFILING=y ++CONFIG_TRACEPOINTS=y ++# end of General setup ++ ++CONFIG_64BIT=y ++CONFIG_X86_64=y ++CONFIG_X86=y ++CONFIG_INSTRUCTION_DECODER=y ++CONFIG_OUTPUT_FORMAT="elf64-x86-64" ++CONFIG_LOCKDEP_SUPPORT=y ++CONFIG_STACKTRACE_SUPPORT=y ++CONFIG_MMU=y ++CONFIG_ARCH_MMAP_RND_BITS_MIN=28 ++CONFIG_ARCH_MMAP_RND_BITS_MAX=32 ++CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 ++CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 ++CONFIG_GENERIC_ISA_DMA=y ++CONFIG_GENERIC_BUG=y ++CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y ++CONFIG_ARCH_MAY_HAVE_PC_FDC=y ++CONFIG_GENERIC_CALIBRATE_DELAY=y ++CONFIG_ARCH_HAS_CPU_RELAX=y ++CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y ++CONFIG_ARCH_HAS_FILTER_PGPROT=y ++CONFIG_HAVE_SETUP_PER_CPU_AREA=y ++CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y ++CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y ++CONFIG_ARCH_HIBERNATION_POSSIBLE=y ++CONFIG_ARCH_SUSPEND_POSSIBLE=y ++CONFIG_ARCH_WANT_GENERAL_HUGETLB=y ++CONFIG_ZONE_DMA32=y ++CONFIG_AUDIT_ARCH=y ++CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y ++CONFIG_X86_64_SMP=y ++CONFIG_ARCH_SUPPORTS_UPROBES=y ++CONFIG_FIX_EARLYCON_MEM=y ++CONFIG_PGTABLE_LEVELS=4 ++CONFIG_CC_HAS_SANE_STACKPROTECTOR=y ++ ++# ++# Processor type and features ++# ++CONFIG_ZONE_DMA=y ++CONFIG_SMP=y ++CONFIG_X86_FEATURE_NAMES=y ++CONFIG_X86_MPPARSE=y ++# CONFIG_GOLDFISH is not set ++# CONFIG_RETPOLINE is not set ++# CONFIG_X86_CPU_RESCTRL is not set ++CONFIG_X86_EXTENDED_PLATFORM=y ++# CONFIG_X86_VSMP is not set ++# CONFIG_X86_GOLDFISH is not set ++# CONFIG_X86_INTEL_LPSS is not set ++# CONFIG_X86_AMD_PLATFORM_DEVICE is not set ++# CONFIG_IOSF_MBI is not set ++CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y ++CONFIG_SCHED_OMIT_FRAME_POINTER=y ++# CONFIG_HYPERVISOR_GUEST is not set ++# CONFIG_MK8 is not set ++# CONFIG_MPSC is not set ++CONFIG_MCORE2=y ++# CONFIG_MATOM is not set ++# CONFIG_GENERIC_CPU is not set ++CONFIG_X86_INTERNODE_CACHE_SHIFT=6 ++CONFIG_X86_L1_CACHE_SHIFT=6 ++CONFIG_X86_INTEL_USERCOPY=y ++CONFIG_X86_USE_PPRO_CHECKSUM=y ++CONFIG_X86_P6_NOP=y ++CONFIG_X86_TSC=y ++CONFIG_X86_CMPXCHG64=y ++CONFIG_X86_CMOV=y ++CONFIG_X86_MINIMUM_CPU_FAMILY=64 ++CONFIG_X86_DEBUGCTLMSR=y ++CONFIG_IA32_FEAT_CTL=y ++CONFIG_X86_VMX_FEATURE_NAMES=y ++# CONFIG_PROCESSOR_SELECT is not set ++CONFIG_CPU_SUP_INTEL=y ++CONFIG_CPU_SUP_AMD=y ++CONFIG_CPU_SUP_HYGON=y ++CONFIG_CPU_SUP_CENTAUR=y ++CONFIG_CPU_SUP_ZHAOXIN=y ++CONFIG_HPET_TIMER=y ++CONFIG_DMI=y ++CONFIG_GART_IOMMU=y ++# CONFIG_MAXSMP is not set ++CONFIG_NR_CPUS_RANGE_BEGIN=2 ++CONFIG_NR_CPUS_RANGE_END=512 ++CONFIG_NR_CPUS_DEFAULT=64 ++CONFIG_NR_CPUS=128 ++CONFIG_SCHED_SMT=y ++CONFIG_SCHED_MC=y ++CONFIG_SCHED_MC_PRIO=y ++CONFIG_X86_LOCAL_APIC=y ++CONFIG_X86_IO_APIC=y ++# CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS is not set ++CONFIG_X86_MCE=y ++# CONFIG_X86_MCELOG_LEGACY is not set ++CONFIG_X86_MCE_INTEL=y ++CONFIG_X86_MCE_AMD=y ++CONFIG_X86_MCE_THRESHOLD=y ++# CONFIG_X86_MCE_INJECT is not set ++CONFIG_X86_THERMAL_VECTOR=y ++ ++# ++# Performance monitoring ++# ++CONFIG_PERF_EVENTS_INTEL_UNCORE=y ++# CONFIG_PERF_EVENTS_INTEL_RAPL is not set ++# CONFIG_PERF_EVENTS_INTEL_CSTATE is not set ++# CONFIG_PERF_EVENTS_AMD_POWER is not set ++# end of Performance monitoring ++ ++# CONFIG_X86_16BIT is not set ++CONFIG_X86_VSYSCALL_EMULATION=y ++CONFIG_X86_IOPL_IOPERM=y ++# CONFIG_I8K is not set ++# CONFIG_MICROCODE is not set ++CONFIG_X86_MSR=y ++CONFIG_X86_CPUID=y ++# CONFIG_X86_5LEVEL is not set ++CONFIG_X86_DIRECT_GBPAGES=y ++# CONFIG_X86_CPA_STATISTICS is not set ++# CONFIG_AMD_MEM_ENCRYPT is not set ++CONFIG_NUMA=y ++CONFIG_AMD_NUMA=y ++CONFIG_X86_64_ACPI_NUMA=y ++# CONFIG_NUMA_EMU is not set ++CONFIG_NODES_SHIFT=6 ++CONFIG_ARCH_SPARSEMEM_ENABLE=y ++CONFIG_ARCH_SPARSEMEM_DEFAULT=y ++CONFIG_ARCH_SELECT_MEMORY_MODEL=y ++CONFIG_ARCH_PROC_KCORE_TEXT=y ++CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 ++# CONFIG_X86_PMEM_LEGACY is not set ++# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set ++CONFIG_X86_RESERVE_LOW=64 ++CONFIG_MTRR=y ++# CONFIG_MTRR_SANITIZER is not set ++CONFIG_X86_PAT=y ++CONFIG_ARCH_USES_PG_UNCACHED=y ++CONFIG_ARCH_RANDOM=y ++CONFIG_X86_SMAP=y ++CONFIG_X86_UMIP=y ++# CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS is not set ++CONFIG_X86_INTEL_TSX_MODE_OFF=y ++# CONFIG_X86_INTEL_TSX_MODE_ON is not set ++# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set ++CONFIG_EFI=y ++CONFIG_EFI_STUB=y ++# CONFIG_EFI_MIXED is not set ++CONFIG_SECCOMP=y ++# CONFIG_HZ_100 is not set ++# CONFIG_HZ_250 is not set ++# CONFIG_HZ_300 is not set ++CONFIG_HZ_1000=y ++CONFIG_HZ=1000 ++CONFIG_SCHED_HRTICK=y ++CONFIG_KEXEC=y ++# CONFIG_KEXEC_FILE is not set ++# CONFIG_CRASH_DUMP is not set ++CONFIG_PHYSICAL_START=0x1000000 ++CONFIG_RELOCATABLE=y ++# CONFIG_RANDOMIZE_BASE is not set ++CONFIG_PHYSICAL_ALIGN=0x1000000 ++CONFIG_HOTPLUG_CPU=y ++# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set ++# CONFIG_DEBUG_HOTPLUG_CPU0 is not set ++# CONFIG_LEGACY_VSYSCALL_EMULATE is not set ++# CONFIG_LEGACY_VSYSCALL_XONLY is not set ++CONFIG_LEGACY_VSYSCALL_NONE=y ++# CONFIG_CMDLINE_BOOL is not set ++CONFIG_MODIFY_LDT_SYSCALL=y ++CONFIG_HAVE_LIVEPATCH=y ++# CONFIG_LIVEPATCH is not set ++# end of Processor type and features ++ ++CONFIG_ARCH_HAS_ADD_PAGES=y ++CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y ++CONFIG_USE_PERCPU_NUMA_NODE_ID=y ++CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y ++CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y ++CONFIG_ARCH_ENABLE_THP_MIGRATION=y ++ ++# ++# Power management and ACPI options ++# ++# CONFIG_SUSPEND is not set ++# CONFIG_HIBERNATION is not set ++# CONFIG_PM is not set ++# CONFIG_ENERGY_MODEL is not set ++CONFIG_ARCH_SUPPORTS_ACPI=y ++CONFIG_ACPI=y ++CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y ++CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y ++CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y ++# CONFIG_ACPI_DEBUGGER is not set ++# CONFIG_ACPI_SPCR_TABLE is not set ++CONFIG_ACPI_LPIT=y ++# CONFIG_ACPI_REV_OVERRIDE_POSSIBLE is not set ++# CONFIG_ACPI_EC_DEBUGFS is not set ++# CONFIG_ACPI_AC is not set ++# CONFIG_ACPI_BATTERY is not set ++# CONFIG_ACPI_BUTTON is not set ++# CONFIG_ACPI_TINY_POWER_BUTTON is not set ++# CONFIG_ACPI_VIDEO is not set ++# CONFIG_ACPI_FAN is not set ++# CONFIG_ACPI_DOCK is not set ++CONFIG_ACPI_CPU_FREQ_PSS=y ++CONFIG_ACPI_PROCESSOR_CSTATE=y ++CONFIG_ACPI_PROCESSOR_IDLE=y ++CONFIG_ACPI_CPPC_LIB=y ++CONFIG_ACPI_PROCESSOR=y ++CONFIG_ACPI_HOTPLUG_CPU=y ++# CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set ++# CONFIG_ACPI_THERMAL is not set ++CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y ++# CONFIG_ACPI_TABLE_UPGRADE is not set ++# CONFIG_ACPI_DEBUG is not set ++# CONFIG_ACPI_PCI_SLOT is not set ++CONFIG_ACPI_CONTAINER=y ++CONFIG_ACPI_HOTPLUG_IOAPIC=y ++# CONFIG_ACPI_SBS is not set ++# CONFIG_ACPI_HED is not set ++# CONFIG_ACPI_CUSTOM_METHOD is not set ++# CONFIG_ACPI_BGRT is not set ++# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set ++# CONFIG_ACPI_NFIT is not set ++CONFIG_ACPI_NUMA=y ++# CONFIG_ACPI_HMAT is not set ++CONFIG_HAVE_ACPI_APEI=y ++CONFIG_HAVE_ACPI_APEI_NMI=y ++# CONFIG_ACPI_APEI is not set ++# CONFIG_DPTF_POWER is not set ++# CONFIG_PMIC_OPREGION is not set ++# CONFIG_ACPI_CONFIGFS is not set ++# CONFIG_X86_PM_TIMER is not set ++# CONFIG_SFI is not set ++ ++# ++# CPU Frequency scaling ++# ++CONFIG_CPU_FREQ=y ++CONFIG_CPU_FREQ_GOV_ATTR_SET=y ++CONFIG_CPU_FREQ_GOV_COMMON=y ++CONFIG_CPU_FREQ_STAT=y ++CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y ++# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set ++# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set ++# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set ++# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set ++# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set ++CONFIG_CPU_FREQ_GOV_PERFORMANCE=y ++# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set ++CONFIG_CPU_FREQ_GOV_USERSPACE=y ++CONFIG_CPU_FREQ_GOV_ONDEMAND=y ++CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y ++CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y ++ ++# ++# CPU frequency scaling drivers ++# ++CONFIG_X86_INTEL_PSTATE=y ++# CONFIG_X86_PCC_CPUFREQ is not set ++CONFIG_X86_ACPI_CPUFREQ=y ++CONFIG_X86_ACPI_CPUFREQ_CPB=y ++CONFIG_X86_POWERNOW_K8=y ++# CONFIG_X86_AMD_FREQ_SENSITIVITY is not set ++# CONFIG_X86_SPEEDSTEP_CENTRINO is not set ++# CONFIG_X86_P4_CLOCKMOD is not set ++ ++# ++# shared options ++# ++# end of CPU Frequency scaling ++ ++# ++# CPU Idle ++# ++CONFIG_CPU_IDLE=y ++CONFIG_CPU_IDLE_GOV_LADDER=y ++CONFIG_CPU_IDLE_GOV_MENU=y ++# CONFIG_CPU_IDLE_GOV_TEO is not set ++# end of CPU Idle ++ ++# CONFIG_INTEL_IDLE is not set ++# end of Power management and ACPI options ++ ++# ++# Bus options (PCI etc.) ++# ++CONFIG_PCI_DIRECT=y ++CONFIG_PCI_MMCONFIG=y ++CONFIG_MMCONF_FAM10H=y ++# CONFIG_PCI_CNB20LE_QUIRK is not set ++# CONFIG_ISA_BUS is not set ++CONFIG_ISA_DMA_API=y ++CONFIG_AMD_NB=y ++# CONFIG_X86_SYSFB is not set ++# end of Bus options (PCI etc.) ++ ++# ++# Binary Emulations ++# ++# CONFIG_IA32_EMULATION is not set ++# CONFIG_X86_X32 is not set ++# end of Binary Emulations ++ ++# ++# Firmware Drivers ++# ++# CONFIG_EDD is not set ++CONFIG_FIRMWARE_MEMMAP=y ++CONFIG_DMIID=y ++# CONFIG_DMI_SYSFS is not set ++CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y ++# CONFIG_FW_CFG_SYSFS is not set ++# CONFIG_GOOGLE_FIRMWARE is not set ++ ++# ++# EFI (Extensible Firmware Interface) Support ++# ++# CONFIG_EFI_VARS is not set ++CONFIG_EFI_ESRT=y ++CONFIG_EFI_RUNTIME_MAP=y ++# CONFIG_EFI_FAKE_MEMMAP is not set ++CONFIG_EFI_RUNTIME_WRAPPERS=y ++CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y ++# CONFIG_EFI_CAPSULE_LOADER is not set ++# CONFIG_EFI_TEST is not set ++# CONFIG_APPLE_PROPERTIES is not set ++# CONFIG_RESET_ATTACK_MITIGATION is not set ++# CONFIG_EFI_RCI2_TABLE is not set ++# CONFIG_EFI_DISABLE_PCI_DMA is not set ++# end of EFI (Extensible Firmware Interface) Support ++ ++CONFIG_EFI_EARLYCON=y ++ ++# ++# Tegra firmware driver ++# ++# end of Tegra firmware driver ++# end of Firmware Drivers ++ ++CONFIG_HAVE_KVM=y ++CONFIG_VIRTUALIZATION=y ++# CONFIG_KVM is not set ++CONFIG_KVM_WERROR=y ++CONFIG_AS_AVX512=y ++CONFIG_AS_SHA1_NI=y ++CONFIG_AS_SHA256_NI=y ++ ++# ++# General architecture-dependent options ++# ++CONFIG_CRASH_CORE=y ++CONFIG_KEXEC_CORE=y ++CONFIG_HOTPLUG_SMT=y ++CONFIG_GENERIC_ENTRY=y ++# CONFIG_OPROFILE is not set ++CONFIG_HAVE_OPROFILE=y ++CONFIG_OPROFILE_NMI_TIMER=y ++CONFIG_KPROBES=y ++CONFIG_JUMP_LABEL=y ++# CONFIG_STATIC_KEYS_SELFTEST is not set ++CONFIG_OPTPROBES=y ++CONFIG_KPROBES_ON_FTRACE=y ++CONFIG_UPROBES=y ++CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y ++CONFIG_ARCH_USE_BUILTIN_BSWAP=y ++CONFIG_KRETPROBES=y ++CONFIG_HAVE_IOREMAP_PROT=y ++CONFIG_HAVE_KPROBES=y ++CONFIG_HAVE_KRETPROBES=y ++CONFIG_HAVE_OPTPROBES=y ++CONFIG_HAVE_KPROBES_ON_FTRACE=y ++CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y ++CONFIG_HAVE_NMI=y ++CONFIG_HAVE_ARCH_TRACEHOOK=y ++CONFIG_HAVE_DMA_CONTIGUOUS=y ++CONFIG_GENERIC_SMP_IDLE_THREAD=y ++CONFIG_ARCH_HAS_FORTIFY_SOURCE=y ++CONFIG_ARCH_HAS_SET_MEMORY=y ++CONFIG_ARCH_HAS_SET_DIRECT_MAP=y ++CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y ++CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y ++CONFIG_HAVE_ASM_MODVERSIONS=y ++CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y ++CONFIG_HAVE_RSEQ=y ++CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y ++CONFIG_HAVE_HW_BREAKPOINT=y ++CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y ++CONFIG_HAVE_USER_RETURN_NOTIFIER=y ++CONFIG_HAVE_PERF_EVENTS_NMI=y ++CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y ++CONFIG_HAVE_PERF_REGS=y ++CONFIG_HAVE_PERF_USER_STACK_DUMP=y ++CONFIG_HAVE_ARCH_JUMP_LABEL=y ++CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y ++CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y ++CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y ++CONFIG_HAVE_CMPXCHG_LOCAL=y ++CONFIG_HAVE_CMPXCHG_DOUBLE=y ++CONFIG_HAVE_ARCH_SECCOMP_FILTER=y ++CONFIG_SECCOMP_FILTER=y ++CONFIG_HAVE_ARCH_STACKLEAK=y ++CONFIG_HAVE_STACKPROTECTOR=y ++# CONFIG_STACKPROTECTOR is not set ++CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y ++CONFIG_HAVE_CONTEXT_TRACKING=y ++CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y ++CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y ++CONFIG_HAVE_MOVE_PMD=y ++CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y ++CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y ++CONFIG_HAVE_ARCH_HUGE_VMAP=y ++CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y ++CONFIG_HAVE_ARCH_SOFT_DIRTY=y ++CONFIG_HAVE_MOD_ARCH_SPECIFIC=y ++CONFIG_MODULES_USE_ELF_RELA=y ++CONFIG_ARCH_HAS_ELF_RANDOMIZE=y ++CONFIG_HAVE_ARCH_MMAP_RND_BITS=y ++CONFIG_HAVE_EXIT_THREAD=y ++CONFIG_ARCH_MMAP_RND_BITS=28 ++CONFIG_HAVE_STACK_VALIDATION=y ++CONFIG_HAVE_RELIABLE_STACKTRACE=y ++CONFIG_COMPAT_32BIT_TIME=y ++CONFIG_HAVE_ARCH_VMAP_STACK=y ++CONFIG_VMAP_STACK=y ++CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y ++CONFIG_STRICT_KERNEL_RWX=y ++CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y ++CONFIG_STRICT_MODULE_RWX=y ++CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y ++CONFIG_ARCH_USE_MEMREMAP_PROT=y ++# CONFIG_LOCK_EVENT_COUNTS is not set ++CONFIG_ARCH_HAS_MEM_ENCRYPT=y ++ ++# ++# GCOV-based kernel profiling ++# ++# CONFIG_GCOV_KERNEL is not set ++CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y ++# end of GCOV-based kernel profiling ++ ++CONFIG_HAVE_GCC_PLUGINS=y ++# end of General architecture-dependent options ++ ++CONFIG_RT_MUTEXES=y ++CONFIG_BASE_SMALL=0 ++CONFIG_MODULES=y ++# CONFIG_MODULE_FORCE_LOAD is not set ++CONFIG_MODULE_UNLOAD=y ++# CONFIG_MODULE_FORCE_UNLOAD is not set ++CONFIG_MODVERSIONS=y ++CONFIG_ASM_MODVERSIONS=y ++CONFIG_MODULE_SRCVERSION_ALL=y ++CONFIG_MODULE_SIG=y ++# CONFIG_MODULE_COMPRESS is not set ++# CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set ++# CONFIG_UNUSED_SYMBOLS is not set ++# CONFIG_TRIM_UNUSED_KSYMS is not set ++CONFIG_MODULES_TREE_LOOKUP=y ++CONFIG_BLOCK=y ++CONFIG_BLK_SCSI_REQUEST=y ++CONFIG_BLK_CGROUP_RWSTAT=y ++CONFIG_BLK_DEV_BSG=y ++CONFIG_BLK_DEV_BSGLIB=y ++# CONFIG_BLK_DEV_INTEGRITY is not set ++# CONFIG_BLK_DEV_ZONED is not set ++CONFIG_BLK_DEV_THROTTLING=y ++# CONFIG_BLK_DEV_THROTTLING_LOW is not set ++# CONFIG_BLK_CMDLINE_PARSER is not set ++# CONFIG_BLK_WBT is not set ++CONFIG_BLK_CGROUP_IOLATENCY=y ++# CONFIG_BLK_CGROUP_IOCOST is not set ++CONFIG_BLK_DEBUG_FS=y ++# CONFIG_BLK_SED_OPAL is not set ++# CONFIG_BLK_INLINE_ENCRYPTION is not set ++ ++# ++# Partition Types ++# ++CONFIG_PARTITION_ADVANCED=y ++# CONFIG_ACORN_PARTITION is not set ++# CONFIG_AIX_PARTITION is not set ++CONFIG_OSF_PARTITION=y ++CONFIG_AMIGA_PARTITION=y ++# CONFIG_ATARI_PARTITION is not set ++CONFIG_MAC_PARTITION=y ++CONFIG_MSDOS_PARTITION=y ++CONFIG_BSD_DISKLABEL=y ++CONFIG_MINIX_SUBPARTITION=y ++CONFIG_SOLARIS_X86_PARTITION=y ++CONFIG_UNIXWARE_DISKLABEL=y ++# CONFIG_LDM_PARTITION is not set ++CONFIG_SGI_PARTITION=y ++# CONFIG_ULTRIX_PARTITION is not set ++CONFIG_SUN_PARTITION=y ++CONFIG_KARMA_PARTITION=y ++CONFIG_EFI_PARTITION=y ++# CONFIG_SYSV68_PARTITION is not set ++# CONFIG_CMDLINE_PARTITION is not set ++# end of Partition Types ++ ++CONFIG_BLK_MQ_PCI=y ++CONFIG_BLK_MQ_VIRTIO=y ++ ++# ++# IO Schedulers ++# ++CONFIG_MQ_IOSCHED_DEADLINE=y ++CONFIG_MQ_IOSCHED_KYBER=y ++# CONFIG_IOSCHED_BFQ is not set ++# end of IO Schedulers ++ ++CONFIG_ASN1=y ++CONFIG_UNINLINE_SPIN_UNLOCK=y ++CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y ++CONFIG_MUTEX_SPIN_ON_OWNER=y ++CONFIG_RWSEM_SPIN_ON_OWNER=y ++CONFIG_LOCK_SPIN_ON_OWNER=y ++CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y ++CONFIG_QUEUED_SPINLOCKS=y ++CONFIG_ARCH_USE_QUEUED_RWLOCKS=y ++CONFIG_QUEUED_RWLOCKS=y ++CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y ++CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y ++CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y ++CONFIG_FREEZER=y ++ ++# ++# Executable file formats ++# ++CONFIG_BINFMT_ELF=y ++CONFIG_ELFCORE=y ++# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set ++CONFIG_BINFMT_SCRIPT=y ++CONFIG_BINFMT_MISC=y ++CONFIG_COREDUMP=y ++# end of Executable file formats ++ ++# ++# Memory Management options ++# ++CONFIG_SELECT_MEMORY_MODEL=y ++CONFIG_SPARSEMEM_MANUAL=y ++CONFIG_SPARSEMEM=y ++CONFIG_NEED_MULTIPLE_NODES=y ++CONFIG_SPARSEMEM_EXTREME=y ++CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y ++CONFIG_SPARSEMEM_VMEMMAP=y ++CONFIG_HAVE_FAST_GUP=y ++CONFIG_MEMORY_ISOLATION=y ++# CONFIG_MEMORY_HOTPLUG is not set ++CONFIG_SPLIT_PTLOCK_CPUS=4 ++CONFIG_MEMORY_BALLOON=y ++CONFIG_BALLOON_COMPACTION=y ++CONFIG_COMPACTION=y ++CONFIG_PAGE_REPORTING=y ++CONFIG_MIGRATION=y ++CONFIG_CONTIG_ALLOC=y ++CONFIG_PHYS_ADDR_T_64BIT=y ++CONFIG_BOUNCE=y ++CONFIG_VIRT_TO_BUS=y ++CONFIG_KSM=y ++CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 ++CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y ++CONFIG_MEMORY_FAILURE=y ++CONFIG_HWPOISON_INJECT=y ++CONFIG_TRANSPARENT_HUGEPAGE=y ++# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set ++CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y ++CONFIG_ARCH_WANTS_THP_SWAP=y ++CONFIG_THP_SWAP=y ++# CONFIG_CLEANCACHE is not set ++# CONFIG_FRONTSWAP is not set ++CONFIG_CMA=y ++# CONFIG_CMA_DEBUG is not set ++# CONFIG_CMA_DEBUGFS is not set ++CONFIG_CMA_AREAS=7 ++# CONFIG_ZPOOL is not set ++# CONFIG_ZBUD is not set ++# CONFIG_ZSMALLOC is not set ++CONFIG_GENERIC_EARLY_IOREMAP=y ++# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set ++# CONFIG_IDLE_PAGE_TRACKING is not set ++CONFIG_ARCH_HAS_PTE_DEVMAP=y ++# CONFIG_PERCPU_STATS is not set ++# CONFIG_GUP_BENCHMARK is not set ++# CONFIG_READ_ONLY_THP_FOR_FS is not set ++CONFIG_ARCH_HAS_PTE_SPECIAL=y ++# end of Memory Management options ++ ++CONFIG_NET=y ++CONFIG_NET_INGRESS=y ++CONFIG_NET_EGRESS=y ++CONFIG_SKB_EXTENSIONS=y ++ ++# ++# Networking options ++# ++CONFIG_PACKET=y ++# CONFIG_PACKET_DIAG is not set ++CONFIG_UNIX=y ++CONFIG_UNIX_SCM=y ++# CONFIG_UNIX_DIAG is not set ++CONFIG_TLS=y ++# CONFIG_TLS_DEVICE is not set ++# CONFIG_TLS_TOE is not set ++CONFIG_XFRM=y ++CONFIG_XFRM_ALGO=y ++CONFIG_XFRM_USER=y ++# CONFIG_XFRM_INTERFACE is not set ++CONFIG_XFRM_SUB_POLICY=y ++# CONFIG_XFRM_MIGRATE is not set ++# CONFIG_XFRM_STATISTICS is not set ++# CONFIG_NET_KEY is not set ++CONFIG_XDP_SOCKETS=y ++CONFIG_XDP_SOCKETS_DIAG=y ++CONFIG_INET=y ++CONFIG_IP_MULTICAST=y ++CONFIG_IP_ADVANCED_ROUTER=y ++# CONFIG_IP_FIB_TRIE_STATS is not set ++CONFIG_IP_MULTIPLE_TABLES=y ++CONFIG_IP_ROUTE_MULTIPATH=y ++CONFIG_IP_ROUTE_VERBOSE=y ++# CONFIG_IP_PNP is not set ++CONFIG_NET_IPIP=y ++CONFIG_NET_IPGRE_DEMUX=y ++CONFIG_NET_IP_TUNNEL=y ++CONFIG_NET_IPGRE=y ++CONFIG_NET_IPGRE_BROADCAST=y ++CONFIG_IP_MROUTE_COMMON=y ++CONFIG_IP_MROUTE=y ++# CONFIG_IP_MROUTE_MULTIPLE_TABLES is not set ++CONFIG_IP_PIMSM_V1=y ++CONFIG_IP_PIMSM_V2=y ++CONFIG_SYN_COOKIES=y ++# CONFIG_NET_IPVTI is not set ++CONFIG_NET_UDP_TUNNEL=y ++# CONFIG_NET_FOU is not set ++# CONFIG_NET_FOU_IP_TUNNELS is not set ++# CONFIG_INET_AH is not set ++# CONFIG_INET_ESP is not set ++# CONFIG_INET_IPCOMP is not set ++CONFIG_INET_TUNNEL=y ++CONFIG_INET_DIAG=y ++CONFIG_INET_TCP_DIAG=y ++# CONFIG_INET_UDP_DIAG is not set ++# CONFIG_INET_RAW_DIAG is not set ++# CONFIG_INET_DIAG_DESTROY is not set ++CONFIG_TCP_CONG_ADVANCED=y ++CONFIG_TCP_CONG_BIC=m ++CONFIG_TCP_CONG_CUBIC=y ++CONFIG_TCP_CONG_WESTWOOD=m ++CONFIG_TCP_CONG_HTCP=m ++# CONFIG_TCP_CONG_HSTCP is not set ++# CONFIG_TCP_CONG_HYBLA is not set ++# CONFIG_TCP_CONG_VEGAS is not set ++# CONFIG_TCP_CONG_NV is not set ++# CONFIG_TCP_CONG_SCALABLE is not set ++# CONFIG_TCP_CONG_LP is not set ++# CONFIG_TCP_CONG_VENO is not set ++# CONFIG_TCP_CONG_YEAH is not set ++# CONFIG_TCP_CONG_ILLINOIS is not set ++# CONFIG_TCP_CONG_DCTCP is not set ++# CONFIG_TCP_CONG_CDG is not set ++# CONFIG_TCP_CONG_BBR is not set ++# CONFIG_DEFAULT_CUBIC is not set ++CONFIG_DEFAULT_RENO=y ++CONFIG_DEFAULT_TCP_CONG="reno" ++CONFIG_TCP_MD5SIG=y ++CONFIG_IPV6=y ++CONFIG_IPV6_ROUTER_PREF=y ++CONFIG_IPV6_ROUTE_INFO=y ++# CONFIG_IPV6_OPTIMISTIC_DAD is not set ++# CONFIG_INET6_AH is not set ++# CONFIG_INET6_ESP is not set ++# CONFIG_INET6_IPCOMP is not set ++CONFIG_IPV6_MIP6=y ++# CONFIG_IPV6_ILA is not set ++CONFIG_INET6_TUNNEL=y ++# CONFIG_IPV6_VTI is not set ++CONFIG_IPV6_SIT=y ++# CONFIG_IPV6_SIT_6RD is not set ++CONFIG_IPV6_NDISC_NODETYPE=y ++CONFIG_IPV6_TUNNEL=y ++CONFIG_IPV6_GRE=y ++CONFIG_IPV6_MULTIPLE_TABLES=y ++CONFIG_IPV6_SUBTREES=y ++# CONFIG_IPV6_MROUTE is not set ++CONFIG_IPV6_SEG6_LWTUNNEL=y ++# CONFIG_IPV6_SEG6_HMAC is not set ++CONFIG_IPV6_SEG6_BPF=y ++# CONFIG_IPV6_RPL_LWTUNNEL is not set ++CONFIG_NETLABEL=y ++CONFIG_MPTCP=y ++CONFIG_MPTCP_IPV6=y ++CONFIG_NETWORK_SECMARK=y ++CONFIG_NET_PTP_CLASSIFY=y ++# CONFIG_NETWORK_PHY_TIMESTAMPING is not set ++CONFIG_NETFILTER=y ++CONFIG_NETFILTER_ADVANCED=y ++ ++# ++# Core Netfilter Configuration ++# ++CONFIG_NETFILTER_INGRESS=y ++CONFIG_NETFILTER_NETLINK=y ++# CONFIG_NETFILTER_NETLINK_ACCT is not set ++CONFIG_NETFILTER_NETLINK_QUEUE=y ++CONFIG_NETFILTER_NETLINK_LOG=y ++# CONFIG_NETFILTER_NETLINK_OSF is not set ++CONFIG_NF_CONNTRACK=y ++# CONFIG_NF_LOG_NETDEV is not set ++CONFIG_NETFILTER_SYNPROXY=y ++# CONFIG_NF_TABLES is not set ++CONFIG_NETFILTER_XTABLES=y ++ ++# ++# Xtables combined modules ++# ++# CONFIG_NETFILTER_XT_MARK is not set ++ ++# ++# Xtables targets ++# ++# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set ++# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set ++CONFIG_NETFILTER_XT_TARGET_CT=y ++# CONFIG_NETFILTER_XT_TARGET_HMARK is not set ++# CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set ++# CONFIG_NETFILTER_XT_TARGET_LOG is not set ++# CONFIG_NETFILTER_XT_TARGET_MARK is not set ++# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set ++# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set ++# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set ++# CONFIG_NETFILTER_XT_TARGET_TEE is not set ++# CONFIG_NETFILTER_XT_TARGET_SECMARK is not set ++# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set ++ ++# ++# Xtables matches ++# ++# CONFIG_NETFILTER_XT_MATCH_ADDRTYPE is not set ++CONFIG_NETFILTER_XT_MATCH_BPF=y ++# CONFIG_NETFILTER_XT_MATCH_CGROUP is not set ++# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set ++# CONFIG_NETFILTER_XT_MATCH_CPU is not set ++# CONFIG_NETFILTER_XT_MATCH_DCCP is not set ++# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set ++# CONFIG_NETFILTER_XT_MATCH_DSCP is not set ++# CONFIG_NETFILTER_XT_MATCH_ECN is not set ++# CONFIG_NETFILTER_XT_MATCH_ESP is not set ++# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set ++# CONFIG_NETFILTER_XT_MATCH_HL is not set ++# CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set ++# CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set ++# CONFIG_NETFILTER_XT_MATCH_L2TP is not set ++# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set ++# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set ++# CONFIG_NETFILTER_XT_MATCH_MAC is not set ++# CONFIG_NETFILTER_XT_MATCH_MARK is not set ++# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set ++# CONFIG_NETFILTER_XT_MATCH_NFACCT is not set ++# CONFIG_NETFILTER_XT_MATCH_OSF is not set ++# CONFIG_NETFILTER_XT_MATCH_OWNER is not set ++# CONFIG_NETFILTER_XT_MATCH_POLICY is not set ++# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set ++# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set ++# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set ++# CONFIG_NETFILTER_XT_MATCH_REALM is not set ++# CONFIG_NETFILTER_XT_MATCH_RECENT is not set ++# CONFIG_NETFILTER_XT_MATCH_SCTP is not set ++# CONFIG_NETFILTER_XT_MATCH_SOCKET is not set ++CONFIG_NETFILTER_XT_MATCH_STATE=y ++CONFIG_NETFILTER_XT_MATCH_STATISTIC=y ++# CONFIG_NETFILTER_XT_MATCH_STRING is not set ++# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set ++# CONFIG_NETFILTER_XT_MATCH_TIME is not set ++# CONFIG_NETFILTER_XT_MATCH_U32 is not set ++# end of Core Netfilter Configuration ++ ++# CONFIG_IP_SET is not set ++# CONFIG_IP_VS is not set ++ ++# ++# IP: Netfilter Configuration ++# ++CONFIG_NF_DEFRAG_IPV4=y ++# CONFIG_NF_SOCKET_IPV4 is not set ++# CONFIG_NF_TPROXY_IPV4 is not set ++# CONFIG_NF_DUP_IPV4 is not set ++# CONFIG_NF_LOG_ARP is not set ++# CONFIG_NF_LOG_IPV4 is not set ++# CONFIG_NF_REJECT_IPV4 is not set ++CONFIG_IP_NF_IPTABLES=y ++# CONFIG_IP_NF_MATCH_AH is not set ++# CONFIG_IP_NF_MATCH_ECN is not set ++# CONFIG_IP_NF_MATCH_TTL is not set ++CONFIG_IP_NF_FILTER=y ++CONFIG_IP_NF_TARGET_SYNPROXY=y ++# CONFIG_IP_NF_MANGLE is not set ++CONFIG_IP_NF_RAW=y ++# CONFIG_IP_NF_SECURITY is not set ++# CONFIG_IP_NF_ARPTABLES is not set ++# end of IP: Netfilter Configuration ++ ++# ++# IPv6: Netfilter Configuration ++# ++# CONFIG_NF_SOCKET_IPV6 is not set ++# CONFIG_NF_TPROXY_IPV6 is not set ++# CONFIG_NF_DUP_IPV6 is not set ++# CONFIG_NF_REJECT_IPV6 is not set ++# CONFIG_NF_LOG_IPV6 is not set ++CONFIG_IP6_NF_IPTABLES=y ++# CONFIG_IP6_NF_MATCH_AH is not set ++# CONFIG_IP6_NF_MATCH_EUI64 is not set ++# CONFIG_IP6_NF_MATCH_FRAG is not set ++# CONFIG_IP6_NF_MATCH_OPTS is not set ++# CONFIG_IP6_NF_MATCH_HL is not set ++# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set ++# CONFIG_IP6_NF_MATCH_MH is not set ++# CONFIG_IP6_NF_MATCH_RT is not set ++# CONFIG_IP6_NF_MATCH_SRH is not set ++# CONFIG_IP6_NF_FILTER is not set ++# CONFIG_IP6_NF_MANGLE is not set ++# CONFIG_IP6_NF_RAW is not set ++# CONFIG_IP6_NF_SECURITY is not set ++# end of IPv6: Netfilter Configuration ++ ++CONFIG_NF_DEFRAG_IPV6=y ++CONFIG_BPFILTER=y ++CONFIG_BPFILTER_UMH=m ++# CONFIG_IP_DCCP is not set ++# CONFIG_IP_SCTP is not set ++# CONFIG_RDS is not set ++# CONFIG_TIPC is not set ++# CONFIG_ATM is not set ++# CONFIG_L2TP is not set ++# CONFIG_BRIDGE is not set ++CONFIG_HAVE_NET_DSA=y ++# CONFIG_NET_DSA is not set ++CONFIG_VLAN_8021Q=y ++# CONFIG_VLAN_8021Q_GVRP is not set ++# CONFIG_VLAN_8021Q_MVRP is not set ++# CONFIG_DECNET is not set ++# CONFIG_LLC2 is not set ++# CONFIG_ATALK is not set ++# CONFIG_X25 is not set ++# CONFIG_LAPB is not set ++# CONFIG_PHONET is not set ++# CONFIG_6LOWPAN is not set ++# CONFIG_IEEE802154 is not set ++CONFIG_NET_SCHED=y ++ ++# ++# Queueing/Scheduling ++# ++# CONFIG_NET_SCH_CBQ is not set ++# CONFIG_NET_SCH_HTB is not set ++# CONFIG_NET_SCH_HFSC is not set ++# CONFIG_NET_SCH_PRIO is not set ++# CONFIG_NET_SCH_MULTIQ is not set ++# CONFIG_NET_SCH_RED is not set ++# CONFIG_NET_SCH_SFB is not set ++# CONFIG_NET_SCH_SFQ is not set ++# CONFIG_NET_SCH_TEQL is not set ++# CONFIG_NET_SCH_TBF is not set ++# CONFIG_NET_SCH_CBS is not set ++# CONFIG_NET_SCH_ETF is not set ++# CONFIG_NET_SCH_TAPRIO is not set ++# CONFIG_NET_SCH_GRED is not set ++# CONFIG_NET_SCH_DSMARK is not set ++# CONFIG_NET_SCH_NETEM is not set ++# CONFIG_NET_SCH_DRR is not set ++# CONFIG_NET_SCH_MQPRIO is not set ++# CONFIG_NET_SCH_SKBPRIO is not set ++# CONFIG_NET_SCH_CHOKE is not set ++# CONFIG_NET_SCH_QFQ is not set ++# CONFIG_NET_SCH_CODEL is not set ++CONFIG_NET_SCH_FQ_CODEL=y ++# CONFIG_NET_SCH_CAKE is not set ++# CONFIG_NET_SCH_FQ is not set ++# CONFIG_NET_SCH_HHF is not set ++# CONFIG_NET_SCH_PIE is not set ++CONFIG_NET_SCH_INGRESS=y ++# CONFIG_NET_SCH_PLUG is not set ++# CONFIG_NET_SCH_ETS is not set ++CONFIG_NET_SCH_DEFAULT=y ++CONFIG_DEFAULT_FQ_CODEL=y ++# CONFIG_DEFAULT_PFIFO_FAST is not set ++CONFIG_DEFAULT_NET_SCH="fq_codel" ++ ++# ++# Classification ++# ++CONFIG_NET_CLS=y ++# CONFIG_NET_CLS_BASIC is not set ++# CONFIG_NET_CLS_TCINDEX is not set ++# CONFIG_NET_CLS_ROUTE4 is not set ++# CONFIG_NET_CLS_FW is not set ++# CONFIG_NET_CLS_U32 is not set ++# CONFIG_NET_CLS_RSVP is not set ++# CONFIG_NET_CLS_RSVP6 is not set ++# CONFIG_NET_CLS_FLOW is not set ++CONFIG_NET_CLS_CGROUP=y ++CONFIG_NET_CLS_BPF=y ++# CONFIG_NET_CLS_FLOWER is not set ++# CONFIG_NET_CLS_MATCHALL is not set ++CONFIG_NET_EMATCH=y ++CONFIG_NET_EMATCH_STACK=32 ++# CONFIG_NET_EMATCH_CMP is not set ++# CONFIG_NET_EMATCH_NBYTE is not set ++# CONFIG_NET_EMATCH_U32 is not set ++# CONFIG_NET_EMATCH_META is not set ++# CONFIG_NET_EMATCH_TEXT is not set ++# CONFIG_NET_EMATCH_IPT is not set ++CONFIG_NET_CLS_ACT=y ++# CONFIG_NET_ACT_POLICE is not set ++# CONFIG_NET_ACT_GACT is not set ++# CONFIG_NET_ACT_MIRRED is not set ++# CONFIG_NET_ACT_SAMPLE is not set ++# CONFIG_NET_ACT_IPT is not set ++# CONFIG_NET_ACT_NAT is not set ++# CONFIG_NET_ACT_PEDIT is not set ++# CONFIG_NET_ACT_SIMP is not set ++# CONFIG_NET_ACT_SKBEDIT is not set ++# CONFIG_NET_ACT_CSUM is not set ++# CONFIG_NET_ACT_MPLS is not set ++# CONFIG_NET_ACT_VLAN is not set ++CONFIG_NET_ACT_BPF=y ++# CONFIG_NET_ACT_SKBMOD is not set ++# CONFIG_NET_ACT_IFE is not set ++# CONFIG_NET_ACT_TUNNEL_KEY is not set ++# CONFIG_NET_ACT_GATE is not set ++CONFIG_NET_TC_SKB_EXT=y ++CONFIG_NET_SCH_FIFO=y ++CONFIG_DCB=y ++CONFIG_DNS_RESOLVER=y ++# CONFIG_BATMAN_ADV is not set ++# CONFIG_OPENVSWITCH is not set ++# CONFIG_VSOCKETS is not set ++# CONFIG_NETLINK_DIAG is not set ++CONFIG_MPLS=y ++# CONFIG_NET_MPLS_GSO is not set ++# CONFIG_MPLS_ROUTING is not set ++# CONFIG_NET_NSH is not set ++# CONFIG_HSR is not set ++# CONFIG_NET_SWITCHDEV is not set ++CONFIG_NET_L3_MASTER_DEV=y ++# CONFIG_QRTR is not set ++# CONFIG_NET_NCSI is not set ++CONFIG_RPS=y ++CONFIG_RFS_ACCEL=y ++CONFIG_XPS=y ++# CONFIG_CGROUP_NET_PRIO is not set ++CONFIG_CGROUP_NET_CLASSID=y ++CONFIG_NET_RX_BUSY_POLL=y ++CONFIG_BQL=y ++CONFIG_BPF_JIT=y ++CONFIG_BPF_STREAM_PARSER=y ++CONFIG_NET_FLOW_LIMIT=y ++ ++# ++# Network testing ++# ++# CONFIG_NET_PKTGEN is not set ++# CONFIG_NET_DROP_MONITOR is not set ++# end of Network testing ++# end of Networking options ++ ++# CONFIG_HAMRADIO is not set ++# CONFIG_CAN is not set ++# CONFIG_BT is not set ++# CONFIG_AF_RXRPC is not set ++# CONFIG_AF_KCM is not set ++CONFIG_STREAM_PARSER=y ++CONFIG_FIB_RULES=y ++CONFIG_WIRELESS=y ++# CONFIG_CFG80211 is not set ++ ++# ++# CFG80211 needs to be enabled for MAC80211 ++# ++CONFIG_MAC80211_STA_HASH_MAX_SIZE=0 ++# CONFIG_WIMAX is not set ++# CONFIG_RFKILL is not set ++CONFIG_NET_9P=y ++CONFIG_NET_9P_VIRTIO=y ++# CONFIG_NET_9P_DEBUG is not set ++# CONFIG_CAIF is not set ++# CONFIG_CEPH_LIB is not set ++# CONFIG_NFC is not set ++# CONFIG_PSAMPLE is not set ++# CONFIG_NET_IFE is not set ++CONFIG_LWTUNNEL=y ++CONFIG_LWTUNNEL_BPF=y ++CONFIG_DST_CACHE=y ++CONFIG_GRO_CELLS=y ++CONFIG_NET_SOCK_MSG=y ++CONFIG_NET_DEVLINK=y ++CONFIG_FAILOVER=y ++CONFIG_ETHTOOL_NETLINK=y ++CONFIG_HAVE_EBPF_JIT=y ++ ++# ++# Device Drivers ++# ++CONFIG_HAVE_EISA=y ++# CONFIG_EISA is not set ++CONFIG_HAVE_PCI=y ++CONFIG_PCI=y ++CONFIG_PCI_DOMAINS=y ++CONFIG_PCIEPORTBUS=y ++# CONFIG_PCIEAER is not set ++CONFIG_PCIEASPM=y ++CONFIG_PCIEASPM_DEFAULT=y ++# CONFIG_PCIEASPM_POWERSAVE is not set ++# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set ++# CONFIG_PCIEASPM_PERFORMANCE is not set ++# CONFIG_PCIE_PTM is not set ++# CONFIG_PCIE_BW is not set ++CONFIG_PCI_MSI=y ++CONFIG_PCI_MSI_IRQ_DOMAIN=y ++CONFIG_PCI_QUIRKS=y ++# CONFIG_PCI_DEBUG is not set ++# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set ++# CONFIG_PCI_STUB is not set ++# CONFIG_PCI_PF_STUB is not set ++CONFIG_PCI_ATS=y ++CONFIG_PCI_LOCKLESS_CONFIG=y ++CONFIG_PCI_IOV=y ++# CONFIG_PCI_PRI is not set ++# CONFIG_PCI_PASID is not set ++CONFIG_PCI_LABEL=y ++# CONFIG_HOTPLUG_PCI is not set ++ ++# ++# PCI controller drivers ++# ++# CONFIG_VMD is not set ++ ++# ++# DesignWare PCI Core Support ++# ++# CONFIG_PCIE_DW_PLAT_HOST is not set ++# CONFIG_PCI_MESON is not set ++# end of DesignWare PCI Core Support ++ ++# ++# Mobiveil PCIe Core Support ++# ++# end of Mobiveil PCIe Core Support ++ ++# ++# Cadence PCIe controllers support ++# ++# end of Cadence PCIe controllers support ++# end of PCI controller drivers ++ ++# ++# PCI Endpoint ++# ++# CONFIG_PCI_ENDPOINT is not set ++# end of PCI Endpoint ++ ++# ++# PCI switch controller drivers ++# ++# CONFIG_PCI_SW_SWITCHTEC is not set ++# end of PCI switch controller drivers ++ ++# CONFIG_PCCARD is not set ++# CONFIG_RAPIDIO is not set ++ ++# ++# Generic Driver Options ++# ++# CONFIG_UEVENT_HELPER is not set ++CONFIG_DEVTMPFS=y ++CONFIG_DEVTMPFS_MOUNT=y ++CONFIG_STANDALONE=y ++# CONFIG_PREVENT_FIRMWARE_BUILD is not set ++ ++# ++# Firmware loader ++# ++CONFIG_FW_LOADER=y ++CONFIG_FW_LOADER_PAGED_BUF=y ++CONFIG_EXTRA_FIRMWARE="" ++CONFIG_FW_LOADER_USER_HELPER=y ++# CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set ++# CONFIG_FW_LOADER_COMPRESS is not set ++# end of Firmware loader ++ ++CONFIG_ALLOW_DEV_COREDUMP=y ++# CONFIG_DEBUG_DRIVER is not set ++# CONFIG_DEBUG_DEVRES is not set ++# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set ++# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set ++CONFIG_GENERIC_CPU_AUTOPROBE=y ++CONFIG_GENERIC_CPU_VULNERABILITIES=y ++CONFIG_DMA_SHARED_BUFFER=y ++# CONFIG_DMA_FENCE_TRACE is not set ++# end of Generic Driver Options ++ ++# ++# Bus devices ++# ++# CONFIG_MHI_BUS is not set ++# end of Bus devices ++ ++# CONFIG_CONNECTOR is not set ++# CONFIG_GNSS is not set ++# CONFIG_MTD is not set ++# CONFIG_OF is not set ++CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y ++# CONFIG_PARPORT is not set ++CONFIG_PNP=y ++# CONFIG_PNP_DEBUG_MESSAGES is not set ++ ++# ++# Protocols ++# ++CONFIG_PNPACPI=y ++CONFIG_BLK_DEV=y ++# CONFIG_BLK_DEV_NULL_BLK is not set ++# CONFIG_BLK_DEV_FD is not set ++# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set ++# CONFIG_BLK_DEV_UMEM is not set ++CONFIG_BLK_DEV_LOOP=y ++# CONFIG_BLK_DEV_DRBD is not set ++# CONFIG_BLK_DEV_NBD is not set ++# CONFIG_BLK_DEV_SKD is not set ++# CONFIG_BLK_DEV_SX8 is not set ++CONFIG_BLK_DEV_RAM=y ++CONFIG_BLK_DEV_RAM_COUNT=16 ++CONFIG_BLK_DEV_RAM_SIZE=16384 ++# CONFIG_CDROM_PKTCDVD is not set ++# CONFIG_ATA_OVER_ETH is not set ++CONFIG_VIRTIO_BLK=y ++# CONFIG_BLK_DEV_RBD is not set ++# CONFIG_BLK_DEV_RSXX is not set ++ ++# ++# NVME Support ++# ++# CONFIG_BLK_DEV_NVME is not set ++# CONFIG_NVME_FC is not set ++# end of NVME Support ++ ++# ++# Misc devices ++# ++# CONFIG_DUMMY_IRQ is not set ++# CONFIG_IBM_ASM is not set ++# CONFIG_PHANTOM is not set ++# CONFIG_TIFM_CORE is not set ++# CONFIG_ENCLOSURE_SERVICES is not set ++# CONFIG_HP_ILO is not set ++# CONFIG_SRAM is not set ++# CONFIG_PCI_ENDPOINT_TEST is not set ++# CONFIG_XILINX_SDFEC is not set ++# CONFIG_PVPANIC is not set ++# CONFIG_C2PORT is not set ++ ++# ++# EEPROM support ++# ++# CONFIG_EEPROM_93CX6 is not set ++# end of EEPROM support ++ ++# CONFIG_CB710_CORE is not set ++ ++# ++# Texas Instruments shared transport line discipline ++# ++# end of Texas Instruments shared transport line discipline ++ ++# ++# Altera FPGA firmware download module (requires I2C) ++# ++# CONFIG_INTEL_MEI is not set ++# CONFIG_INTEL_MEI_ME is not set ++# CONFIG_INTEL_MEI_TXE is not set ++# CONFIG_VMWARE_VMCI is not set ++ ++# ++# Intel MIC & related support ++# ++# CONFIG_INTEL_MIC_BUS is not set ++# CONFIG_SCIF_BUS is not set ++# CONFIG_VOP_BUS is not set ++# end of Intel MIC & related support ++ ++# CONFIG_GENWQE is not set ++# CONFIG_ECHO is not set ++# CONFIG_MISC_ALCOR_PCI is not set ++# CONFIG_MISC_RTSX_PCI is not set ++# CONFIG_HABANA_AI is not set ++# end of Misc devices ++ ++CONFIG_HAVE_IDE=y ++# CONFIG_IDE is not set ++ ++# ++# SCSI device support ++# ++CONFIG_SCSI_MOD=y ++# CONFIG_RAID_ATTRS is not set ++# CONFIG_SCSI is not set ++# end of SCSI device support ++ ++# CONFIG_ATA is not set ++# CONFIG_MD is not set ++# CONFIG_TARGET_CORE is not set ++# CONFIG_FUSION is not set ++ ++# ++# IEEE 1394 (FireWire) support ++# ++# CONFIG_FIREWIRE is not set ++# CONFIG_FIREWIRE_NOSY is not set ++# end of IEEE 1394 (FireWire) support ++ ++# CONFIG_MACINTOSH_DRIVERS is not set ++CONFIG_NETDEVICES=y ++CONFIG_NET_CORE=y ++CONFIG_BONDING=y ++# CONFIG_DUMMY is not set ++# CONFIG_WIREGUARD is not set ++# CONFIG_EQUALIZER is not set ++# CONFIG_IFB is not set ++# CONFIG_NET_TEAM is not set ++# CONFIG_MACVLAN is not set ++# CONFIG_IPVLAN is not set ++CONFIG_VXLAN=y ++# CONFIG_GENEVE is not set ++# CONFIG_BAREUDP is not set ++# CONFIG_GTP is not set ++# CONFIG_MACSEC is not set ++# CONFIG_NETCONSOLE is not set ++CONFIG_TUN=y ++# CONFIG_TUN_VNET_CROSS_LE is not set ++CONFIG_VETH=y ++CONFIG_VIRTIO_NET=y ++# CONFIG_NLMON is not set ++CONFIG_NET_VRF=y ++# CONFIG_ARCNET is not set ++ ++# ++# Distributed Switch Architecture drivers ++# ++# end of Distributed Switch Architecture drivers ++ ++# CONFIG_ETHERNET is not set ++# CONFIG_FDDI is not set ++# CONFIG_HIPPI is not set ++# CONFIG_NET_SB1000 is not set ++# CONFIG_MDIO_DEVICE is not set ++# CONFIG_PHYLIB is not set ++# CONFIG_PPP is not set ++# CONFIG_SLIP is not set ++ ++# ++# Host-side USB support is needed for USB Network Adapter support ++# ++# CONFIG_WLAN is not set ++ ++# ++# Enable WiMAX (Networking options) to see the WiMAX drivers ++# ++# CONFIG_WAN is not set ++# CONFIG_VMXNET3 is not set ++# CONFIG_FUJITSU_ES is not set ++CONFIG_NETDEVSIM=y ++CONFIG_NET_FAILOVER=y ++# CONFIG_ISDN is not set ++# CONFIG_NVM is not set ++ ++# ++# Input device support ++# ++CONFIG_INPUT=y ++CONFIG_INPUT_FF_MEMLESS=y ++# CONFIG_INPUT_POLLDEV is not set ++# CONFIG_INPUT_SPARSEKMAP is not set ++# CONFIG_INPUT_MATRIXKMAP is not set ++ ++# ++# Userland interfaces ++# ++# CONFIG_INPUT_MOUSEDEV is not set ++# CONFIG_INPUT_JOYDEV is not set ++CONFIG_INPUT_EVDEV=y ++# CONFIG_INPUT_EVBUG is not set ++ ++# ++# Input Device Drivers ++# ++# CONFIG_INPUT_KEYBOARD is not set ++# CONFIG_INPUT_MOUSE is not set ++# CONFIG_INPUT_JOYSTICK is not set ++# CONFIG_INPUT_TABLET is not set ++# CONFIG_INPUT_TOUCHSCREEN is not set ++# CONFIG_INPUT_MISC is not set ++# CONFIG_RMI4_CORE is not set ++ ++# ++# Hardware I/O ports ++# ++CONFIG_SERIO=y ++CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y ++CONFIG_SERIO_I8042=y ++CONFIG_SERIO_SERPORT=y ++# CONFIG_SERIO_CT82C710 is not set ++# CONFIG_SERIO_PCIPS2 is not set ++CONFIG_SERIO_LIBPS2=y ++# CONFIG_SERIO_RAW is not set ++# CONFIG_SERIO_ALTERA_PS2 is not set ++# CONFIG_SERIO_PS2MULT is not set ++# CONFIG_SERIO_ARC_PS2 is not set ++# CONFIG_USERIO is not set ++# CONFIG_GAMEPORT is not set ++# end of Hardware I/O ports ++# end of Input device support ++ ++# ++# Character devices ++# ++CONFIG_TTY=y ++CONFIG_VT=y ++CONFIG_CONSOLE_TRANSLATIONS=y ++CONFIG_VT_CONSOLE=y ++CONFIG_HW_CONSOLE=y ++CONFIG_VT_HW_CONSOLE_BINDING=y ++CONFIG_UNIX98_PTYS=y ++# CONFIG_LEGACY_PTYS is not set ++CONFIG_LDISC_AUTOLOAD=y ++ ++# ++# Serial drivers ++# ++CONFIG_SERIAL_EARLYCON=y ++CONFIG_SERIAL_8250=y ++CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y ++CONFIG_SERIAL_8250_PNP=y ++# CONFIG_SERIAL_8250_16550A_VARIANTS is not set ++# CONFIG_SERIAL_8250_FINTEK is not set ++CONFIG_SERIAL_8250_CONSOLE=y ++CONFIG_SERIAL_8250_PCI=y ++CONFIG_SERIAL_8250_EXAR=y ++CONFIG_SERIAL_8250_NR_UARTS=32 ++CONFIG_SERIAL_8250_RUNTIME_UARTS=4 ++CONFIG_SERIAL_8250_EXTENDED=y ++CONFIG_SERIAL_8250_MANY_PORTS=y ++CONFIG_SERIAL_8250_SHARE_IRQ=y ++CONFIG_SERIAL_8250_DETECT_IRQ=y ++CONFIG_SERIAL_8250_RSA=y ++# CONFIG_SERIAL_8250_DW is not set ++# CONFIG_SERIAL_8250_RT288X is not set ++# CONFIG_SERIAL_8250_LPSS is not set ++# CONFIG_SERIAL_8250_MID is not set ++ ++# ++# Non-8250 serial port support ++# ++# CONFIG_SERIAL_UARTLITE is not set ++CONFIG_SERIAL_CORE=y ++CONFIG_SERIAL_CORE_CONSOLE=y ++# CONFIG_SERIAL_JSM is not set ++# CONFIG_SERIAL_LANTIQ is not set ++# CONFIG_SERIAL_SCCNXP is not set ++# CONFIG_SERIAL_ALTERA_JTAGUART is not set ++# CONFIG_SERIAL_ALTERA_UART is not set ++# CONFIG_SERIAL_ARC is not set ++# CONFIG_SERIAL_RP2 is not set ++# CONFIG_SERIAL_FSL_LPUART is not set ++# CONFIG_SERIAL_FSL_LINFLEXUART is not set ++# CONFIG_SERIAL_SPRD is not set ++# end of Serial drivers ++ ++CONFIG_SERIAL_NONSTANDARD=y ++# CONFIG_ROCKETPORT is not set ++# CONFIG_CYCLADES is not set ++# CONFIG_MOXA_INTELLIO is not set ++# CONFIG_MOXA_SMARTIO is not set ++# CONFIG_SYNCLINK is not set ++# CONFIG_SYNCLINKMP is not set ++# CONFIG_SYNCLINK_GT is not set ++# CONFIG_ISI is not set ++# CONFIG_N_HDLC is not set ++# CONFIG_N_GSM is not set ++# CONFIG_NOZOMI is not set ++# CONFIG_NULL_TTY is not set ++# CONFIG_TRACE_SINK is not set ++CONFIG_HVC_DRIVER=y ++# CONFIG_SERIAL_DEV_BUS is not set ++# CONFIG_TTY_PRINTK is not set ++CONFIG_VIRTIO_CONSOLE=y ++# CONFIG_IPMI_HANDLER is not set ++# CONFIG_HW_RANDOM is not set ++# CONFIG_APPLICOM is not set ++# CONFIG_MWAVE is not set ++CONFIG_DEVMEM=y ++CONFIG_DEVKMEM=y ++# CONFIG_NVRAM is not set ++# CONFIG_RAW_DRIVER is not set ++CONFIG_DEVPORT=y ++CONFIG_HPET=y ++# CONFIG_HPET_MMAP is not set ++# CONFIG_HANGCHECK_TIMER is not set ++CONFIG_TCG_TPM=y ++CONFIG_TCG_TIS_CORE=y ++CONFIG_TCG_TIS=y ++# CONFIG_TCG_NSC is not set ++# CONFIG_TCG_ATMEL is not set ++# CONFIG_TCG_INFINEON is not set ++CONFIG_TCG_CRB=y ++# CONFIG_TCG_VTPM_PROXY is not set ++# CONFIG_TELCLOCK is not set ++# CONFIG_XILLYBUS is not set ++# end of Character devices ++ ++# CONFIG_RANDOM_TRUST_CPU is not set ++# CONFIG_RANDOM_TRUST_BOOTLOADER is not set ++ ++# ++# I2C support ++# ++# CONFIG_I2C is not set ++# end of I2C support ++ ++# CONFIG_I3C is not set ++# CONFIG_SPI is not set ++# CONFIG_SPMI is not set ++# CONFIG_HSI is not set ++CONFIG_PPS=y ++# CONFIG_PPS_DEBUG is not set ++ ++# ++# PPS clients support ++# ++# CONFIG_PPS_CLIENT_KTIMER is not set ++# CONFIG_PPS_CLIENT_LDISC is not set ++# CONFIG_PPS_CLIENT_GPIO is not set ++ ++# ++# PPS generators support ++# ++ ++# ++# PTP clock support ++# ++CONFIG_PTP_1588_CLOCK=y ++ ++# ++# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. ++# ++# end of PTP clock support ++ ++# CONFIG_PINCTRL is not set ++# CONFIG_GPIOLIB is not set ++# CONFIG_W1 is not set ++# CONFIG_POWER_AVS is not set ++# CONFIG_POWER_RESET is not set ++CONFIG_POWER_SUPPLY=y ++# CONFIG_POWER_SUPPLY_DEBUG is not set ++# CONFIG_PDA_POWER is not set ++# CONFIG_TEST_POWER is not set ++# CONFIG_BATTERY_DS2780 is not set ++# CONFIG_BATTERY_DS2781 is not set ++# CONFIG_BATTERY_BQ27XXX is not set ++# CONFIG_CHARGER_MAX8903 is not set ++# CONFIG_HWMON is not set ++CONFIG_THERMAL=y ++# CONFIG_THERMAL_NETLINK is not set ++# CONFIG_THERMAL_STATISTICS is not set ++CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 ++CONFIG_THERMAL_WRITABLE_TRIPS=y ++CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y ++# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set ++# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set ++# CONFIG_THERMAL_GOV_FAIR_SHARE is not set ++CONFIG_THERMAL_GOV_STEP_WISE=y ++# CONFIG_THERMAL_GOV_BANG_BANG is not set ++CONFIG_THERMAL_GOV_USER_SPACE=y ++# CONFIG_THERMAL_EMULATION is not set ++ ++# ++# Intel thermal drivers ++# ++CONFIG_INTEL_POWERCLAMP=y ++CONFIG_X86_PKG_TEMP_THERMAL=m ++# CONFIG_INTEL_SOC_DTS_THERMAL is not set ++ ++# ++# ACPI INT340X thermal drivers ++# ++# CONFIG_INT340X_THERMAL is not set ++# end of ACPI INT340X thermal drivers ++ ++# CONFIG_INTEL_PCH_THERMAL is not set ++# end of Intel thermal drivers ++ ++# CONFIG_WATCHDOG is not set ++CONFIG_SSB_POSSIBLE=y ++# CONFIG_SSB is not set ++CONFIG_BCMA_POSSIBLE=y ++# CONFIG_BCMA is not set ++ ++# ++# Multifunction device drivers ++# ++# CONFIG_MFD_MADERA is not set ++# CONFIG_HTC_PASIC3 is not set ++# CONFIG_MFD_INTEL_QUARK_I2C_GPIO is not set ++# CONFIG_LPC_ICH is not set ++# CONFIG_LPC_SCH is not set ++# CONFIG_MFD_INTEL_LPSS_ACPI is not set ++# CONFIG_MFD_INTEL_LPSS_PCI is not set ++# CONFIG_MFD_JANZ_CMODIO is not set ++# CONFIG_MFD_KEMPLD is not set ++# CONFIG_MFD_MT6397 is not set ++# CONFIG_MFD_RDC321X is not set ++# CONFIG_MFD_SM501 is not set ++# CONFIG_ABX500_CORE is not set ++# CONFIG_MFD_SYSCON is not set ++# CONFIG_MFD_TI_AM335X_TSCADC is not set ++# CONFIG_MFD_TQMX86 is not set ++# CONFIG_MFD_VX855 is not set ++# end of Multifunction device drivers ++ ++# CONFIG_REGULATOR is not set ++CONFIG_RC_CORE=y ++# CONFIG_RC_MAP is not set ++CONFIG_LIRC=y ++CONFIG_BPF_LIRC_MODE2=y ++# CONFIG_RC_DECODERS is not set ++CONFIG_RC_DEVICES=y ++CONFIG_RC_LOOPBACK=y ++# CONFIG_IR_SERIAL is not set ++# CONFIG_MEDIA_CEC_SUPPORT is not set ++# CONFIG_MEDIA_SUPPORT is not set ++ ++# ++# Graphics support ++# ++CONFIG_AGP=y ++CONFIG_AGP_AMD64=y ++CONFIG_AGP_INTEL=y ++CONFIG_AGP_SIS=y ++CONFIG_AGP_VIA=y ++CONFIG_INTEL_GTT=y ++CONFIG_VGA_ARB=y ++CONFIG_VGA_ARB_MAX_GPUS=16 ++# CONFIG_VGA_SWITCHEROO is not set ++# CONFIG_DRM is not set ++ ++# ++# ARM devices ++# ++# end of ARM devices ++ ++# ++# Frame buffer Devices ++# ++CONFIG_FB_CMDLINE=y ++CONFIG_FB_NOTIFY=y ++CONFIG_FB=y ++# CONFIG_FIRMWARE_EDID is not set ++CONFIG_FB_BOOT_VESA_SUPPORT=y ++CONFIG_FB_CFB_FILLRECT=y ++CONFIG_FB_CFB_COPYAREA=y ++CONFIG_FB_CFB_IMAGEBLIT=y ++# CONFIG_FB_FOREIGN_ENDIAN is not set ++CONFIG_FB_MODE_HELPERS=y ++CONFIG_FB_TILEBLITTING=y ++ ++# ++# Frame buffer hardware drivers ++# ++# CONFIG_FB_CIRRUS is not set ++# CONFIG_FB_PM2 is not set ++# CONFIG_FB_CYBER2000 is not set ++# CONFIG_FB_ARC is not set ++# CONFIG_FB_ASILIANT is not set ++# CONFIG_FB_IMSTT is not set ++# CONFIG_FB_VGA16 is not set ++CONFIG_FB_VESA=y ++# CONFIG_FB_EFI is not set ++# CONFIG_FB_N411 is not set ++# CONFIG_FB_HGA is not set ++# CONFIG_FB_OPENCORES is not set ++# CONFIG_FB_S1D13XXX is not set ++# CONFIG_FB_NVIDIA is not set ++# CONFIG_FB_RIVA is not set ++# CONFIG_FB_I740 is not set ++# CONFIG_FB_LE80578 is not set ++# CONFIG_FB_INTEL is not set ++# CONFIG_FB_MATROX is not set ++# CONFIG_FB_RADEON is not set ++# CONFIG_FB_ATY128 is not set ++# CONFIG_FB_ATY is not set ++# CONFIG_FB_S3 is not set ++# CONFIG_FB_SAVAGE is not set ++# CONFIG_FB_SIS is not set ++# CONFIG_FB_NEOMAGIC is not set ++# CONFIG_FB_KYRO is not set ++# CONFIG_FB_3DFX is not set ++# CONFIG_FB_VOODOO1 is not set ++# CONFIG_FB_VT8623 is not set ++# CONFIG_FB_TRIDENT is not set ++# CONFIG_FB_ARK is not set ++# CONFIG_FB_PM3 is not set ++# CONFIG_FB_CARMINE is not set ++# CONFIG_FB_IBM_GXT4500 is not set ++# CONFIG_FB_VIRTUAL is not set ++# CONFIG_FB_METRONOME is not set ++# CONFIG_FB_MB862XX is not set ++# CONFIG_FB_SIMPLE is not set ++# CONFIG_FB_SM712 is not set ++# end of Frame buffer Devices ++ ++# ++# Backlight & LCD device support ++# ++# CONFIG_LCD_CLASS_DEVICE is not set ++CONFIG_BACKLIGHT_CLASS_DEVICE=y ++# CONFIG_BACKLIGHT_APPLE is not set ++# CONFIG_BACKLIGHT_QCOM_WLED is not set ++# CONFIG_BACKLIGHT_SAHARA is not set ++# end of Backlight & LCD device support ++ ++# ++# Console display driver support ++# ++CONFIG_VGA_CONSOLE=y ++CONFIG_VGACON_SOFT_SCROLLBACK=y ++CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 ++# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set ++CONFIG_DUMMY_CONSOLE=y ++CONFIG_DUMMY_CONSOLE_COLUMNS=80 ++CONFIG_DUMMY_CONSOLE_ROWS=25 ++CONFIG_FRAMEBUFFER_CONSOLE=y ++CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y ++CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y ++# CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER is not set ++# end of Console display driver support ++ ++CONFIG_LOGO=y ++# CONFIG_LOGO_LINUX_MONO is not set ++# CONFIG_LOGO_LINUX_VGA16 is not set ++CONFIG_LOGO_LINUX_CLUT224=y ++# end of Graphics support ++ ++# CONFIG_SOUND is not set ++ ++# ++# HID support ++# ++CONFIG_HID=y ++# CONFIG_HID_BATTERY_STRENGTH is not set ++# CONFIG_HIDRAW is not set ++# CONFIG_UHID is not set ++CONFIG_HID_GENERIC=y ++ ++# ++# Special HID drivers ++# ++CONFIG_HID_A4TECH=y ++# CONFIG_HID_ACRUX is not set ++CONFIG_HID_APPLE=y ++# CONFIG_HID_AUREAL is not set ++CONFIG_HID_BELKIN=y ++CONFIG_HID_CHERRY=y ++CONFIG_HID_CHICONY=y ++# CONFIG_HID_COUGAR is not set ++# CONFIG_HID_MACALLY is not set ++# CONFIG_HID_CMEDIA is not set ++CONFIG_HID_CYPRESS=y ++CONFIG_HID_DRAGONRISE=y ++# CONFIG_DRAGONRISE_FF is not set ++# CONFIG_HID_EMS_FF is not set ++# CONFIG_HID_ELECOM is not set ++CONFIG_HID_EZKEY=y ++# CONFIG_HID_GEMBIRD is not set ++# CONFIG_HID_GFRM is not set ++# CONFIG_HID_GLORIOUS is not set ++# CONFIG_HID_KEYTOUCH is not set ++CONFIG_HID_KYE=y ++# CONFIG_HID_WALTOP is not set ++# CONFIG_HID_VIEWSONIC is not set ++CONFIG_HID_GYRATION=y ++# CONFIG_HID_ICADE is not set ++# CONFIG_HID_ITE is not set ++# CONFIG_HID_JABRA is not set ++CONFIG_HID_TWINHAN=y ++CONFIG_HID_KENSINGTON=y ++# CONFIG_HID_LCPOWER is not set ++# CONFIG_HID_LENOVO is not set ++# CONFIG_HID_MAGICMOUSE is not set ++# CONFIG_HID_MALTRON is not set ++# CONFIG_HID_MAYFLASH is not set ++# CONFIG_HID_REDRAGON is not set ++CONFIG_HID_MICROSOFT=y ++CONFIG_HID_MONTEREY=y ++# CONFIG_HID_MULTITOUCH is not set ++# CONFIG_HID_NTI is not set ++# CONFIG_HID_ORTEK is not set ++CONFIG_HID_PANTHERLORD=y ++# CONFIG_PANTHERLORD_FF is not set ++CONFIG_HID_PETALYNX=y ++# CONFIG_HID_PICOLCD is not set ++# CONFIG_HID_PLANTRONICS is not set ++# CONFIG_HID_PRIMAX is not set ++# CONFIG_HID_SAITEK is not set ++CONFIG_HID_SAMSUNG=y ++# CONFIG_HID_SPEEDLINK is not set ++# CONFIG_HID_STEAM is not set ++# CONFIG_HID_STEELSERIES is not set ++CONFIG_HID_SUNPLUS=y ++# CONFIG_HID_RMI is not set ++CONFIG_HID_GREENASIA=y ++# CONFIG_GREENASIA_FF is not set ++CONFIG_HID_SMARTJOYPLUS=y ++# CONFIG_SMARTJOYPLUS_FF is not set ++# CONFIG_HID_TIVO is not set ++CONFIG_HID_TOPSEED=y ++CONFIG_HID_THRUSTMASTER=y ++CONFIG_THRUSTMASTER_FF=y ++# CONFIG_HID_UDRAW_PS3 is not set ++# CONFIG_HID_XINMO is not set ++CONFIG_HID_ZEROPLUS=y ++CONFIG_ZEROPLUS_FF=y ++# CONFIG_HID_ZYDACRON is not set ++# CONFIG_HID_SENSOR_HUB is not set ++# CONFIG_HID_ALPS is not set ++# end of Special HID drivers ++ ++# ++# Intel ISH HID support ++# ++# CONFIG_INTEL_ISH_HID is not set ++# end of Intel ISH HID support ++# end of HID support ++ ++CONFIG_USB_OHCI_LITTLE_ENDIAN=y ++# CONFIG_USB_SUPPORT is not set ++# CONFIG_MMC is not set ++# CONFIG_MEMSTICK is not set ++# CONFIG_NEW_LEDS is not set ++# CONFIG_ACCESSIBILITY is not set ++# CONFIG_INFINIBAND is not set ++CONFIG_EDAC_ATOMIC_SCRUB=y ++CONFIG_EDAC_SUPPORT=y ++# CONFIG_EDAC is not set ++CONFIG_RTC_LIB=y ++CONFIG_RTC_MC146818_LIB=y ++# CONFIG_RTC_CLASS is not set ++# CONFIG_DMADEVICES is not set ++ ++# ++# DMABUF options ++# ++CONFIG_SYNC_FILE=y ++# CONFIG_SW_SYNC is not set ++# CONFIG_UDMABUF is not set ++# CONFIG_DMABUF_MOVE_NOTIFY is not set ++# CONFIG_DMABUF_SELFTESTS is not set ++# CONFIG_DMABUF_HEAPS is not set ++# end of DMABUF options ++ ++# CONFIG_AUXDISPLAY is not set ++# CONFIG_UIO is not set ++CONFIG_VIRT_DRIVERS=y ++# CONFIG_VBOXGUEST is not set ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_MENU=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_PCI_LEGACY=y ++CONFIG_VIRTIO_BALLOON=y ++# CONFIG_VIRTIO_INPUT is not set ++# CONFIG_VIRTIO_MMIO is not set ++# CONFIG_VDPA is not set ++CONFIG_VHOST_MENU=y ++# CONFIG_VHOST_NET is not set ++# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set ++ ++# ++# Microsoft Hyper-V guest support ++# ++# end of Microsoft Hyper-V guest support ++ ++# CONFIG_GREYBUS is not set ++# CONFIG_STAGING is not set ++# CONFIG_X86_PLATFORM_DEVICES is not set ++CONFIG_PMC_ATOM=y ++# CONFIG_MFD_CROS_EC is not set ++# CONFIG_CHROME_PLATFORMS is not set ++# CONFIG_MELLANOX_PLATFORM is not set ++CONFIG_HAVE_CLK=y ++CONFIG_CLKDEV_LOOKUP=y ++CONFIG_HAVE_CLK_PREPARE=y ++CONFIG_COMMON_CLK=y ++# CONFIG_HWSPINLOCK is not set ++ ++# ++# Clock Source drivers ++# ++CONFIG_CLKEVT_I8253=y ++CONFIG_I8253_LOCK=y ++CONFIG_CLKBLD_I8253=y ++# end of Clock Source drivers ++ ++CONFIG_MAILBOX=y ++CONFIG_PCC=y ++# CONFIG_ALTERA_MBOX is not set ++# CONFIG_IOMMU_SUPPORT is not set ++ ++# ++# Remoteproc drivers ++# ++# CONFIG_REMOTEPROC is not set ++# end of Remoteproc drivers ++ ++# ++# Rpmsg drivers ++# ++# CONFIG_RPMSG_QCOM_GLINK_RPM is not set ++# CONFIG_RPMSG_VIRTIO is not set ++# end of Rpmsg drivers ++ ++# CONFIG_SOUNDWIRE is not set ++ ++# ++# SOC (System On Chip) specific Drivers ++# ++ ++# ++# Amlogic SoC drivers ++# ++# end of Amlogic SoC drivers ++ ++# ++# Aspeed SoC drivers ++# ++# end of Aspeed SoC drivers ++ ++# ++# Broadcom SoC drivers ++# ++# end of Broadcom SoC drivers ++ ++# ++# NXP/Freescale QorIQ SoC drivers ++# ++# end of NXP/Freescale QorIQ SoC drivers ++ ++# ++# i.MX SoC drivers ++# ++# end of i.MX SoC drivers ++ ++# ++# Qualcomm SoC drivers ++# ++# end of Qualcomm SoC drivers ++ ++# CONFIG_SOC_TI is not set ++ ++# ++# Xilinx SoC drivers ++# ++# CONFIG_XILINX_VCU is not set ++# end of Xilinx SoC drivers ++# end of SOC (System On Chip) specific Drivers ++ ++# CONFIG_PM_DEVFREQ is not set ++# CONFIG_EXTCON is not set ++# CONFIG_MEMORY is not set ++# CONFIG_IIO is not set ++# CONFIG_NTB is not set ++# CONFIG_VME_BUS is not set ++# CONFIG_PWM is not set ++ ++# ++# IRQ chip support ++# ++# end of IRQ chip support ++ ++# CONFIG_IPACK_BUS is not set ++# CONFIG_RESET_CONTROLLER is not set ++ ++# ++# PHY Subsystem ++# ++CONFIG_GENERIC_PHY=y ++# CONFIG_BCM_KONA_USB2_PHY is not set ++# CONFIG_PHY_PXA_28NM_HSIC is not set ++# CONFIG_PHY_PXA_28NM_USB2 is not set ++# CONFIG_PHY_INTEL_EMMC is not set ++# end of PHY Subsystem ++ ++# CONFIG_POWERCAP is not set ++# CONFIG_MCB is not set ++ ++# ++# Performance monitor support ++# ++# end of Performance monitor support ++ ++CONFIG_RAS=y ++# CONFIG_RAS_CEC is not set ++# CONFIG_USB4 is not set ++ ++# ++# Android ++# ++# CONFIG_ANDROID is not set ++# end of Android ++ ++# CONFIG_LIBNVDIMM is not set ++# CONFIG_DAX is not set ++CONFIG_NVMEM=y ++# CONFIG_NVMEM_SYSFS is not set ++ ++# ++# HW tracing support ++# ++# CONFIG_STM is not set ++# CONFIG_INTEL_TH is not set ++# end of HW tracing support ++ ++# CONFIG_FPGA is not set ++# CONFIG_TEE is not set ++# CONFIG_UNISYS_VISORBUS is not set ++# CONFIG_SIOX is not set ++# CONFIG_SLIMBUS is not set ++# CONFIG_INTERCONNECT is not set ++# CONFIG_COUNTER is not set ++# end of Device Drivers ++ ++# ++# File systems ++# ++CONFIG_DCACHE_WORD_ACCESS=y ++CONFIG_VALIDATE_FS_PARSER=y ++CONFIG_FS_IOMAP=y ++# CONFIG_EXT2_FS is not set ++# CONFIG_EXT3_FS is not set ++CONFIG_EXT4_FS=y ++CONFIG_EXT4_USE_FOR_EXT2=y ++CONFIG_EXT4_FS_POSIX_ACL=y ++CONFIG_EXT4_FS_SECURITY=y ++# CONFIG_EXT4_DEBUG is not set ++CONFIG_JBD2=y ++# CONFIG_JBD2_DEBUG is not set ++CONFIG_FS_MBCACHE=y ++# CONFIG_REISERFS_FS is not set ++# CONFIG_JFS_FS is not set ++# CONFIG_XFS_FS is not set ++# CONFIG_GFS2_FS is not set ++# CONFIG_BTRFS_FS is not set ++# CONFIG_NILFS2_FS is not set ++# CONFIG_F2FS_FS is not set ++# CONFIG_FS_DAX is not set ++CONFIG_FS_POSIX_ACL=y ++CONFIG_EXPORTFS=y ++# CONFIG_EXPORTFS_BLOCK_OPS is not set ++CONFIG_FILE_LOCKING=y ++CONFIG_MANDATORY_FILE_LOCKING=y ++# CONFIG_FS_ENCRYPTION is not set ++# CONFIG_FS_VERITY is not set ++CONFIG_FSNOTIFY=y ++CONFIG_DNOTIFY=y ++CONFIG_INOTIFY_USER=y ++# CONFIG_FANOTIFY is not set ++# CONFIG_QUOTA is not set ++# CONFIG_AUTOFS4_FS is not set ++# CONFIG_AUTOFS_FS is not set ++# CONFIG_FUSE_FS is not set ++# CONFIG_OVERLAY_FS is not set ++ ++# ++# Caches ++# ++# CONFIG_FSCACHE is not set ++# end of Caches ++ ++# ++# CD-ROM/DVD Filesystems ++# ++# CONFIG_ISO9660_FS is not set ++# CONFIG_UDF_FS is not set ++# end of CD-ROM/DVD Filesystems ++ ++# ++# DOS/FAT/EXFAT/NT Filesystems ++# ++# CONFIG_MSDOS_FS is not set ++# CONFIG_VFAT_FS is not set ++# CONFIG_EXFAT_FS is not set ++# CONFIG_NTFS_FS is not set ++# end of DOS/FAT/EXFAT/NT Filesystems ++ ++# ++# Pseudo filesystems ++# ++CONFIG_PROC_FS=y ++CONFIG_PROC_KCORE=y ++CONFIG_PROC_SYSCTL=y ++CONFIG_PROC_PAGE_MONITOR=y ++# CONFIG_PROC_CHILDREN is not set ++CONFIG_PROC_PID_ARCH_STATUS=y ++CONFIG_KERNFS=y ++CONFIG_SYSFS=y ++CONFIG_TMPFS=y ++CONFIG_TMPFS_POSIX_ACL=y ++CONFIG_TMPFS_XATTR=y ++# CONFIG_TMPFS_INODE64 is not set ++CONFIG_HUGETLBFS=y ++CONFIG_HUGETLB_PAGE=y ++CONFIG_MEMFD_CREATE=y ++CONFIG_ARCH_HAS_GIGANTIC_PAGE=y ++# CONFIG_CONFIGFS_FS is not set ++# CONFIG_EFIVAR_FS is not set ++# end of Pseudo filesystems ++ ++# CONFIG_MISC_FILESYSTEMS is not set ++CONFIG_NETWORK_FILESYSTEMS=y ++# CONFIG_NFS_FS is not set ++# CONFIG_NFSD is not set ++# CONFIG_CEPH_FS is not set ++# CONFIG_CIFS is not set ++# CONFIG_CODA_FS is not set ++# CONFIG_AFS_FS is not set ++CONFIG_9P_FS=y ++CONFIG_9P_FS_POSIX_ACL=y ++CONFIG_9P_FS_SECURITY=y ++CONFIG_NLS=y ++CONFIG_NLS_DEFAULT="utf8" ++CONFIG_NLS_CODEPAGE_437=y ++# CONFIG_NLS_CODEPAGE_737 is not set ++# CONFIG_NLS_CODEPAGE_775 is not set ++# CONFIG_NLS_CODEPAGE_850 is not set ++# CONFIG_NLS_CODEPAGE_852 is not set ++# CONFIG_NLS_CODEPAGE_855 is not set ++# CONFIG_NLS_CODEPAGE_857 is not set ++# CONFIG_NLS_CODEPAGE_860 is not set ++# CONFIG_NLS_CODEPAGE_861 is not set ++# CONFIG_NLS_CODEPAGE_862 is not set ++# CONFIG_NLS_CODEPAGE_863 is not set ++# CONFIG_NLS_CODEPAGE_864 is not set ++# CONFIG_NLS_CODEPAGE_865 is not set ++# CONFIG_NLS_CODEPAGE_866 is not set ++# CONFIG_NLS_CODEPAGE_869 is not set ++# CONFIG_NLS_CODEPAGE_936 is not set ++# CONFIG_NLS_CODEPAGE_950 is not set ++# CONFIG_NLS_CODEPAGE_932 is not set ++# CONFIG_NLS_CODEPAGE_949 is not set ++# CONFIG_NLS_CODEPAGE_874 is not set ++# CONFIG_NLS_ISO8859_8 is not set ++# CONFIG_NLS_CODEPAGE_1250 is not set ++# CONFIG_NLS_CODEPAGE_1251 is not set ++CONFIG_NLS_ASCII=y ++# CONFIG_NLS_ISO8859_1 is not set ++# CONFIG_NLS_ISO8859_2 is not set ++# CONFIG_NLS_ISO8859_3 is not set ++# CONFIG_NLS_ISO8859_4 is not set ++# CONFIG_NLS_ISO8859_5 is not set ++# CONFIG_NLS_ISO8859_6 is not set ++# CONFIG_NLS_ISO8859_7 is not set ++# CONFIG_NLS_ISO8859_9 is not set ++# CONFIG_NLS_ISO8859_13 is not set ++# CONFIG_NLS_ISO8859_14 is not set ++# CONFIG_NLS_ISO8859_15 is not set ++# CONFIG_NLS_KOI8_R is not set ++# CONFIG_NLS_KOI8_U is not set ++# CONFIG_NLS_MAC_ROMAN is not set ++# CONFIG_NLS_MAC_CELTIC is not set ++# CONFIG_NLS_MAC_CENTEURO is not set ++# CONFIG_NLS_MAC_CROATIAN is not set ++# CONFIG_NLS_MAC_CYRILLIC is not set ++# CONFIG_NLS_MAC_GAELIC is not set ++# CONFIG_NLS_MAC_GREEK is not set ++# CONFIG_NLS_MAC_ICELAND is not set ++# CONFIG_NLS_MAC_INUIT is not set ++# CONFIG_NLS_MAC_ROMANIAN is not set ++# CONFIG_NLS_MAC_TURKISH is not set ++# CONFIG_NLS_UTF8 is not set ++# CONFIG_UNICODE is not set ++CONFIG_IO_WQ=y ++# end of File systems ++ ++# ++# Security options ++# ++CONFIG_KEYS=y ++# CONFIG_KEYS_REQUEST_CACHE is not set ++# CONFIG_PERSISTENT_KEYRINGS is not set ++# CONFIG_TRUSTED_KEYS is not set ++# CONFIG_ENCRYPTED_KEYS is not set ++# CONFIG_KEY_DH_OPERATIONS is not set ++# CONFIG_SECURITY_DMESG_RESTRICT is not set ++CONFIG_SECURITY=y ++CONFIG_SECURITYFS=y ++CONFIG_SECURITY_NETWORK=y ++CONFIG_PAGE_TABLE_ISOLATION=y ++# CONFIG_SECURITY_NETWORK_XFRM is not set ++# CONFIG_SECURITY_PATH is not set ++CONFIG_LSM_MMAP_MIN_ADDR=65536 ++CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y ++# CONFIG_HARDENED_USERCOPY is not set ++# CONFIG_FORTIFY_SOURCE is not set ++# CONFIG_STATIC_USERMODEHELPER is not set ++CONFIG_SECURITY_SELINUX=y ++# CONFIG_SECURITY_SELINUX_BOOTPARAM is not set ++# CONFIG_SECURITY_SELINUX_DISABLE is not set ++CONFIG_SECURITY_SELINUX_DEVELOP=y ++CONFIG_SECURITY_SELINUX_AVC_STATS=y ++CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0 ++CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9 ++CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256 ++# CONFIG_SECURITY_SMACK is not set ++# CONFIG_SECURITY_TOMOYO is not set ++# CONFIG_SECURITY_APPARMOR is not set ++# CONFIG_SECURITY_LOADPIN is not set ++# CONFIG_SECURITY_YAMA is not set ++# CONFIG_SECURITY_SAFESETID is not set ++# CONFIG_SECURITY_LOCKDOWN_LSM is not set ++CONFIG_INTEGRITY=y ++# CONFIG_INTEGRITY_SIGNATURE is not set ++CONFIG_INTEGRITY_AUDIT=y ++CONFIG_IMA=y ++CONFIG_IMA_MEASURE_PCR_IDX=10 ++CONFIG_IMA_LSM_RULES=y ++# CONFIG_IMA_TEMPLATE is not set ++CONFIG_IMA_NG_TEMPLATE=y ++# CONFIG_IMA_SIG_TEMPLATE is not set ++CONFIG_IMA_DEFAULT_TEMPLATE="ima-ng" ++CONFIG_IMA_DEFAULT_HASH_SHA1=y ++# CONFIG_IMA_DEFAULT_HASH_SHA256 is not set ++CONFIG_IMA_DEFAULT_HASH="sha1" ++CONFIG_IMA_WRITE_POLICY=y ++CONFIG_IMA_READ_POLICY=y ++# CONFIG_IMA_APPRAISE is not set ++CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS=y ++CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS=y ++# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set ++# CONFIG_EVM is not set ++# CONFIG_DEFAULT_SECURITY_SELINUX is not set ++CONFIG_DEFAULT_SECURITY_DAC=y ++CONFIG_LSM="selinux,bpf,integrity" ++ ++# ++# Kernel hardening options ++# ++ ++# ++# Memory initialization ++# ++CONFIG_INIT_STACK_NONE=y ++# CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set ++# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set ++# end of Memory initialization ++# end of Kernel hardening options ++# end of Security options ++ ++CONFIG_CRYPTO=y ++ ++# ++# Crypto core or helper ++# ++CONFIG_CRYPTO_ALGAPI=y ++CONFIG_CRYPTO_ALGAPI2=y ++CONFIG_CRYPTO_AEAD=y ++CONFIG_CRYPTO_AEAD2=y ++CONFIG_CRYPTO_SKCIPHER=y ++CONFIG_CRYPTO_SKCIPHER2=y ++CONFIG_CRYPTO_HASH=y ++CONFIG_CRYPTO_HASH2=y ++CONFIG_CRYPTO_RNG=y ++CONFIG_CRYPTO_RNG2=y ++CONFIG_CRYPTO_RNG_DEFAULT=y ++CONFIG_CRYPTO_AKCIPHER2=y ++CONFIG_CRYPTO_AKCIPHER=y ++CONFIG_CRYPTO_KPP2=y ++CONFIG_CRYPTO_ACOMP2=y ++CONFIG_CRYPTO_MANAGER=y ++CONFIG_CRYPTO_MANAGER2=y ++# CONFIG_CRYPTO_USER is not set ++CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y ++CONFIG_CRYPTO_GF128MUL=y ++CONFIG_CRYPTO_NULL=y ++CONFIG_CRYPTO_NULL2=y ++# CONFIG_CRYPTO_PCRYPT is not set ++# CONFIG_CRYPTO_CRYPTD is not set ++# CONFIG_CRYPTO_AUTHENC is not set ++# CONFIG_CRYPTO_TEST is not set ++CONFIG_CRYPTO_ENGINE=m ++ ++# ++# Public-key cryptography ++# ++CONFIG_CRYPTO_RSA=y ++# CONFIG_CRYPTO_DH is not set ++# CONFIG_CRYPTO_ECDH is not set ++# CONFIG_CRYPTO_ECRDSA is not set ++# CONFIG_CRYPTO_CURVE25519 is not set ++# CONFIG_CRYPTO_CURVE25519_X86 is not set ++ ++# ++# Authenticated Encryption with Associated Data ++# ++# CONFIG_CRYPTO_CCM is not set ++CONFIG_CRYPTO_GCM=y ++# CONFIG_CRYPTO_CHACHA20POLY1305 is not set ++# CONFIG_CRYPTO_AEGIS128 is not set ++# CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set ++CONFIG_CRYPTO_SEQIV=y ++# CONFIG_CRYPTO_ECHAINIV is not set ++ ++# ++# Block modes ++# ++# CONFIG_CRYPTO_CBC is not set ++# CONFIG_CRYPTO_CFB is not set ++CONFIG_CRYPTO_CTR=y ++# CONFIG_CRYPTO_CTS is not set ++# CONFIG_CRYPTO_ECB is not set ++# CONFIG_CRYPTO_LRW is not set ++# CONFIG_CRYPTO_OFB is not set ++# CONFIG_CRYPTO_PCBC is not set ++# CONFIG_CRYPTO_XTS is not set ++# CONFIG_CRYPTO_KEYWRAP is not set ++# CONFIG_CRYPTO_NHPOLY1305_SSE2 is not set ++# CONFIG_CRYPTO_NHPOLY1305_AVX2 is not set ++# CONFIG_CRYPTO_ADIANTUM is not set ++# CONFIG_CRYPTO_ESSIV is not set ++ ++# ++# Hash modes ++# ++# CONFIG_CRYPTO_CMAC is not set ++CONFIG_CRYPTO_HMAC=y ++# CONFIG_CRYPTO_XCBC is not set ++# CONFIG_CRYPTO_VMAC is not set ++ ++# ++# Digest ++# ++CONFIG_CRYPTO_CRC32C=y ++# CONFIG_CRYPTO_CRC32C_INTEL is not set ++# CONFIG_CRYPTO_CRC32 is not set ++# CONFIG_CRYPTO_CRC32_PCLMUL is not set ++CONFIG_CRYPTO_XXHASH=y ++CONFIG_CRYPTO_BLAKE2B=y ++# CONFIG_CRYPTO_BLAKE2S is not set ++# CONFIG_CRYPTO_BLAKE2S_X86 is not set ++CONFIG_CRYPTO_CRCT10DIF=y ++# CONFIG_CRYPTO_CRCT10DIF_PCLMUL is not set ++CONFIG_CRYPTO_GHASH=y ++# CONFIG_CRYPTO_POLY1305 is not set ++# CONFIG_CRYPTO_POLY1305_X86_64 is not set ++# CONFIG_CRYPTO_MD4 is not set ++CONFIG_CRYPTO_MD5=y ++# CONFIG_CRYPTO_MICHAEL_MIC is not set ++# CONFIG_CRYPTO_RMD128 is not set ++# CONFIG_CRYPTO_RMD160 is not set ++# CONFIG_CRYPTO_RMD256 is not set ++# CONFIG_CRYPTO_RMD320 is not set ++CONFIG_CRYPTO_SHA1=y ++# CONFIG_CRYPTO_SHA1_SSSE3 is not set ++# CONFIG_CRYPTO_SHA256_SSSE3 is not set ++# CONFIG_CRYPTO_SHA512_SSSE3 is not set ++CONFIG_CRYPTO_SHA256=y ++# CONFIG_CRYPTO_SHA512 is not set ++# CONFIG_CRYPTO_SHA3 is not set ++# CONFIG_CRYPTO_SM3 is not set ++# CONFIG_CRYPTO_STREEBOG is not set ++# CONFIG_CRYPTO_TGR192 is not set ++# CONFIG_CRYPTO_WP512 is not set ++# CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set ++ ++# ++# Ciphers ++# ++CONFIG_CRYPTO_AES=y ++# CONFIG_CRYPTO_AES_TI is not set ++# CONFIG_CRYPTO_AES_NI_INTEL is not set ++# CONFIG_CRYPTO_ANUBIS is not set ++# CONFIG_CRYPTO_ARC4 is not set ++# CONFIG_CRYPTO_BLOWFISH is not set ++# CONFIG_CRYPTO_BLOWFISH_X86_64 is not set ++# CONFIG_CRYPTO_CAMELLIA is not set ++# CONFIG_CRYPTO_CAMELLIA_X86_64 is not set ++# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set ++# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set ++# CONFIG_CRYPTO_CAST5 is not set ++# CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set ++# CONFIG_CRYPTO_CAST6 is not set ++# CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set ++# CONFIG_CRYPTO_DES is not set ++# CONFIG_CRYPTO_DES3_EDE_X86_64 is not set ++# CONFIG_CRYPTO_FCRYPT is not set ++# CONFIG_CRYPTO_KHAZAD is not set ++# CONFIG_CRYPTO_SALSA20 is not set ++# CONFIG_CRYPTO_CHACHA20 is not set ++# CONFIG_CRYPTO_CHACHA20_X86_64 is not set ++# CONFIG_CRYPTO_SEED is not set ++# CONFIG_CRYPTO_SERPENT is not set ++# CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set ++# CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set ++# CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set ++# CONFIG_CRYPTO_SM4 is not set ++# CONFIG_CRYPTO_TEA is not set ++# CONFIG_CRYPTO_TWOFISH is not set ++# CONFIG_CRYPTO_TWOFISH_X86_64 is not set ++# CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set ++# CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set ++ ++# ++# Compression ++# ++# CONFIG_CRYPTO_DEFLATE is not set ++# CONFIG_CRYPTO_LZO is not set ++# CONFIG_CRYPTO_842 is not set ++# CONFIG_CRYPTO_LZ4 is not set ++# CONFIG_CRYPTO_LZ4HC is not set ++# CONFIG_CRYPTO_ZSTD is not set ++ ++# ++# Random Number Generation ++# ++# CONFIG_CRYPTO_ANSI_CPRNG is not set ++CONFIG_CRYPTO_DRBG_MENU=y ++CONFIG_CRYPTO_DRBG_HMAC=y ++# CONFIG_CRYPTO_DRBG_HASH is not set ++# CONFIG_CRYPTO_DRBG_CTR is not set ++CONFIG_CRYPTO_DRBG=y ++CONFIG_CRYPTO_JITTERENTROPY=y ++CONFIG_CRYPTO_USER_API=y ++CONFIG_CRYPTO_USER_API_HASH=y ++# CONFIG_CRYPTO_USER_API_SKCIPHER is not set ++# CONFIG_CRYPTO_USER_API_RNG is not set ++# CONFIG_CRYPTO_USER_API_AEAD is not set ++CONFIG_CRYPTO_HASH_INFO=y ++ ++# ++# Crypto library routines ++# ++CONFIG_CRYPTO_LIB_AES=y ++# CONFIG_CRYPTO_LIB_BLAKE2S is not set ++# CONFIG_CRYPTO_LIB_CHACHA is not set ++# CONFIG_CRYPTO_LIB_CURVE25519 is not set ++CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11 ++# CONFIG_CRYPTO_LIB_POLY1305 is not set ++# CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set ++CONFIG_CRYPTO_LIB_SHA256=y ++CONFIG_CRYPTO_HW=y ++# CONFIG_CRYPTO_DEV_PADLOCK is not set ++# CONFIG_CRYPTO_DEV_CCP is not set ++# CONFIG_CRYPTO_DEV_QAT_DH895xCC is not set ++# CONFIG_CRYPTO_DEV_QAT_C3XXX is not set ++# CONFIG_CRYPTO_DEV_QAT_C62X is not set ++# CONFIG_CRYPTO_DEV_QAT_DH895xCCVF is not set ++# CONFIG_CRYPTO_DEV_QAT_C3XXXVF is not set ++# CONFIG_CRYPTO_DEV_QAT_C62XVF is not set ++# CONFIG_CRYPTO_DEV_NITROX_CNN55XX is not set ++CONFIG_CRYPTO_DEV_VIRTIO=m ++# CONFIG_CRYPTO_DEV_SAFEXCEL is not set ++# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set ++CONFIG_ASYMMETRIC_KEY_TYPE=y ++CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y ++CONFIG_X509_CERTIFICATE_PARSER=y ++# CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set ++CONFIG_PKCS7_MESSAGE_PARSER=y ++ ++# ++# Certificates for signature checking ++# ++CONFIG_SYSTEM_TRUSTED_KEYRING=y ++CONFIG_SYSTEM_TRUSTED_KEYS="" ++# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set ++# CONFIG_SECONDARY_TRUSTED_KEYRING is not set ++# CONFIG_SYSTEM_BLACKLIST_KEYRING is not set ++# end of Certificates for signature checking ++ ++CONFIG_BINARY_PRINTF=y ++ ++# ++# Library routines ++# ++# CONFIG_PACKING is not set ++CONFIG_BITREVERSE=y ++CONFIG_GENERIC_STRNCPY_FROM_USER=y ++CONFIG_GENERIC_STRNLEN_USER=y ++CONFIG_GENERIC_NET_UTILS=y ++CONFIG_GENERIC_FIND_FIRST_BIT=y ++# CONFIG_CORDIC is not set ++# CONFIG_PRIME_NUMBERS is not set ++CONFIG_RATIONAL=y ++CONFIG_GENERIC_PCI_IOMAP=y ++CONFIG_GENERIC_IOMAP=y ++CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y ++CONFIG_ARCH_HAS_FAST_MULTIPLIER=y ++CONFIG_ARCH_USE_SYM_ANNOTATIONS=y ++CONFIG_CRC_CCITT=y ++CONFIG_CRC16=y ++CONFIG_CRC_T10DIF=y ++# CONFIG_CRC_ITU_T is not set ++CONFIG_CRC32=y ++# CONFIG_CRC32_SELFTEST is not set ++CONFIG_CRC32_SLICEBY8=y ++# CONFIG_CRC32_SLICEBY4 is not set ++# CONFIG_CRC32_SARWATE is not set ++# CONFIG_CRC32_BIT is not set ++# CONFIG_CRC64 is not set ++# CONFIG_CRC4 is not set ++# CONFIG_CRC7 is not set ++CONFIG_LIBCRC32C=y ++# CONFIG_CRC8 is not set ++CONFIG_XXHASH=y ++# CONFIG_RANDOM32_SELFTEST is not set ++CONFIG_ZLIB_INFLATE=y ++CONFIG_LZO_DECOMPRESS=y ++CONFIG_LZ4_DECOMPRESS=y ++CONFIG_ZSTD_DECOMPRESS=y ++CONFIG_XZ_DEC=y ++CONFIG_XZ_DEC_X86=y ++# CONFIG_XZ_DEC_POWERPC is not set ++# CONFIG_XZ_DEC_IA64 is not set ++# CONFIG_XZ_DEC_ARM is not set ++# CONFIG_XZ_DEC_ARMTHUMB is not set ++# CONFIG_XZ_DEC_SPARC is not set ++CONFIG_XZ_DEC_BCJ=y ++# CONFIG_XZ_DEC_TEST is not set ++CONFIG_DECOMPRESS_GZIP=y ++CONFIG_DECOMPRESS_BZIP2=y ++CONFIG_DECOMPRESS_LZMA=y ++CONFIG_DECOMPRESS_XZ=y ++CONFIG_DECOMPRESS_LZO=y ++CONFIG_DECOMPRESS_LZ4=y ++CONFIG_DECOMPRESS_ZSTD=y ++CONFIG_GENERIC_ALLOCATOR=y ++CONFIG_XARRAY_MULTI=y ++CONFIG_ASSOCIATIVE_ARRAY=y ++CONFIG_HAS_IOMEM=y ++CONFIG_HAS_IOPORT_MAP=y ++CONFIG_HAS_DMA=y ++CONFIG_DMA_OPS=y ++CONFIG_NEED_SG_DMA_LENGTH=y ++CONFIG_NEED_DMA_MAP_STATE=y ++CONFIG_ARCH_DMA_ADDR_T_64BIT=y ++CONFIG_SWIOTLB=y ++CONFIG_DMA_CMA=y ++ ++# ++# Default contiguous memory area size: ++# ++CONFIG_CMA_SIZE_MBYTES=0 ++CONFIG_CMA_SIZE_SEL_MBYTES=y ++# CONFIG_CMA_SIZE_SEL_PERCENTAGE is not set ++# CONFIG_CMA_SIZE_SEL_MIN is not set ++# CONFIG_CMA_SIZE_SEL_MAX is not set ++CONFIG_CMA_ALIGNMENT=8 ++# CONFIG_DMA_API_DEBUG is not set ++CONFIG_SGL_ALLOC=y ++CONFIG_IOMMU_HELPER=y ++CONFIG_CPU_RMAP=y ++CONFIG_DQL=y ++CONFIG_GLOB=y ++# CONFIG_GLOB_SELFTEST is not set ++CONFIG_NLATTR=y ++CONFIG_CLZ_TAB=y ++CONFIG_IRQ_POLL=y ++CONFIG_MPILIB=y ++CONFIG_OID_REGISTRY=y ++CONFIG_UCS2_STRING=y ++CONFIG_HAVE_GENERIC_VDSO=y ++CONFIG_GENERIC_GETTIMEOFDAY=y ++CONFIG_GENERIC_VDSO_TIME_NS=y ++CONFIG_FONT_SUPPORT=y ++CONFIG_FONTS=y ++# CONFIG_FONT_8x8 is not set ++CONFIG_FONT_8x16=y ++# CONFIG_FONT_6x11 is not set ++# CONFIG_FONT_7x14 is not set ++# CONFIG_FONT_PEARL_8x8 is not set ++# CONFIG_FONT_ACORN_8x8 is not set ++CONFIG_FONT_MINI_4x6=y ++# CONFIG_FONT_6x10 is not set ++# CONFIG_FONT_10x18 is not set ++# CONFIG_FONT_SUN8x16 is not set ++# CONFIG_FONT_SUN12x22 is not set ++# CONFIG_FONT_TER16x32 is not set ++CONFIG_ARCH_HAS_PMEM_API=y ++CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y ++CONFIG_ARCH_HAS_UACCESS_MCSAFE=y ++CONFIG_ARCH_STACKWALK=y ++CONFIG_SBITMAP=y ++# CONFIG_STRING_SELFTEST is not set ++# end of Library routines ++ ++# ++# Kernel hacking ++# ++ ++# ++# printk and dmesg options ++# ++CONFIG_PRINTK_TIME=y ++# CONFIG_PRINTK_CALLER is not set ++CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 ++CONFIG_CONSOLE_LOGLEVEL_QUIET=4 ++CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 ++# CONFIG_BOOT_PRINTK_DELAY is not set ++# CONFIG_DYNAMIC_DEBUG is not set ++# CONFIG_DYNAMIC_DEBUG_CORE is not set ++CONFIG_SYMBOLIC_ERRNAME=y ++CONFIG_DEBUG_BUGVERBOSE=y ++# end of printk and dmesg options ++ ++# ++# Compile-time checks and compiler options ++# ++CONFIG_DEBUG_INFO=y ++# CONFIG_DEBUG_INFO_REDUCED is not set ++# CONFIG_DEBUG_INFO_COMPRESSED is not set ++# CONFIG_DEBUG_INFO_SPLIT is not set ++# CONFIG_DEBUG_INFO_DWARF4 is not set ++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y ++CONFIG_DEBUG_INFO_BTF=y ++# CONFIG_GDB_SCRIPTS is not set ++CONFIG_ENABLE_MUST_CHECK=y ++CONFIG_FRAME_WARN=2048 ++# CONFIG_STRIP_ASM_SYMS is not set ++# CONFIG_READABLE_ASM is not set ++# CONFIG_HEADERS_INSTALL is not set ++# CONFIG_DEBUG_SECTION_MISMATCH is not set ++CONFIG_SECTION_MISMATCH_WARN_ONLY=y ++# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B is not set ++CONFIG_STACK_VALIDATION=y ++# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set ++# end of Compile-time checks and compiler options ++ ++# ++# Generic Kernel Debugging Instruments ++# ++CONFIG_MAGIC_SYSRQ=y ++CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 ++CONFIG_MAGIC_SYSRQ_SERIAL=y ++CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" ++CONFIG_DEBUG_FS=y ++CONFIG_DEBUG_FS_ALLOW_ALL=y ++# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set ++# CONFIG_DEBUG_FS_ALLOW_NONE is not set ++CONFIG_HAVE_ARCH_KGDB=y ++# CONFIG_KGDB is not set ++CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y ++# CONFIG_UBSAN is not set ++# end of Generic Kernel Debugging Instruments ++ ++CONFIG_DEBUG_KERNEL=y ++CONFIG_DEBUG_MISC=y ++ ++# ++# Memory Debugging ++# ++# CONFIG_PAGE_EXTENSION is not set ++# CONFIG_DEBUG_PAGEALLOC is not set ++# CONFIG_PAGE_OWNER is not set ++# CONFIG_PAGE_POISONING is not set ++# CONFIG_DEBUG_PAGE_REF is not set ++# CONFIG_DEBUG_RODATA_TEST is not set ++CONFIG_ARCH_HAS_DEBUG_WX=y ++# CONFIG_DEBUG_WX is not set ++CONFIG_GENERIC_PTDUMP=y ++# CONFIG_PTDUMP_DEBUGFS is not set ++# CONFIG_DEBUG_OBJECTS is not set ++# CONFIG_SLUB_DEBUG_ON is not set ++# CONFIG_SLUB_STATS is not set ++CONFIG_HAVE_DEBUG_KMEMLEAK=y ++# CONFIG_DEBUG_KMEMLEAK is not set ++# CONFIG_DEBUG_STACK_USAGE is not set ++CONFIG_SCHED_STACK_END_CHECK=y ++CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y ++# CONFIG_DEBUG_VM is not set ++# CONFIG_DEBUG_VM_PGTABLE is not set ++CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y ++# CONFIG_DEBUG_VIRTUAL is not set ++CONFIG_DEBUG_MEMORY_INIT=y ++# CONFIG_DEBUG_PER_CPU_MAPS is not set ++CONFIG_HAVE_ARCH_KASAN=y ++CONFIG_HAVE_ARCH_KASAN_VMALLOC=y ++CONFIG_CC_HAS_KASAN_GENERIC=y ++# end of Memory Debugging ++ ++# CONFIG_DEBUG_SHIRQ is not set ++ ++# ++# Debug Oops, Lockups and Hangs ++# ++CONFIG_PANIC_ON_OOPS=y ++CONFIG_PANIC_ON_OOPS_VALUE=1 ++CONFIG_PANIC_TIMEOUT=0 ++CONFIG_LOCKUP_DETECTOR=y ++CONFIG_SOFTLOCKUP_DETECTOR=y ++# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set ++CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 ++CONFIG_HARDLOCKUP_DETECTOR_PERF=y ++CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y ++CONFIG_HARDLOCKUP_DETECTOR=y ++CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y ++CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=1 ++CONFIG_DETECT_HUNG_TASK=y ++CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 ++# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set ++CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 ++# CONFIG_WQ_WATCHDOG is not set ++# CONFIG_TEST_LOCKUP is not set ++# end of Debug Oops, Lockups and Hangs ++ ++# ++# Scheduler Debugging ++# ++CONFIG_SCHED_DEBUG=y ++CONFIG_SCHED_INFO=y ++CONFIG_SCHEDSTATS=y ++# end of Scheduler Debugging ++ ++# CONFIG_DEBUG_TIMEKEEPING is not set ++CONFIG_DEBUG_PREEMPT=y ++ ++# ++# Lock Debugging (spinlocks, mutexes, etc...) ++# ++CONFIG_LOCK_DEBUGGING_SUPPORT=y ++CONFIG_PROVE_LOCKING=y ++# CONFIG_PROVE_RAW_LOCK_NESTING is not set ++# CONFIG_LOCK_STAT is not set ++CONFIG_DEBUG_RT_MUTEXES=y ++CONFIG_DEBUG_SPINLOCK=y ++CONFIG_DEBUG_MUTEXES=y ++CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y ++CONFIG_DEBUG_RWSEMS=y ++CONFIG_DEBUG_LOCK_ALLOC=y ++CONFIG_LOCKDEP=y ++# CONFIG_DEBUG_LOCKDEP is not set ++CONFIG_DEBUG_ATOMIC_SLEEP=y ++# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set ++# CONFIG_LOCK_TORTURE_TEST is not set ++# CONFIG_WW_MUTEX_SELFTEST is not set ++# end of Lock Debugging (spinlocks, mutexes, etc...) ++ ++CONFIG_TRACE_IRQFLAGS=y ++CONFIG_TRACE_IRQFLAGS_NMI=y ++CONFIG_STACKTRACE=y ++# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set ++# CONFIG_DEBUG_KOBJECT is not set ++ ++# ++# Debug kernel data structures ++# ++# CONFIG_DEBUG_LIST is not set ++# CONFIG_DEBUG_PLIST is not set ++# CONFIG_DEBUG_SG is not set ++# CONFIG_DEBUG_NOTIFIERS is not set ++# CONFIG_BUG_ON_DATA_CORRUPTION is not set ++# end of Debug kernel data structures ++ ++CONFIG_DEBUG_CREDENTIALS=y ++ ++# ++# RCU Debugging ++# ++CONFIG_PROVE_RCU=y ++# CONFIG_RCU_PERF_TEST is not set ++# CONFIG_RCU_TORTURE_TEST is not set ++# CONFIG_RCU_REF_SCALE_TEST is not set ++CONFIG_RCU_CPU_STALL_TIMEOUT=60 ++# CONFIG_RCU_TRACE is not set ++# CONFIG_RCU_EQS_DEBUG is not set ++# end of RCU Debugging ++ ++# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set ++# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set ++# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set ++# CONFIG_LATENCYTOP is not set ++CONFIG_USER_STACKTRACE_SUPPORT=y ++CONFIG_NOP_TRACER=y ++CONFIG_HAVE_FUNCTION_TRACER=y ++CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y ++CONFIG_HAVE_DYNAMIC_FTRACE=y ++CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y ++CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y ++CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y ++CONFIG_HAVE_SYSCALL_TRACEPOINTS=y ++CONFIG_HAVE_FENTRY=y ++CONFIG_HAVE_C_RECORDMCOUNT=y ++CONFIG_TRACE_CLOCK=y ++CONFIG_RING_BUFFER=y ++CONFIG_EVENT_TRACING=y ++CONFIG_CONTEXT_SWITCH_TRACER=y ++CONFIG_PREEMPTIRQ_TRACEPOINTS=y ++CONFIG_TRACING=y ++CONFIG_GENERIC_TRACER=y ++CONFIG_TRACING_SUPPORT=y ++CONFIG_FTRACE=y ++CONFIG_BOOTTIME_TRACING=y ++CONFIG_FUNCTION_TRACER=y ++CONFIG_FUNCTION_GRAPH_TRACER=y ++CONFIG_DYNAMIC_FTRACE=y ++CONFIG_DYNAMIC_FTRACE_WITH_REGS=y ++CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y ++CONFIG_FPROBE=y ++# CONFIG_FUNCTION_PROFILER is not set ++# CONFIG_STACK_TRACER is not set ++# CONFIG_IRQSOFF_TRACER is not set ++# CONFIG_PREEMPT_TRACER is not set ++# CONFIG_SCHED_TRACER is not set ++# CONFIG_HWLAT_TRACER is not set ++# CONFIG_MMIOTRACE is not set ++CONFIG_FTRACE_SYSCALLS=y ++# CONFIG_TRACER_SNAPSHOT is not set ++CONFIG_BRANCH_PROFILE_NONE=y ++# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set ++# CONFIG_PROFILE_ALL_BRANCHES is not set ++CONFIG_BLK_DEV_IO_TRACE=y ++CONFIG_KPROBE_EVENTS=y ++# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set ++CONFIG_UPROBE_EVENTS=y ++CONFIG_BPF_EVENTS=y ++CONFIG_DYNAMIC_EVENTS=y ++CONFIG_PROBE_EVENTS=y ++CONFIG_BPF_KPROBE_OVERRIDE=y ++CONFIG_FTRACE_MCOUNT_RECORD=y ++# CONFIG_SYNTH_EVENTS is not set ++# CONFIG_HIST_TRIGGERS is not set ++# CONFIG_TRACE_EVENT_INJECT is not set ++# CONFIG_TRACEPOINT_BENCHMARK is not set ++# CONFIG_RING_BUFFER_BENCHMARK is not set ++# CONFIG_TRACE_EVAL_MAP_FILE is not set ++# CONFIG_FTRACE_STARTUP_TEST is not set ++# CONFIG_RING_BUFFER_STARTUP_TEST is not set ++# CONFIG_PREEMPTIRQ_DELAY_TEST is not set ++# CONFIG_KPROBE_EVENT_GEN_TEST is not set ++# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set ++# CONFIG_SAMPLES is not set ++CONFIG_HAVE_ARCH_KCSAN=y ++CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y ++# CONFIG_STRICT_DEVMEM is not set ++ ++# ++# x86 Debugging ++# ++CONFIG_TRACE_IRQFLAGS_SUPPORT=y ++CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y ++CONFIG_X86_VERBOSE_BOOTUP=y ++CONFIG_EARLY_PRINTK=y ++# CONFIG_EARLY_PRINTK_DBGP is not set ++# CONFIG_EARLY_PRINTK_USB_XDBC is not set ++# CONFIG_EFI_PGT_DUMP is not set ++# CONFIG_DEBUG_TLBFLUSH is not set ++# CONFIG_IOMMU_DEBUG is not set ++CONFIG_HAVE_MMIOTRACE_SUPPORT=y ++# CONFIG_X86_DECODER_SELFTEST is not set ++CONFIG_IO_DELAY_0X80=y ++# CONFIG_IO_DELAY_0XED is not set ++# CONFIG_IO_DELAY_UDELAY is not set ++# CONFIG_IO_DELAY_NONE is not set ++# CONFIG_DEBUG_BOOT_PARAMS is not set ++# CONFIG_CPA_DEBUG is not set ++# CONFIG_DEBUG_ENTRY is not set ++# CONFIG_DEBUG_NMI_SELFTEST is not set ++CONFIG_X86_DEBUG_FPU=y ++# CONFIG_PUNIT_ATOM_DEBUG is not set ++CONFIG_UNWINDER_ORC=y ++# CONFIG_UNWINDER_FRAME_POINTER is not set ++# CONFIG_UNWINDER_GUESS is not set ++# end of x86 Debugging ++ ++# ++# Kernel Testing and Coverage ++# ++# CONFIG_KUNIT is not set ++# CONFIG_NOTIFIER_ERROR_INJECTION is not set ++CONFIG_FUNCTION_ERROR_INJECTION=y ++CONFIG_FAULT_INJECTION=y ++# CONFIG_FAILSLAB is not set ++# CONFIG_FAIL_PAGE_ALLOC is not set ++# CONFIG_FAIL_MAKE_REQUEST is not set ++# CONFIG_FAIL_IO_TIMEOUT is not set ++# CONFIG_FAIL_FUTEX is not set ++CONFIG_FAULT_INJECTION_DEBUG_FS=y ++CONFIG_FAIL_FUNCTION=y ++CONFIG_ARCH_HAS_KCOV=y ++CONFIG_CC_HAS_SANCOV_TRACE_PC=y ++# CONFIG_KCOV is not set ++# CONFIG_RUNTIME_TESTING_MENU is not set ++# CONFIG_MEMTEST is not set ++# end of Kernel Testing and Coverage ++# end of Kernel hacking +diff --git a/src/cc/libbpf/travis-ci/vmtest/configs/whitelist/WHITELIST-4.9.0 b/src/cc/libbpf/travis-ci/vmtest/configs/whitelist/WHITELIST-4.9.0 +new file mode 100644 +index 0000000..ee0d3db +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/configs/whitelist/WHITELIST-4.9.0 +@@ -0,0 +1,8 @@ ++# btf_dump -- need to disable data dump sub-tests ++core_retro ++cpu_mask ++hashmap ++legacy_printk ++perf_buffer ++section_names ++ +diff --git a/src/cc/libbpf/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0 b/src/cc/libbpf/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0 +new file mode 100644 +index 0000000..87f72f9 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/configs/whitelist/WHITELIST-5.5.0 +@@ -0,0 +1,55 @@ ++# attach_probe ++autoload ++bpf_verif_scale ++cgroup_attach_autodetach ++cgroup_attach_override ++core_autosize ++core_extern ++core_read_macros ++core_reloc ++core_retro ++cpu_mask ++endian ++get_branch_snapshot ++get_stackid_cannot_attach ++global_data ++global_data_init ++global_func_args ++hashmap ++l4lb_all ++legacy_printk ++linked_funcs ++linked_maps ++map_lock ++obj_name ++perf_buffer ++perf_event_stackmap ++pinning ++pkt_md_access ++probe_user ++queue_stack_map ++raw_tp_writable_reject_nbd_invalid ++raw_tp_writable_test_run ++rdonly_maps ++section_names ++signal_pending ++skeleton ++sockmap_ktls ++sockopt ++sockopt_inherit ++sockopt_multi ++spinlock ++stacktrace_map ++stacktrace_map_raw_tp ++static_linked ++task_fd_query_rawtp ++task_fd_query_tp ++tc_bpf ++tcp_estats ++tcp_rtt ++tp_attach_query ++usdt/urand_pid_attach ++xdp ++xdp_info ++xdp_noinline ++xdp_perf +diff --git a/src/cc/libbpf/travis-ci/vmtest/helpers.sh b/src/cc/libbpf/travis-ci/vmtest/helpers.sh +new file mode 100755 +index 0000000..3b2cda0 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/helpers.sh +@@ -0,0 +1,36 @@ ++# $1 - start or end ++# $2 - fold identifier, no spaces ++# $3 - fold section description ++foldable() { ++ local YELLOW='\033[1;33m' ++ local NOCOLOR='\033[0m' ++ if [ $1 = "start" ]; then ++ line="::group::$2" ++ if [ ! -z "${3:-}" ]; then ++ line="$line - ${YELLOW}$3${NOCOLOR}" ++ fi ++ else ++ line="::endgroup::" ++ fi ++ echo -e "$line" ++} ++ ++__print() { ++ local TITLE="" ++ if [[ -n $2 ]]; then ++ TITLE=" title=$2" ++ fi ++ echo "::$1${TITLE}::$3" ++} ++ ++# $1 - title ++# $2 - message ++print_error() { ++ __print error $1 $2 ++} ++ ++# $1 - title ++# $2 - message ++print_notice() { ++ __print notice $1 $2 ++} +diff --git a/src/cc/libbpf/travis-ci/vmtest/run_selftests.sh b/src/cc/libbpf/travis-ci/vmtest/run_selftests.sh +new file mode 100755 +index 0000000..d5d44d1 +--- /dev/null ++++ b/src/cc/libbpf/travis-ci/vmtest/run_selftests.sh +@@ -0,0 +1,63 @@ ++#!/bin/bash ++ ++set -euo pipefail ++ ++source $(cd $(dirname $0) && pwd)/helpers.sh ++ ++ARCH=$(uname -m) ++ ++STATUS_FILE=/exitstatus ++ ++read_lists() { ++ (for path in "$@"; do ++ if [[ -s "$path" ]]; then ++ cat "$path" ++ fi; ++ done) | cut -d'#' -f1 | tr -s ' \t\n' ',' ++} ++ ++test_progs() { ++ if [[ "${KERNEL}" != '4.9.0' ]]; then ++ foldable start test_progs "Testing test_progs" ++ # "&& true" does not change the return code (it is not executed ++ # if the Python script fails), but it prevents exiting on a ++ # failure due to the "set -e". ++ ./test_progs ${BLACKLIST:+-d$BLACKLIST} ${WHITELIST:+-a$WHITELIST} && true ++ echo "test_progs:$?" >> "${STATUS_FILE}" ++ foldable end test_progs ++ fi ++ ++ foldable start test_progs-no_alu32 "Testing test_progs-no_alu32" ++ ./test_progs-no_alu32 ${BLACKLIST:+-d$BLACKLIST} ${WHITELIST:+-a$WHITELIST} && true ++ echo "test_progs-no_alu32:$?" >> "${STATUS_FILE}" ++ foldable end test_progs-no_alu32 ++} ++ ++test_maps() { ++ foldable start test_maps "Testing test_maps" ++ ./test_maps && true ++ echo "test_maps:$?" >> "${STATUS_FILE}" ++ foldable end test_maps ++} ++ ++test_verifier() { ++ foldable start test_verifier "Testing test_verifier" ++ ./test_verifier && true ++ echo "test_verifier:$?" >> "${STATUS_FILE}" ++ foldable end test_verifier ++} ++ ++foldable end vm_init ++ ++configs_path=${PROJECT_NAME}/vmtest/configs ++BLACKLIST=$(read_lists "$configs_path/blacklist/BLACKLIST-${KERNEL}" "$configs_path/blacklist/BLACKLIST-${KERNEL}.${ARCH}") ++WHITELIST=$(read_lists "$configs_path/whitelist/WHITELIST-${KERNEL}" "$configs_path/whitelist/WHITELIST-${KERNEL}.${ARCH}") ++ ++cd ${PROJECT_NAME}/selftests/bpf ++ ++test_progs ++ ++if [[ "${KERNEL}" == 'latest' ]]; then ++ # test_maps ++ test_verifier ++fi +-- +2.20.1 + diff --git a/bcc.spec b/bcc.spec index 862e041..ddcd48f 100644 --- a/bcc.spec +++ b/bcc.spec @@ -1,4 +1,4 @@ -%global anolis_release .0.2 +%global anolis_release .0.3 # luajit is not available RHEL 8 %bcond_with lua @@ -24,10 +24,11 @@ Patch4: %{name}-%{version}-Fix-clang-15-int-to-pointer-conversion-errors Patch5: %{name}-%{version}-Revert-tools-tcpaccept-Fix-support-for-v5.6-kernels.patch Patch6: %{name}-%{version}-Fix-get_kprobe_functions.patch +Patch1000: %{name}-%{version}-Add-libbpf-with-commit-0667206913b.patch + # Arches will be included as upstream support is added and dependencies are # satisfied in the respective arches ExcludeArch: i686 -Excludearch: loongarch64 BuildRequires: bison BuildRequires: cmake >= 2.8.7 @@ -44,9 +45,14 @@ BuildRequires: ncurses-devel %if %{with lua} BuildRequires: pkgconfig(luajit) %endif +%ifarch loongarch64 +BuildRequires: elfutils-devel +%else BuildRequires: libbpf-devel >= 0.5.0, libbpf-static >= 0.5.0 Requires: libbpf >= 0.5.0 +%endif + Requires: tar Recommends: kernel-devel Recommends: %{name}-tools = %{version}-%{release} @@ -111,13 +117,29 @@ Requires: python3-netaddr Command line tools for BPF Compiler Collection (BCC) %prep -%autosetup -p1 +%setup -q +%ifarch loongarch64 +%patch1000 -p1 +%else +%patch0 -p1 +%patch1 -p1 +%patch2 -p1 +%patch3 -p1 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%endif + %build %cmake . \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DREVISION_LAST=%{version} -DREVISION=%{version} -DPYTHON_CMD=python3 \ +%ifarch loongarch64 + -DCMAKE_USE_LIBBPF_PACKAGE:BOOL=FALSE \ +%else -DCMAKE_USE_LIBBPF_PACKAGE:BOOL=TRUE \ +%endif %{?with_llvm_shared:-DENABLE_LLVM_SHARED=1} %make_build @@ -221,6 +243,9 @@ done %changelog +* Mon Aug 28 2023 Hui Li - 0.25.0-2.0.3 +- Add support for loongarch64 + * Wed Aug 23 2023 Ren Bo - 5.14.0-4.0.2 - Rebuild with LLVM15 -- Gitee