From cdd2f99a4ec9c151c6d8180541e57a952093f8b8 Mon Sep 17 00:00:00 2001 From: waterwin Date: Tue, 4 Jan 2022 12:53:25 +0000 Subject: [PATCH] ohos inclusion category: feature issue: #I4P72U CVE: NA Signed-off-by: qianjiaxing ---------------------------------------------- HMDFS is an overlay file system. Relying on the underlying file system, under the premise of networking, file exchanges across devices can be realized. Device view and merge view are provided. In the device view, the shared directories of the corresponding devices are provided under different device directories; in the merge view, acollection of shared files of all devices is provided. Signed-off-by: qianjiaxing Signed-off-by: maojingjing Signed-off-by: zhangzhiwei Signed-off-by: ningzuobin Signed-off-by: wangminmin Signed-off-by: gaoshunli Signed-off-by: weilongping Signed-off-by: lvhao Signed-off-by: xianghengliang Signed-off-by: liuyu Signed-off-by: zhouweilai Signed-off-by: houtao Signed-off-by: panqinxu Signed-off-by: chenjinglong --- OAT.xml | 448 +++++ README.OpenSource | 11 + fs/Kconfig | 1 + fs/Makefile | 1 + fs/hmdfs/Kconfig | 48 + fs/hmdfs/Makefile | 15 + fs/hmdfs/authority/authentication.c | 486 +++++ fs/hmdfs/authority/authentication.h | 328 ++++ fs/hmdfs/client_writeback.c | 519 ++++++ fs/hmdfs/client_writeback.h | 136 ++ fs/hmdfs/comm/connection.c | 1311 +++++++++++++ fs/hmdfs/comm/connection.h | 356 ++++ fs/hmdfs/comm/crypto.c | 260 +++ fs/hmdfs/comm/crypto.h | 36 + fs/hmdfs/comm/device_node.c | 1665 +++++++++++++++++ fs/hmdfs/comm/device_node.h | 101 + fs/hmdfs/comm/fault_inject.c | 134 ++ fs/hmdfs/comm/fault_inject.h | 88 + fs/hmdfs/comm/message_verify.c | 985 ++++++++++ fs/hmdfs/comm/message_verify.h | 27 + fs/hmdfs/comm/node_cb.c | 76 + fs/hmdfs/comm/node_cb.h | 44 + fs/hmdfs/comm/protocol.h | 489 +++++ fs/hmdfs/comm/socket_adapter.c | 1151 ++++++++++++ fs/hmdfs/comm/socket_adapter.h | 193 ++ fs/hmdfs/comm/transport.c | 1220 ++++++++++++ fs/hmdfs/comm/transport.h | 76 + fs/hmdfs/dentry.c | 303 +++ fs/hmdfs/file_local.c | 246 +++ fs/hmdfs/file_merge.c | 525 ++++++ fs/hmdfs/file_remote.c | 1054 +++++++++++ fs/hmdfs/file_remote.h | 26 + fs/hmdfs/file_root.c | 154 ++ fs/hmdfs/hmdfs.h | 325 ++++ fs/hmdfs/hmdfs_client.c | 1096 +++++++++++ fs/hmdfs/hmdfs_client.h | 121 ++ fs/hmdfs/hmdfs_dentryfile.c | 2680 +++++++++++++++++++++++++++ fs/hmdfs/hmdfs_dentryfile.h | 342 ++++ fs/hmdfs/hmdfs_device_view.h | 237 +++ fs/hmdfs/hmdfs_merge_view.h | 153 ++ fs/hmdfs/hmdfs_server.c | 2073 +++++++++++++++++++++ fs/hmdfs/hmdfs_server.h | 75 + fs/hmdfs/hmdfs_trace.h | 800 ++++++++ fs/hmdfs/inode.c | 254 +++ fs/hmdfs/inode.h | 237 +++ fs/hmdfs/inode_local.c | 963 ++++++++++ fs/hmdfs/inode_merge.c | 1357 ++++++++++++++ fs/hmdfs/inode_remote.c | 989 ++++++++++ fs/hmdfs/inode_root.c | 307 +++ fs/hmdfs/main.c | 1069 +++++++++++ fs/hmdfs/server_writeback.c | 135 ++ fs/hmdfs/server_writeback.h | 40 + fs/hmdfs/stash.c | 2247 ++++++++++++++++++++++ fs/hmdfs/stash.h | 25 + fs/hmdfs/super.c | 153 ++ 55 files changed, 28191 insertions(+) create mode 100644 OAT.xml create mode 100644 README.OpenSource create mode 100644 fs/hmdfs/Kconfig create mode 100644 fs/hmdfs/Makefile create mode 100644 fs/hmdfs/authority/authentication.c create mode 100644 fs/hmdfs/authority/authentication.h create mode 100644 fs/hmdfs/client_writeback.c create mode 100644 fs/hmdfs/client_writeback.h create mode 100644 fs/hmdfs/comm/connection.c create mode 100644 fs/hmdfs/comm/connection.h create mode 100644 fs/hmdfs/comm/crypto.c create mode 100644 fs/hmdfs/comm/crypto.h create mode 100644 fs/hmdfs/comm/device_node.c create mode 100644 fs/hmdfs/comm/device_node.h create mode 100644 fs/hmdfs/comm/fault_inject.c create mode 100644 fs/hmdfs/comm/fault_inject.h create mode 100644 fs/hmdfs/comm/message_verify.c create mode 100644 fs/hmdfs/comm/message_verify.h create mode 100644 fs/hmdfs/comm/node_cb.c create mode 100644 fs/hmdfs/comm/node_cb.h create mode 100644 fs/hmdfs/comm/protocol.h create mode 100644 fs/hmdfs/comm/socket_adapter.c create mode 100644 fs/hmdfs/comm/socket_adapter.h create mode 100644 fs/hmdfs/comm/transport.c create mode 100644 fs/hmdfs/comm/transport.h create mode 100644 fs/hmdfs/dentry.c create mode 100644 fs/hmdfs/file_local.c create mode 100644 fs/hmdfs/file_merge.c create mode 100644 fs/hmdfs/file_remote.c create mode 100644 fs/hmdfs/file_remote.h create mode 100644 fs/hmdfs/file_root.c create mode 100644 fs/hmdfs/hmdfs.h create mode 100644 fs/hmdfs/hmdfs_client.c create mode 100644 fs/hmdfs/hmdfs_client.h create mode 100644 fs/hmdfs/hmdfs_dentryfile.c create mode 100644 fs/hmdfs/hmdfs_dentryfile.h create mode 100644 fs/hmdfs/hmdfs_device_view.h create mode 100644 fs/hmdfs/hmdfs_merge_view.h create mode 100644 fs/hmdfs/hmdfs_server.c create mode 100644 fs/hmdfs/hmdfs_server.h create mode 100644 fs/hmdfs/hmdfs_trace.h create mode 100644 fs/hmdfs/inode.c create mode 100644 fs/hmdfs/inode.h create mode 100644 fs/hmdfs/inode_local.c create mode 100644 fs/hmdfs/inode_merge.c create mode 100644 fs/hmdfs/inode_remote.c create mode 100644 fs/hmdfs/inode_root.c create mode 100644 fs/hmdfs/main.c create mode 100644 fs/hmdfs/server_writeback.c create mode 100644 fs/hmdfs/server_writeback.h create mode 100644 fs/hmdfs/stash.c create mode 100644 fs/hmdfs/stash.h create mode 100644 fs/hmdfs/super.c diff --git a/OAT.xml b/OAT.xml new file mode 100644 index 000000000000..4f2fcc9b1225 --- /dev/null +++ b/OAT.xml @@ -0,0 +1,448 @@ + + + + + + COPYING + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/README.OpenSource b/README.OpenSource new file mode 100644 index 000000000000..8313cf171abe --- /dev/null +++ b/README.OpenSource @@ -0,0 +1,11 @@ +[ + { + "Name": "linux-5.10", + "License": "GPL-2.0+", + "License File": "COPYING", + "Version Number": "5.10.79", + "Owner": "liuyu82@huawei.com", + "Upstream URL": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/log/?h=linux-5.10.y", + "Description": "linux kernel 5.10" + } +] \ No newline at end of file diff --git a/fs/Kconfig b/fs/Kconfig index da524c4d7b7e..b95f212be39e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -22,6 +22,7 @@ config FS_IOMAP source "fs/ext2/Kconfig" source "fs/ext4/Kconfig" +source "fs/hmdfs/Kconfig" source "fs/jbd2/Kconfig" config FS_MBCACHE diff --git a/fs/Makefile b/fs/Makefile index 999d1a23f036..d71954aaba20 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_DLM) += dlm/ obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT4_FS) += ext4/ +obj-$(CONFIG_HMDFS_FS) += hmdfs/ # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the # ext2 driver, which doesn't know about journalling! Explicitly request ext2 # by giving the rootfstype= parameter. diff --git a/fs/hmdfs/Kconfig b/fs/hmdfs/Kconfig new file mode 100644 index 000000000000..379606a6f466 --- /dev/null +++ b/fs/hmdfs/Kconfig @@ -0,0 +1,48 @@ +config HMDFS_FS + tristate "HMDFS filesystem support" + help + HMDFS is an overlay file system. Relying on the underlying file system, + under the premise of networking, file exchanges across devices can be + realized. Device view and merge view are provided. In the device view, + the shared directories of the corresponding devices are provided under + different device directories; in the merge view, acollection of shared + files of all devices is provided. + +config HMDFS_FS_PERMISSION + bool "HMDFS application permission management" + depends on HMDFS_FS + help + HMDFS provides cross-device file and directory sharing. Only the same + application can access the files and directories under the corresponding + package directory. it provides management and control of access + permissions. + + If unsure, say N. + +config HMDFS_FS_ENCRYPTION + bool "HMDFS message encryption" + depends on HMDFS_FS && TLS + help + HMDFS provides cross-device file and directory sharing by sending and + receiving network messages. To ensure data security, TLS encryption is + provided. + + If you want to improve performance, say N. + +config HMDFS_FS_DEBUG + bool "HMDFS debug log" + depends on HMDFS_FS + help + HMDFS print a lot of logs, but many of them are debugging information, + which is actually unnecessary during operation. If there is a problem, + it works. + + If unsure, say N. + +config HMDFS_FS_FAULT_INJECT + bool "HMDFS fault inject" + depends on HMDFS_FS + help + HMDFS provide fault inject for test. + + If unsure, say N. diff --git a/fs/hmdfs/Makefile b/fs/hmdfs/Makefile new file mode 100644 index 000000000000..25c3eef3dd9d --- /dev/null +++ b/fs/hmdfs/Makefile @@ -0,0 +1,15 @@ +obj-$(CONFIG_HMDFS_FS) += hmdfs.o +ccflags-y += -I$(src) + +hmdfs-y := main.o super.o inode.o dentry.o inode_root.o file_merge.o +hmdfs-y += hmdfs_client.o hmdfs_server.o inode_local.o inode_remote.o +hmdfs-y += inode_merge.o hmdfs_dentryfile.o file_root.o file_remote.o +hmdfs-y += file_local.o client_writeback.o server_writeback.o stash.o + +hmdfs-y += comm/device_node.o comm/message_verify.o comm/node_cb.o +hmdfs-y += comm/connection.o comm/socket_adapter.o comm/transport.o + +hmdfs-$(CONFIG_HMDFS_FS_ENCRYPTION) += comm/crypto.o +hmdfs-$(CONFIG_HMDFS_FS_PERMISSION) += authority/authentication.o + +hmdfs-$(CONFIG_FS_FAULT_INJECTION) += comm/fault_inject.o diff --git a/fs/hmdfs/authority/authentication.c b/fs/hmdfs/authority/authentication.c new file mode 100644 index 000000000000..97d842147050 --- /dev/null +++ b/fs/hmdfs/authority/authentication.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/authority/authentication.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "authentication.h" +#include +#include + +#include "hmdfs.h" + +struct fs_struct *hmdfs_override_fsstruct(struct fs_struct *saved_fs) +{ +#if (defined CONFIG_HMDFS_FS_PERMISSION) && (defined CONFIG_SDCARD_FS) + struct fs_struct *copied_fs = copy_fs_struct(saved_fs); + + if (!copied_fs) + return NULL; + copied_fs->umask = 0; + task_lock(current); + current->fs = copied_fs; + task_unlock(current); + return copied_fs; +#else + return saved_fs; +#endif +} + +void hmdfs_revert_fsstruct(struct fs_struct *saved_fs, + struct fs_struct *copied_fs) +{ +#if (defined CONFIG_HMDFS_FS_PERMISSION) && (defined CONFIG_SDCARD_FS) + task_lock(current); + current->fs = saved_fs; + task_unlock(current); + free_fs_struct(copied_fs); +#endif +} + +const struct cred *hmdfs_override_fsids(bool is_recv_thread) +{ + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + + cred = prepare_creds(); + if (!cred) + return NULL; + + cred->fsuid = MEDIA_RW_UID; + cred->fsgid = is_recv_thread ? + KGIDT_INIT((gid_t)AID_EVERYBODY) : MEDIA_RW_GID; + + old_cred = override_creds(cred); + + return old_cred; +} + +const struct cred *hmdfs_override_dir_fsids(struct inode *dir, + struct dentry *dentry, __u16 *_perm) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + __u16 level = hmdfs_perm_get_next_level(hii->perm); + __u16 perm = 0; + + cred = prepare_creds(); + if (!cred) + return NULL; + + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + cred->fsuid = SYSTEM_UID; + perm = (hii->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + cred->fsuid = SYSTEM_UID; + perm = HMDFS_DIR_DATA | level; + } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) { + cred->fsuid = SYSTEM_UID; + perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level; + } else { + cred->fsuid = MEDIA_RW_UID; + perm = HMDFS_DIR_PUBLIC | level; + } + break; + case HMDFS_PERM_PKG: + if (is_data_dir(hii->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + uid_t app_id = 0; + + if (app_id != 0) + cred->fsuid = KUIDT_INIT(app_id); + else + cred->fsuid = ROOT_UID; + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + cred->fsuid = dir->i_uid; + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + cred->fsuid = dir->i_uid; + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + + cred->fsgid = MEDIA_RW_GID; + *_perm = perm; + old_cred = override_creds(cred); + + return old_cred; +} + +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm) +{ + or->saved_cred = hmdfs_override_dir_fsids(dir, dentry, perm); + if (!or->saved_cred) + return -ENOMEM; + + or->saved_fs = current->fs; + or->copied_fs = hmdfs_override_fsstruct(or->saved_fs); + if (!or->copied_fs) { + hmdfs_revert_fsids(or->saved_cred); + return -ENOMEM; + } + + return 0; +} + +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or) +{ + hmdfs_revert_fsstruct(or->saved_fs, or->copied_fs); + hmdfs_revert_fsids(or->saved_cred); +} + +const struct cred *hmdfs_override_file_fsids(struct inode *dir, __u16 *_perm) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + __u16 level = hmdfs_perm_get_next_level(hii->perm); + uint16_t perm; + + perm = HMDFS_FILE_DEFAULT | level; + + cred = prepare_creds(); + if (!cred) + return NULL; + + cred->fsuid = dir->i_uid; + cred->fsgid = dir->i_gid; + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_FILE_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_FILE_DEFAULT | level; + + *_perm = perm; + old_cred = override_creds(cred); + + return old_cred; +} + +void hmdfs_revert_fsids(const struct cred *old_cred) +{ + const struct cred *cur_cred; + + cur_cred = current->cred; + revert_creds(old_cred); + put_cred(cur_cred); +} + +int hmdfs_persist_perm(struct dentry *dentry, __u16 *perm) +{ + int err; + struct inode *minode = d_inode(dentry); + + if (!minode) + return -EINVAL; + + inode_lock(minode); + err = __vfs_setxattr(dentry, minode, HMDFS_PERM_XATTR, perm, + sizeof(*perm), XATTR_CREATE); + if (!err) + fsnotify_xattr(dentry); + else if (err && err != -EEXIST) + hmdfs_err("failed to setxattr, err=%d", err); + inode_unlock(minode); + return err; +} + +__u16 hmdfs_read_perm(struct inode *inode) +{ + __u16 ret = 0; + int size = 0; + struct dentry *dentry = d_find_alias(inode); + + if (!dentry) + return ret; + + size = __vfs_getxattr(dentry, inode, HMDFS_PERM_XATTR, &ret, + sizeof(ret)); + /* + * some file may not set setxattr with perm + * eg. files created in sdcard dir by other user + **/ + if (size < 0 || size != sizeof(ret)) + ret = HMDFS_ALL_MASK; + + dput(dentry); + return ret; +} + +static __u16 __inherit_perm_dir(struct inode *parent, struct inode *inode) +{ + __u16 perm = 0; + struct hmdfs_inode_info *info = hmdfs_i(parent); + __u16 level = hmdfs_perm_get_next_level(info->perm); + struct dentry *dentry = d_find_alias(inode); + + if (!dentry) + return perm; + + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + perm = (info->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + // "data" + perm = HMDFS_DIR_DATA | level; + } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) { + // "system" + perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level; + } else { + perm = HMDFS_DIR_PUBLIC | level; + } + break; + case HMDFS_PERM_PKG: + if (is_data_dir(info->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + perm = (info->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + if (is_pkg_auth(info->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (info->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + dput(dentry); + return perm; +} + +static __u16 __inherit_perm_file(struct inode *parent) +{ + struct hmdfs_inode_info *hii = hmdfs_i(parent); + __u16 level = hmdfs_perm_get_next_level(hii->perm); + uint16_t perm; + + perm = HMDFS_FILE_DEFAULT | level; + + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_FILE_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_FILE_DEFAULT | level; + + return perm; +} + +static void fixup_ownership(struct inode *child, struct dentry *lower_dentry, + uid_t uid) +{ + int err; + struct iattr newattrs; + + newattrs.ia_valid = ATTR_UID | ATTR_FORCE; + newattrs.ia_uid = KUIDT_INIT(uid); + if (!S_ISDIR(d_inode(lower_dentry)->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV; + + inode_lock(d_inode(lower_dentry)); + err = notify_change(lower_dentry, &newattrs, NULL); + inode_unlock(d_inode(lower_dentry)); + + if (!err) + child->i_uid = KUIDT_INIT(uid); + else + hmdfs_err("update PKG uid failed, err = %d", err); +} + +static void fixup_ownership_user_group(struct inode *child, struct dentry *lower_dentry, + uid_t uid, gid_t gid) +{ + int err; + struct iattr newattrs; + + newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_FORCE; + newattrs.ia_uid = KUIDT_INIT(uid); + newattrs.ia_gid = KGIDT_INIT(gid); + if (!S_ISDIR(d_inode(lower_dentry)->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + + inode_lock(d_inode(lower_dentry)); + err = notify_change(lower_dentry, &newattrs, NULL); + inode_unlock(d_inode(lower_dentry)); + + if (!err) { + child->i_uid = KUIDT_INIT(uid); + child->i_gid = KGIDT_INIT(gid); + } else { + hmdfs_err("update PKG uid failed, err = %d", err); + } +} + +__u16 hmdfs_perm_inherit(struct inode *parent_inode, struct inode *child) +{ + __u16 perm; + + if (S_ISDIR(child->i_mode)) + perm = __inherit_perm_dir(parent_inode, child); + else + perm = __inherit_perm_file(parent_inode); + return perm; +} + +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child, + struct dentry *lower_dentry, const char *name) +{ + uid_t appid; + struct hmdfs_inode_info *info = hmdfs_i(child); + + if (info->perm == HMDFS_ALL_MASK) + info->perm = hmdfs_perm_inherit(parent_inode, child); + + switch (info->perm & HMDFS_DIR_TYPE_MASK) { + case HMDFS_DIR_PKG: + appid = 0; + if (appid != child->i_uid.val) + fixup_ownership(child, lower_dentry, appid); + + break; + case HMDFS_DIR_DATA: + case HMDFS_FILE_PKG_SUB: + case HMDFS_DIR_DEFAULT: + case HMDFS_FILE_DEFAULT: + if (parent_inode->i_uid.val != child->i_uid.val || + parent_inode->i_gid.val != child->i_gid.val) + fixup_ownership_user_group(child, lower_dentry, + parent_inode->i_uid.val, + parent_inode->i_gid.val); + break; + case HMDFS_DIR_PUBLIC: + fixup_ownership(child, lower_dentry, (uid_t)AID_MEDIA_RW); + + break; + default: + break; + } +} + +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct inode *dinode = d_inode(dentry); + struct hmdfs_inode_info *dinfo = hmdfs_i(dinode); + __u16 level = hmdfs_perm_get_next_level(hii->perm); + __u16 perm = 0; + + hmdfs_debug("level:0x%X", level); + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + dinode->i_uid = SYSTEM_UID; + perm = (hii->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + // "data" + dinode->i_uid = SYSTEM_UID; + perm = HMDFS_DIR_DATA | level; + } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) { + // "system" + dinode->i_uid = SYSTEM_UID; + perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level; + } else { + dinode->i_uid = MEDIA_RW_UID; + perm = HMDFS_DIR_PUBLIC | level; + } + break; + case HMDFS_PERM_PKG: + if (is_data_dir(hii->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + uid_t app_id = 0; + + if (app_id != 0) + dinode->i_uid = KUIDT_INIT(app_id); + else + dinode->i_uid = ROOT_UID; + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + dinode->i_uid = dir->i_uid; + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + dinode->i_uid = dir->i_uid; + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + + dinode->i_gid = MEDIA_RW_GID; + dinfo->perm = perm; +} + +void hmdfs_root_inode_perm_init(struct inode *root_inode) +{ + struct hmdfs_inode_info *hii = hmdfs_i(root_inode); + + hii->perm = HMDFS_DIR_ROOT | HMDFS_PERM_MNT; + set_inode_uid(root_inode, SYSTEM_UID); + set_inode_gid(root_inode, MEDIA_RW_GID); +} diff --git a/fs/hmdfs/authority/authentication.h b/fs/hmdfs/authority/authentication.h new file mode 100644 index 000000000000..e8b7bed53fb9 --- /dev/null +++ b/fs/hmdfs/authority/authentication.h @@ -0,0 +1,328 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/authority/authentication.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef AUTHENTICATION_H +#define AUTHENTICATION_H + +#include +#include +#include +#include +#include +#include "hmdfs.h" + +struct cache_fs_override { + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; + const struct cred *saved_cred; +}; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + +#define AID_ROOT 0 +#define AID_SYSTEM 1000 +#define AID_SDCARD_RW 1015 +#define AID_MEDIA_RW 1023 +#define AID_EVERYBODY 9997 + +/* copied from sdcardfs/multiuser.h */ +#define AID_USER_OFFSET 100000 /* offset for uid ranges for each user */ + +#define HMDFS_PERM_XATTR "user.hmdfs.perm" + +#define ROOT_UID KUIDT_INIT(AID_ROOT) +#define SYSTEM_UID KUIDT_INIT(AID_SYSTEM) +#define MEDIA_RW_UID KUIDT_INIT(AID_MEDIA_RW) + +#define SYSTEM_GID KGIDT_INIT((gid_t) AID_SYSTEM) +#define MEDIA_RW_GID KGIDT_INIT(AID_MEDIA_RW) +#define SDCARD_RW_GID KGIDT_INIT(AID_SDCARD_RW) + +#define PKG_ROOT_NAME "data" +#define SYSTEM_NAME "system" + +/* + * | perm fix | permmnt | permdfs | permpkg | perm other + * /mnt/mdfs/ accoundID / device view / local / DATA / packageName /... + * / system /... + * / documents /... + * / devid /....... + * / merge view / + * / sdcard / + **/ +#define HMDFS_PERM_MASK 0x000F + +#define HMDFS_PERM_FIX 0 +#define HMDFS_PERM_MNT 1 +#define HMDFS_PERM_DFS 2 +#define HMDFS_PERM_PKG 3 +#define HMDFS_PERM_OTHER 4 + +static inline bool is_perm_fix(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_FIX; +} + +static inline bool is_perm_mnt(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_MNT; +} + +static inline bool is_perm_dfs(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_DFS; +} + +static inline bool is_perm_pkg(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_PKG; +} + +static inline bool is_perm_other(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_OTHER; +} + +static inline void hmdfs_check_cred(const struct cred *cred) +{ + if (cred->fsuid.val != AID_SYSTEM || cred->fsgid.val != AID_SYSTEM) + hmdfs_warning("uid is %u, gid is %u", cred->fsuid.val, + cred->fsgid.val); +} + +/* dir and file type mask for hmdfs */ +#define HMDFS_DIR_TYPE_MASK 0x00F0 + +/* LEVEL 0 perm fix - permmnt , only root dir */ +#define HMDFS_DIR_ROOT 0x0010 + +/* LEVEL 1 perm dfs */ +#define HMDFS_DIR_PUBLIC 0x0020 +#define HMDFS_DIR_DATA 0x0030 +#define HMDFS_DIR_SYSTEM 0x0040 + +/* LEVEL 2 HMDFS_PERM_PKG */ +#define HMDFS_DIR_PKG 0x0050 + +/* LEVEL 2~n HMDFS_PERM_OTHER */ +#define PUBLIC_FILE 0x0060 +#define PUBLIC_SUB_DIR 0x0070 +#define SYSTEM_SUB_DIR 0x0080 +#define SYSTEM_SUB_FILE 0x0090 + +#define HMDFS_DIR_PKG_SUB 0x00A0 +#define HMDFS_FILE_PKG_SUB 0x00B0 + +/* access right is derived + * PUBLIC_SUB_DIR SYSTEM_SUB_DIR HMDFS_DIR_PKG_SUB + * PUBLIC_FILE SYSTEM_SUB_FILE HMDFS_FILE_PKG_SUB + */ +#define HMDFS_DIR_DEFAULT 0x00C0 +#define HMDFS_FILE_DEFAULT 0x00D0 +#define HMDFS_TYPE_DEFAULT 0x0000 + +static inline bool is_data_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_DATA; +} + +static inline bool is_pkg_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_PKG; +} + +static inline bool is_pkg_sub_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_PKG_SUB; +} + +static inline bool is_pkg_sub_file(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_FILE_PKG_SUB; +} + +static inline bool is_default_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_DEFAULT; +} + +static inline bool is_default_file(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_FILE_DEFAULT; +} + +#define AUTH_MASK 0x0F00 +#define AUTH_PKG 0x0100 +#define AUTH_SYSTEM 0x0200 + +static inline bool is_pkg_auth(__u16 perm) +{ + return (perm & AUTH_MASK) == AUTH_PKG; +} + +static inline bool is_system_auth(__u16 perm) +{ + return (perm & AUTH_MASK) == AUTH_SYSTEM; +} + +#define HMDFS_MOUNT_POINT_MASK 0xF000 +#define HMDFS_MNT_COMMON 0x0000 // sdcard +#define HMDFS_MNT_SDCARD 0x1000 // sdcard +#define HMDFS_MNT_ACNTID 0x2000 // accound id + +#define HMDFS_ALL_MASK (HMDFS_MOUNT_POINT_MASK | AUTH_MASK | HMDFS_DIR_TYPE_MASK | HMDFS_PERM_MASK) + + +static inline void set_inode_gid(struct inode *inode, kgid_t gid) +{ + inode->i_gid = gid; +} + +static inline kuid_t get_inode_uid(struct inode *inode) +{ + kuid_t uid = inode->i_uid; + return uid; +} + +static inline void set_inode_uid(struct inode *inode, kuid_t uid) +{ + inode->i_uid = uid; +} + +static inline kuid_t hmdfs_override_inode_uid(struct inode *inode) +{ + kuid_t uid = get_inode_uid(inode); + + set_inode_uid(inode, current_fsuid()); + return uid; +} + +static inline void hmdfs_revert_inode_uid(struct inode *inode, kuid_t uid) +{ + set_inode_uid(inode, uid); +} + +static inline const struct cred *hmdfs_override_creds(const struct cred *new) +{ + if (!new) + return NULL; + + return override_creds(new); +} + +static inline void hmdfs_revert_creds(const struct cred *old) +{ + if (old) + revert_creds(old); +} + +static inline __u16 hmdfs_perm_get_next_level(__u16 perm) +{ + __u16 level = (perm & HMDFS_PERM_MASK) + 1; + + if (level <= HMDFS_PERM_OTHER) + return level; + else + return HMDFS_PERM_OTHER; +} + +struct fs_struct *hmdfs_override_fsstruct(struct fs_struct *saved_fs); +void hmdfs_revert_fsstruct(struct fs_struct *saved_fs, + struct fs_struct *copied_fs); +const struct cred *hmdfs_override_fsids(bool is_recv_thread); +const struct cred *hmdfs_override_dir_fsids(struct inode *dir, + struct dentry *dentry, __u16 *perm); +const struct cred *hmdfs_override_file_fsids(struct inode *dir, __u16 *perm); +void hmdfs_revert_fsids(const struct cred *old_cred); +int hmdfs_persist_perm(struct dentry *dentry, __u16 *perm); +__u16 hmdfs_read_perm(struct inode *inode); +void hmdfs_root_inode_perm_init(struct inode *root_inode); +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child, + struct dentry *lower_dentry, const char *name); +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm); +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or); +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry); + +#else + +static inline +void hmdfs_root_inode_perm_init(struct inode *root_inode) +{ +} + +static inline +void hmdfs_revert_fsids(const struct cred *old_cred) +{ +} + +static inline +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm) +{ + return 0; +} + +static inline +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or) +{ +} + +static inline +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child, + struct dentry *lower_dentry, const char *name) +{ +} + +static inline +const struct cred *hmdfs_override_fsids(bool is_recv_thread) +{ + return ERR_PTR(-ENOTTY); +} + +static inline +const struct cred *hmdfs_override_creds(const struct cred *new) +{ + return ERR_PTR(-ENOTTY); +} + +static inline +void hmdfs_revert_creds(const struct cred *old) +{ + +} + +static inline +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry) +{ +} + +static inline +kuid_t hmdfs_override_inode_uid(struct inode *inode) +{ + return KUIDT_INIT((uid_t)0); +} + +static inline +void hmdfs_revert_inode_uid(struct inode *inode, kuid_t uid) +{ +} + +static inline +void hmdfs_check_cred(const struct cred *cred) +{ +} + +#endif /* CONFIG_HMDFS_FS_PERMISSION */ + +#endif diff --git a/fs/hmdfs/client_writeback.c b/fs/hmdfs/client_writeback.c new file mode 100644 index 000000000000..d4da7ec482a5 --- /dev/null +++ b/fs/hmdfs/client_writeback.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/client_writeback.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" + +/* 200ms */ +#define HMDFS_MAX_PAUSE max((HZ / 5), 1) +#define HMDFS_BANDWIDTH_INTERVAL max((HZ / 5), 1) +/* Dirty type */ +#define HMDFS_DIRTY_FS 0 +#define HMDFS_DIRTY_FILE 1 +/* Exceed flags */ +#define HMDFS_FS_EXCEED (1 << HMDFS_DIRTY_FS) +#define HMDFS_FILE_EXCEED (1 << HMDFS_DIRTY_FILE) +/* Ratelimit calculate shift */ +#define HMDFS_LIMIT_SHIFT 10 + +void hmdfs_writeback_inodes_sb_handler(struct work_struct *work) +{ + struct hmdfs_writeback *hwb = container_of( + work, struct hmdfs_writeback, dirty_sb_writeback_work.work); + + try_to_writeback_inodes_sb(hwb->sbi->sb, WB_REASON_FS_FREE_SPACE); +} + +void hmdfs_writeback_inode_handler(struct work_struct *work) +{ + struct hmdfs_inode_info *info = NULL; + struct inode *inode = NULL; + struct hmdfs_writeback *hwb = container_of( + work, struct hmdfs_writeback, dirty_inode_writeback_work.work); + + spin_lock(&hwb->inode_list_lock); + while (likely(!list_empty(&hwb->inode_list_head))) { + info = list_first_entry(&hwb->inode_list_head, + struct hmdfs_inode_info, wb_list); + list_del_init(&info->wb_list); + spin_unlock(&hwb->inode_list_lock); + + inode = &info->vfs_inode; + write_inode_now(inode, 0); + iput(inode); + spin_lock(&hwb->inode_list_lock); + } + spin_unlock(&hwb->inode_list_lock); +} + +static void hmdfs_writeback_inodes_sb_delayed(struct super_block *sb, + unsigned int delay) +{ + struct hmdfs_sb_info *sbi = sb->s_fs_info; + unsigned long timeout; + + timeout = msecs_to_jiffies(delay); + if (!timeout || !work_busy(&sbi->h_wb->dirty_sb_writeback_work.work)) + mod_delayed_work(sbi->h_wb->dirty_sb_writeback_wq, + &sbi->h_wb->dirty_sb_writeback_work, timeout); +} + +static inline void hmdfs_writeback_inodes_sb(struct super_block *sb) +{ + hmdfs_writeback_inodes_sb_delayed(sb, 0); +} + +static void hmdfs_writeback_inode(struct super_block *sb, struct inode *inode) +{ + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + struct hmdfs_inode_info *info = hmdfs_i(inode); + + spin_lock(&hwb->inode_list_lock); + if (list_empty(&info->wb_list)) { + ihold(inode); + list_add_tail(&info->wb_list, &hwb->inode_list_head); + queue_delayed_work(hwb->dirty_inode_writeback_wq, + &hwb->dirty_inode_writeback_work, 0); + } + spin_unlock(&hwb->inode_list_lock); +} + +static unsigned long hmdfs_idirty_pages(struct inode *inode, int tag) +{ + struct pagevec pvec; + unsigned long nr_dirty_pages = 0; + pgoff_t index = 0; + +#if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE + pagevec_init(&pvec); +#else + pagevec_init(&pvec, 0); +#endif + while (pagevec_lookup_tag(&pvec, inode->i_mapping, &index, tag)) { + nr_dirty_pages += pagevec_count(&pvec); + pagevec_release(&pvec); + cond_resched(); + } + return nr_dirty_pages; +} + +static inline unsigned long hmdfs_ratio_thresh(unsigned long ratio, + unsigned long thresh) +{ + unsigned long ret = (ratio * thresh) >> HMDFS_LIMIT_SHIFT; + + return (ret == 0) ? 1 : ret; +} + +static inline unsigned long hmdfs_thresh_ratio(unsigned long base, + unsigned long thresh) +{ + unsigned long ratio = (base << HMDFS_LIMIT_SHIFT) / thresh; + + return (ratio == 0) ? 1 : ratio; +} + +void hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb) +{ + hwb->dirty_fs_thresh = DIV_ROUND_UP(hwb->dirty_fs_bytes, PAGE_SIZE); + hwb->dirty_file_thresh = DIV_ROUND_UP(hwb->dirty_file_bytes, PAGE_SIZE); + hwb->dirty_fs_bg_thresh = + DIV_ROUND_UP(hwb->dirty_fs_bg_bytes, PAGE_SIZE); + hwb->dirty_file_bg_thresh = + DIV_ROUND_UP(hwb->dirty_file_bg_bytes, PAGE_SIZE); + + hwb->fs_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_fs_bg_thresh, + hwb->dirty_fs_thresh); + hwb->file_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_file_bg_thresh, + hwb->dirty_file_thresh); + hwb->fs_file_ratio = hmdfs_thresh_ratio(hwb->dirty_file_thresh, + hwb->dirty_fs_thresh); +} + +static void hmdfs_init_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc) +{ + struct hmdfs_writeback *hwb = hdtc->hwb; + + hdtc->fs_thresh = hdtc->hwb->dirty_fs_thresh; + hdtc->file_thresh = hdtc->hwb->dirty_file_thresh; + hdtc->fs_bg_thresh = hdtc->hwb->dirty_fs_bg_thresh; + hdtc->file_bg_thresh = hdtc->hwb->dirty_file_bg_thresh; + + if (!hwb->dirty_auto_threshold) + return; + + /* + * Init thresh according the previous bandwidth adjusted thresh, + * thresh should be no more than setting thresh. + */ + if (hwb->bw_fs_thresh < hdtc->fs_thresh) { + hdtc->fs_thresh = hwb->bw_fs_thresh; + hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio, + hdtc->fs_thresh); + } + if (hwb->bw_file_thresh < hdtc->file_thresh) { + hdtc->file_thresh = hwb->bw_file_thresh; + hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio, + hdtc->file_thresh); + } + /* + * The thresh should be updated in the first time of dirty pages + * exceed the freerun ceiling. + */ + hdtc->thresh_time_stamp = jiffies - HMDFS_BANDWIDTH_INTERVAL - 1; +} + +static void hmdfs_update_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc) +{ + struct hmdfs_writeback *hwb = hdtc->hwb; + struct bdi_writeback *wb = hwb->wb; + unsigned int time_limit = hwb->writeback_timelimit; + unsigned long bw = wb->avg_write_bandwidth; + unsigned long thresh; + + if (!hwb->dirty_auto_threshold) + return; + + spin_lock(&hwb->write_bandwidth_lock); + if (bw > hwb->max_write_bandwidth) + hwb->max_write_bandwidth = bw; + + if (bw < hwb->min_write_bandwidth) + hwb->min_write_bandwidth = bw; + hwb->avg_write_bandwidth = bw; + spin_unlock(&hwb->write_bandwidth_lock); + + /* + * If the bandwidth is lower than the lower limit, it may propably + * offline, there is meaningless to set such a lower thresh. + */ + bw = max(bw, hwb->bw_thresh_lowerlimit); + thresh = bw * time_limit / roundup_pow_of_two(HZ); + if (thresh >= hwb->dirty_fs_thresh) { + hdtc->fs_thresh = hwb->dirty_fs_thresh; + hdtc->file_thresh = hwb->dirty_file_thresh; + hdtc->fs_bg_thresh = hwb->dirty_fs_bg_thresh; + hdtc->file_bg_thresh = hwb->dirty_file_bg_thresh; + } else { + /* Adjust thresh according to current bandwidth */ + hdtc->fs_thresh = thresh; + hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio, + hdtc->fs_thresh); + hdtc->file_thresh = hmdfs_ratio_thresh(hwb->fs_file_ratio, + hdtc->fs_thresh); + hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio, + hdtc->file_thresh); + } + /* Save bandwidth adjusted thresh */ + hwb->bw_fs_thresh = hdtc->fs_thresh; + hwb->bw_file_thresh = hdtc->file_thresh; + /* Update time stamp */ + hdtc->thresh_time_stamp = jiffies; +} + +void hmdfs_update_ratelimit(struct hmdfs_writeback *hwb) +{ + struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb}; + + hmdfs_init_dirty_limit(&hdtc); + + /* hdtc.file_bg_thresh should be the lowest thresh */ + hwb->ratelimit_pages = hdtc.file_bg_thresh / + (num_online_cpus() * HMDFS_RATELIMIT_PAGES_GAP); + if (hwb->ratelimit_pages < HMDFS_MIN_RATELIMIT_PAGES) + hwb->ratelimit_pages = HMDFS_MIN_RATELIMIT_PAGES; +} + +/* This is a copy of wb_max_pause() */ +static unsigned long hmdfs_wb_pause(struct bdi_writeback *wb, + unsigned long wb_dirty) +{ + unsigned long bw = wb->avg_write_bandwidth; + unsigned long t; + + /* + * Limit pause time for small memory systems. If sleeping for too long + * time, a small pool of dirty/writeback pages may go empty and disk go + * idle. + * + * 8 serves as the safety ratio. + */ + t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8)); + t++; + + return min_t(unsigned long, t, HMDFS_MAX_PAUSE); +} + +static unsigned long +hmdfs_dirty_freerun_ceiling(struct hmdfs_dirty_throttle_control *hdtc, + unsigned int type) +{ + if (type == HMDFS_DIRTY_FS) + return (hdtc->fs_thresh + hdtc->fs_bg_thresh) / 2; + else /* HMDFS_DIRTY_FILE_TYPE */ + return (hdtc->file_thresh + hdtc->file_bg_thresh) / 2; +} + +/* This is a copy of dirty_poll_interval() */ +static inline unsigned long hmdfs_dirty_intv(unsigned long dirty, + unsigned long thresh) +{ + if (thresh > dirty) + return 1UL << (ilog2(thresh - dirty) >> 1); + return 1; +} + +static void hmdfs_balance_dirty_pages(struct address_space *mapping) +{ + struct inode *inode = mapping->host; + struct super_block *sb = inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; + struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb}; + unsigned int dirty_exceeded = 0; + unsigned long start_time = jiffies; + unsigned long pause = 0; + + /* Add delay work to trigger timeout writeback */ + if (hwb->dirty_writeback_interval != 0) + hmdfs_writeback_inodes_sb_delayed( + sb, hwb->dirty_writeback_interval * 10); + + hmdfs_init_dirty_limit(&hdtc); + + while (1) { + unsigned long exceed = 0; + unsigned long diff; + + /* Per-filesystem overbalance writeback */ + hdtc.fs_nr_dirty = wb_stat_sum(wb, WB_RECLAIMABLE); + hdtc.fs_nr_reclaimable = + hdtc.fs_nr_dirty + wb_stat_sum(wb, WB_WRITEBACK); + if (hdtc.fs_nr_reclaimable < hdtc.file_bg_thresh) { + diff = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable, + hdtc.file_thresh); + goto free_running; + } + + /* Per-file overbalance writeback */ + hdtc.file_nr_dirty = + hmdfs_idirty_pages(inode, PAGECACHE_TAG_DIRTY); + hdtc.file_nr_reclaimable = + hmdfs_idirty_pages(inode, PAGECACHE_TAG_WRITEBACK) + + hdtc.file_nr_dirty; + if ((hdtc.fs_nr_reclaimable < + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) && + (hdtc.file_nr_reclaimable < + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FILE))) { + unsigned long fs_intv, file_intv; + + fs_intv = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable, + hdtc.fs_thresh); + file_intv = hmdfs_dirty_intv(hdtc.file_nr_reclaimable, + hdtc.file_thresh); + diff = min(fs_intv, file_intv); +free_running: + current->nr_dirtied_pause = diff; + current->nr_dirtied = 0; + break; + } + + if (hdtc.fs_nr_reclaimable >= + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) { + if (unlikely(!writeback_in_progress(wb))) + hmdfs_writeback_inodes_sb(sb); + } else { + hmdfs_writeback_inode(sb, inode); + } + + /* + * If dirty_auto_threshold is enabled, recalculate writeback + * thresh according to current bandwidth. Update bandwidth + * could be better if possible, but wb_update_bandwidth() is + * not exported, so we cannot update bandwidth here, so the + * bandwidth' update will be delayed if writing a lot to a + * single file. + */ + if (hwb->dirty_auto_threshold && + time_is_before_jiffies(hdtc.thresh_time_stamp + + HMDFS_BANDWIDTH_INTERVAL)) + hmdfs_update_dirty_limit(&hdtc); + + if (unlikely(hdtc.fs_nr_reclaimable >= hdtc.fs_thresh)) + exceed |= HMDFS_FS_EXCEED; + if (unlikely(hdtc.file_nr_reclaimable >= hdtc.file_thresh)) + exceed |= HMDFS_FILE_EXCEED; + + if (!exceed) { + trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, + 0UL, start_time); + current->nr_dirtied = 0; + break; + } + /* + * Per-file or per-fs reclaimable pages exceed throttle limit, + * sleep pause time and check again. + */ + dirty_exceeded |= exceed; + if (dirty_exceeded && !hwb->dirty_exceeded) + hwb->dirty_exceeded = true; + + /* Pause */ + pause = hmdfs_wb_pause(wb, hdtc.fs_nr_reclaimable); + + trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, pause, + start_time); + + __set_current_state(TASK_KILLABLE); + io_schedule_timeout(pause); + + if (fatal_signal_pending(current)) + break; + } + + if (!dirty_exceeded && hwb->dirty_exceeded) + hwb->dirty_exceeded = false; + + if (hdtc.fs_nr_reclaimable >= hdtc.fs_bg_thresh) { + if (unlikely(!writeback_in_progress(wb))) + hmdfs_writeback_inodes_sb(sb); + } else if (hdtc.file_nr_reclaimable >= hdtc.file_bg_thresh) { + hmdfs_writeback_inode(sb, inode); + } +} + +void hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping) +{ + struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + int *bdp_ratelimits = NULL; + int ratelimit; + + if (!hwb->dirty_writeback_control) + return; + + /* Add delay work to trigger timeout writeback */ + if (hwb->dirty_writeback_interval != 0) + hmdfs_writeback_inodes_sb_delayed( + mapping->host->i_sb, + hwb->dirty_writeback_interval * 10); + + ratelimit = current->nr_dirtied_pause; + if (hwb->dirty_exceeded) + ratelimit = min(ratelimit, HMDFS_DIRTY_EXCEED_RATELIMIT); + + /* + * This prevents one CPU to accumulate too many dirtied pages + * without calling into hmdfs_balance_dirty_pages(), which can + * happen when there are 1000+ tasks, all of them start dirtying + * pages at exactly the same time, hence all honoured too large + * initial task->nr_dirtied_pause. + */ + preempt_disable(); + bdp_ratelimits = this_cpu_ptr(hwb->bdp_ratelimits); + + trace_hmdfs_balance_dirty_pages_ratelimited(sbi, hwb, *bdp_ratelimits); + + if (unlikely(current->nr_dirtied >= ratelimit)) { + *bdp_ratelimits = 0; + } else if (unlikely(*bdp_ratelimits >= hwb->ratelimit_pages)) { + *bdp_ratelimits = 0; + ratelimit = 0; + } + preempt_enable(); + + if (unlikely(current->nr_dirtied >= ratelimit)) + hmdfs_balance_dirty_pages(mapping); +} + +void hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi) +{ + if (!sbi->h_wb) + return; + + flush_delayed_work(&sbi->h_wb->dirty_sb_writeback_work); + flush_delayed_work(&sbi->h_wb->dirty_inode_writeback_work); + destroy_workqueue(sbi->h_wb->dirty_sb_writeback_wq); + destroy_workqueue(sbi->h_wb->dirty_inode_writeback_wq); + free_percpu(sbi->h_wb->bdp_ratelimits); + kfree(sbi->h_wb); + sbi->h_wb = NULL; +} + +int hmdfs_init_writeback(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_writeback *hwb; + char name[HMDFS_WQ_NAME_LEN]; + int ret = -ENOMEM; + + hwb = kzalloc(sizeof(struct hmdfs_writeback), GFP_KERNEL); + if (!hwb) + return ret; + + hwb->sbi = sbi; + hwb->wb = &sbi->sb->s_bdi->wb; + hwb->dirty_writeback_control = true; + hwb->dirty_writeback_interval = HM_DEFAULT_WRITEBACK_INTERVAL; + hwb->dirty_file_bg_bytes = HMDFS_FILE_BG_WB_BYTES; + hwb->dirty_fs_bg_bytes = HMDFS_FS_BG_WB_BYTES; + hwb->dirty_file_bytes = HMDFS_FILE_WB_BYTES; + hwb->dirty_fs_bytes = HMDFS_FS_WB_BYTES; + hmdfs_calculate_dirty_thresh(hwb); + hwb->bw_file_thresh = hwb->dirty_file_thresh; + hwb->bw_fs_thresh = hwb->dirty_fs_thresh; + spin_lock_init(&hwb->inode_list_lock); + INIT_LIST_HEAD(&hwb->inode_list_head); + hwb->dirty_exceeded = false; + hwb->ratelimit_pages = HMDFS_DEF_RATELIMIT_PAGES; + hwb->dirty_auto_threshold = true; + hwb->writeback_timelimit = HMDFS_DEF_WB_TIMELIMIT; + hwb->bw_thresh_lowerlimit = HMDFS_BW_THRESH_DEF_LIMIT; + spin_lock_init(&hwb->write_bandwidth_lock); + hwb->avg_write_bandwidth = 0; + hwb->max_write_bandwidth = 0; + hwb->min_write_bandwidth = ULONG_MAX; + hwb->bdp_ratelimits = alloc_percpu(int); + if (!hwb->bdp_ratelimits) + goto free_hwb; + + snprintf(name, sizeof(name), "dfs_ino_wb%u", sbi->seq); + hwb->dirty_inode_writeback_wq = create_singlethread_workqueue(name); + if (!hwb->dirty_inode_writeback_wq) { + hmdfs_err("Failed to create inode writeback workqueue!"); + goto free_bdp; + } + snprintf(name, sizeof(name), "dfs_sb_wb%u", sbi->seq); + hwb->dirty_sb_writeback_wq = create_singlethread_workqueue(name); + if (!hwb->dirty_sb_writeback_wq) { + hmdfs_err("Failed to create filesystem writeback workqueue!"); + goto free_i_wq; + } + INIT_DELAYED_WORK(&hwb->dirty_sb_writeback_work, + hmdfs_writeback_inodes_sb_handler); + INIT_DELAYED_WORK(&hwb->dirty_inode_writeback_work, + hmdfs_writeback_inode_handler); + sbi->h_wb = hwb; + return 0; +free_i_wq: + destroy_workqueue(hwb->dirty_inode_writeback_wq); +free_bdp: + free_percpu(sbi->h_wb->bdp_ratelimits); +free_hwb: + kfree(hwb); + return ret; +} diff --git a/fs/hmdfs/client_writeback.h b/fs/hmdfs/client_writeback.h new file mode 100644 index 000000000000..689a5e733ece --- /dev/null +++ b/fs/hmdfs/client_writeback.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/client_writeback.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef CLIENT_WRITEBACK_H +#define CLIENT_WRITEBACK_H + +#include "hmdfs.h" + +/* + * HM_DEFAULT_WRITEBACK_INTERVAL - centiseconds + * HMDFS_FILE_BG_WB_BYTES - background per-file threshold 10M + * HMDFS_FS_BG_WB_BYTES - background per-fs threshold 50M + * HMDFS_FILE_WB_BYTES - per-file throttle threshold + * HMDFS_FS_WB_BYTES - per-fs throttle threshold + */ +#define HM_DEFAULT_WRITEBACK_INTERVAL 500 +#define HMDFS_FILE_BG_WB_BYTES (10 * 1024 * 1024) +#define HMDFS_FS_BG_WB_BYTES (50 * 1024 * 1024) +#define HMDFS_FILE_WB_BYTES (HMDFS_FILE_BG_WB_BYTES << 1) +#define HMDFS_FS_WB_BYTES (HMDFS_FS_BG_WB_BYTES << 1) + +/* writeback time limit (default 5s) */ +#define HMDFS_DEF_WB_TIMELIMIT (5 * HZ) +#define HMDFS_MAX_WB_TIMELIMIT (30 * HZ) + +/* bandwidth adjusted lower limit (default 1MB/s) */ +#define HMDFS_BW_THRESH_MIN_LIMIT (1 << (20 - PAGE_SHIFT)) +#define HMDFS_BW_THRESH_MAX_LIMIT (100 << (20 - PAGE_SHIFT)) +#define HMDFS_BW_THRESH_DEF_LIMIT HMDFS_BW_THRESH_MIN_LIMIT + +#define HMDFS_DIRTY_EXCEED_RATELIMIT (32 >> (PAGE_SHIFT - 10)) +#define HMDFS_RATELIMIT_PAGES_GAP 16 +#define HMDFS_DEF_RATELIMIT_PAGES 32 +#define HMDFS_MIN_RATELIMIT_PAGES 1 + +struct hmdfs_dirty_throttle_control { + struct hmdfs_writeback *hwb; + /* last time threshes are updated */ + unsigned long thresh_time_stamp; + + unsigned long file_bg_thresh; + unsigned long fs_bg_thresh; + unsigned long file_thresh; + unsigned long fs_thresh; + + unsigned long file_nr_dirty; + unsigned long fs_nr_dirty; + unsigned long file_nr_reclaimable; + unsigned long fs_nr_reclaimable; +}; + +struct hmdfs_writeback { + struct hmdfs_sb_info *sbi; + struct bdi_writeback *wb; + /* enable hmdfs dirty writeback control */ + bool dirty_writeback_control; + + /* writeback per-file inode list */ + struct list_head inode_list_head; + spinlock_t inode_list_lock; + + /* centiseconds */ + unsigned int dirty_writeback_interval; + /* per-file background threshold */ + unsigned long dirty_file_bg_bytes; + unsigned long dirty_file_bg_thresh; + /* per-fs background threshold */ + unsigned long dirty_fs_bg_bytes; + unsigned long dirty_fs_bg_thresh; + /* per-file throttle threshold */ + unsigned long dirty_file_bytes; + unsigned long dirty_file_thresh; + /* per-fs throttle threshold */ + unsigned long dirty_fs_bytes; + unsigned long dirty_fs_thresh; + /* ratio between background thresh and throttle thresh */ + unsigned long fs_bg_ratio; + unsigned long file_bg_ratio; + /* ratio between file and fs throttle thresh */ + unsigned long fs_file_ratio; + + /* + * Enable auto-thresh. If enabled, the background and throttle + * thresh are nolonger a fixed value storeed in dirty_*_bytes, + * they are determined by the bandwidth of the network and the + * writeback timelimit. + */ + bool dirty_auto_threshold; + unsigned int writeback_timelimit; + /* bandwitdh adjusted filesystem throttle thresh */ + unsigned long bw_fs_thresh; + /* bandwidth adjusted per-file throttle thresh */ + unsigned long bw_file_thresh; + /* bandwidth adjusted thresh lower limit */ + unsigned long bw_thresh_lowerlimit; + + /* reclaimable pages exceed throttle thresh */ + bool dirty_exceeded; + /* percpu dirty pages ratelimit */ + long ratelimit_pages; + /* count percpu dirty pages */ + int __percpu *bdp_ratelimits; + + /* per-fs writeback work */ + struct workqueue_struct *dirty_sb_writeback_wq; + struct delayed_work dirty_sb_writeback_work; + /* per-file writeback work */ + struct workqueue_struct *dirty_inode_writeback_wq; + struct delayed_work dirty_inode_writeback_work; + + /* per-fs writeback bandwidth */ + spinlock_t write_bandwidth_lock; + unsigned long max_write_bandwidth; + unsigned long min_write_bandwidth; + unsigned long avg_write_bandwidth; +}; + +void hmdfs_writeback_inodes_sb_handler(struct work_struct *work); + +void hmdfs_writeback_inode_handler(struct work_struct *work); + +void hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb); + +void hmdfs_update_ratelimit(struct hmdfs_writeback *hwb); + +void hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping); + +void hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi); + +int hmdfs_init_writeback(struct hmdfs_sb_info *sbi); + +#endif diff --git a/fs/hmdfs/comm/connection.c b/fs/hmdfs/comm/connection.c new file mode 100644 index 000000000000..51e6f829eb34 --- /dev/null +++ b/fs/hmdfs/comm/connection.c @@ -0,0 +1,1311 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/connection.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "connection.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "device_node.h" +#include "hmdfs.h" +#include "message_verify.h" +#include "node_cb.h" +#include "protocol.h" +#include "socket_adapter.h" + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include "crypto.h" +#endif + +#define HMDFS_WAIT_REQUEST_END_MIN 20 +#define HMDFS_WAIT_REQUEST_END_MAX 30 + +#define HMDFS_WAIT_CONN_RELEASE (3 * HZ) + +#define HMDFS_RETRY_WB_WQ_MAX_ACTIVE 16 + +static void hs_fill_crypto_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct crypto_body *body = NULL; + + if (len < sizeof(struct crypto_body)) { + hmdfs_info("crpto body len %u is err", len); + return; + } + body = (struct crypto_body *)data; + + /* this is only test, later need to fill right algorithm. */ + body->crypto |= HMDFS_HS_CRYPTO_KTLS_AES128; + body->crypto = cpu_to_le32(body->crypto); + + hmdfs_info("fill crypto. ccrtypto=0x%08x", body->crypto); +} + +static int hs_parse_crypto_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct crypto_body *hs_crypto = NULL; + uint32_t crypto; + + if (len < sizeof(struct crypto_body)) { + hmdfs_info("handshake msg len error, len=%u", len); + return -1; + } + hs_crypto = (struct crypto_body *)data; + crypto = le16_to_cpu(hs_crypto->crypto); + conn_impl->crypto = crypto; + hmdfs_info("ops=%u, len=%u, crypto=0x%08x", ops, len, crypto); + return 0; +} + +static void hs_fill_case_sense_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct case_sense_body *body = (struct case_sense_body *)data; + + if (len < sizeof(struct case_sense_body)) { + hmdfs_err("case sensitive len %u is err", len); + return; + } + body->case_sensitive = conn_impl->node->sbi->s_case_sensitive; +} + +static int hs_parse_case_sense_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct case_sense_body *body = (struct case_sense_body *)data; + __u8 sensitive = conn_impl->node->sbi->s_case_sensitive ? 1 : 0; + + if (len < sizeof(struct case_sense_body)) { + hmdfs_info("case sensitive len %u is err", len); + return -1; + } + if (body->case_sensitive != sensitive) { + hmdfs_err("case sensitive inconsistent, server: %u,client: %u, ops: %u", + body->case_sensitive, sensitive, ops); + return -1; + } + return 0; +} + +static void hs_fill_feature_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct feature_body *body = (struct feature_body *)data; + + if (len < sizeof(struct feature_body)) { + hmdfs_err("feature len %u is err", len); + return; + } + body->features = cpu_to_le64(conn_impl->node->sbi->s_features); + body->reserved = cpu_to_le64(0); +} + +static int hs_parse_feature_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct feature_body *body = (struct feature_body *)data; + + if (len < sizeof(struct feature_body)) { + hmdfs_err("feature len %u is err", len); + return -1; + } + + conn_impl->node->features = le64_to_cpu(body->features); + return 0; +} + +/* should ensure len is small than 0xffff. */ +static const struct conn_hs_extend_reg s_hs_extend_reg[HS_EXTEND_CODE_COUNT] = { + [HS_EXTEND_CODE_CRYPTO] = { + .len = sizeof(struct crypto_body), + .resv = 0, + .filler = hs_fill_crypto_data, + .parser = hs_parse_crypto_data + }, + [HS_EXTEND_CODE_CASE_SENSE] = { + .len = sizeof(struct case_sense_body), + .resv = 0, + .filler = hs_fill_case_sense_data, + .parser = hs_parse_case_sense_data, + }, + [HS_EXTEND_CODE_FEATURE_SUPPORT] = { + .len = sizeof(struct feature_body), + .resv = 0, + .filler = hs_fill_feature_data, + .parser = hs_parse_feature_data, + }, + [HS_EXTEND_CODE_FEATURE_SUPPORT] = { + .len = sizeof(struct feature_body), + .resv = 0, + .filler = hs_fill_feature_data, + .parser = hs_parse_feature_data, + }, +}; + +static __u32 hs_get_extend_data_len(void) +{ + __u32 len; + int i; + + len = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < HS_EXTEND_CODE_COUNT; i++) { + len += sizeof(struct extend_field_head); + len += s_hs_extend_reg[i].len; + } + + hmdfs_info("extend data total len is %u", len); + return len; +} + +static void hs_fill_extend_data(struct connection *conn_impl, __u8 ops, + void *extend_data, __u32 len) +{ + struct conn_hs_extend_head *extend_head = NULL; + struct extend_field_head *field = NULL; + uint8_t *body = NULL; + __u32 offset; + __u16 i; + + if (sizeof(struct conn_hs_extend_head) > len) { + hmdfs_info("len error. len=%u", len); + return; + } + extend_head = (struct conn_hs_extend_head *)extend_data; + extend_head->field_cn = 0; + offset = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < HS_EXTEND_CODE_COUNT; i++) { + if (sizeof(struct extend_field_head) > (len - offset)) + break; + field = (struct extend_field_head *)((uint8_t *)extend_data + + offset); + offset += sizeof(struct extend_field_head); + + if (s_hs_extend_reg[i].len > (len - offset)) + break; + body = (uint8_t *)extend_data + offset; + offset += s_hs_extend_reg[i].len; + + field->code = cpu_to_le16(i); + field->len = cpu_to_le16(s_hs_extend_reg[i].len); + + if (s_hs_extend_reg[i].filler) + s_hs_extend_reg[i].filler(conn_impl, ops, + body, s_hs_extend_reg[i].len); + + extend_head->field_cn += 1; + } + + extend_head->field_cn = cpu_to_le32(extend_head->field_cn); +} + +static int hs_parse_extend_data(struct connection *conn_impl, __u8 ops, + void *extend_data, __u32 extend_len) +{ + struct conn_hs_extend_head *extend_head = NULL; + struct extend_field_head *field = NULL; + uint8_t *body = NULL; + __u32 offset; + __u32 field_cnt; + __u16 code; + __u16 len; + int i; + int ret; + + if (sizeof(struct conn_hs_extend_head) > extend_len) { + hmdfs_err("ops=%u,extend_len=%u", ops, extend_len); + return -1; + } + extend_head = (struct conn_hs_extend_head *)extend_data; + field_cnt = le32_to_cpu(extend_head->field_cn); + hmdfs_info("extend_len=%u,field_cnt=%u", extend_len, field_cnt); + + offset = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < field_cnt; i++) { + if (sizeof(struct extend_field_head) > (extend_len - offset)) { + hmdfs_err("cnt err, op=%u, extend_len=%u, cnt=%u, i=%u", + ops, extend_len, field_cnt, i); + return -1; + } + field = (struct extend_field_head *)((uint8_t *)extend_data + + offset); + offset += sizeof(struct extend_field_head); + code = le16_to_cpu(field->code); + len = le16_to_cpu(field->len); + if (len > (extend_len - offset)) { + hmdfs_err("len err, op=%u, extend_len=%u, cnt=%u, i=%u", + ops, extend_len, field_cnt, i); + hmdfs_err("len err, code=%u, len=%u, offset=%u", code, + len, offset); + return -1; + } + + body = (uint8_t *)extend_data + offset; + offset += len; + if ((code < HS_EXTEND_CODE_COUNT) && + (s_hs_extend_reg[code].parser)) { + ret = s_hs_extend_reg[code].parser(conn_impl, ops, + body, len); + if (ret) + return ret; + } + } + return 0; +} + +static int hs_proc_msg_data(struct connection *conn_impl, __u8 ops, void *data, + __u32 data_len) +{ + struct connection_handshake_req *hs_req = NULL; + uint8_t *extend_data = NULL; + __u32 extend_len; + __u32 req_len; + int ret; + + if (!data) { + hmdfs_err("err, msg data is null"); + return -1; + } + + if (data_len < sizeof(struct connection_handshake_req)) { + hmdfs_err("ack msg data len error. data_len=%u, device_id=%llu", + data_len, conn_impl->node->device_id); + return -1; + } + + hs_req = (struct connection_handshake_req *)data; + req_len = le32_to_cpu(hs_req->len); + if (req_len > (data_len - sizeof(struct connection_handshake_req))) { + hmdfs_info( + "ack msg hs_req len(%u) error. data_len=%u, device_id=%llu", + req_len, data_len, conn_impl->node->device_id); + return -1; + } + extend_len = + data_len - sizeof(struct connection_handshake_req) - req_len; + extend_data = (uint8_t *)data + + sizeof(struct connection_handshake_req) + req_len; + ret = hs_parse_extend_data(conn_impl, ops, extend_data, extend_len); + if (!ret) + hmdfs_info( + "hs msg rcv, ops=%u, data_len=%u, device_id=%llu, req_len=%u", + ops, data_len, conn_impl->node->device_id, hs_req->len); + return ret; +} +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +static int connection_handshake_init_tls(struct connection *conn_impl, __u8 ops) +{ + // init ktls config, use key1/key2 as init write-key of each direction + __u8 key1[HMDFS_KEY_SIZE]; + __u8 key2[HMDFS_KEY_SIZE]; + int ret; + + if ((ops != CONNECT_MESG_HANDSHAKE_RESPONSE) && + (ops != CONNECT_MESG_HANDSHAKE_ACK)) { + hmdfs_err("ops %u is err", ops); + return -EINVAL; + } + + update_key(conn_impl->master_key, key1, HKDF_TYPE_KEY_INITIATOR); + update_key(conn_impl->master_key, key2, HKDF_TYPE_KEY_ACCEPTER); + + if (ops == CONNECT_MESG_HANDSHAKE_ACK) { + memcpy(conn_impl->send_key, key1, HMDFS_KEY_SIZE); + memcpy(conn_impl->recv_key, key2, HMDFS_KEY_SIZE); + } else { + memcpy(conn_impl->send_key, key2, HMDFS_KEY_SIZE); + memcpy(conn_impl->recv_key, key1, HMDFS_KEY_SIZE); + } + + memset(key1, 0, HMDFS_KEY_SIZE); + memset(key2, 0, HMDFS_KEY_SIZE); + + hmdfs_info("hs: ops=%u start set crypto tls", ops); + ret = tls_crypto_info_init(conn_impl); + if (ret) + hmdfs_err("setting tls fail. ops is %u", ops); + + return ret; +} +#endif + +static int do_send_handshake(struct connection *conn_impl, __u8 ops, + __le16 request_id) +{ + int err; + struct connection_msg_head *hs_head = NULL; + struct connection_handshake_req *hs_data = NULL; + uint8_t *hs_extend_data = NULL; + struct hmdfs_send_data msg; + __u32 send_len; + __u32 len; + __u32 extend_len; + char buf[HMDFS_CID_SIZE] = { 0 }; + + len = scnprintf(buf, HMDFS_CID_SIZE, "%llu", 0ULL); + send_len = sizeof(struct connection_msg_head) + + sizeof(struct connection_handshake_req) + len; + + if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) || + (ops == CONNECT_MESG_HANDSHAKE_ACK)) && + (conn_impl->node->version >= DFS_2_0)) { + extend_len = hs_get_extend_data_len(); + send_len += extend_len; + } + + hs_head = kzalloc(send_len, GFP_KERNEL); + if (!hs_head) + return -ENOMEM; + + hs_data = (struct connection_handshake_req + *)((uint8_t *)hs_head + + sizeof(struct connection_msg_head)); + + hs_data->len = cpu_to_le32(len); + memcpy(hs_data->dev_id, buf, len); + + if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) || + ops == CONNECT_MESG_HANDSHAKE_ACK) && + (conn_impl->node->version >= DFS_2_0)) { + hs_extend_data = (uint8_t *)hs_data + + sizeof(struct connection_handshake_req) + len; + hs_fill_extend_data(conn_impl, ops, hs_extend_data, extend_len); + } + + hs_head->magic = HMDFS_MSG_MAGIC; + hs_head->version = DFS_2_0; + hs_head->flags |= 0x1; + hmdfs_info("Send handshake message: ops = %d, fd = %d", ops, + ((struct tcp_handle *)(conn_impl->connect_handle))->fd); + hs_head->operations = ops; + hs_head->request_id = request_id; + hs_head->datasize = cpu_to_le32(send_len); + hs_head->source = 0; + hs_head->msg_id = 0; + + msg.head = hs_head; + msg.head_len = sizeof(struct connection_msg_head); + msg.data = hs_data; + msg.len = send_len - msg.head_len; + msg.sdesc = NULL; + msg.sdesc_len = 0; + err = conn_impl->send_message(conn_impl, &msg); + kfree(hs_head); + return err; +} + +static int hmdfs_node_waiting_evt_sum(const struct hmdfs_peer *node) +{ + int sum = 0; + int i; + + for (i = 0; i < RAW_NODE_EVT_NR; i++) + sum += node->waiting_evt[i]; + + return sum; +} + +static int hmdfs_update_node_waiting_evt(struct hmdfs_peer *node, int evt, + unsigned int *seq) +{ + int last; + int sum; + unsigned int next; + + sum = hmdfs_node_waiting_evt_sum(node); + if (sum % RAW_NODE_EVT_NR) + last = !node->pending_evt; + else + last = node->pending_evt; + + /* duplicated event */ + if (evt == last) { + node->dup_evt[evt]++; + return 0; + } + + node->waiting_evt[evt]++; + hmdfs_debug("add node->waiting_evt[%d]=%d", evt, + node->waiting_evt[evt]); + + /* offline wait + online wait + offline wait = offline wait + * online wait + offline wait + online wait != online wait + * As the first online related resource (e.g. fd) must be invalidated + */ + if (node->waiting_evt[RAW_NODE_EVT_OFF] >= 2 && + node->waiting_evt[RAW_NODE_EVT_ON] >= 1) { + node->waiting_evt[RAW_NODE_EVT_OFF] -= 1; + node->waiting_evt[RAW_NODE_EVT_ON] -= 1; + node->seq_wr_idx -= 2; + node->merged_evt += 2; + } + + next = hmdfs_node_inc_evt_seq(node); + node->seq_tbl[(node->seq_wr_idx++) % RAW_NODE_EVT_MAX_NR] = next; + *seq = next; + + return 1; +} + +static void hmdfs_run_evt_cb_verbosely(struct hmdfs_peer *node, int raw_evt, + bool sync, unsigned int seq) +{ + int evt = (raw_evt == RAW_NODE_EVT_OFF) ? NODE_EVT_OFFLINE : + NODE_EVT_ONLINE; + int cur_evt_idx = sync ? 1 : 0; + + node->cur_evt[cur_evt_idx] = raw_evt; + node->cur_evt_seq[cur_evt_idx] = seq; + hmdfs_node_call_evt_cb(node, evt, sync, seq); + node->cur_evt[cur_evt_idx] = RAW_NODE_EVT_NR; +} + +static void hmdfs_node_evt_work(struct work_struct *work) +{ + struct hmdfs_peer *node = + container_of(work, struct hmdfs_peer, evt_dwork.work); + unsigned int seq; + + /* + * N-th sync cb completes before N-th async cb, + * so use seq_lock as a barrier in read & write path + * to ensure we can read the required seq. + */ + mutex_lock(&node->seq_lock); + seq = node->seq_tbl[(node->seq_rd_idx++) % RAW_NODE_EVT_MAX_NR]; + hmdfs_run_evt_cb_verbosely(node, node->pending_evt, false, seq); + mutex_unlock(&node->seq_lock); + + mutex_lock(&node->evt_lock); + if (hmdfs_node_waiting_evt_sum(node)) { + node->pending_evt = !node->pending_evt; + node->pending_evt_seq = + node->seq_tbl[node->seq_rd_idx % RAW_NODE_EVT_MAX_NR]; + node->waiting_evt[node->pending_evt]--; + /* sync cb has been done */ + schedule_delayed_work(&node->evt_dwork, + node->sbi->async_cb_delay * HZ); + } else { + node->last_evt = node->pending_evt; + node->pending_evt = RAW_NODE_EVT_NR; + } + mutex_unlock(&node->evt_lock); +} + +/* + * The running orders of cb are: + * + * (1) sync callbacks are invoked according to the queue order of raw events: + * ensured by seq_lock. + * (2) async callbacks are invoked according to the queue order of raw events: + * ensured by evt_lock & evt_dwork + * (3) async callback is invoked after sync callback of the same raw event: + * ensured by seq_lock. + * (4) async callback of N-th raw event and sync callback of (N+x)-th raw + * event can run concurrently. + */ +static void hmdfs_queue_raw_node_evt(struct hmdfs_peer *node, int evt) +{ + unsigned int seq = 0; + + mutex_lock(&node->evt_lock); + if (node->pending_evt == RAW_NODE_EVT_NR) { + if (evt == node->last_evt) { + node->dup_evt[evt]++; + mutex_unlock(&node->evt_lock); + return; + } + node->pending_evt = evt; + seq = hmdfs_node_inc_evt_seq(node); + node->seq_tbl[(node->seq_wr_idx++) % RAW_NODE_EVT_MAX_NR] = seq; + node->pending_evt_seq = seq; + mutex_lock(&node->seq_lock); + mutex_unlock(&node->evt_lock); + /* call sync cb, then async cb */ + hmdfs_run_evt_cb_verbosely(node, evt, true, seq); + mutex_unlock(&node->seq_lock); + schedule_delayed_work(&node->evt_dwork, + node->sbi->async_cb_delay * HZ); + } else if (hmdfs_update_node_waiting_evt(node, evt, &seq) > 0) { + /* + * Take seq_lock firstly to ensure N-th sync cb + * is called before N-th async cb. + */ + mutex_lock(&node->seq_lock); + mutex_unlock(&node->evt_lock); + hmdfs_run_evt_cb_verbosely(node, evt, true, seq); + mutex_unlock(&node->seq_lock); + } else { + mutex_unlock(&node->evt_lock); + } +} + +void connection_send_handshake(struct connection *conn_impl, __u8 ops, + __le16 request_id) +{ + struct tcp_handle *tcp = NULL; + int err = do_send_handshake(conn_impl, ops, request_id); + + if (likely(err >= 0)) + return; + + tcp = conn_impl->connect_handle; + hmdfs_err("Failed to send handshake: err = %d, fd = %d", err, tcp->fd); + hmdfs_reget_connection(conn_impl); +} + +void connection_handshake_notify(struct hmdfs_peer *node, int notify_type) +{ + struct notify_param param; + + param.notify = notify_type; + param.fd = INVALID_SOCKET_FD; + memcpy(param.remote_cid, node->cid, HMDFS_CID_SIZE); + notify(node, ¶m); +} + + +void peer_online(struct hmdfs_peer *peer) +{ + // To evaluate if someone else has made the peer online + u8 prev_stat = xchg(&peer->status, NODE_STAT_ONLINE); + unsigned long jif_tmp = jiffies; + + if (prev_stat == NODE_STAT_ONLINE) + return; + WRITE_ONCE(peer->conn_time, jif_tmp); + WRITE_ONCE(peer->sbi->connections.recent_ol, jif_tmp); + hmdfs_queue_raw_node_evt(peer, RAW_NODE_EVT_ON); +} + +void connection_to_working(struct hmdfs_peer *node) +{ + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + if (!node) + return; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == CONNECT_TYPE_TCP && + conn_impl->status == CONNECT_STAT_WAIT_RESPONSE) { + tcp = conn_impl->connect_handle; + hmdfs_info("fd %d to working", tcp->fd); + conn_impl->status = CONNECT_STAT_WORKING; + } + } + mutex_unlock(&node->conn_impl_list_lock); + peer_online(node); +} + +static int connection_check_version(__u8 version) +{ + __u8 min_ver = USERSPACE_MAX_VER; + + if (version <= min_ver || version >= MAX_VERSION) { + hmdfs_info("version err. version %u", version); + return -1; + } + return 0; +} + +void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len) +{ + __u8 version; + __u8 ops; + __u8 status; + int fd = ((struct tcp_handle *)(conn_impl->connect_handle))->fd; + struct connection_msg_head *head = (struct connection_msg_head *)buf; + int ret; + + version = head->version; + conn_impl->node->version = version; + if (connection_check_version(version) != 0) + goto out; + conn_impl->node->conn_operations = hmdfs_get_peer_operation(version); + ops = head->operations; + status = conn_impl->status; + switch (ops) { + case CONNECT_MESG_HANDSHAKE_REQUEST: + hmdfs_info( + "Recved handshake request: device_id = %llu, version = %d, head->len = %d, tcp->fd = %d", + conn_impl->node->device_id, version, head->datasize, fd); + connection_send_handshake(conn_impl, + CONNECT_MESG_HANDSHAKE_RESPONSE, + head->msg_id); + if (conn_impl->node->version >= DFS_2_0) { + conn_impl->status = CONNECT_STAT_WAIT_ACK; + conn_impl->node->status = NODE_STAT_SHAKING; + } else { + conn_impl->status = CONNECT_STAT_WORKING; + } + break; + case CONNECT_MESG_HANDSHAKE_RESPONSE: + hmdfs_info( + "Recved handshake response: device_id = %llu, cmd->status = %hhu, tcp->fd = %d", + conn_impl->node->device_id, status, fd); + if (status == CONNECT_STAT_WAIT_REQUEST) { + // must be 10.1 device, no need to set ktls + connection_to_working(conn_impl->node); + goto out; + } + + if (conn_impl->node->version >= DFS_2_0) { + ret = hs_proc_msg_data(conn_impl, ops, data, data_len); + if (ret) + goto nego_err; + connection_send_handshake(conn_impl, + CONNECT_MESG_HANDSHAKE_ACK, + head->msg_id); + hmdfs_info("respon rcv handle,conn_impl->crypto=0x%0x", + conn_impl->crypto); +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + ret = connection_handshake_init_tls(conn_impl, ops); + if (ret) { + hmdfs_err("init_tls_key fail, ops %u", ops); + goto out; + } +#endif + } + + conn_impl->status = CONNECT_STAT_WORKING; + peer_online(conn_impl->node); + break; + case CONNECT_MESG_HANDSHAKE_ACK: + if (conn_impl->node->version >= DFS_2_0) { + ret = hs_proc_msg_data(conn_impl, ops, data, data_len); + if (ret) + goto nego_err; + hmdfs_info("ack rcv handle, conn_impl->crypto=0x%0x", + conn_impl->crypto); +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + ret = connection_handshake_init_tls(conn_impl, ops); + if (ret) { + hmdfs_err("init_tls_key fail, ops %u", ops); + goto out; + } +#endif + conn_impl->status = CONNECT_STAT_WORKING; + peer_online(conn_impl->node); + break; + } + fallthrough; + default: + return; + } +out: + kfree(data); + return; +nego_err: + conn_impl->status = CONNECT_STAT_NEGO_FAIL; + connection_handshake_notify(conn_impl->node, + NOTIFY_OFFLINE); + hmdfs_err("protocol negotiation failed, remote device_id = %llu, tcp->fd = %d", + conn_impl->node->device_id, fd); + goto out; +} + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +static void update_tls_crypto_key(struct connection *conn, + struct hmdfs_head_cmd *head, void *data, + __u32 data_len) +{ + // rekey message handler + struct connection_rekey_request *rekey_req = NULL; + int ret = 0; + + if (hmdfs_message_verify(conn->node, head, data) < 0) { + hmdfs_err("Rekey msg %d has been abandoned", head->msg_id); + goto out_err; + } + + hmdfs_info("recv REKEY request"); + set_crypto_info(conn, SET_CRYPTO_RECV); + // update send key if requested + rekey_req = data; + if (le32_to_cpu(rekey_req->update_request) == UPDATE_REQUESTED) { + ret = tcp_send_rekey_request(conn); + if (ret == 0) + set_crypto_info(conn, SET_CRYPTO_SEND); + } +out_err: + kfree(data); +} + +static bool cmd_update_tls_crypto_key(struct connection *conn, + struct hmdfs_head_cmd *head) +{ + __u8 version = conn->node->version; + struct tcp_handle *tcp = conn->connect_handle; + + if (version < DFS_2_0 || conn->type != CONNECT_TYPE_TCP || !tcp) + return false; + return head->operations.command == F_CONNECT_REKEY; +} +#endif + +void connection_working_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len) +{ +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + if (cmd_update_tls_crypto_key(conn_impl, buf)) { + update_tls_crypto_key(conn_impl, buf, data, data_len); + return; + } +#endif + conn_impl->node->conn_operations->recvmsg(conn_impl->node, buf, data); +} + +static void connection_release(struct kref *ref) +{ + struct tcp_handle *tcp = NULL; + struct connection *conn = container_of(ref, struct connection, ref_cnt); + + hmdfs_info("connection release"); + memset(conn->master_key, 0, HMDFS_KEY_SIZE); + memset(conn->send_key, 0, HMDFS_KEY_SIZE); + memset(conn->recv_key, 0, HMDFS_KEY_SIZE); + if (conn->close) + conn->close(conn); + tcp = conn->connect_handle; + crypto_free_aead(conn->tfm); + // need to check and test: fput(tcp->sock->file); + if (tcp && tcp->sock) { + hmdfs_info("connection release: fd = %d, refcount %ld", tcp->fd, + file_count(tcp->sock->file)); + sockfd_put(tcp->sock); + } + if (tcp && tcp->recv_cache) + kmem_cache_destroy(tcp->recv_cache); + + if (!list_empty(&conn->list)) { + mutex_lock(&conn->node->conn_impl_list_lock); + list_del(&conn->list); + mutex_unlock(&conn->node->conn_impl_list_lock); + /* + * wakup hmdfs_disconnect_node to check + * conn_deleting_list if empty. + */ + wake_up_interruptible(&conn->node->deleting_list_wq); + } + + kfree(tcp); + kfree(conn); +} + +static void hmdfs_peer_release(struct kref *ref) +{ + struct hmdfs_peer *peer = container_of(ref, struct hmdfs_peer, ref_cnt); + struct mutex *lock = &peer->sbi->connections.node_lock; + + if (!list_empty(&peer->list)) + hmdfs_info("releasing a on-sbi peer: device_id %llu ", + peer->device_id); + else + hmdfs_info("releasing a redundant peer: device_id %llu ", + peer->device_id); + + cancel_delayed_work_sync(&peer->evt_dwork); + list_del(&peer->list); + idr_destroy(&peer->msg_idr); + idr_destroy(&peer->file_id_idr); + flush_workqueue(peer->req_handle_wq); + flush_workqueue(peer->async_wq); + flush_workqueue(peer->retry_wb_wq); + destroy_workqueue(peer->dentry_wq); + destroy_workqueue(peer->req_handle_wq); + destroy_workqueue(peer->async_wq); + destroy_workqueue(peer->retry_wb_wq); + destroy_workqueue(peer->reget_conn_wq); + kfree(peer); + mutex_unlock(lock); +} + +void connection_put(struct connection *conn) +{ + struct mutex *lock = &conn->ref_lock; + + kref_put_mutex(&conn->ref_cnt, connection_release, lock); +} + +void peer_put(struct hmdfs_peer *peer) +{ + struct mutex *lock = &peer->sbi->connections.node_lock; + + kref_put_mutex(&peer->ref_cnt, hmdfs_peer_release, lock); +} + +static void hmdfs_dump_deleting_list(struct hmdfs_peer *node) +{ + struct connection *con = NULL; + struct tcp_handle *tcp = NULL; + int count = 0; + + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(con, &node->conn_deleting_list, list) { + tcp = con->connect_handle; + hmdfs_info("deleting list %d:device_id %llu tcp_fd %d refcnt %d", + count, node->device_id, tcp ? tcp->fd : -1, + kref_read(&con->ref_cnt)); + count++; + } + mutex_unlock(&node->conn_impl_list_lock); +} + +static bool hmdfs_conn_deleting_list_empty(struct hmdfs_peer *node) +{ + bool empty = false; + + mutex_lock(&node->conn_impl_list_lock); + empty = list_empty(&node->conn_deleting_list); + mutex_unlock(&node->conn_impl_list_lock); + + return empty; +} + +void hmdfs_disconnect_node(struct hmdfs_peer *node) +{ + LIST_HEAD(local_conns); + struct connection *conn_impl = NULL; + struct connection *next = NULL; + struct tcp_handle *tcp = NULL; + + if (unlikely(!node)) + return; + + hmdfs_node_inc_evt_seq(node); + /* Refer to comments in hmdfs_is_node_offlined() */ + smp_mb__after_atomic(); + node->status = NODE_STAT_OFFLINE; + hmdfs_info("Try to disconnect peer: device_id %llu", node->device_id); + + mutex_lock(&node->conn_impl_list_lock); + if (!list_empty(&node->conn_impl_list)) + list_replace_init(&node->conn_impl_list, &local_conns); + mutex_unlock(&node->conn_impl_list_lock); + + list_for_each_entry_safe(conn_impl, next, &local_conns, list) { + tcp = conn_impl->connect_handle; + if (tcp && tcp->sock) { + kernel_sock_shutdown(tcp->sock, SHUT_RDWR); + hmdfs_info("shudown sock: fd = %d, refcount %ld", + tcp->fd, file_count(tcp->sock->file)); + } + if (tcp) + tcp->fd = INVALID_SOCKET_FD; + + tcp_close_socket(tcp); + list_del_init(&conn_impl->list); + + connection_put(conn_impl); + } + + if (wait_event_interruptible_timeout(node->deleting_list_wq, + hmdfs_conn_deleting_list_empty(node), + HMDFS_WAIT_CONN_RELEASE) <= 0) + hmdfs_dump_deleting_list(node); + + /* wait all request process end */ + spin_lock(&node->idr_lock); + while (node->msg_idr_process) { + spin_unlock(&node->idr_lock); + usleep_range(HMDFS_WAIT_REQUEST_END_MIN, + HMDFS_WAIT_REQUEST_END_MAX); + spin_lock(&node->idr_lock); + } + spin_unlock(&node->idr_lock); + + hmdfs_queue_raw_node_evt(node, RAW_NODE_EVT_OFF); +} + +static void hmdfs_run_simple_evt_cb(struct hmdfs_peer *node, int evt) +{ + unsigned int seq = hmdfs_node_inc_evt_seq(node); + + mutex_lock(&node->seq_lock); + hmdfs_node_call_evt_cb(node, evt, true, seq); + mutex_unlock(&node->seq_lock); +} + +static void hmdfs_del_peer(struct hmdfs_peer *node) +{ + /* + * No need for offline evt cb, because all files must + * have been flushed and closed, else the filesystem + * will be un-mountable. + */ + cancel_delayed_work_sync(&node->evt_dwork); + + hmdfs_run_simple_evt_cb(node, NODE_EVT_DEL); + + hmdfs_release_peer_sysfs(node); + + flush_workqueue(node->reget_conn_wq); + peer_put(node); +} + +void hmdfs_connections_stop(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_peer *node = NULL; + struct hmdfs_peer *con_tmp = NULL; + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry_safe(node, con_tmp, &sbi->connections.node_list, + list) { + mutex_unlock(&sbi->connections.node_lock); + hmdfs_disconnect_node(node); + hmdfs_del_peer(node); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); +} + +struct connection *get_conn_impl(struct hmdfs_peer *node, int connect_type) +{ + struct connection *conn_impl = NULL; + + if (!node) + return NULL; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == connect_type && + conn_impl->status == CONNECT_STAT_WORKING) { + connection_get(conn_impl); + mutex_unlock(&node->conn_impl_list_lock); + return conn_impl; + } + } + mutex_unlock(&node->conn_impl_list_lock); + hmdfs_err_ratelimited("device %llu not find connection, type %d", + node->device_id, connect_type); + return NULL; +} + +void set_conn_sock_quickack(struct hmdfs_peer *node) +{ + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + int option = 1; + + if (!node) + return; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == CONNECT_TYPE_TCP && + conn_impl->status == CONNECT_STAT_WORKING && + conn_impl->connect_handle) { + tcp = (struct tcp_handle *)(conn_impl->connect_handle); + tcp_sock_set_quickack(tcp->sock->sk, option); + } + } + mutex_unlock(&node->conn_impl_list_lock); +} + +struct hmdfs_peer *hmdfs_lookup_from_devid(struct hmdfs_sb_info *sbi, + uint64_t device_id) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *lookup = NULL; + + if (!sbi) + return NULL; + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (con->status != NODE_STAT_ONLINE || + con->device_id != device_id) + continue; + lookup = con; + peer_get(lookup); + break; + } + mutex_unlock(&sbi->connections.node_lock); + return lookup; +} + +struct hmdfs_peer *hmdfs_lookup_from_cid(struct hmdfs_sb_info *sbi, + uint8_t *cid) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *lookup = NULL; + + if (!sbi) + return NULL; + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (strncmp(con->cid, cid, HMDFS_CID_SIZE) != 0) + continue; + lookup = con; + peer_get(lookup); + break; + } + mutex_unlock(&sbi->connections.node_lock); + return lookup; +} + +static struct hmdfs_peer *lookup_peer_by_cid_unsafe(struct hmdfs_sb_info *sbi, + uint8_t *cid) +{ + struct hmdfs_peer *node = NULL; + + list_for_each_entry(node, &sbi->connections.node_list, list) + if (!strncmp(node->cid, cid, HMDFS_CID_SIZE)) { + peer_get(node); + return node; + } + return NULL; +} + +static struct hmdfs_peer *add_peer_unsafe(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer2add) +{ + struct hmdfs_peer *peer; + int err; + + peer = lookup_peer_by_cid_unsafe(sbi, peer2add->cid); + if (peer) + return peer; + + err = hmdfs_register_peer_sysfs(sbi, peer2add); + if (err) { + hmdfs_err("register peer %llu sysfs err %d", + peer2add->device_id, err); + return ERR_PTR(err); + } + list_add_tail(&peer2add->list, &sbi->connections.node_list); + peer_get(peer2add); + hmdfs_run_simple_evt_cb(peer2add, NODE_EVT_ADD); + return peer2add; +} + +static struct hmdfs_peer * +alloc_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, + const struct connection_operations *conn_operations) +{ + struct hmdfs_peer *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!node) + return NULL; + + node->device_id = (u32)atomic_inc_return(&sbi->connections.conn_seq); + + node->async_wq = alloc_workqueue("dfs_async%u_%llu", WQ_MEM_RECLAIM, 0, + sbi->seq, node->device_id); + if (!node->async_wq) { + hmdfs_err("Failed to alloc async wq"); + goto out_err; + } + node->req_handle_wq = alloc_workqueue("dfs_req%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + sbi->async_req_max_active, + sbi->seq, node->device_id); + if (!node->req_handle_wq) { + hmdfs_err("Failed to alloc req wq"); + goto out_err; + } + node->dentry_wq = alloc_workqueue("dfs_dentry%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + 0, sbi->seq, node->device_id); + if (!node->dentry_wq) { + hmdfs_err("Failed to alloc dentry wq"); + goto out_err; + } + node->retry_wb_wq = alloc_workqueue("dfs_rwb%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + HMDFS_RETRY_WB_WQ_MAX_ACTIVE, + sbi->seq, node->device_id); + if (!node->retry_wb_wq) { + hmdfs_err("Failed to alloc retry writeback wq"); + goto out_err; + } + node->reget_conn_wq = alloc_workqueue("dfs_regetcon%u_%llu", + WQ_UNBOUND, 0, + sbi->seq, node->device_id); + if (!node->reget_conn_wq) { + hmdfs_err("Failed to alloc reget conn wq"); + goto out_err; + } + INIT_LIST_HEAD(&node->conn_impl_list); + mutex_init(&node->conn_impl_list_lock); + INIT_LIST_HEAD(&node->conn_deleting_list); + init_waitqueue_head(&node->deleting_list_wq); + idr_init(&node->msg_idr); + spin_lock_init(&node->idr_lock); + idr_init(&node->file_id_idr); + spin_lock_init(&node->file_id_lock); + INIT_LIST_HEAD(&node->list); + kref_init(&node->ref_cnt); + node->owner = sbi->seq; + node->conn_operations = conn_operations; + node->sbi = sbi; + node->status = NODE_STAT_SHAKING; + node->conn_time = jiffies; + memcpy(node->cid, cid, HMDFS_CID_SIZE); + atomic64_set(&node->sb_dirty_count, 0); + node->fid_cookie = 0; + atomic_set(&node->evt_seq, 0); + mutex_init(&node->seq_lock); + mutex_init(&node->offline_cb_lock); + mutex_init(&node->evt_lock); + node->pending_evt = RAW_NODE_EVT_NR; + node->last_evt = RAW_NODE_EVT_NR; + node->cur_evt[0] = RAW_NODE_EVT_NR; + node->cur_evt[1] = RAW_NODE_EVT_NR; + node->seq_wr_idx = (unsigned char)UINT_MAX; + node->seq_rd_idx = node->seq_wr_idx; + INIT_DELAYED_WORK(&node->evt_dwork, hmdfs_node_evt_work); + node->msg_idr_process = 0; + node->offline_start = false; + spin_lock_init(&node->wr_opened_inode_lock); + INIT_LIST_HEAD(&node->wr_opened_inode_list); + spin_lock_init(&node->stashed_inode_lock); + node->stashed_inode_nr = 0; + atomic_set(&node->rebuild_inode_status_nr, 0); + init_waitqueue_head(&node->rebuild_inode_status_wq); + INIT_LIST_HEAD(&node->stashed_inode_list); + node->need_rebuild_stash_list = false; + + return node; + +out_err: + if (node->async_wq) { + destroy_workqueue(node->async_wq); + node->async_wq = NULL; + } + if (node->req_handle_wq) { + destroy_workqueue(node->req_handle_wq); + node->req_handle_wq = NULL; + } + if (node->dentry_wq) { + destroy_workqueue(node->dentry_wq); + node->dentry_wq = NULL; + } + if (node->retry_wb_wq) { + destroy_workqueue(node->retry_wb_wq); + node->retry_wb_wq = NULL; + } + if (node->reget_conn_wq) { + destroy_workqueue(node->reget_conn_wq); + node->reget_conn_wq = NULL; + } + kfree(node); + return NULL; +} + +struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid) +{ + struct hmdfs_peer *peer = NULL, *on_sbi_peer = NULL; + const struct connection_operations *conn_opr_ptr = NULL; + + mutex_lock(&sbi->connections.node_lock); + peer = lookup_peer_by_cid_unsafe(sbi, cid); + mutex_unlock(&sbi->connections.node_lock); + if (peer) { + hmdfs_info("Got a existing peer: device_id = %llu", + peer->device_id); + goto out; + } + + conn_opr_ptr = hmdfs_get_peer_operation(DFS_2_0); + if (unlikely(!conn_opr_ptr)) { + hmdfs_info("Fatal! Cannot get peer operation"); + goto out; + } + peer = alloc_peer(sbi, cid, conn_opr_ptr); + if (unlikely(!peer)) { + hmdfs_info("Failed to alloc a peer"); + goto out; + } + + mutex_lock(&sbi->connections.node_lock); + on_sbi_peer = add_peer_unsafe(sbi, peer); + mutex_unlock(&sbi->connections.node_lock); + if (IS_ERR(on_sbi_peer)) { + peer_put(peer); + peer = NULL; + goto out; + } else if (unlikely(on_sbi_peer != peer)) { + hmdfs_info("Got a existing peer: device_id = %llu", + on_sbi_peer->device_id); + peer_put(peer); + peer = on_sbi_peer; + } else { + hmdfs_info("Got a newly allocated peer: device_id = %llu", + peer->device_id); + } + +out: + return peer; +} + +static void head_release(struct kref *kref) +{ + struct hmdfs_msg_idr_head *head; + struct hmdfs_peer *con; + + head = (struct hmdfs_msg_idr_head *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + con = head->peer; + idr_remove(&con->msg_idr, head->msg_id); + spin_unlock(&con->idr_lock); + + kfree(head); +} + +void head_put(struct hmdfs_msg_idr_head *head) +{ + kref_put_lock(&head->ref, head_release, &head->peer->idr_lock); +} + +struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id) +{ + struct hmdfs_msg_idr_head *head = NULL; + + spin_lock(&peer->idr_lock); + head = idr_find(&peer->msg_idr, id); + if (head) + kref_get(&head->ref); + spin_unlock(&peer->idr_lock); + + return head; +} + +int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type, + void *ptr) +{ + int ret = -EAGAIN; + struct hmdfs_msg_idr_head *head = ptr; + int end = peer->version < DFS_2_0 ? (USHRT_MAX + 1) : 0; + + idr_preload(GFP_KERNEL); + spin_lock(&peer->idr_lock); + if (!peer->offline_start) + ret = idr_alloc_cyclic(&peer->msg_idr, ptr, + 1, end, GFP_NOWAIT); + if (ret >= 0) { + kref_init(&head->ref); + head->msg_id = ret; + head->type = type; + head->peer = peer; + peer->msg_idr_process++; + ret = 0; + } + spin_unlock(&peer->idr_lock); + idr_preload_end(); + + return ret; +} diff --git a/fs/hmdfs/comm/connection.h b/fs/hmdfs/comm/connection.h new file mode 100644 index 000000000000..6f3ee1baddf2 --- /dev/null +++ b/fs/hmdfs/comm/connection.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/connection.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_CONNECTION_H +#define HMDFS_CONNECTION_H + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include +#endif + +#include +#include +#include "protocol.h" +#include "node_cb.h" + +#define HMDFS_KEY_SIZE 32 +#define HMDFS_IV_SIZE 12 +#define HMDFS_TAG_SIZE 16 +#define HMDFS_CID_SIZE 64 + +enum { + CONNECT_MESG_HANDSHAKE_REQUEST = 1, + CONNECT_MESG_HANDSHAKE_RESPONSE = 2, + CONNECT_MESG_HANDSHAKE_ACK = 3, +}; + +enum { + CONNECT_STAT_WAIT_REQUEST = 0, + CONNECT_STAT_WAIT_RESPONSE, + CONNECT_STAT_WORKING, + CONNECT_STAT_STOP, + CONNECT_STAT_WAIT_ACK, + CONNECT_STAT_NEGO_FAIL, + CONNECT_STAT_COUNT +}; + +enum { + CONNECT_TYPE_TCP = 0, + CONNECT_TYPE_UNSUPPORT, +}; + +struct connection_stat { + int64_t send_bytes; + int64_t recv_bytes; + int send_message_count; + int recv_message_count; + unsigned long rekey_time; +}; + +struct connection { + struct list_head list; + struct kref ref_cnt; + struct mutex ref_lock; + struct hmdfs_peer *node; + int type; + int status; + void *connect_handle; + struct crypto_aead *tfm; + u8 master_key[HMDFS_KEY_SIZE]; + u8 send_key[HMDFS_KEY_SIZE]; + u8 recv_key[HMDFS_KEY_SIZE]; + struct connection_stat stat; + struct work_struct reget_work; +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + struct tls12_crypto_info_aes_gcm_128 send_crypto_info; + struct tls12_crypto_info_aes_gcm_128 recv_crypto_info; +#endif + void (*close)(struct connection *connect); + int (*send_message)(struct connection *connect, + struct hmdfs_send_data *msg); + uint32_t crypto; +}; + +enum { + NODE_STAT_SHAKING = 0, + NODE_STAT_ONLINE, + NODE_STAT_OFFLINE, +}; + +struct hmdfs_async_work { + struct hmdfs_msg_idr_head head; + struct page *page; + struct delayed_work d_work; + unsigned long start; +}; + +enum { + RAW_NODE_EVT_OFF = 0, + RAW_NODE_EVT_ON, + RAW_NODE_EVT_NR, +}; + +#define RAW_NODE_EVT_MAX_NR 4 + +struct hmdfs_stash_statistics { + unsigned int cur_ok; + unsigned int cur_nothing; + unsigned int cur_fail; + unsigned int total_ok; + unsigned int total_nothing; + unsigned int total_fail; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_restore_statistics { + unsigned int cur_ok; + unsigned int cur_fail; + unsigned int cur_keep; + unsigned int total_ok; + unsigned int total_fail; + unsigned int total_keep; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_rebuild_statistics { + unsigned int cur_ok; + unsigned int cur_fail; + unsigned int cur_invalid; + unsigned int total_ok; + unsigned int total_fail; + unsigned int total_invalid; + unsigned int time; +}; + +struct hmdfs_peer_statistics { + /* stash statistics */ + struct hmdfs_stash_statistics stash; + /* restore statistics */ + struct hmdfs_restore_statistics restore; + /* rebuild statistics */ + struct hmdfs_rebuild_statistics rebuild; +}; + +struct hmdfs_peer { + struct list_head list; + struct kref ref_cnt; + unsigned int owner; + uint64_t device_id; + unsigned long conn_time; + uint8_t version; + u8 status; + u64 features; + long long old_sb_dirty_count; + atomic64_t sb_dirty_count; + /* + * cookie for opened file id. + * It will be increased if peer has offlined + */ + uint16_t fid_cookie; + struct mutex conn_impl_list_lock; + struct list_head conn_impl_list; + /* + * when async message process context call hmdfs_reget_connection + * add conn node to conn_deleting_list, so call hmdfs_disconnect_node + * can wait all receive thread exit + */ + struct list_head conn_deleting_list; + wait_queue_head_t deleting_list_wq; + struct idr msg_idr; + spinlock_t idr_lock; + struct idr file_id_idr; + spinlock_t file_id_lock; + int recvbuf_maxsize; + struct crypto_aead *tfm; + char cid[HMDFS_CID_SIZE + 1]; + const struct connection_operations *conn_operations; + struct hmdfs_sb_info *sbi; + struct workqueue_struct *async_wq; + struct workqueue_struct *req_handle_wq; + struct workqueue_struct *dentry_wq; + struct workqueue_struct *retry_wb_wq; + struct workqueue_struct *reget_conn_wq; + atomic_t evt_seq; + /* sync cb may be blocking */ + struct mutex seq_lock; + struct mutex offline_cb_lock; + struct mutex evt_lock; + unsigned char pending_evt; + unsigned char last_evt; + unsigned char waiting_evt[RAW_NODE_EVT_NR]; + unsigned char seq_rd_idx; + unsigned char seq_wr_idx; + unsigned int seq_tbl[RAW_NODE_EVT_MAX_NR]; + unsigned int pending_evt_seq; + unsigned char cur_evt[NODE_EVT_TYPE_NR]; + unsigned int cur_evt_seq[NODE_EVT_TYPE_NR]; + unsigned int merged_evt; + unsigned int dup_evt[RAW_NODE_EVT_NR]; + struct delayed_work evt_dwork; + /* protected by idr_lock */ + uint64_t msg_idr_process; + bool offline_start; + spinlock_t wr_opened_inode_lock; + struct list_head wr_opened_inode_list; + /* + * protect @stashed_inode_list and @stashed_inode_nr in stash process + * and fill_inode_remote->hmdfs_remote_init_stash_status process + */ + spinlock_t stashed_inode_lock; + unsigned int stashed_inode_nr; + struct list_head stashed_inode_list; + bool need_rebuild_stash_list; + /* how many inodes are rebuilding statsh status */ + atomic_t rebuild_inode_status_nr; + wait_queue_head_t rebuild_inode_status_wq; + struct hmdfs_peer_statistics stats; + /* sysfs */ + struct kobject kobj; + struct completion kobj_unregister; +}; + +#define HMDFS_DEVID_LOCAL 0 + +/* Be Compatible to DFS1.0, dont add packed attribute so far */ +struct connection_msg_head { + __u8 magic; + __u8 version; + __u8 operations; + __u8 flags; + __le32 datasize; + __le64 source; + __le16 msg_id; + __le16 request_id; + __le32 reserved1; +} __packed; + +struct connection_handshake_req { + __le32 len; + char dev_id[0]; +} __packed; + +enum { + HS_EXTEND_CODE_CRYPTO = 0, + HS_EXTEND_CODE_CASE_SENSE, + HS_EXTEND_CODE_FEATURE_SUPPORT, + HS_EXTEND_CODE_COUNT +}; + +struct conn_hs_extend_reg { + __u16 len; + __u16 resv; + void (*filler)(struct connection *conn_impl, __u8 ops, + void *data, __u32 len); + int (*parser)(struct connection *conn_impl, __u8 ops, + void *data, __u32 len); +}; + +struct conn_hs_extend_head { + __le32 field_cn; + char data[0]; +}; + +struct extend_field_head { + __le16 code; + __le16 len; +} __packed; + +struct crypto_body { + __le32 crypto; +} __packed; + +struct case_sense_body { + __u8 case_sensitive; +} __packed; + +struct feature_body { + __u64 features; + __u64 reserved; +} __packed; + +#define HMDFS_HS_CRYPTO_KTLS_AES128 0x00000001 +#define HMDFS_HS_CRYPTO_KTLS_AES256 0x00000002 + +static inline bool hmdfs_is_node_online(const struct hmdfs_peer *node) +{ + return READ_ONCE(node->status) == NODE_STAT_ONLINE; +} + +static inline unsigned int hmdfs_node_inc_evt_seq(struct hmdfs_peer *node) +{ + /* Use the atomic as an unsigned integer */ + return atomic_inc_return(&node->evt_seq); +} + +static inline unsigned int hmdfs_node_evt_seq(const struct hmdfs_peer *node) +{ + return atomic_read(&node->evt_seq); +} + +struct connection *get_conn_impl(struct hmdfs_peer *node, int connect_type); + +void set_conn_sock_quickack(struct hmdfs_peer *node); + +struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid); + +struct hmdfs_peer *hmdfs_lookup_from_devid(struct hmdfs_sb_info *sbi, + uint64_t device_id); +struct hmdfs_peer *hmdfs_lookup_from_cid(struct hmdfs_sb_info *sbi, + uint8_t *cid); +void connection_send_handshake(struct connection *conn_impl, __u8 operations, + __le16 request_id); +void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len); +void connection_working_recv_handler(struct connection *conn_impl, void *head, + void *data, __u32 data_len); +static inline void connection_get(struct connection *conn) +{ + kref_get(&conn->ref_cnt); +} + +void connection_put(struct connection *conn); +static inline void peer_get(struct hmdfs_peer *peer) +{ + kref_get(&peer->ref_cnt); +} + +void peer_put(struct hmdfs_peer *peer); + +int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg); +void hmdfs_connections_stop(struct hmdfs_sb_info *sbi); + +void hmdfs_disconnect_node(struct hmdfs_peer *node); + +void connection_to_working(struct hmdfs_peer *node); + +int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type, + void *ptr); +struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id); + +static inline void hmdfs_start_process_offline(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->offline_start = true; + spin_unlock(&peer->idr_lock); +} + +static inline void hmdfs_stop_process_offline(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->offline_start = false; + spin_unlock(&peer->idr_lock); +} + +static inline void hmdfs_dec_msg_idr_process(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->msg_idr_process--; + spin_unlock(&peer->idr_lock); +} +#endif diff --git a/fs/hmdfs/comm/crypto.c b/fs/hmdfs/comm/crypto.c new file mode 100644 index 000000000000..60bb08f1697f --- /dev/null +++ b/fs/hmdfs/comm/crypto.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/crypto.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "crypto.h" + +#include +#include +#include +#include +#include +#include + +#include "hmdfs.h" + +static void tls_crypto_set_key(struct connection *conn_impl, int tx) +{ + int rc = 0; + struct tcp_handle *tcp = conn_impl->connect_handle; + struct tls_context *ctx = tls_get_ctx(tcp->sock->sk); + struct cipher_context *cctx = NULL; + struct tls_sw_context_tx *sw_ctx_tx = NULL; + struct tls_sw_context_rx *sw_ctx_rx = NULL; + struct crypto_aead **aead = NULL; + struct tls12_crypto_info_aes_gcm_128 *crypto_info = NULL; + + if (tx) { + crypto_info = &conn_impl->send_crypto_info; + cctx = &ctx->tx; + sw_ctx_tx = tls_sw_ctx_tx(ctx); + aead = &sw_ctx_tx->aead_send; + } else { + crypto_info = &conn_impl->recv_crypto_info; + cctx = &ctx->rx; + sw_ctx_rx = tls_sw_ctx_rx(ctx); + aead = &sw_ctx_rx->aead_recv; + } + + memcpy(cctx->iv, crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, crypto_info->iv, + TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(cctx->rec_seq, crypto_info->rec_seq, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + rc = crypto_aead_setkey(*aead, crypto_info->key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + if (rc) + hmdfs_err("crypto set key error"); +} + +int tls_crypto_info_init(struct connection *conn_impl) +{ + int ret = 0; + u8 key_meterial[HMDFS_KEY_SIZE]; + struct tcp_handle *tcp = + (struct tcp_handle *)(conn_impl->connect_handle); + if (conn_impl->node->version < DFS_2_0 || !tcp) + return -EINVAL; + // send + update_key(conn_impl->send_key, key_meterial, HKDF_TYPE_IV); + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TCP, TCP_ULP, + KERNEL_SOCKPTR("tls"), sizeof("tls")); + if (ret) + hmdfs_err("set tls error %d", ret); + tcp->connect->send_crypto_info.info.version = TLS_1_2_VERSION; + tcp->connect->send_crypto_info.info.cipher_type = + TLS_CIPHER_AES_GCM_128; + + memcpy(tcp->connect->send_crypto_info.key, tcp->connect->send_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->send_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->send_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->send_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TLS, TLS_TX, + KERNEL_SOCKPTR(&(tcp->connect->send_crypto_info)), + sizeof(tcp->connect->send_crypto_info)); + if (ret) + hmdfs_err("set tls send_crypto_info error %d", ret); + + // recv + update_key(tcp->connect->recv_key, key_meterial, HKDF_TYPE_IV); + tcp->connect->recv_crypto_info.info.version = TLS_1_2_VERSION; + tcp->connect->recv_crypto_info.info.cipher_type = + TLS_CIPHER_AES_GCM_128; + + memcpy(tcp->connect->recv_crypto_info.key, tcp->connect->recv_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->recv_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->recv_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->recv_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TLS, TLS_RX, + KERNEL_SOCKPTR(&(tcp->connect->recv_crypto_info)), + sizeof(tcp->connect->recv_crypto_info)); + if (ret) + hmdfs_err("set tls recv_crypto_info error %d", ret); + return ret; +} + +static int tls_set_tx(struct tcp_handle *tcp) +{ + int ret = 0; + u8 new_key[HMDFS_KEY_SIZE]; + u8 key_meterial[HMDFS_KEY_SIZE]; + + ret = update_key(tcp->connect->send_key, new_key, HKDF_TYPE_REKEY); + if (ret < 0) + return ret; + memcpy(tcp->connect->send_key, new_key, HMDFS_KEY_SIZE); + ret = update_key(tcp->connect->send_key, key_meterial, HKDF_TYPE_IV); + if (ret < 0) + return ret; + + memcpy(tcp->connect->send_crypto_info.key, tcp->connect->send_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->send_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->send_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->send_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(new_key, 0, HMDFS_KEY_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + + tls_crypto_set_key(tcp->connect, 1); + return 0; +} + +static int tls_set_rx(struct tcp_handle *tcp) +{ + int ret = 0; + u8 new_key[HMDFS_KEY_SIZE]; + u8 key_meterial[HMDFS_KEY_SIZE]; + + ret = update_key(tcp->connect->recv_key, new_key, HKDF_TYPE_REKEY); + if (ret < 0) + return ret; + memcpy(tcp->connect->recv_key, new_key, HMDFS_KEY_SIZE); + ret = update_key(tcp->connect->recv_key, key_meterial, HKDF_TYPE_IV); + if (ret < 0) + return ret; + + memcpy(tcp->connect->recv_crypto_info.key, tcp->connect->recv_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->recv_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->recv_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->recv_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(new_key, 0, HMDFS_KEY_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + tls_crypto_set_key(tcp->connect, 0); + return 0; +} + +int set_crypto_info(struct connection *conn_impl, int set_type) +{ + int ret = 0; + __u8 version = conn_impl->node->version; + struct tcp_handle *tcp = + (struct tcp_handle *)(conn_impl->connect_handle); + if (version < DFS_2_0 || !tcp) + return -EINVAL; + + if (set_type == SET_CRYPTO_SEND) { + ret = tls_set_tx(tcp); + if (ret) { + hmdfs_err("tls set tx fail"); + return ret; + } + } + if (set_type == SET_CRYPTO_RECV) { + ret = tls_set_rx(tcp); + if (ret) { + hmdfs_err("tls set rx fail"); + return ret; + } + } + hmdfs_info("KTLS setting success"); + return ret; +} + +static int hmac_sha256(u8 *key, u8 key_len, char *info, u8 info_len, u8 *output) +{ + struct crypto_shash *tfm = NULL; + struct shash_desc *shash = NULL; + int ret = 0; + + if (!key) + return -EINVAL; + + tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); + if (IS_ERR(tfm)) { + hmdfs_err("crypto_alloc_ahash failed: err %ld", PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + + ret = crypto_shash_setkey(tfm, key, key_len); + if (ret) { + hmdfs_err("crypto_ahash_setkey failed: err %d", ret); + goto failed; + } + + shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm), + GFP_KERNEL); + if (!shash) { + ret = -ENOMEM; + goto failed; + } + + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, info, info_len, output); + + kfree(shash); + +failed: + crypto_free_shash(tfm); + return ret; +} + +static const char *const g_key_lable[] = { "ktls key initiator", + "ktls key accepter", + "ktls key update", "ktls iv&salt" }; +static const int g_key_lable_len[] = { 18, 17, 15, 12 }; + +int update_key(__u8 *old_key, __u8 *new_key, int type) +{ + int ret = 0; + char lable[MAX_LABLE_SIZE]; + u8 lable_size; + + lable_size = g_key_lable_len[type] + sizeof(u16) + sizeof(char); + *((u16 *)lable) = HMDFS_KEY_SIZE; + memcpy(lable + sizeof(u16), g_key_lable[type], g_key_lable_len[type]); + *(lable + sizeof(u16) + g_key_lable_len[type]) = 0x01; + ret = hmac_sha256(old_key, HMDFS_KEY_SIZE, lable, lable_size, new_key); + if (ret < 0) + hmdfs_err("hmac sha256 error"); + return ret; +} diff --git a/fs/hmdfs/comm/crypto.h b/fs/hmdfs/comm/crypto.h new file mode 100644 index 000000000000..7549f3897336 --- /dev/null +++ b/fs/hmdfs/comm/crypto.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/crypto.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_FS_ENCRYPTION_H +#define HMDFS_FS_ENCRYPTION_H + +#include "transport.h" + +#define MAX_LABLE_SIZE 30 +#define CRYPTO_IV_OFFSET 0 +#define CRYPTO_SALT_OFFSET (CRYPTO_IV_OFFSET + TLS_CIPHER_AES_GCM_128_IV_SIZE) +#define CRYPTO_SEQ_OFFSET \ + (CRYPTO_SALT_OFFSET + TLS_CIPHER_AES_GCM_128_SALT_SIZE) +#define REKEY_LIFETIME (60 * 60 * HZ) + +enum HKDF_TYPE { + HKDF_TYPE_KEY_INITIATOR = 0, + HKDF_TYPE_KEY_ACCEPTER = 1, + HKDF_TYPE_REKEY = 2, + HKDF_TYPE_IV = 3, +}; + +enum SET_CRYPTO_TYPE { + SET_CRYPTO_SEND = 0, + SET_CRYPTO_RECV = 1, +}; + +int tls_crypto_info_init(struct connection *conn_impl); +int set_crypto_info(struct connection *conn_impl, int set_type); +int update_key(__u8 *old_key, __u8 *new_key, int type); + +#endif diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c new file mode 100644 index 000000000000..54eaaf06f223 --- /dev/null +++ b/fs/hmdfs/comm/device_node.c @@ -0,0 +1,1665 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/device_node.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "device_node.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "client_writeback.h" +#include "server_writeback.h" +#include "connection.h" +#include "hmdfs_client.h" +#include "socket_adapter.h" +#include "authority/authentication.h" + +DEFINE_MUTEX(hmdfs_sysfs_mutex); +static struct kset *hmdfs_kset; + +struct hmdfs_disconnect_node_work { + struct hmdfs_peer *conn; + struct work_struct work; + atomic_t *cnt; + struct wait_queue_head *waitq; +}; + +static void ctrl_cmd_update_socket_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct update_socket_param cmd; + struct hmdfs_peer *node = NULL; + struct connection *conn = NULL; + + if (unlikely(!buf || len != sizeof(cmd))) { + hmdfs_err("len/buf error"); + goto out; + } + memcpy(&cmd, buf, sizeof(cmd)); + + node = hmdfs_get_peer(sbi, cmd.cid); + if (unlikely(!node)) { + hmdfs_err("failed to update ctrl node: cannot get peer"); + goto out; + } + + conn = hmdfs_get_conn_tcp(node, cmd.newfd, cmd.masterkey, cmd.status); + if (unlikely(!conn)) { + hmdfs_err("failed to update ctrl node: cannot get conn"); + } else if (!sbi->system_cred) { + const struct cred *system_cred = get_cred(current_cred()); + + if (cmpxchg_relaxed(&sbi->system_cred, NULL, system_cred)) + put_cred(system_cred); + else + hmdfs_check_cred(system_cred); + } +out: + if (conn) + connection_put(conn); + if (node) + peer_put(node); +} + +static inline void hmdfs_disconnect_node_marked(struct hmdfs_peer *conn) +{ + hmdfs_start_process_offline(conn); + hmdfs_disconnect_node(conn); + hmdfs_stop_process_offline(conn); +} + +static void ctrl_cmd_off_line_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct offline_param cmd; + struct hmdfs_peer *node = NULL; + + if (unlikely(!buf || len != sizeof(cmd))) { + hmdfs_err("Recved a invalid userbuf"); + return; + } + memcpy(&cmd, buf, sizeof(cmd)); + node = hmdfs_lookup_from_cid(sbi, cmd.remote_cid); + if (unlikely(!node)) { + hmdfs_err("Cannot find node by device"); + return; + } + hmdfs_info("Found peer: device_id = %llu", node->device_id); + hmdfs_disconnect_node_marked(node); + peer_put(node); +} + +static void hmdfs_disconnect_node_work_fn(struct work_struct *base) +{ + struct hmdfs_disconnect_node_work *work = + container_of(base, struct hmdfs_disconnect_node_work, work); + + hmdfs_disconnect_node_marked(work->conn); + if (atomic_dec_and_test(work->cnt)) + wake_up(work->waitq); + kfree(work); +} + +static void ctrl_cmd_off_line_all_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct hmdfs_peer *node = NULL; + struct hmdfs_disconnect_node_work *work = NULL; + atomic_t cnt = ATOMIC_INIT(0); + wait_queue_head_t waitq; + + if (unlikely(len != sizeof(struct offline_all_param))) { + hmdfs_err("Recved a invalid userbuf, len %zu, expect %zu\n", + len, sizeof(struct offline_all_param)); + return; + } + + init_waitqueue_head(&waitq); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(node, &sbi->connections.node_list, list) { + mutex_unlock(&sbi->connections.node_lock); + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (work) { + atomic_inc(&cnt); + work->conn = node; + work->cnt = &cnt; + work->waitq = &waitq; + INIT_WORK(&work->work, hmdfs_disconnect_node_work_fn); + schedule_work(&work->work); + } else { + hmdfs_disconnect_node_marked(node); + } + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + wait_event(waitq, !atomic_read(&cnt)); +} + +typedef void (*ctrl_cmd_handler)(const char *buf, size_t len, + struct hmdfs_sb_info *sbi); + +static const ctrl_cmd_handler cmd_handler[CMD_CNT] = { + [CMD_UPDATE_SOCKET] = ctrl_cmd_update_socket_handler, + [CMD_OFF_LINE] = ctrl_cmd_off_line_handler, + [CMD_OFF_LINE_ALL] = ctrl_cmd_off_line_all_handler, +}; + +static ssize_t sbi_cmd_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + struct notify_param param; + int out_len; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + memset(¶m, 0, sizeof(param)); + spin_lock(&sbi->notify_fifo_lock); + out_len = kfifo_out(&sbi->notify_fifo, ¶m, sizeof(param)); + spin_unlock(&sbi->notify_fifo_lock); + if (out_len != sizeof(param)) + param.notify = NOTIFY_NONE; + memcpy(buf, ¶m, sizeof(param)); + return sizeof(param); +} + +static const char *cmd2str(int cmd) +{ + switch (cmd) { + case 0: + return "CMD_UPDATE_SOCKET"; + case 1: + return "CMD_OFF_LINE"; + case 2: + return "CMD_OFF_LINE_ALL"; + default: + return "illegal cmd"; + } +} + +static ssize_t sbi_cmd_store(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int cmd; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + if (!sbi) { + hmdfs_info("Fatal! Empty sbi. Mount fs first"); + return len; + } + if (len < sizeof(int)) { + hmdfs_err("Illegal cmd: cmd len = %zu", len); + return len; + } + cmd = *(int *)buf; + if (cmd < 0 || cmd >= CMD_CNT) { + hmdfs_err("Illegal cmd : cmd = %d", cmd); + return len; + } + hmdfs_info("Recved cmd: %s", cmd2str(cmd)); + if (cmd_handler[cmd]) + cmd_handler[cmd](buf, len, sbi); + return len; +} + +static struct sbi_attribute sbi_cmd_attr = + __ATTR(cmd, 0664, sbi_cmd_show, sbi_cmd_store); + +static ssize_t sbi_status_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + ssize_t size = 0; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + sbi = to_sbi(kobj); + size += sprintf(buf + size, "peers version status\n"); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + size += sprintf(buf + size, "%llu %d %d\n", peer->device_id, + peer->version, peer->status); + // connection information + size += sprintf( + buf + size, + "\t socket_fd connection_status tcp_status ... refcnt\n"); + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + tcp = conn_impl->connect_handle; + size += sprintf(buf + size, "\t %d \t%d \t%d \t%p \t%ld\n", + tcp->fd, conn_impl->status, + tcp->sock->state, tcp->sock, file_count(tcp->sock->file)); + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return size; +} + +static ssize_t sbi_status_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, + size_t len) +{ + return len; +} + +static struct sbi_attribute sbi_status_attr = + __ATTR(status, 0664, sbi_status_show, sbi_status_store); + +static ssize_t sbi_stat_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + ssize_t size = 0; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + sbi = to_sbi(kobj); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + // connection information + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + tcp = conn_impl->connect_handle; + size += sprintf(buf + size, "socket_fd: %d\n", tcp->fd); + size += sprintf(buf + size, + "\tsend_msg %d \tsend_bytes %llu\n", + conn_impl->stat.send_message_count, + conn_impl->stat.send_bytes); + size += sprintf(buf + size, + "\trecv_msg %d \trecv_bytes %llu\n", + conn_impl->stat.recv_message_count, + conn_impl->stat.recv_bytes); + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return size; +} + +static ssize_t sbi_stat_store(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + + sbi = to_sbi(kobj); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + // connection information + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + conn_impl->stat.send_message_count = 0; + conn_impl->stat.send_bytes = 0; + conn_impl->stat.recv_message_count = 0; + conn_impl->stat.recv_bytes = 0; + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return len; +} + +static struct sbi_attribute sbi_statistic_attr = + __ATTR(statistic, 0664, sbi_stat_show, sbi_stat_store); + +static ssize_t sbi_dcache_precision_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", to_sbi(kobj)->dcache_precision); +} + +#define PRECISION_MAX 3600000 + +static ssize_t sbi_dcache_precision_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int ret; + unsigned int precision; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(skip_spaces(buf), 0, &precision); + if (!ret) { + if (precision <= PRECISION_MAX) + sbi->dcache_precision = precision; + else + ret = -EINVAL; + } + + return ret ? ret : len; +} + +static struct sbi_attribute sbi_dcache_precision_attr = + __ATTR(dcache_precision, 0664, sbi_dcache_precision_show, + sbi_dcache_precision_store); + +static ssize_t sbi_dcache_threshold_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%lu\n", + to_sbi(kobj)->dcache_threshold); +} + +static ssize_t sbi_dcache_threshold_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int ret; + unsigned long threshold; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtoul(skip_spaces(buf), 0, &threshold); + if (!ret) + sbi->dcache_threshold = threshold; + + return ret ? ret : len; +} + +static struct sbi_attribute sbi_dcache_threshold_attr = + __ATTR(dcache_threshold, 0664, sbi_dcache_threshold_show, + sbi_dcache_threshold_store); + +static ssize_t server_statistic_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + int i, ret; + const size_t size = PAGE_SIZE - 1; + ssize_t pos = 0; + struct server_statistic *stat = to_sbi(kobj)->s_server_statis; + + for (i = 0; i < F_SIZE; i++) { + + ret = snprintf(buf + pos, size - pos, + "%llu %u %llu %llu\n", + stat[i].cnt, + jiffies_to_msecs(stat[i].max), + stat[i].snd_cnt, stat[i].snd_fail_cnt); + if (ret > size - pos) + break; + pos += ret; + } + + /* If break, we should add a new line */ + if (i < F_SIZE) { + ret = snprintf(buf + pos, size + 1 - pos, "\n"); + pos += ret; + } + return pos; +} + +static struct sbi_attribute sbi_local_op_attr = __ATTR_RO(server_statistic); + +static ssize_t client_statistic_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + int i, ret; + const size_t size = PAGE_SIZE - 1; + ssize_t pos = 0; + struct client_statistic *stat = to_sbi(kobj)->s_client_statis; + + for (i = 0; i < F_SIZE; i++) { + + ret = snprintf(buf + pos, size - pos, + "%llu %llu %llu %llu %llu %u\n", + stat[i].snd_cnt, + stat[i].snd_fail_cnt, + stat[i].resp_cnt, + stat[i].timeout_cnt, + stat[i].delay_resp_cnt, + jiffies_to_msecs(stat[i].max)); + if (ret > size - pos) + break; + pos += ret; + } + + /* If break, we should add a new line */ + if (i < F_SIZE) { + ret = snprintf(buf + pos, size + 1 - pos, "\n"); + pos += ret; + } + + return pos; +} + +static struct sbi_attribute sbi_delay_resp_attr = __ATTR_RO(client_statistic); + +static inline unsigned long pages_to_kbytes(unsigned long page) +{ + return page << (PAGE_SHIFT - 10); +} + +static ssize_t dirty_writeback_stats_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + struct hmdfs_writeback *hwb = sbi->h_wb; + unsigned long avg; + unsigned long max; + unsigned long min; + + spin_lock(&hwb->write_bandwidth_lock); + avg = hwb->avg_write_bandwidth; + max = hwb->max_write_bandwidth; + min = hwb->min_write_bandwidth; + spin_unlock(&hwb->write_bandwidth_lock); + + if (min == ULONG_MAX) + min = 0; + + return snprintf(buf, PAGE_SIZE, + "%10lu\n" + "%10lu\n" + "%10lu\n", + pages_to_kbytes(avg), + pages_to_kbytes(max), + pages_to_kbytes(min)); +} + +static struct sbi_attribute sbi_dirty_writeback_stats_attr = + __ATTR_RO(dirty_writeback_stats); + +static ssize_t sbi_wb_timeout_ms_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->wb_timeout_ms); +} + +static ssize_t sbi_wb_timeout_ms_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int val; + int err; + + err = kstrtouint(buf, 10, &val); + if (err) + return err; + + if (!val || val > HMDFS_MAX_WB_TIMEOUT_MS) + return -EINVAL; + + sbi->wb_timeout_ms = val; + + return len; +} + +static struct sbi_attribute sbi_wb_timeout_ms_attr = + __ATTR(wb_timeout_ms, 0664, sbi_wb_timeout_ms_show, + sbi_wb_timeout_ms_store); + +static ssize_t sbi_dirty_writeback_centisecs_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", + sbi->h_wb->dirty_writeback_interval); +} + +static ssize_t sbi_dirty_writeback_centisecs_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + int err; + + err = kstrtouint(buf, 10, &sbi->h_wb->dirty_writeback_interval); + if (err) + return err; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_centisecs_attr = + __ATTR(dirty_writeback_centisecs, 0664, + sbi_dirty_writeback_centisecs_show, + sbi_dirty_writeback_centisecs_store); + +static ssize_t sbi_dirty_file_background_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", + sbi->h_wb->dirty_file_bg_bytes); +} + +static ssize_t sbi_dirty_file_background_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long file_background_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &file_background_bytes); + if (err) + return err; + if (file_background_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_fs_bytes = + max(sbi->h_wb->dirty_fs_bytes, file_background_bytes); + sbi->h_wb->dirty_fs_bg_bytes = + max(sbi->h_wb->dirty_fs_bg_bytes, file_background_bytes); + sbi->h_wb->dirty_file_bytes = + max(sbi->h_wb->dirty_file_bytes, file_background_bytes); + + sbi->h_wb->dirty_file_bg_bytes = file_background_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static ssize_t sbi_dirty_fs_background_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_fs_bg_bytes); +} + +static ssize_t sbi_dirty_fs_background_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long fs_background_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &fs_background_bytes); + if (err) + return err; + if (fs_background_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, fs_background_bytes); + sbi->h_wb->dirty_fs_bytes = + max(sbi->h_wb->dirty_fs_bytes, fs_background_bytes); + + sbi->h_wb->dirty_fs_bg_bytes = fs_background_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static struct sbi_attribute sbi_dirty_file_background_bytes_attr = + __ATTR(dirty_file_background_bytes, 0644, + sbi_dirty_file_background_bytes_show, + sbi_dirty_file_background_bytes_store); +static struct sbi_attribute sbi_dirty_fs_background_bytes_attr = + __ATTR(dirty_fs_background_bytes, 0644, + sbi_dirty_fs_background_bytes_show, + sbi_dirty_fs_background_bytes_store); + +static ssize_t sbi_dirty_file_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_file_bytes); +} + +static ssize_t sbi_dirty_file_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long file_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &file_bytes); + if (err) + return err; + if (file_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, file_bytes); + sbi->h_wb->dirty_fs_bytes = max(sbi->h_wb->dirty_fs_bytes, file_bytes); + + sbi->h_wb->dirty_file_bytes = file_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static ssize_t sbi_dirty_fs_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_fs_bytes); +} + +static ssize_t sbi_dirty_fs_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long fs_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &fs_bytes); + if (err) + return err; + if (fs_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, fs_bytes); + sbi->h_wb->dirty_file_bytes = + min(sbi->h_wb->dirty_file_bytes, fs_bytes); + sbi->h_wb->dirty_fs_bg_bytes = + min(sbi->h_wb->dirty_fs_bg_bytes, fs_bytes); + + sbi->h_wb->dirty_fs_bytes = fs_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static struct sbi_attribute sbi_dirty_file_bytes_attr = + __ATTR(dirty_file_bytes, 0644, sbi_dirty_file_bytes_show, + sbi_dirty_file_bytes_store); +static struct sbi_attribute sbi_dirty_fs_bytes_attr = + __ATTR(dirty_fs_bytes, 0644, sbi_dirty_fs_bytes_show, + sbi_dirty_fs_bytes_store); + +static ssize_t sbi_dirty_writeback_timelimit_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", + sbi->h_wb->writeback_timelimit / HZ); +} + +static ssize_t sbi_dirty_writeback_timelimit_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int time_limit = 0; + int err; + + err = kstrtouint(buf, 10, &time_limit); + if (err) + return err; + if (time_limit == 0 || time_limit > (HMDFS_MAX_WB_TIMELIMIT / HZ)) + return -EINVAL; + + sbi->h_wb->writeback_timelimit = time_limit * HZ; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_timelimit_attr = +__ATTR(dirty_writeback_timelimit, 0644, sbi_dirty_writeback_timelimit_show, + sbi_dirty_writeback_timelimit_store); + +static ssize_t sbi_dirty_thresh_lowerlimit_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", + sbi->h_wb->bw_thresh_lowerlimit << PAGE_SHIFT); +} + +static ssize_t sbi_dirty_thresh_lowerlimit_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long bw_thresh_lowerbytes = 0; + unsigned long bw_thresh_lowerlimit; + int err; + + err = kstrtoul(buf, 10, &bw_thresh_lowerbytes); + if (err) + return err; + + bw_thresh_lowerlimit = DIV_ROUND_UP(bw_thresh_lowerbytes, PAGE_SIZE); + if (bw_thresh_lowerlimit < HMDFS_BW_THRESH_MIN_LIMIT || + bw_thresh_lowerlimit > HMDFS_BW_THRESH_MAX_LIMIT) + return -EINVAL; + + sbi->h_wb->bw_thresh_lowerlimit = bw_thresh_lowerlimit; + return len; +} + +static struct sbi_attribute sbi_dirty_thresh_lowerlimit_attr = +__ATTR(dirty_thresh_lowerlimit, 0644, sbi_dirty_thresh_lowerlimit_show, + sbi_dirty_thresh_lowerlimit_store); + +static ssize_t sbi_dirty_writeback_autothresh_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_wb->dirty_auto_threshold); +} + +static ssize_t sbi_dirty_writeback_autothresh_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + bool dirty_auto_threshold = false; + int err; + + err = kstrtobool(buf, &dirty_auto_threshold); + if (err) + return err; + + sbi->h_wb->dirty_auto_threshold = dirty_auto_threshold; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_autothresh_attr = +__ATTR(dirty_writeback_autothresh, 0644, sbi_dirty_writeback_autothresh_show, + sbi_dirty_writeback_autothresh_store); + +static ssize_t sbi_dirty_writeback_control_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_wb->dirty_writeback_control); +} + +static ssize_t sbi_dirty_writeback_control_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int dirty_writeback_control = 0; + int err; + + err = kstrtouint(buf, 10, &dirty_writeback_control); + if (err) + return err; + + sbi->h_wb->dirty_writeback_control = (bool)dirty_writeback_control; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_control_attr = + __ATTR(dirty_writeback_control, 0644, sbi_dirty_writeback_control_show, + sbi_dirty_writeback_control_store); + +static ssize_t sbi_srv_dirty_thresh_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_swb->dirty_thresh_pg >> HMDFS_MB_TO_PAGE_SHIFT); +} + +static ssize_t sbi_srv_dirty_thresh_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_server_writeback *hswb = to_sbi(kobj)->h_swb; + int dirty_thresh_mb; + unsigned long long pages; + int err; + + err = kstrtoint(buf, 10, &dirty_thresh_mb); + if (err) + return err; + + if (dirty_thresh_mb <= 0) + return -EINVAL; + + pages = dirty_thresh_mb; + pages <<= HMDFS_MB_TO_PAGE_SHIFT; + if (pages > INT_MAX) { + hmdfs_err("Illegal dirty_thresh_mb %d, its page count beyonds max int", + dirty_thresh_mb); + return -EINVAL; + } + + hswb->dirty_thresh_pg = (unsigned int)pages; + return len; +} + +static struct sbi_attribute sbi_srv_dirty_thresh_attr = +__ATTR(srv_dirty_thresh, 0644, sbi_srv_dirty_thresh_show, + sbi_srv_dirty_thresh_store); + + +static ssize_t sbi_srv_dirty_wb_control_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_swb->dirty_writeback_control); +} + +static ssize_t sbi_srv_dirty_wb_conctrol_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_server_writeback *hswb = to_sbi(kobj)->h_swb; + bool dirty_writeback_control = true; + int err; + + err = kstrtobool(buf, &dirty_writeback_control); + if (err) + return err; + + hswb->dirty_writeback_control = dirty_writeback_control; + + return len; +} + +static struct sbi_attribute sbi_srv_dirty_wb_control_attr = +__ATTR(srv_dirty_writeback_control, 0644, sbi_srv_dirty_wb_control_show, + sbi_srv_dirty_wb_conctrol_store); + +static ssize_t sbi_dcache_timeout_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->dcache_timeout); +} + +static ssize_t sbi_dcache_timeout_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int timeout; + int err; + + err = kstrtouint(buf, 0, &timeout); + if (err) + return err; + + /* zero is invalid, and it doesn't mean no cache */ + if (timeout == 0 || timeout > MAX_DCACHE_TIMEOUT) + return -EINVAL; + + sbi->dcache_timeout = timeout; + + return len; +} + +static struct sbi_attribute sbi_dcache_timeout_attr = + __ATTR(dcache_timeout, 0644, sbi_dcache_timeout_show, + sbi_dcache_timeout_store); + +static ssize_t sbi_write_cache_timeout_sec_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", + to_sbi(kobj)->write_cache_timeout); +} + +static ssize_t sbi_write_cache_timeout_sec_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned int timeout; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(buf, 0, &timeout); + if (ret) + return ret; + + /* set write_cache_timeout to 0 means this functionality is disabled */ + sbi->write_cache_timeout = timeout; + + return len; +} + +static struct sbi_attribute sbi_write_cache_timeout_sec_attr = + __ATTR(write_cache_timeout_sec, 0664, sbi_write_cache_timeout_sec_show, + sbi_write_cache_timeout_sec_store); + +static ssize_t sbi_node_evt_cb_delay_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->async_cb_delay); +} + +static ssize_t sbi_node_evt_cb_delay_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int delay = 0; + int err; + + err = kstrtouint(buf, 10, &delay); + if (err) + return err; + + sbi->async_cb_delay = delay; + + return len; +} + +static struct sbi_attribute sbi_node_evt_cb_delay_attr = +__ATTR(node_event_delay, 0644, sbi_node_evt_cb_delay_show, + sbi_node_evt_cb_delay_store); + +static int calc_idr_number(struct idr *idr) +{ + void *entry = NULL; + int id; + int number = 0; + + idr_for_each_entry(idr, entry, id) { + number++; + if (number % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } + + return number; +} + +static ssize_t sbi_show_idr_stats(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf, bool showmsg) +{ + ssize_t size = 0; + int count; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct idr *idr = NULL; + + sbi = to_sbi(kobj); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + idr = showmsg ? &peer->msg_idr : &peer->file_id_idr; + count = calc_idr_number(idr); + size += snprintf(buf + size, PAGE_SIZE - size, + "device-id\tcount\tnext-id\n\t%llu\t\t%d\t%u\n", + peer->device_id, count, idr_get_cursor(idr)); + if (size >= PAGE_SIZE) { + size = PAGE_SIZE; + break; + } + } + mutex_unlock(&sbi->connections.node_lock); + + return size; +} + +static ssize_t pending_message_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + return sbi_show_idr_stats(kobj, attr, buf, true); +} + +static struct sbi_attribute sbi_pending_message_attr = + __ATTR_RO(pending_message); + +static ssize_t peer_opened_fd_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return sbi_show_idr_stats(kobj, attr, buf, false); +} + +static struct sbi_attribute sbi_peer_opened_fd_attr = __ATTR_RO(peer_opened_fd); + +static ssize_t sbi_srv_req_max_active_attr_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->async_req_max_active); +} + +static ssize_t sbi_srv_req_max_active_attr_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned int max_active; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(buf, 0, &max_active); + if (ret) + return ret; + + sbi->async_req_max_active = max_active; + + return len; +} + +static struct sbi_attribute sbi_srv_req_max_active_attr = +__ATTR(srv_req_handle_max_active, 0644, sbi_srv_req_max_active_attr_show, + sbi_srv_req_max_active_attr_store); + + +static ssize_t cache_file_show(struct hmdfs_sb_info *sbi, + struct list_head *head, char *buf) +{ + struct cache_file_node *cfn = NULL; + ssize_t pos = 0; + + mutex_lock(&sbi->cache_list_lock); + list_for_each_entry(cfn, head, list) { + pos += snprintf(buf + pos, PAGE_SIZE - pos, + "dev_id: %s relative_path: %s\n", + cfn->cid, cfn->relative_path); + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + } + mutex_unlock(&sbi->cache_list_lock); + + return pos; +} + +static ssize_t client_cache_file_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return cache_file_show(to_sbi(kobj), &to_sbi(kobj)->client_cache, buf); +} +static ssize_t server_cache_file_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return cache_file_show(to_sbi(kobj), &to_sbi(kobj)->server_cache, buf); +} + +static struct sbi_attribute sbi_server_cache_file_attr = + __ATTR_RO(server_cache_file); +static struct sbi_attribute sbi_client_cache_file_attr = + __ATTR_RO(client_cache_file); + +static ssize_t sb_seq_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", to_sbi(kobj)->seq); +} + +static struct sbi_attribute sbi_seq_attr = __ATTR_RO(sb_seq); + +static ssize_t peers_sum_attr_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + struct hmdfs_peer *node = NULL; + unsigned int stash_ok = 0, stash_fail = 0, restore_ok = 0, + restore_fail = 0, rebuild_ok = 0, rebuild_fail = 0, rebuild_invalid = 0, + rebuild_time = 0; + unsigned long long stash_ok_pages = 0, stash_fail_pages = 0, + restore_ok_pages = 0, restore_fail_pages = 0; + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(node, &sbi->connections.node_list, list) { + peer_get(node); + mutex_unlock(&sbi->connections.node_lock); + stash_ok += node->stats.stash.total_ok; + stash_fail += node->stats.stash.total_fail; + stash_ok_pages += node->stats.stash.ok_pages; + stash_fail_pages += node->stats.stash.fail_pages; + restore_ok += node->stats.restore.total_ok; + restore_fail += node->stats.restore.total_fail; + restore_ok_pages += node->stats.restore.ok_pages; + restore_fail_pages += node->stats.restore.fail_pages; + rebuild_ok += node->stats.rebuild.total_ok; + rebuild_fail += node->stats.rebuild.total_fail; + rebuild_invalid += node->stats.rebuild.total_invalid; + rebuild_time += node->stats.rebuild.time; + peer_put(node); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + return snprintf(buf, PAGE_SIZE, + "%u %u %llu %llu\n" + "%u %u %llu %llu\n" + "%u %u %u %u\n", + stash_ok, stash_fail, stash_ok_pages, stash_fail_pages, + restore_ok, restore_fail, restore_ok_pages, + restore_fail_pages, rebuild_ok, rebuild_fail, + rebuild_invalid, rebuild_time); +} + +static struct sbi_attribute sbi_peers_attr = __ATTR_RO(peers_sum_attr); + +const char * const flag_name[] = { + "READPAGES", + "READPAGES_OPEN", + "ATOMIC_OPEN", +}; + +static ssize_t fill_features(char *buf, unsigned long long flag) +{ + int i; + ssize_t pos = 0; + bool sep = false; + int flag_name_count = ARRAY_SIZE(flag_name) / sizeof(flag_name[0]); + + for (i = 0; i < sizeof(flag) * BITS_PER_BYTE; ++i) { + if (!(flag & BIT(i))) + continue; + + if (sep) + pos += snprintf(buf + pos, PAGE_SIZE - pos, "|"); + sep = true; + + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + + if (i < flag_name_count && flag_name[i]) + pos += snprintf(buf + pos, PAGE_SIZE - pos, "%s", + flag_name[i]); + else + pos += snprintf(buf + pos, PAGE_SIZE - pos, "%d", i); + + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + } + pos += snprintf(buf + pos, PAGE_SIZE - pos, "\n"); + if (pos >= PAGE_SIZE) + pos = PAGE_SIZE; + + return pos; +} + +static ssize_t sbi_features_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return fill_features(buf, sbi->s_features); +} + +static struct sbi_attribute sbi_features_attr = __ATTR(features, 0444, + sbi_features_show, NULL); + +static struct attribute *sbi_attrs[] = { + &sbi_cmd_attr.attr, + &sbi_status_attr.attr, + &sbi_statistic_attr.attr, + &sbi_dcache_precision_attr.attr, + &sbi_dcache_threshold_attr.attr, + &sbi_dcache_timeout_attr.attr, + &sbi_write_cache_timeout_sec_attr.attr, + &sbi_local_op_attr.attr, + &sbi_delay_resp_attr.attr, + &sbi_wb_timeout_ms_attr.attr, + &sbi_dirty_writeback_centisecs_attr.attr, + &sbi_dirty_file_background_bytes_attr.attr, + &sbi_dirty_fs_background_bytes_attr.attr, + &sbi_dirty_file_bytes_attr.attr, + &sbi_dirty_fs_bytes_attr.attr, + &sbi_dirty_writeback_autothresh_attr.attr, + &sbi_dirty_writeback_timelimit_attr.attr, + &sbi_dirty_thresh_lowerlimit_attr.attr, + &sbi_dirty_writeback_control_attr.attr, + &sbi_dirty_writeback_stats_attr.attr, + &sbi_srv_dirty_thresh_attr.attr, + &sbi_srv_dirty_wb_control_attr.attr, + &sbi_node_evt_cb_delay_attr.attr, + &sbi_srv_req_max_active_attr.attr, + &sbi_pending_message_attr.attr, + &sbi_peer_opened_fd_attr.attr, + &sbi_server_cache_file_attr.attr, + &sbi_client_cache_file_attr.attr, + &sbi_seq_attr.attr, + &sbi_peers_attr.attr, + &sbi_features_attr.attr, + NULL, +}; + +static ssize_t sbi_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct sbi_attribute *sbi_attr = to_sbi_attr(attr); + + if (!sbi_attr->show) + return -EIO; + return sbi_attr->show(kobj, sbi_attr, buf); +} + +static ssize_t sbi_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct sbi_attribute *sbi_attr = to_sbi_attr(attr); + + if (!sbi_attr->store) + return -EIO; + return sbi_attr->store(kobj, sbi_attr, buf, len); +} + +static const struct sysfs_ops sbi_sysfs_ops = { + .show = sbi_attr_show, + .store = sbi_attr_store, +}; + +static void sbi_release(struct kobject *kobj) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + complete(&sbi->s_kobj_unregister); +} + +static struct kobj_type sbi_ktype = { + .sysfs_ops = &sbi_sysfs_ops, + .default_attrs = sbi_attrs, + .release = sbi_release, +}; + +static inline struct sbi_cmd_attribute *to_sbi_cmd_attr(struct attribute *x) +{ + return container_of(x, struct sbi_cmd_attribute, attr); +} + +static inline struct hmdfs_sb_info *cmd_kobj_to_sbi(struct kobject *x) +{ + return container_of(x, struct hmdfs_sb_info, s_cmd_timeout_kobj); +} + +static ssize_t cmd_timeout_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + int cmd = to_sbi_cmd_attr(attr)->command; + struct hmdfs_sb_info *sbi = cmd_kobj_to_sbi(kobj); + + if (cmd < 0 && cmd >= F_SIZE) + return 0; + + return snprintf(buf, PAGE_SIZE, "%u\n", get_cmd_timeout(sbi, cmd)); +} + +static ssize_t cmd_timeout_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + unsigned int value; + int cmd = to_sbi_cmd_attr(attr)->command; + int ret = kstrtouint(skip_spaces(buf), 0, &value); + struct hmdfs_sb_info *sbi = cmd_kobj_to_sbi(kobj); + + if (cmd < 0 && cmd >= F_SIZE) + return -EINVAL; + + if (!ret) + set_cmd_timeout(sbi, cmd, value); + + return ret ? ret : len; +} + +#define HMDFS_CMD_ATTR(_name, _cmd) \ + static struct sbi_cmd_attribute hmdfs_attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = 0664 }, \ + .command = (_cmd), \ + } + +HMDFS_CMD_ATTR(open, F_OPEN); +HMDFS_CMD_ATTR(release, F_RELEASE); +HMDFS_CMD_ATTR(readpage, F_READPAGE); +HMDFS_CMD_ATTR(writepage, F_WRITEPAGE); +HMDFS_CMD_ATTR(iterate, F_ITERATE); +HMDFS_CMD_ATTR(rmdir, F_RMDIR); +HMDFS_CMD_ATTR(unlink, F_UNLINK); +HMDFS_CMD_ATTR(rename, F_RENAME); +HMDFS_CMD_ATTR(setattr, F_SETATTR); +HMDFS_CMD_ATTR(statfs, F_STATFS); +HMDFS_CMD_ATTR(drop_push, F_DROP_PUSH); +HMDFS_CMD_ATTR(getattr, F_GETATTR); +HMDFS_CMD_ATTR(fsync, F_FSYNC); +HMDFS_CMD_ATTR(syncfs, F_SYNCFS); +HMDFS_CMD_ATTR(getxattr, F_GETXATTR); +HMDFS_CMD_ATTR(setxattr, F_SETXATTR); +HMDFS_CMD_ATTR(listxattr, F_LISTXATTR); + +#define ATTR_LIST(_name) (&hmdfs_attr_##_name.attr) + +static struct attribute *sbi_timeout_attrs[] = { + ATTR_LIST(open), ATTR_LIST(release), + ATTR_LIST(readpage), ATTR_LIST(writepage), + ATTR_LIST(iterate), ATTR_LIST(rmdir), + ATTR_LIST(unlink), ATTR_LIST(rename), + ATTR_LIST(setattr), + ATTR_LIST(statfs), ATTR_LIST(drop_push), + ATTR_LIST(getattr), ATTR_LIST(fsync), + ATTR_LIST(syncfs), ATTR_LIST(getxattr), + ATTR_LIST(setxattr), ATTR_LIST(listxattr), + NULL +}; + +static const struct sysfs_ops sbi_cmd_sysfs_ops = { + .show = cmd_timeout_show, + .store = cmd_timeout_store, +}; + +static void sbi_timeout_release(struct kobject *kobj) +{ + struct hmdfs_sb_info *sbi = container_of(kobj, struct hmdfs_sb_info, + s_cmd_timeout_kobj); + + complete(&sbi->s_timeout_kobj_unregister); +} + +static struct kobj_type sbi_timeout_ktype = { + .sysfs_ops = &sbi_cmd_sysfs_ops, + .default_attrs = sbi_timeout_attrs, + .release = sbi_timeout_release, +}; + +void hmdfs_release_sysfs(struct hmdfs_sb_info *sbi) +{ + kobject_put(&sbi->s_cmd_timeout_kobj); + wait_for_completion(&sbi->s_timeout_kobj_unregister); + kobject_put(&sbi->kobj); + wait_for_completion(&sbi->s_kobj_unregister); +} + +int hmdfs_register_sysfs(const char *name, struct hmdfs_sb_info *sbi) +{ + int ret; + struct kobject *kobj = NULL; + + mutex_lock(&hmdfs_sysfs_mutex); + kobj = kset_find_obj(hmdfs_kset, name); + if (kobj) { + hmdfs_err("mount failed, already exist"); + kobject_put(kobj); + mutex_unlock(&hmdfs_sysfs_mutex); + return -EEXIST; + } + + sbi->kobj.kset = hmdfs_kset; + init_completion(&sbi->s_kobj_unregister); + ret = kobject_init_and_add(&sbi->kobj, &sbi_ktype, + &hmdfs_kset->kobj, "%s", name); + mutex_unlock(&hmdfs_sysfs_mutex); + + if (ret) { + kobject_put(&sbi->kobj); + wait_for_completion(&sbi->s_kobj_unregister); + return ret; + } + + init_completion(&sbi->s_timeout_kobj_unregister); + ret = kobject_init_and_add(&sbi->s_cmd_timeout_kobj, &sbi_timeout_ktype, + &sbi->kobj, "cmd_timeout"); + if (ret) { + hmdfs_release_sysfs(sbi); + return ret; + } + + kobject_uevent(&sbi->kobj, KOBJ_ADD); + return 0; +} + +void hmdfs_unregister_sysfs(struct hmdfs_sb_info *sbi) +{ + kobject_del(&sbi->s_cmd_timeout_kobj); + kobject_del(&sbi->kobj); +} + +static inline int to_sysfs_fmt_evt(unsigned int evt) +{ + return evt == RAW_NODE_EVT_NR ? -1 : evt; +} + +static ssize_t features_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return fill_features(buf, peer->features); +} + +static ssize_t event_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_async evt %d seq %u\n" + "cur_sync evt %d seq %u\n" + "pending evt %d seq %u\n" + "merged evt %u\n" + "dup_drop evt %u %u\n" + "waiting evt %u %u\n" + "seq_tbl %u %u %u %u\n" + "seq_rd_idx %u\n" + "seq_wr_idx %u\n", + to_sysfs_fmt_evt(peer->cur_evt[0]), + peer->cur_evt_seq[0], + to_sysfs_fmt_evt(peer->cur_evt[1]), + peer->cur_evt_seq[1], + to_sysfs_fmt_evt(peer->pending_evt), + peer->pending_evt_seq, + peer->merged_evt, + peer->dup_evt[RAW_NODE_EVT_OFF], + peer->dup_evt[RAW_NODE_EVT_ON], + peer->waiting_evt[RAW_NODE_EVT_OFF], + peer->waiting_evt[RAW_NODE_EVT_ON], + peer->seq_tbl[0], peer->seq_tbl[1], peer->seq_tbl[2], + peer->seq_tbl[3], + peer->seq_rd_idx % RAW_NODE_EVT_MAX_NR, + peer->seq_wr_idx % RAW_NODE_EVT_MAX_NR); +} + +static ssize_t stash_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_nothing %u\n" + "cur_fail %u\n" + "total_ok %u\n" + "total_nothing %u\n" + "total_fail %u\n" + "ok_pages %llu\n" + "fail_pages %llu\n", + peer->stats.stash.cur_ok, + peer->stats.stash.cur_nothing, + peer->stats.stash.cur_fail, + peer->stats.stash.total_ok, + peer->stats.stash.total_nothing, + peer->stats.stash.total_fail, + peer->stats.stash.ok_pages, + peer->stats.stash.fail_pages); +} + +static ssize_t restore_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_fail %u\n" + "cur_keep %u\n" + "total_ok %u\n" + "total_fail %u\n" + "total_keep %u\n" + "ok_pages %llu\n" + "fail_pages %llu\n", + peer->stats.restore.cur_ok, + peer->stats.restore.cur_fail, + peer->stats.restore.cur_keep, + peer->stats.restore.total_ok, + peer->stats.restore.total_fail, + peer->stats.restore.total_keep, + peer->stats.restore.ok_pages, + peer->stats.restore.fail_pages); +} + +static ssize_t rebuild_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_fail %u\n" + "cur_invalid %u\n" + "total_ok %u\n" + "total_fail %u\n" + "total_invalid %u\n" + "time %u\n", + peer->stats.rebuild.cur_ok, + peer->stats.rebuild.cur_fail, + peer->stats.rebuild.cur_invalid, + peer->stats.rebuild.total_ok, + peer->stats.rebuild.total_fail, + peer->stats.rebuild.total_invalid, + peer->stats.rebuild.time); +} + +static struct peer_attribute peer_features_attr = __ATTR_RO(features); +static struct peer_attribute peer_event_attr = __ATTR_RO(event); +static struct peer_attribute peer_stash_attr = __ATTR_RO(stash); +static struct peer_attribute peer_restore_attr = __ATTR_RO(restore); +static struct peer_attribute peer_rebuild_attr = __ATTR_RO(rebuild); + +static struct attribute *peer_attrs[] = { + &peer_features_attr.attr, + &peer_event_attr.attr, + &peer_stash_attr.attr, + &peer_restore_attr.attr, + &peer_rebuild_attr.attr, + NULL, +}; + +static ssize_t peer_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct peer_attribute *peer_attr = to_peer_attr(attr); + + if (!peer_attr->show) + return -EIO; + return peer_attr->show(kobj, peer_attr, buf); +} + +static ssize_t peer_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct peer_attribute *peer_attr = to_peer_attr(attr); + + if (!peer_attr->store) + return -EIO; + return peer_attr->store(kobj, peer_attr, buf, len); +} + +static const struct sysfs_ops peer_sysfs_ops = { + .show = peer_attr_show, + .store = peer_attr_store, +}; + +static void peer_sysfs_release(struct kobject *kobj) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + complete(&peer->kobj_unregister); +} + +static struct kobj_type peer_ktype = { + .sysfs_ops = &peer_sysfs_ops, + .default_attrs = peer_attrs, + .release = peer_sysfs_release, +}; + +int hmdfs_register_peer_sysfs(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer) +{ + int err = 0; + + init_completion(&peer->kobj_unregister); + err = kobject_init_and_add(&peer->kobj, &peer_ktype, &sbi->kobj, + "peer_%llu", peer->device_id); + return err; +} + +void hmdfs_release_peer_sysfs(struct hmdfs_peer *peer) +{ + kobject_del(&peer->kobj); + kobject_put(&peer->kobj); + wait_for_completion(&peer->kobj_unregister); +} + +void notify(struct hmdfs_peer *node, struct notify_param *param) +{ + struct hmdfs_sb_info *sbi = node->sbi; + int in_len; + + if (!param) + return; + spin_lock(&sbi->notify_fifo_lock); + in_len = + kfifo_in(&sbi->notify_fifo, param, sizeof(struct notify_param)); + spin_unlock(&sbi->notify_fifo_lock); + if (in_len != sizeof(struct notify_param)) + return; + sysfs_notify(&sbi->kobj, NULL, "cmd"); +} + +int hmdfs_sysfs_init(void) +{ + hmdfs_kset = kset_create_and_add("hmdfs", NULL, fs_kobj); + if (!hmdfs_kset) + return -ENOMEM; + + return 0; +} + +void hmdfs_sysfs_exit(void) +{ + kset_unregister(hmdfs_kset); + hmdfs_kset = NULL; +} diff --git a/fs/hmdfs/comm/device_node.h b/fs/hmdfs/comm/device_node.h new file mode 100644 index 000000000000..3c99c7fb679f --- /dev/null +++ b/fs/hmdfs/comm/device_node.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/device_node.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DEVICE_NODE_H +#define HMDFS_DEVICE_NODE_H + +#include "hmdfs.h" +#include "transport.h" + +enum CTRL_NODE_CMD { + CMD_UPDATE_SOCKET = 0, + CMD_OFF_LINE, + CMD_OFF_LINE_ALL, + CMD_CNT, +}; + +struct update_socket_param { + int32_t cmd; + int32_t newfd; + uint8_t status; + uint8_t masterkey[HMDFS_KEY_SIZE]; + uint8_t cid[HMDFS_CID_SIZE]; +} __packed; + +struct offline_param { + int32_t cmd; + uint8_t remote_cid[HMDFS_CID_SIZE]; +} __packed; + +struct offline_all_param { + int32_t cmd; +} __packed; + +enum NOTIFY { + NOTIFY_GET_SESSION, + NOTIFY_OFFLINE, + NOTIFY_NONE, + NOTIFY_CNT, +}; + +struct notify_param { + int32_t notify; + int32_t fd; + uint8_t remote_cid[HMDFS_CID_SIZE]; +} __packed; + +struct sbi_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct sbi_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len); +}; + +struct peer_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct peer_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct peer_attribute *attr, + const char *buf, size_t len); +}; + +struct sbi_cmd_attribute { + struct attribute attr; + int command; +}; + +void notify(struct hmdfs_peer *node, struct notify_param *param); +int hmdfs_register_sysfs(const char *name, struct hmdfs_sb_info *sbi); +void hmdfs_unregister_sysfs(struct hmdfs_sb_info *sbi); +void hmdfs_release_sysfs(struct hmdfs_sb_info *sbi); +int hmdfs_register_peer_sysfs(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer); +void hmdfs_release_peer_sysfs(struct hmdfs_peer *peer); +int hmdfs_sysfs_init(void); +void hmdfs_sysfs_exit(void); + +static inline struct sbi_attribute *to_sbi_attr(struct attribute *x) +{ + return container_of(x, struct sbi_attribute, attr); +} + +static inline struct hmdfs_sb_info *to_sbi(struct kobject *x) +{ + return container_of(x, struct hmdfs_sb_info, kobj); +} + +static inline struct peer_attribute *to_peer_attr(struct attribute *x) +{ + return container_of(x, struct peer_attribute, attr); +} + +static inline struct hmdfs_peer *to_peer(struct kobject *x) +{ + return container_of(x, struct hmdfs_peer, kobj); +} +#endif diff --git a/fs/hmdfs/comm/fault_inject.c b/fs/hmdfs/comm/fault_inject.c new file mode 100644 index 000000000000..11779b53b0ea --- /dev/null +++ b/fs/hmdfs/comm/fault_inject.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/fault_inject.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs.h" +#include "fault_inject.h" +#include "connection.h" + +static DECLARE_FAULT_ATTR(fail_default_attr); +static struct dentry *hmdfs_debugfs_root; + +void __init hmdfs_create_debugfs_root(void) +{ + hmdfs_debugfs_root = debugfs_create_dir("hmdfs", NULL); + if (!hmdfs_debugfs_root) + hmdfs_warning("failed to create debugfs directory"); +} + +void hmdfs_destroy_debugfs_root(void) +{ + debugfs_remove_recursive(hmdfs_debugfs_root); + hmdfs_debugfs_root = NULL; +} + +void hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject, + const char *name) +{ + struct dentry *dir = NULL; + struct dentry *parent = NULL; + struct fault_attr *attr = &fault_inject->attr; + + if (!hmdfs_debugfs_root) + return; + + parent = debugfs_create_dir(name, hmdfs_debugfs_root); + if (!parent) { + hmdfs_warning("failed to create %s debugfs directory", name); + return; + } + + *attr = fail_default_attr; + dir = fault_create_debugfs_attr("fault_inject", parent, attr); + if (IS_ERR(dir)) { + hmdfs_warning("hmdfs: failed to create debugfs attr"); + debugfs_remove_recursive(parent); + return; + } + fault_inject->parent = parent; + debugfs_create_ulong("op_mask", 0600, dir, &fault_inject->op_mask); + debugfs_create_ulong("fail_send_message", 0600, dir, + &fault_inject->fail_send_message); + debugfs_create_ulong("fake_fid_ver", 0600, dir, + &fault_inject->fake_fid_ver); + debugfs_create_bool("fail_req", 0600, dir, &fault_inject->fail_req); +} + +void hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject) +{ + debugfs_remove_recursive(fault_inject->parent); +} + +bool hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, + struct hmdfs_send_data *msg, int *err) +{ + struct hmdfs_head_cmd *head = (struct hmdfs_head_cmd *)msg->head; + unsigned long type = fault_inject->fail_send_message; + + if (!test_bit(head->operations.command, &fault_inject->op_mask)) + return false; + + if (type != T_MSG_FAIL && type != T_MSG_DISCARD) + return false; + + if (!should_fail(&fault_inject->attr, 1)) + return false; + + if (type == T_MSG_FAIL) + *err = -EINVAL; + else if (type == T_MSG_DISCARD) + *err = 0; + + hmdfs_err( + "fault injection err %d, %s message, device_id %llu, msg_id %u, cmd %d", + *err, (type == T_MSG_FAIL) ? "fail" : "discard", con->device_id, + le32_to_cpu(head->msg_id), head->operations.command); + return true; +} + +bool hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + int *err) +{ + if (!test_bit(cmd->operations.command, &fault_inject->op_mask)) + return false; + + if (!fault_inject->fail_req) + return false; + + if (!should_fail(&fault_inject->attr, 1)) + return false; + + *err = -EIO; + hmdfs_err("fault injection err %d, device_id %llu, msg_id %u, cmd %d", + *err, con->device_id, le32_to_cpu(cmd->msg_id), + cmd->operations.command); + return true; +} + +bool hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + enum CHANGE_FID_VER_TYPE fake_type) +{ + unsigned long type = fault_inject->fake_fid_ver; + + if (!test_bit(cmd->operations.command, &fault_inject->op_mask)) + return false; + + if (type != fake_type) + return false; + + if (!should_fail(&fault_inject->attr, 1)) + return false; + + hmdfs_err( + "fault injection to change fid ver by %s cookie, device_id %llu, msg_id %u, cmd %d", + (type == T_BOOT_COOKIE) ? "boot" : "con", con->device_id, + le32_to_cpu(cmd->msg_id), cmd->operations.command); + return true; +} diff --git a/fs/hmdfs/comm/fault_inject.h b/fs/hmdfs/comm/fault_inject.h new file mode 100644 index 000000000000..be8876ab0328 --- /dev/null +++ b/fs/hmdfs/comm/fault_inject.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/fault_inject.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_FAULT_INJECT_H +#define HMDFS_FAULT_INJECT_H + +#include +#include "protocol.h" + +struct hmdfs_fault_inject { +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + struct fault_attr attr; + struct dentry *parent; + unsigned long op_mask; + unsigned long fail_send_message; + unsigned long fake_fid_ver; + bool fail_req; +#endif +}; + +enum FAIL_MESSAGE_TYPE { + T_MSG_FAIL = 1, + T_MSG_DISCARD = 2, +}; + +enum CHANGE_FID_VER_TYPE { + T_BOOT_COOKIE = 1, + T_CON_COOKIE = 2, +}; + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +void __init hmdfs_create_debugfs_root(void); +void hmdfs_destroy_debugfs_root(void); + +void hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject, + const char *name); +void hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject); +bool hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, + struct hmdfs_send_data *msg, int *err); +bool hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + int *err); +bool hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + enum CHANGE_FID_VER_TYPE fake_type); +#else +static inline void __init hmdfs_create_debugfs_root(void) {} +static inline void hmdfs_destroy_debugfs_root(void) {} + +static inline void +hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject, + const char *name) +{ +} +static inline void +hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject) +{ +} +static inline bool +hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_send_data *msg, + int *err) +{ + return false; +} +static inline bool +hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + int *err) +{ + return false; +} +static inline bool +hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + enum CHANGE_FID_VER_TYPE fake_type) +{ + return false; +} +#endif + +#endif // HMDFS_FAULT_INJECT_H diff --git a/fs/hmdfs/comm/message_verify.c b/fs/hmdfs/comm/message_verify.c new file mode 100644 index 000000000000..c9eb94d8b615 --- /dev/null +++ b/fs/hmdfs/comm/message_verify.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/message_verify.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "message_verify.h" + +#include +#include +#include + +#include "connection.h" +#include "hmdfs.h" +#include "hmdfs_server.h" + +size_t message_length[C_FLAG_SIZE][F_SIZE][HMDFS_MESSAGE_MIN_MAX]; +bool need_response[F_SIZE]; + +void hmdfs_message_verify_init(void) +{ + int flag, cmd; + + for (cmd = 0; cmd < F_SIZE; cmd++) + need_response[cmd] = true; + need_response[F_RELEASE] = false; + need_response[F_CONNECT_REKEY] = false; + need_response[F_DROP_PUSH] = false; + + for (flag = 0; flag < C_FLAG_SIZE; flag++) { + for (cmd = 0; cmd < F_SIZE; cmd++) { + message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX] = 1; + message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + } + } + + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct open_request); + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct open_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct open_response); + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct atomic_open_request); + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct atomic_open_request) + PATH_MAX + NAME_MAX + 1; + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] + = MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct atomic_open_response); + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] + = MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct release_request); + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct release_request); + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct fsync_request); + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct fsync_request); + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpage_request); + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpage_request); + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_MAX_INDEX] = + HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpages_request); + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_request); + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_MAX_INDEX] = + HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_READPAGES_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpages_open_request); + message_length[C_REQUEST][F_READPAGES_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_open_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_READPAGES_OPEN][ + HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_READPAGES_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + 0; + message_length[C_RESPONSE][F_READPAGES_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_open_response) + + HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGES_OPEN][ + HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct writepage_response); + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readdir_request); + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readdir_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(__le64) + HMDFS_MAX_MESSAGE_LEN; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct mkdir_request); + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct mkdir_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct create_request); + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct create_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct rmdir_request); + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct rmdir_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct unlink_request); + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct unlink_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct rename_request); + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct rename_request) + 4 + 4 * PATH_MAX; + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct setattr_request); + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct setattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct getattr_request); + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getattr_response); + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct statfs_request); + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct statfs_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct statfs_response); + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct syncfs_request); + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct syncfs_request); + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct getxattr_request); + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getxattr_request) + PATH_MAX + XATTR_NAME_MAX + 2; + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getxattr_response) + HMDFS_XATTR_SIZE_MAX; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct setxattr_request); + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct setxattr_request) + PATH_MAX + XATTR_NAME_MAX + + HMDFS_XATTR_SIZE_MAX + 2; + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct listxattr_request); + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct listxattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct listxattr_response) + HMDFS_LISTXATTR_SIZE_MAX; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_CONNECT_REKEY][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct connection_rekey_request); + message_length[C_REQUEST][F_CONNECT_REKEY][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct connection_rekey_request); + message_length[C_REQUEST][F_CONNECT_REKEY] + [HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct drop_push_request); + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct drop_push_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; +} + +static void find_first_no_slash(const char **name, int *len) +{ + const char *s = *name; + int l = *len; + + while (*s == '/' && l > 0) { + s++; + l--; + } + + *name = s; + *len = l; +} + +static void find_first_slash(const char **name, int *len) +{ + const char *s = *name; + int l = *len; + + while (*s != '/' && l > 0) { + s++; + l--; + } + + *name = s; + *len = l; +} + +static bool path_contain_dotdot(const char *name, int len) +{ + while (true) { + find_first_no_slash(&name, &len); + + if (len == 0) + return false; + + if (len >= 2 && name[0] == '.' && name[1] == '.' && + (len == 2 || name[2] == '/')) + return true; + + find_first_slash(&name, &len); + } +} + +static int hmdfs_open_message_verify(int flag, size_t len, void *data) +{ + struct open_request *req = NULL; + size_t tmp_len = 0; + int path_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + path_len = le32_to_cpu(req->path_len); + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len == PATH_MAX || + tmp_len != len - sizeof(struct open_request) - 1 || + path_len != tmp_len) { + hmdfs_err("verify fail"); + return -EINVAL; + } + + /* + * We only allow server to open file in hmdfs, thus we need to + * make sure path don't contain "..". + */ + if (path_contain_dotdot(req->buf, path_len)) { + hmdfs_err("verify fail, path contain dotdot"); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_atomic_open_verify(int flag, size_t len, void *data) +{ + struct atomic_open_request *req = NULL; + size_t total_len; + size_t path_len; + size_t max_path_size; + size_t file_len; + size_t max_file_size; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + total_len = len - sizeof(*req); + max_path_size = min_t(size_t, PATH_MAX, total_len); + path_len = strnlen(req->buf, max_path_size); + /* file name need 2 byte at least */ + if (path_len == max_path_size || path_len + 3 > total_len) { + hmdfs_err("verify fail, len %zu, path_len %zu", len, path_len); + return -EINVAL; + } + + max_file_size = min_t(size_t, NAME_MAX + 1, total_len - path_len - 1); + file_len = strnlen(req->buf + path_len + 1, max_file_size); + + if (file_len == max_file_size || + total_len != path_len + 1 + file_len + 1 || + le32_to_cpu(req->path_len) != path_len || + le32_to_cpu(req->file_len) != file_len) { + hmdfs_err("verify fail total len %zu path_len %zu, decalared path len %u, file_len %zu, decalared file_len %u", + total_len, path_len, le32_to_cpu(req->path_len), + file_len, le32_to_cpu(req->file_len) != file_len); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_iterate_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct readdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (le32_to_cpu(tmp_request->path_len) != tmp_len || + len - sizeof(struct readdir_request) - 1 != tmp_len) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_mkdir_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct mkdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct mkdir_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + return err; +} + +static int hmdfs_create_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct create_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct create_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + return err; +} + +static int hmdfs_rmdir_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct rmdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct rmdir_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_unlink_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct unlink_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct unlink_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_rename_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct rename_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_old_path_len = 0; + size_t tmp_new_path_len = 0; + size_t tmp_old_name_len = 0; + size_t tmp_new_name_len = 0; + size_t tmp_char_old_path_len = 0; + size_t tmp_char_new_path_len = 0; + size_t tmp_char_old_name_len = 0; + size_t tmp_char_new_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + + tmp_old_path_len = + le32_to_cpu(tmp_request->old_path_len); + tmp_new_path_len = + le32_to_cpu(tmp_request->new_path_len); + tmp_old_name_len = + le32_to_cpu(tmp_request->old_name_len); + tmp_new_name_len = + le32_to_cpu(tmp_request->new_name_len); + + tmp_char_old_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_new_path_len = strnlen( + tmp_char + tmp_char_old_path_len + 1, PATH_MAX); + + tmp_char_old_name_len = + strnlen(tmp_char + tmp_char_old_path_len + 1 + + tmp_char_new_path_len + 1, + PATH_MAX); + tmp_char_new_name_len = + strnlen(tmp_char + tmp_char_old_path_len + 1 + + tmp_char_new_path_len + 1 + + tmp_char_old_name_len + 1, + PATH_MAX); + } else { + return err; + } + + if (tmp_new_name_len != tmp_char_new_name_len || + tmp_old_name_len != tmp_char_old_name_len || + tmp_new_path_len != tmp_char_new_path_len || + tmp_old_path_len != tmp_char_old_path_len || + len - sizeof(struct rename_request) != + tmp_new_name_len + 1 + tmp_old_name_len + 1 + + tmp_new_path_len + 1 + tmp_old_path_len + + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_setattr_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct setattr_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->buf; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (tmp_len != len - sizeof(struct setattr_request) - 1 || + le32_to_cpu(tmp_request->path_len) != tmp_len) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_getattr_verify(int flag, size_t len, void *data) +{ + struct getattr_request *req = NULL; + size_t tmp_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len != len - sizeof(struct getattr_request) - 1 || + le32_to_cpu(req->path_len) != tmp_len) { + hmdfs_err("verify fail"); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_getxattr_verify(int flag, size_t len, void *data) +{ + struct getxattr_request *req = NULL; + struct getxattr_response *resp = NULL; + size_t path_len = 0; + size_t name_len = 0; + size_t size = 0; + + if (!data) + return 0; + + if (flag == C_REQUEST) { + req = data; + path_len = le32_to_cpu(req->path_len); + name_len = le32_to_cpu(req->name_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || + path_len != strnlen(req->buf, PATH_MAX) || + name_len != + strnlen(req->buf + path_len + 1, XATTR_NAME_MAX) || + size > HMDFS_XATTR_SIZE_MAX) + return -EINVAL; + } else { + resp = data; + size = le32_to_cpu(resp->size); + if (len != sizeof(struct getxattr_response) && + len < sizeof(struct getxattr_response) + size) + return -EINVAL; + } + + return 0; +} + +static int hmdfs_setxattr_verify(int flag, size_t len, void *data) +{ + struct setxattr_request *req = NULL; + size_t path_len = 0; + size_t name_len = 0; + size_t size = 0; + + /* No need to verify response */ + if (flag != C_REQUEST || !data) + return 0; + + req = data; + path_len = le32_to_cpu(req->path_len); + name_len = le32_to_cpu(req->name_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || path_len != strnlen(req->buf, PATH_MAX) || + name_len != strnlen(req->buf + path_len + 1, XATTR_NAME_MAX) || + len != path_len + name_len + size + 2 + + sizeof(struct setxattr_request) || + size > HMDFS_XATTR_SIZE_MAX) + return -EINVAL; + + return 0; +} + +static int hmdfs_listxattr_verify(int flag, size_t len, void *data) +{ + struct listxattr_request *req = NULL; + struct listxattr_response *resp = NULL; + size_t path_len = 0; + size_t size = 0; + + if (!data) + return 0; + + if (flag == C_REQUEST) { + req = data; + path_len = le32_to_cpu(req->path_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || + path_len != strnlen(req->buf, PATH_MAX) || + size > HMDFS_LISTXATTR_SIZE_MAX) + return -EINVAL; + } else { + resp = data; + size = le32_to_cpu(resp->size); + if (len != sizeof(struct listxattr_response) && + len < sizeof(struct listxattr_response) + size) + return -EINVAL; + } + + return 0; +} + +static int hmdfs_writepage_verify(int flag, size_t len, void *data) +{ + struct writepage_request *req = NULL; + __u32 count; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + count = le32_to_cpu(req->count); + if (count == 0 || count > HMDFS_PAGE_SIZE || + len - sizeof(struct writepage_request) != HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, count is %d", count); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_statfs_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct statfs_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (le32_to_cpu(tmp_request->path_len) != tmp_len || + tmp_len != len - sizeof(struct statfs_request) - 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_readpages_verify(int flag, size_t len, void *data) +{ + struct readpages_request *req = NULL; + unsigned int size; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + size = le32_to_cpu(req->size); + if (size > HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, invalid req->size %u", size); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_readpages_open_verify(int flag, size_t len, void *data) +{ + struct readpages_open_request *req = NULL; + unsigned int size; + size_t tmp_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + size = le32_to_cpu(req->size); + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len + 1 != len - sizeof(*req) || + le32_to_cpu(req->path_len) != tmp_len || + size > HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, req->size %u", size); + return -EINVAL; + } + + return 0; +} + +typedef int (*hmdfs_message_verify_func)(int, size_t, void *); + +static const hmdfs_message_verify_func message_verify[F_SIZE] = { + [F_OPEN] = hmdfs_open_message_verify, + [F_WRITEPAGE] = hmdfs_writepage_verify, + [F_ITERATE] = hmdfs_iterate_verify, + [F_MKDIR] = hmdfs_mkdir_verify, + [F_CREATE] = hmdfs_create_verify, + [F_RMDIR] = hmdfs_rmdir_verify, + [F_UNLINK] = hmdfs_unlink_verify, + [F_RENAME] = hmdfs_rename_verify, + [F_SETATTR] = hmdfs_setattr_verify, + [F_STATFS] = hmdfs_statfs_verify, + [F_GETATTR] = hmdfs_getattr_verify, + [F_GETXATTR] = hmdfs_getxattr_verify, + [F_SETXATTR] = hmdfs_setxattr_verify, + [F_LISTXATTR] = hmdfs_listxattr_verify, + [F_READPAGES] = hmdfs_readpages_verify, + [F_READPAGES_OPEN] = hmdfs_readpages_open_verify, + [F_ATOMIC_OPEN] = hmdfs_atomic_open_verify, +}; + +static void handle_bad_message(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, int *err) +{ + /* + * Bad message won't be awared by upper layer, so ETIME is + * always given to upper layer. It is prefer to pass EOPNOTSUPP + * to upper layer when bad message (eg. caused by wrong len) + * received. + */ + if (head->operations.cmd_flag == C_RESPONSE) { + /* + * Change msg ret code. To let upper layer handle + * EOPNOTSUPP, hmdfs_message_verify() should return + * 0, so err code is modified either. + */ + head->ret_code = cpu_to_le32(-EOPNOTSUPP); + *err = 0; + } else { + if (head->operations.command >= F_SIZE) + return; + /* + * Some request messages do not need to be responded. + * Even if a response is returned, the response msg + * is automatically ignored in hmdfs_response_recv(). + * Therefore, it is normal to directly return a response. + */ + if (need_response[head->operations.command]) + hmdfs_send_err_response(con, head, -EOPNOTSUPP); + } +} + +int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + void *data) +{ + int err = 0; + int flag, cmd, len_type; + size_t len, min, max; + + if (!head) + return -EINVAL; + + flag = head->operations.cmd_flag; + if (flag != C_REQUEST && flag != C_RESPONSE) + return -EINVAL; + + cmd = head->operations.command; + if (cmd >= F_SIZE || cmd < F_OPEN || cmd == F_RESERVED_0 || + (cmd >= F_RESERVED_1 && cmd <= F_RESERVED_4) || cmd == F_RESERVED_5) { + err = -EINVAL; + goto handle_bad_msg; + } + + if (head->version == DFS_2_0) { + len = le32_to_cpu(head->data_len) - + sizeof(struct hmdfs_head_cmd); + min = message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX]; + if (head->operations.command == F_ITERATE && flag == C_RESPONSE) + max = sizeof(struct slice_descriptor) + PAGE_SIZE; + else + max = message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX]; + len_type = + message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX]; + + if (len_type == MESSAGE_LEN_JUDGE_RANGE) { + if (len < min || len > max) { + hmdfs_err( + "cmd %d -> %d message verify fail, len = %zu", + cmd, flag, len); + err = -EINVAL; + goto handle_bad_msg; + } + } else { + if (len != min && len != max) { + hmdfs_err( + "cmd %d -> %d message verify fail, len = %zu", + cmd, flag, len); + err = -EINVAL; + goto handle_bad_msg; + } + } + + if (message_verify[cmd]) + err = message_verify[cmd](flag, len, data); + + if (err) + goto handle_bad_msg; + + return err; + } + +handle_bad_msg: + if (err) { + handle_bad_message(con, head, &err); + return err; + } + + if (head->version == DFS_1_0) + return err; // now, DFS_1_0 version do not verify + + return -EINVAL; +} diff --git a/fs/hmdfs/comm/message_verify.h b/fs/hmdfs/comm/message_verify.h new file mode 100644 index 000000000000..99e696a448f1 --- /dev/null +++ b/fs/hmdfs/comm/message_verify.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/message_verify.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_MESSAGE_VERIFY_H +#define HMDFS_MESSAGE_VERIFY_H + +#include "protocol.h" + +enum MESSAGE_LEN_JUDGE_TYPE { + MESSAGE_LEN_JUDGE_RANGE = 0, + MESSAGE_LEN_JUDGE_BIN = 1, +}; + +#define HMDFS_MESSAGE_MIN_INDEX 0 +#define HMDFS_MESSAGE_MAX_INDEX 1 +#define HMDFS_MESSAGE_LEN_JUDGE_INDEX 2 +#define HMDFS_MESSAGE_MIN_MAX 3 + +void hmdfs_message_verify_init(void); +int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + void *data); + +#endif diff --git a/fs/hmdfs/comm/node_cb.c b/fs/hmdfs/comm/node_cb.c new file mode 100644 index 000000000000..21b84d2fff82 --- /dev/null +++ b/fs/hmdfs/comm/node_cb.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/node_cb.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include + +#include "node_cb.h" +#include "connection.h" + +static struct list_head cb_head[NODE_EVT_NR][NODE_EVT_TYPE_NR]; + +static const char *evt_str_tbl[NODE_EVT_NR] = { + "add", "online", "offline", "del", +}; + +static inline bool hmdfs_is_valid_node_evt(int evt) +{ + return (evt >= 0 && evt < NODE_EVT_NR); +} + +static const char *hmdfs_evt_str(int evt) +{ + if (!hmdfs_is_valid_node_evt(evt)) + return "unknown"; + return evt_str_tbl[evt]; +} + +void hmdfs_node_evt_cb_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cb_head); i++) { + int j; + + for (j = 0; j < ARRAY_SIZE(cb_head[0]); j++) + INIT_LIST_HEAD(&cb_head[i][j]); + } +} + +void hmdfs_node_add_evt_cb(struct hmdfs_node_cb_desc *desc, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + int evt = desc[i].evt; + bool sync = desc[i].sync; + + if (!hmdfs_is_valid_node_evt(evt)) + continue; + + list_add_tail(&desc[i].list, &cb_head[evt][sync]); + } +} + +void hmdfs_node_call_evt_cb(struct hmdfs_peer *conn, int evt, bool sync, + unsigned int seq) +{ + struct hmdfs_node_cb_desc *desc = NULL; + + hmdfs_info("node 0x%x:0x%llx call %s %s cb seq %u", + conn->owner, conn->device_id, hmdfs_evt_str(evt), + sync ? "sync" : "async", seq); + + if (!hmdfs_is_valid_node_evt(evt)) + return; + + list_for_each_entry(desc, &cb_head[evt][sync], list) { + if (conn->version < desc->min_version) + continue; + + desc->fn(conn, evt, seq); + } +} diff --git a/fs/hmdfs/comm/node_cb.h b/fs/hmdfs/comm/node_cb.h new file mode 100644 index 000000000000..fe53b946f668 --- /dev/null +++ b/fs/hmdfs/comm/node_cb.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/node_cb.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_NODE_CB_H +#define HMDFS_NODE_CB_H + +#include "hmdfs.h" + +/* async & sync */ +#define NODE_EVT_TYPE_NR 2 + +enum { + NODE_EVT_ADD = 0, + NODE_EVT_ONLINE, + NODE_EVT_OFFLINE, + NODE_EVT_DEL, + NODE_EVT_NR, +}; + +struct hmdfs_peer; + +typedef void (*hmdfs_node_evt_cb)(struct hmdfs_peer *conn, + int evt, unsigned int seq); + +struct hmdfs_node_cb_desc { + int evt; + bool sync; + unsigned char min_version; + hmdfs_node_evt_cb fn; + struct list_head list; +}; + +extern void hmdfs_node_evt_cb_init(void); + +/* Only initialize during module init */ +extern void hmdfs_node_add_evt_cb(struct hmdfs_node_cb_desc *desc, int nr); +extern void hmdfs_node_call_evt_cb(struct hmdfs_peer *node, int evt, bool sync, + unsigned int seq); + +#endif /* HMDFS_NODE_CB_H */ diff --git a/fs/hmdfs/comm/protocol.h b/fs/hmdfs/comm/protocol.h new file mode 100644 index 000000000000..a873143f20d7 --- /dev/null +++ b/fs/hmdfs/comm/protocol.h @@ -0,0 +1,489 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/protocol.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_PROTOCOL_H +#define HMDFS_PROTOCOL_H + +#include +#include +#include +#include + +struct hmdfs_cmd { + __u8 reserved; + __u8 cmd_flag; + __u8 command; + __u8 reserved2; +} __packed; + +#define HMDFS_MSG_MAGIC 0xF7 +#define HMDFS_MAX_MESSAGE_LEN (8 * 1024 * 1024) + +struct hmdfs_head_cmd { + __u8 magic; + __u8 version; + __le16 reserved; + __le32 data_len; + struct hmdfs_cmd operations; + __le32 ret_code; + __le32 msg_id; + __le32 reserved1; +} __packed; + +enum FILE_RECV_STATE { + FILE_RECV_PROCESS = 0, + FILE_RECV_SUCC, + FILE_RECV_ERR_NET, + FILE_RECV_ERR_SPC, +}; + +struct file_recv_info { + void *local_filp; + atomic_t local_fslices; + atomic_t state; +}; + +enum MSG_IDR_TYPE { + MSG_IDR_1_0_NONE = 0, + MSG_IDR_1_0_MESSAGE_SYNC, + MSG_IDR_1_0_PAGE, + MSG_IDR_MESSAGE_SYNC, + MSG_IDR_MESSAGE_ASYNC, + MSG_IDR_PAGE, + MSG_IDR_MAX, +}; + +struct hmdfs_msg_idr_head { + __u32 type; + __u32 msg_id; + struct kref ref; + struct hmdfs_peer *peer; +}; + +struct sendmsg_wait_queue { + struct hmdfs_msg_idr_head head; + wait_queue_head_t response_q; + struct list_head async_msg; + atomic_t valid; + __u32 size; + void *buf; + __u32 ret; + unsigned long start; + struct file_recv_info recv_info; +}; + +struct hmdfs_send_command { + struct hmdfs_cmd operations; + void *data; + size_t len; + void *local_filp; + void *out_buf; + size_t out_len; + __u32 ret_code; +}; + +struct hmdfs_req { + struct hmdfs_cmd operations; + /* + * Normally, the caller ought set timeout to TIMEOUT_CONFIG, so that + * hmdfs_send_async_request will search s_cmd_timeout for the user- + * configured timeout values. + * + * However, consider the given scenery: + * The caller may want to issue multiple requests sharing the same + * timeout value, but the users may update the value during the gap. + * To ensure the "atomicty" of timeout-using for these requests, we + * provide the timeout field for hacking. + */ + unsigned int timeout; + void *data; + size_t data_len; + + void *private; // optional + size_t private_len; // optional +}; + +struct hmdfs_resp { + void *out_buf; + size_t out_len; + __u32 ret_code; +}; + +struct hmdfs_msg_parasite { + struct hmdfs_msg_idr_head head; + struct delayed_work d_work; + bool wfired; + struct hmdfs_req req; + struct hmdfs_resp resp; + unsigned long start; +}; + +struct hmdfs_send_data { + // sect1: head + void *head; + size_t head_len; + + // sect2: slice descriptor + void *sdesc; + size_t sdesc_len; + + // sect3: request / response / file slice + void *data; + size_t len; +}; + +struct slice_descriptor { + __le32 num_slices; + __le32 slice_size; + __le32 slice_sn; + __le32 content_size; +} __packed; + +enum DFS_VERSION { + INVALID_VERSION = 0, + DFS_1_0, + + USERSPACE_MAX_VER = 0x3F, + DFS_2_0, + + MAX_VERSION = 0xFF +}; + +enum CONN_OPERATIONS_VERSION { USERDFS_VERSION, PROTOCOL_VERSION }; + +enum CMD_FLAG { C_REQUEST = 0, C_RESPONSE = 1, C_FLAG_SIZE }; + +enum FILE_CMD { + F_OPEN = 0, + F_RELEASE = 1, + F_READPAGE = 2, + F_WRITEPAGE = 3, + F_ITERATE = 4, + F_RESERVED_1 = 5, + F_RESERVED_2 = 6, + F_RESERVED_3 = 7, + F_RESERVED_4 = 8, + F_MKDIR = 9, + F_RMDIR = 10, + F_CREATE = 11, + F_UNLINK = 12, + F_RENAME = 13, + F_SETATTR = 14, + F_RESERVED_5 = 15, + F_STATFS = 16, + F_CONNECT_REKEY = 17, + F_DROP_PUSH = 18, + F_RESERVED_0 = 19, + F_GETATTR = 20, + F_FSYNC = 21, + F_SYNCFS = 22, + F_GETXATTR = 23, + F_SETXATTR = 24, + F_LISTXATTR = 25, + F_READPAGES = 26, + F_READPAGES_OPEN = 27, + F_ATOMIC_OPEN = 28, + F_SIZE, +}; + +struct open_request { + __u8 file_type; + __le32 flags; + __le32 path_len; + char buf[0]; +} __packed; + +struct open_response { + __le32 change_detect_cap; + __le64 file_ver; + __le32 file_id; + __le64 file_size; + __le64 ino; + __le64 ctime; + __le32 ctime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le64 stable_ctime; + __le32 stable_ctime_nsec; + __le64 ichange_count; +} __packed; + +enum hmdfs_open_flags { + HMDFS_O_TRUNC = O_TRUNC, + HMDFS_O_EXCL = O_EXCL, +}; + +struct atomic_open_request { + __le32 open_flags; + __le16 mode; + __le16 reserved1; + __le32 path_len; + __le32 file_len; + __le64 reserved2[4]; + char buf[0]; +} __packed; + +struct atomic_open_response { + __le32 fno; + __le16 i_mode; + __le16 reserved1; + __le32 i_flags; + __le32 reserved2; + __le64 reserved3[4]; + struct open_response open_resp; +} __packed; + +struct release_request { + __le64 file_ver; + __le32 file_id; +} __packed; + +struct fsync_request { + __le64 file_ver; + __le32 file_id; + __le32 datasync; + __le64 start; + __le64 end; +} __packed; + +struct readpage_request { + __le64 file_ver; + __le32 file_id; + __le32 size; + __le64 index; +} __packed; + +struct readpage_response { + char buf[0]; +} __packed; + +struct readpages_request { + __le64 file_ver; + __le32 file_id; + __le32 size; + __le64 index; + __le64 reserved; +} __packed; + +struct readpages_response { + char buf[0]; +} __packed; + +struct readpages_open_request { + __u8 file_type; + __u8 reserved1[3]; + __le32 flags; + __le32 path_len; + __le32 size; + __le64 index; + __le64 reserved2; + char buf[0]; +} __packed; + +struct readpages_open_response { + struct open_response open_resp; + __le64 reserved[4]; + char buf[0]; +} __packed; + +struct writepage_request { + __le64 file_ver; + __le32 file_id; + __le64 index; + __le32 count; + char buf[0]; +} __packed; + +struct writepage_response { + __le64 ichange_count; + __le64 ctime; + __le32 ctime_nsec; +} __packed; + +struct readdir_request { + __le64 dcache_crtime; + __le64 dcache_crtime_nsec; + __le64 dentry_ctime; + __le64 dentry_ctime_nsec; + __le64 num; + __le32 verify_cache; + __le32 path_len; + char path[0]; +} __packed; + +struct hmdfs_inodeinfo_response { + __le64 i_size; + __le64 i_mtime; + __le32 i_mtime_nsec; + __le32 fno; + __le16 i_mode; + __le64 i_ino; + __le32 i_flags; + __le32 i_reserved; +} __packed; + +struct mkdir_request { + __le32 path_len; + __le32 name_len; + __le16 mode; + char path[0]; +} __packed; + +struct create_request { + __le32 path_len; + __le32 name_len; + __le16 mode; + __u8 want_excl; + char path[0]; +} __packed; + +struct rmdir_request { + __le32 path_len; + __le32 name_len; + char path[0]; +} __packed; + +struct unlink_request { + __le32 path_len; + __le32 name_len; + char path[0]; +} __packed; + +struct rename_request { + __le32 old_path_len; + __le32 new_path_len; + __le32 old_name_len; + __le32 new_name_len; + __le32 flags; + char path[0]; +} __packed; + +struct drop_push_request { + __le32 path_len; + char path[0]; +} __packed; + +struct setattr_request { + __le64 size; + __le32 valid; + __le16 mode; + __le32 uid; + __le32 gid; + __le64 atime; + __le32 atime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le32 path_len; + char buf[0]; +} __packed; + +struct getattr_request { + __le32 lookup_flags; + __le32 path_len; + char buf[0]; +} __packed; + +struct getattr_response { + __le32 change_detect_cap; + __le32 result_mask; + __le32 flags; + __le64 fsid; + __le16 mode; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 rdev; + __le64 ino; + __le64 size; + __le64 blocks; + __le32 blksize; + __le64 atime; + __le32 atime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le64 ctime; + __le32 ctime_nsec; + __le64 crtime; + __le32 crtime_nsec; + __le64 ichange_count; +} __packed; + +struct statfs_request { + __le32 path_len; + char path[0]; +} __packed; + +struct statfs_response { + __le64 f_type; + __le64 f_bsize; + __le64 f_blocks; + __le64 f_bfree; + __le64 f_bavail; + __le64 f_files; + __le64 f_ffree; + __le32 f_fsid_0; + __le32 f_fsid_1; + __le64 f_namelen; + __le64 f_frsize; + __le64 f_flags; + __le64 f_spare_0; + __le64 f_spare_1; + __le64 f_spare_2; + __le64 f_spare_3; +} __packed; + +struct syncfs_request { + __le64 version; + __le32 flags; +} __packed; + +struct getxattr_request { + __le32 path_len; + __le32 name_len; + __le32 size; + char buf[0]; +} __packed; + +struct getxattr_response { + __le32 size; + char value[0]; /* xattr value may non-printable */ +} __packed; + +struct setxattr_request { + __le32 path_len; + __le32 name_len; + __le32 size; + __le32 flags; + __u8 del; /* remove xattr */ + char buf[0]; +} __packed; + +struct listxattr_request { + __le32 path_len; + __le32 size; + char buf[0]; +} __packed; + +struct listxattr_response { + __le32 size; + char list[0]; +} __packed; + +struct connection_rekey_request { + __le32 update_request; +} __packed; + +enum CONNECTION_KEY_UPDATE_REQUEST { + UPDATE_NOT_REQUESTED = 0, + UPDATE_REQUESTED = 1 +}; + +enum MSG_QUEUE_STATUS { + MSG_Q_SEND = 0, + MSG_Q_END_RECV, +}; +#endif diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c new file mode 100644 index 000000000000..769b6d28ebce --- /dev/null +++ b/fs/hmdfs/comm/socket_adapter.c @@ -0,0 +1,1151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/socket_adapter.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "socket_adapter.h" + +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/device_node.h" +#include "hmdfs_client.h" +#include "hmdfs_server.h" +#include "hmdfs_trace.h" +#include "message_verify.h" + +#define ACQUIRE_WFIRED_INTVAL_USEC_MIN 10 +#define ACQUIRE_WFIRED_INTVAL_USEC_MAX 30 + +typedef void (*request_callback)(struct hmdfs_peer *, struct hmdfs_head_cmd *, + void *); +typedef void (*response_callback)(struct hmdfs_peer *, + struct sendmsg_wait_queue *, void *, size_t); + +static const request_callback s_recv_callbacks[F_SIZE] = { + [F_OPEN] = hmdfs_server_open, + [F_READPAGE] = hmdfs_server_readpage, + [F_RELEASE] = hmdfs_server_release, + [F_WRITEPAGE] = hmdfs_server_writepage, + [F_ITERATE] = hmdfs_server_readdir, + [F_MKDIR] = hmdfs_server_mkdir, + [F_CREATE] = hmdfs_server_create, + [F_RMDIR] = hmdfs_server_rmdir, + [F_UNLINK] = hmdfs_server_unlink, + [F_RENAME] = hmdfs_server_rename, + [F_SETATTR] = hmdfs_server_setattr, + [F_STATFS] = hmdfs_server_statfs, + [F_DROP_PUSH] = hmdfs_server_get_drop_push, + [F_GETATTR] = hmdfs_server_getattr, + [F_FSYNC] = hmdfs_server_fsync, + [F_SYNCFS] = hmdfs_server_syncfs, + [F_GETXATTR] = hmdfs_server_getxattr, + [F_SETXATTR] = hmdfs_server_setxattr, + [F_LISTXATTR] = hmdfs_server_listxattr, + [F_READPAGES] = hmdfs_server_readpages, + [F_READPAGES_OPEN] = hmdfs_server_readpages_open, + [F_ATOMIC_OPEN] = hmdfs_server_atomic_open, +}; + +typedef void (*file_request_callback)(struct hmdfs_peer *, + struct hmdfs_send_command *); + +struct async_req_callbacks { + void (*on_wakeup)(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); +}; + +static const struct async_req_callbacks g_async_req_callbacks[F_SIZE] = { + [F_SYNCFS] = { .on_wakeup = hmdfs_recv_syncfs_cb }, + [F_WRITEPAGE] = { .on_wakeup = hmdfs_writepage_cb }, +}; + +static void msg_release(struct kref *kref) +{ + struct sendmsg_wait_queue *msg_wq; + struct hmdfs_peer *con; + + msg_wq = (struct sendmsg_wait_queue *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + con = msg_wq->head.peer; + idr_remove(&con->msg_idr, msg_wq->head.msg_id); + spin_unlock(&con->idr_lock); + + kfree(msg_wq->buf); + if (msg_wq->recv_info.local_filp) + fput(msg_wq->recv_info.local_filp); + kfree(msg_wq); +} + +// Always remember to find before put, and make sure con is avilable +void msg_put(struct sendmsg_wait_queue *msg_wq) +{ + kref_put_lock(&msg_wq->head.ref, msg_release, + &msg_wq->head.peer->idr_lock); +} + +static void recv_info_init(struct file_recv_info *recv_info) +{ + memset(recv_info, 0, sizeof(struct file_recv_info)); + atomic_set(&recv_info->local_fslices, 0); + atomic_set(&recv_info->state, FILE_RECV_PROCESS); +} + +static int msg_init(struct hmdfs_peer *con, struct sendmsg_wait_queue *msg_wq) +{ + int ret = 0; + struct file_recv_info *recv_info = &msg_wq->recv_info; + + ret = hmdfs_alloc_msg_idr(con, MSG_IDR_MESSAGE_SYNC, msg_wq); + if (unlikely(ret)) + return ret; + + atomic_set(&msg_wq->valid, MSG_Q_SEND); + init_waitqueue_head(&msg_wq->response_q); + recv_info_init(recv_info); + msg_wq->start = jiffies; + return 0; +} + +static inline void statistic_con_sb_dirty(struct hmdfs_peer *con, + const struct hmdfs_cmd *op) +{ + if (op->command == F_WRITEPAGE && op->cmd_flag == C_REQUEST) + atomic64_inc(&con->sb_dirty_count); +} + +int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg) +{ + int ret = 0; + struct connection *connect = NULL; + struct tcp_handle *tcp = NULL; + struct hmdfs_head_cmd *head = msg->head; + const struct cred *old_cred; + + if (!node) { + hmdfs_err("node NULL when send cmd %d", + head->operations.command); + ret = -EAGAIN; + goto out_err; + } else if (node->status != NODE_STAT_ONLINE) { + hmdfs_err("device %llu OFFLINE %d when send cmd %d", + node->device_id, node->status, + head->operations.command); + ret = -EAGAIN; + goto out; + } + + if (hmdfs_should_fail_sendmsg(&node->sbi->fault_inject, node, msg, + &ret)) + goto out; + + old_cred = hmdfs_override_creds(node->sbi->system_cred); + + do { + connect = get_conn_impl(node, CONNECT_TYPE_TCP); + if (!connect) { + hmdfs_info_ratelimited( + "device %llu no connection available when send cmd %d, get new session", + node->device_id, head->operations.command); + if (node->status != NODE_STAT_OFFLINE) { + struct notify_param param; + + memcpy(param.remote_cid, node->cid, + HMDFS_CID_SIZE); + param.notify = NOTIFY_OFFLINE; + param.fd = INVALID_SOCKET_FD; + notify(node, ¶m); + } + ret = -EAGAIN; + goto revert_cred; + } + + ret = connect->send_message(connect, msg); + if (ret == -ESHUTDOWN) { + hmdfs_info("device %llu send cmd %d message fail, connection stop", + node->device_id, head->operations.command); + connect->status = CONNECT_STAT_STOP; + tcp = connect->connect_handle; + if (node->status != NODE_STAT_OFFLINE) { + connection_get(connect); + if (!queue_work(node->reget_conn_wq, + &connect->reget_work)) + connection_put(connect); + } + connection_put(connect); + /* + * node->status is OFFLINE can not ensure + * node_seq will be increased before + * hmdfs_sendmessage() returns. + */ + hmdfs_node_inc_evt_seq(node); + } else { + connection_put(connect); + goto revert_cred; + } + } while (node->status != NODE_STAT_OFFLINE); +revert_cred: + hmdfs_revert_creds(old_cred); + + if (!ret) + statistic_con_sb_dirty(node, &head->operations); +out: + if (node->version == DFS_2_0 && + head->operations.cmd_flag == C_REQUEST) + hmdfs_client_snd_statis(node->sbi, + head->operations.command, ret); + else if (node->version == DFS_2_0 && + head->operations.cmd_flag == C_RESPONSE) + hmdfs_server_snd_statis(node->sbi, + head->operations.command, ret); +out_err: + return ret; +} + +int hmdfs_sendmessage_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, __u32 data_len, + void *buf, __u32 ret_code) +{ + int ret; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = cmd->operations; + head.operations.cmd_flag = C_RESPONSE; + head.data_len = cpu_to_le32(data_len + sizeof(struct hmdfs_head_cmd)); + head.ret_code = cpu_to_le32(ret_code); + head.msg_id = cmd->msg_id; + head.reserved = cmd->reserved; + head.reserved1 = cmd->reserved1; + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = buf; + msg.len = data_len; + msg.sdesc = NULL; + msg.sdesc_len = 0; + + ret = hmdfs_sendmessage(con, &msg); + return ret; +} + +static void mp_release(struct kref *kref) +{ + struct hmdfs_msg_parasite *mp = NULL; + struct hmdfs_peer *peer = NULL; + + mp = (struct hmdfs_msg_parasite *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + peer = mp->head.peer; + idr_remove(&peer->msg_idr, mp->head.msg_id); + spin_unlock(&peer->idr_lock); + + peer_put(peer); + kfree(mp->resp.out_buf); + kfree(mp); +} + +void mp_put(struct hmdfs_msg_parasite *mp) +{ + kref_put_lock(&mp->head.ref, mp_release, &mp->head.peer->idr_lock); +} + +static void async_request_cb_on_wakeup_fn(struct work_struct *w) +{ + struct hmdfs_msg_parasite *mp = + container_of(w, struct hmdfs_msg_parasite, d_work.work); + struct async_req_callbacks cbs; + const struct cred *old_cred = + hmdfs_override_creds(mp->head.peer->sbi->cred); + + if (mp->resp.ret_code == -ETIME) + hmdfs_client_resp_statis(mp->head.peer->sbi, + mp->req.operations.command, + HMDFS_RESP_TIMEOUT, 0, 0); + + cbs = g_async_req_callbacks[mp->req.operations.command]; + if (cbs.on_wakeup) + (*cbs.on_wakeup)(mp->head.peer, &mp->req, &mp->resp); + mp_put(mp); + hmdfs_revert_creds(old_cred); +} + +static struct hmdfs_msg_parasite *mp_alloc(struct hmdfs_peer *peer, + const struct hmdfs_req *req) +{ + struct hmdfs_msg_parasite *mp = kzalloc(sizeof(*mp), GFP_KERNEL); + int ret; + + if (unlikely(!mp)) + return ERR_PTR(-ENOMEM); + + ret = hmdfs_alloc_msg_idr(peer, MSG_IDR_MESSAGE_ASYNC, mp); + if (unlikely(ret)) { + kfree(mp); + return ERR_PTR(ret); + } + + mp->start = jiffies; + peer_get(mp->head.peer); + mp->resp.ret_code = -ETIME; + INIT_DELAYED_WORK(&mp->d_work, async_request_cb_on_wakeup_fn); + mp->wfired = false; + mp->req = *req; + return mp; +} + +/** + * hmdfs_send_async_request - sendout a async request + * @peer: target device node + * @req: request descriptor + necessary contexts + * + * Sendout a request synchronously and wait for its response asynchronously + * Return -ESHUTDOWN when the device node is unachievable + * Return -EAGAIN if the network is recovering + * Return -ENOMEM if out of memory + * + * Register g_async_req_callbacks to recv the response + */ +int hmdfs_send_async_request(struct hmdfs_peer *peer, + const struct hmdfs_req *req) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + struct hmdfs_msg_parasite *mp = NULL; + size_t msg_len = req->data_len + sizeof(struct hmdfs_head_cmd); + unsigned int timeout; + + if (req->timeout == TIMEOUT_CONFIG) + timeout = get_cmd_timeout(peer->sbi, req->operations.command); + else + timeout = req->timeout; + if (timeout == TIMEOUT_UNINIT || timeout == TIMEOUT_NONE) { + hmdfs_err("send msg %d with uninitialized/invalid timeout", + req->operations.command); + return -EINVAL; + } + + if (!hmdfs_is_node_online(peer)) + return -EAGAIN; + + mp = mp_alloc(peer, req); + if (IS_ERR(mp)) + return PTR_ERR(mp); + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.data_len = cpu_to_le32(msg_len); + head.operations = mp->req.operations; + head.msg_id = cpu_to_le32(mp->head.msg_id); + head.reserved = 0; + head.reserved1 = 0; + + msg.head = &head; + msg.head_len = sizeof(head); + msg.data = mp->req.data; + msg.len = mp->req.data_len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + + ret = hmdfs_sendmessage(peer, &msg); + if (unlikely(ret)) { + mp_put(mp); + goto out; + } + + queue_delayed_work(peer->async_wq, &mp->d_work, timeout * HZ); + /* + * The work may havn't been queued upon the arriving of it's response, + * resulting in meaningless waiting. So we use the membar to tell the + * recv thread if the work has been queued + */ + smp_store_release(&mp->wfired, true); +out: + hmdfs_dec_msg_idr_process(peer); + return ret; +} + +static int hmdfs_record_async_readdir(struct hmdfs_peer *con, + struct sendmsg_wait_queue *msg_wq) +{ + struct hmdfs_sb_info *sbi = con->sbi; + + spin_lock(&sbi->async_readdir_msg_lock); + if (sbi->async_readdir_prohibit) { + spin_unlock(&sbi->async_readdir_msg_lock); + return -EINTR; + } + + list_add(&msg_wq->async_msg, &sbi->async_readdir_msg_list); + spin_unlock(&sbi->async_readdir_msg_lock); + + return 0; +} + +static void hmdfs_untrack_async_readdir(struct hmdfs_peer *con, + struct sendmsg_wait_queue *msg_wq) +{ + struct hmdfs_sb_info *sbi = con->sbi; + + spin_lock(&sbi->async_readdir_msg_lock); + list_del(&msg_wq->async_msg); + spin_unlock(&sbi->async_readdir_msg_lock); +} + +int hmdfs_sendmessage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *sm) +{ + int time_left; + int ret = 0; + struct sendmsg_wait_queue *msg_wq = NULL; + struct hmdfs_send_data msg; + size_t outlen = sm->len + sizeof(struct hmdfs_head_cmd); + unsigned int timeout = + get_cmd_timeout(con->sbi, sm->operations.command); + struct hmdfs_head_cmd *head = NULL; + bool dec = false; + + if (!hmdfs_is_node_online(con)) + return -EAGAIN; + + if (timeout == TIMEOUT_UNINIT) { + hmdfs_err_ratelimited("send msg %d with uninitialized timeout", + sm->operations.command); + return -EINVAL; + } + + head = kzalloc(sizeof(struct hmdfs_head_cmd), GFP_KERNEL); + if (!head) + return -ENOMEM; + + sm->out_buf = NULL; + head->magic = HMDFS_MSG_MAGIC; + head->version = DFS_2_0; + head->operations = sm->operations; + head->data_len = cpu_to_le32(outlen); + head->ret_code = cpu_to_le32(sm->ret_code); + head->reserved = 0; + head->reserved1 = 0; + if (timeout != TIMEOUT_NONE) { + msg_wq = kzalloc(sizeof(*msg_wq), GFP_KERNEL); + if (!msg_wq) { + ret = -ENOMEM; + goto free; + } + ret = msg_init(con, msg_wq); + if (ret) { + kfree(msg_wq); + msg_wq = NULL; + goto free; + } + dec = true; + head->msg_id = cpu_to_le32(msg_wq->head.msg_id); + if (sm->operations.command == F_ITERATE) + msg_wq->recv_info.local_filp = sm->local_filp; + } + msg.head = head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = sm->data; + msg.len = sm->len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + ret = hmdfs_sendmessage(con, &msg); + if (ret) { + hmdfs_err_ratelimited("send err sm->device_id, %lld, msg_id %u", + con->device_id, head->msg_id); + goto free; + } + + if (timeout == TIMEOUT_NONE) + goto free; + + hmdfs_dec_msg_idr_process(con); + dec = false; + + if (sm->operations.command == F_ITERATE) { + ret = hmdfs_record_async_readdir(con, msg_wq); + if (ret) { + atomic_set(&msg_wq->recv_info.state, FILE_RECV_ERR_SPC); + goto free; + } + } + + time_left = wait_event_interruptible_timeout( + msg_wq->response_q, + (atomic_read(&msg_wq->valid) == MSG_Q_END_RECV), timeout * HZ); + + if (sm->operations.command == F_ITERATE) + hmdfs_untrack_async_readdir(con, msg_wq); + + if (time_left == -ERESTARTSYS || time_left == 0) { + hmdfs_err("timeout err sm->device_id %lld, msg_id %d cmd %d", + con->device_id, head->msg_id, + head->operations.command); + if (sm->operations.command == F_ITERATE) + atomic_set(&msg_wq->recv_info.state, FILE_RECV_ERR_NET); + ret = -ETIME; + hmdfs_client_resp_statis(con->sbi, sm->operations.command, + HMDFS_RESP_TIMEOUT, 0, 0); + goto free; + } + sm->out_buf = msg_wq->buf; + msg_wq->buf = NULL; + sm->out_len = msg_wq->size - sizeof(struct hmdfs_head_cmd); + ret = msg_wq->ret; + +free: + if (msg_wq) + msg_put(msg_wq); + if (dec) + hmdfs_dec_msg_idr_process(con); + kfree(head); + return ret; +} + +static int hmdfs_send_slice(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + struct slice_descriptor *sdesc, void *slice_buf) +{ + int ret; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + int content_size = le32_to_cpu(sdesc->content_size); + int msg_len = sizeof(struct hmdfs_head_cmd) + content_size + + sizeof(struct slice_descriptor); + + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = cmd->operations; + head.operations.cmd_flag = C_RESPONSE; + head.data_len = cpu_to_le32(msg_len); + head.ret_code = cpu_to_le32(0); + head.msg_id = cmd->msg_id; + head.reserved = cmd->reserved; + head.reserved1 = cmd->reserved1; + + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.sdesc = sdesc; + msg.sdesc_len = le32_to_cpu(sizeof(struct slice_descriptor)); + msg.data = slice_buf; + msg.len = content_size; + + ret = hmdfs_sendmessage(con, &msg); + + return ret; +} + +int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + struct file *filp) +{ + int ret; + const unsigned int slice_size = PAGE_SIZE; + char *slice_buf = NULL; + loff_t file_offset = 0, file_size; + ssize_t size; + struct slice_descriptor sdesc; + unsigned int slice_sn = 0; + + if (!filp) + return hmdfs_sendmessage_response(con, head, 0, NULL, 0); + + sdesc.slice_size = cpu_to_le32(slice_size); + file_size = i_size_read(file_inode(filp)); + file_size = round_up(file_size, slice_size); + sdesc.num_slices = cpu_to_le32(file_size / slice_size); + + slice_buf = kmalloc(slice_size, GFP_KERNEL); + if (!slice_buf) { + ret = -ENOMEM; + goto out; + } + + while (1) { + sdesc.slice_sn = cpu_to_le32(slice_sn++); + size = kernel_read(filp, slice_buf, (size_t)slice_size, + &file_offset); + if (IS_ERR_VALUE(size)) { + ret = (int)size; + goto out; + } + sdesc.content_size = cpu_to_le32(size); + ret = hmdfs_send_slice(con, head, &sdesc, slice_buf); + if (ret) { + hmdfs_info("Cannot send file slice %d ", + le32_to_cpu(sdesc.slice_sn)); + break; + } + if (file_offset >= i_size_read(file_inode(filp))) + break; + } + +out: + kfree(slice_buf); + if (ret) + hmdfs_sendmessage_response(con, head, 0, NULL, ret); + return ret; +} + +static void asw_release(struct kref *kref) +{ + struct hmdfs_async_work *asw = NULL; + struct hmdfs_peer *peer = NULL; + + asw = (struct hmdfs_async_work *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + peer = asw->head.peer; + idr_remove(&peer->msg_idr, asw->head.msg_id); + spin_unlock(&peer->idr_lock); + kfree(asw); +} + +void asw_put(struct hmdfs_async_work *asw) +{ + kref_put_lock(&asw->head.ref, asw_release, &asw->head.peer->idr_lock); +} + +void hmdfs_recv_page_work_fn(struct work_struct *ptr) +{ + struct hmdfs_async_work *async_work = + container_of(ptr, struct hmdfs_async_work, d_work.work); + + if (async_work->head.peer->version >= DFS_2_0) + hmdfs_client_resp_statis(async_work->head.peer->sbi, + F_READPAGE, HMDFS_RESP_TIMEOUT, 0, 0); + hmdfs_err_ratelimited("timeout and release page, msg_id:%u", + async_work->head.msg_id); + asw_done(async_work); +} + +int hmdfs_sendpage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *sm) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct hmdfs_async_work *async_work = NULL; + size_t outlen = sm->len + sizeof(struct hmdfs_head_cmd); + struct hmdfs_head_cmd head; + unsigned int timeout; + unsigned long start = jiffies; + + WARN_ON(!sm->out_buf); + + timeout = get_cmd_timeout(con->sbi, sm->operations.command); + if (timeout == TIMEOUT_UNINIT) { + hmdfs_err("send msg %d with uninitialized timeout", + sm->operations.command); + ret = -EINVAL; + goto unlock; + } + + if (!hmdfs_is_node_online(con)) { + ret = -EAGAIN; + goto unlock; + } + + memset(&head, 0, sizeof(head)); + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = sm->operations; + head.data_len = cpu_to_le32(outlen); + head.ret_code = cpu_to_le32(sm->ret_code); + head.reserved = 0; + head.reserved1 = 0; + + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = sm->data; + msg.len = sm->len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + + async_work = kzalloc(sizeof(*async_work), GFP_KERNEL); + if (!async_work) { + ret = -ENOMEM; + goto unlock; + } + async_work->start = start; + ret = hmdfs_alloc_msg_idr(con, MSG_IDR_PAGE, async_work); + if (ret) { + hmdfs_err("alloc msg_id failed, err %d", ret); + goto unlock; + } + head.msg_id = cpu_to_le32(async_work->head.msg_id); + async_work->page = sm->out_buf; + asw_get(async_work); + INIT_DELAYED_WORK(&async_work->d_work, hmdfs_recv_page_work_fn); + ret = queue_delayed_work(con->async_wq, &async_work->d_work, + timeout * HZ); + if (!ret) { + hmdfs_err("queue_delayed_work failed, msg_id %u", head.msg_id); + goto fail_and_unlock_page; + } + ret = hmdfs_sendmessage(con, &msg); + if (ret) { + hmdfs_err("send err sm->device_id, %lld, msg_id %u", + con->device_id, head.msg_id); + if (!cancel_delayed_work(&async_work->d_work)) { + hmdfs_err("cancel async work err"); + asw_put(async_work); + hmdfs_dec_msg_idr_process(con); + goto out; + } + goto fail_and_unlock_page; + } + + asw_put(async_work); + hmdfs_dec_msg_idr_process(con); + return 0; + +fail_and_unlock_page: + asw_put(async_work); + asw_done(async_work); + hmdfs_dec_msg_idr_process(con); + return ret; +unlock: + kfree(async_work); + unlock_page(sm->out_buf); +out: + return ret; +} + +static void hmdfs_request_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + unsigned long start = jiffies; + const struct cred *saved_cred = hmdfs_override_fsids(true); + + if (!saved_cred) { + hmdfs_err("prepare cred failed!"); + kfree(buf); + return; + } + + s_recv_callbacks[head->operations.command](con, head, buf); + hmdfs_statistic(con->sbi, head->operations.command, jiffies - start); + + kfree(buf); + + hmdfs_revert_fsids(saved_cred); +} + +static void hmdfs_msg_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + const struct cred *old_cred = hmdfs_override_creds(con->sbi->cred); + + /* + * Reuse PF_NPROC_EXCEEDED as an indication of hmdfs server context: + * 1. PF_NPROC_EXCEEDED will set by setreuid()/setuid()/setresuid(), + * we assume kwork will not call theses syscalls. + * 2. PF_NPROC_EXCEEDED will be cleared by execv(), and kworker + * will not call it. + */ + current->flags |= PF_NPROC_EXCEEDED; + hmdfs_request_handle_sync(con, head, buf); + current->flags &= ~PF_NPROC_EXCEEDED; + + hmdfs_revert_creds(old_cred); +} + + +static void hmdfs_request_work_fn(struct work_struct *ptr) +{ + struct work_handler_desp *desp = + container_of(ptr, struct work_handler_desp, work); + + hmdfs_msg_handle_sync(desp->peer, desp->head, desp->buf); + peer_put(desp->peer); + kfree(desp->head); + kfree(desp); +} + +static int hmdfs_msg_handle_async(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf, + struct workqueue_struct *wq, + void (*work_fn)(struct work_struct *ptr)) +{ + struct work_handler_desp *desp = NULL; + struct hmdfs_head_cmd *dup_head = NULL; + int ret; + + desp = kzalloc(sizeof(*desp), GFP_KERNEL); + if (!desp) { + ret = -ENOMEM; + goto exit_desp; + } + + dup_head = kzalloc(sizeof(*dup_head), GFP_KERNEL); + if (!dup_head) { + ret = -ENOMEM; + goto exit_desp; + } + + *dup_head = *head; + desp->peer = con; + desp->head = dup_head; + desp->buf = buf; + INIT_WORK(&desp->work, work_fn); + + peer_get(con); + queue_work(wq, &desp->work); + + ret = 0; + return ret; + +exit_desp: + kfree(desp); + return ret; +} + +static int hmdfs_request_recv(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + int ret; + + if (head->operations.command >= F_SIZE || + !s_recv_callbacks[head->operations.command]) { + ret = -EINVAL; + hmdfs_err("NULL callback, command %d", + head->operations.command); + goto out; + } + + switch (head->operations.command) { + case F_OPEN: + case F_RELEASE: + case F_ITERATE: + case F_MKDIR: + case F_RMDIR: + case F_CREATE: + case F_UNLINK: + case F_RENAME: + case F_SETATTR: + case F_STATFS: + case F_CONNECT_REKEY: + case F_DROP_PUSH: + case F_GETATTR: + case F_FSYNC: + case F_SYNCFS: + case F_GETXATTR: + case F_SETXATTR: + case F_LISTXATTR: + case F_READPAGES_OPEN: + case F_ATOMIC_OPEN: + ret = hmdfs_msg_handle_async(con, head, buf, con->req_handle_wq, + hmdfs_request_work_fn); + break; + case F_WRITEPAGE: + case F_READPAGE: + case F_READPAGES: + hmdfs_msg_handle_sync(con, head, buf); + ret = 0; + break; + default: + hmdfs_err("Fatal! Unexpected request command %d", + head->operations.command); + ret = -EINVAL; + } + +out: + return ret; +} + +void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info, + __u32 ret_code, __u32 data_len, void *buf) +{ + msg_info->ret = ret_code; + msg_info->size = data_len; + msg_info->buf = buf; + atomic_set(&msg_info->valid, MSG_Q_END_RECV); + wake_up_interruptible(&msg_info->response_q); +} + +static int hmdfs_readfile_slice(struct sendmsg_wait_queue *msg_info, + struct work_handler_desp *desp) +{ + struct slice_descriptor *sdesc = desp->buf; + void *slice_buf = sdesc + 1; + struct file_recv_info *recv_info = &msg_info->recv_info; + struct file *filp = recv_info->local_filp; + loff_t offset; + ssize_t written_size; + + if (atomic_read(&recv_info->state) != FILE_RECV_PROCESS) + return -EBUSY; + + offset = le32_to_cpu(sdesc->slice_size) * le32_to_cpu(sdesc->slice_sn); + + written_size = kernel_write(filp, slice_buf, + le32_to_cpu(sdesc->content_size), &offset); + if (IS_ERR_VALUE(written_size)) { + atomic_set(&recv_info->state, FILE_RECV_ERR_SPC); + hmdfs_info("Fatal! Cannot store a file slice %d/%d, ret = %d", + le32_to_cpu(sdesc->slice_sn), + le32_to_cpu(sdesc->num_slices), (int)written_size); + return (int)written_size; + } + + if (atomic_inc_return(&recv_info->local_fslices) >= + le32_to_cpu(sdesc->num_slices)) + atomic_set(&recv_info->state, FILE_RECV_SUCC); + return 0; +} + +static void hmdfs_file_response_work_fn(struct work_struct *ptr) +{ + struct work_handler_desp *desp = + container_of(ptr, struct work_handler_desp, work); + struct sendmsg_wait_queue *msg_info = NULL; + int ret; + atomic_t *pstate = NULL; + u8 cmd = desp->head->operations.command; + const struct cred *old_cred = + hmdfs_override_creds(desp->peer->sbi->cred); + + msg_info = (struct sendmsg_wait_queue *)hmdfs_find_msg_head(desp->peer, + le32_to_cpu(desp->head->msg_id)); + if (!msg_info || atomic_read(&msg_info->valid) != MSG_Q_SEND) { + hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_DELAY, + 0, 0); + hmdfs_info("cannot find msg(id %d)", + le32_to_cpu(desp->head->msg_id)); + goto free; + } + + ret = le32_to_cpu(desp->head->ret_code); + if (ret || le32_to_cpu(desp->head->data_len) == sizeof(*desp->head)) + goto wakeup; + ret = hmdfs_readfile_slice(msg_info, desp); + pstate = &msg_info->recv_info.state; + if (ret || atomic_read(pstate) != FILE_RECV_PROCESS) + goto wakeup; + goto free; + +wakeup: + hmdfs_response_wakeup(msg_info, ret, sizeof(struct hmdfs_head_cmd), + NULL); + hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_NORMAL, + msg_info->start, jiffies); +free: + if (msg_info) + msg_put(msg_info); + peer_put(desp->peer); + hmdfs_revert_creds(old_cred); + + kfree(desp->buf); + kfree(desp->head); + kfree(desp); +} + +static void hmdfs_wait_mp_wfired(struct hmdfs_msg_parasite *mp) +{ + /* We just cancel queued works */ + while (unlikely(!smp_load_acquire(&mp->wfired))) + usleep_range(ACQUIRE_WFIRED_INTVAL_USEC_MIN, + ACQUIRE_WFIRED_INTVAL_USEC_MAX); +} + +int hmdfs_response_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + struct sendmsg_wait_queue *msg_info = NULL; + struct hmdfs_msg_parasite *mp = NULL; + struct hmdfs_msg_idr_head *msg_head = NULL; + u32 msg_id = le32_to_cpu(head->msg_id); + bool woke = false; + u8 cmd = head->operations.command; + + msg_head = hmdfs_find_msg_head(con, msg_id); + if (!msg_head) + goto out; + + switch (msg_head->type) { + case MSG_IDR_MESSAGE_SYNC: + msg_info = (struct sendmsg_wait_queue *)msg_head; + if (atomic_read(&msg_info->valid) == MSG_Q_SEND) { + hmdfs_response_wakeup(msg_info, + le32_to_cpu(head->ret_code), + le32_to_cpu(head->data_len), buf); + hmdfs_client_resp_statis(con->sbi, cmd, + HMDFS_RESP_NORMAL, + msg_info->start, jiffies); + woke = true; + } + + msg_put(msg_info); + break; + case MSG_IDR_MESSAGE_ASYNC: + mp = (struct hmdfs_msg_parasite *)msg_head; + + hmdfs_wait_mp_wfired(mp); + if (cancel_delayed_work(&mp->d_work)) { + mp->resp.out_buf = buf; + mp->resp.out_len = + le32_to_cpu(head->data_len) - sizeof(*head); + mp->resp.ret_code = le32_to_cpu(head->ret_code); + queue_delayed_work(con->async_wq, &mp->d_work, 0); + hmdfs_client_resp_statis(con->sbi, cmd, + HMDFS_RESP_NORMAL, mp->start, + jiffies); + woke = true; + } + mp_put(mp); + break; + default: + hmdfs_err("receive incorrect msg type %d msg_id %d cmd %d", + msg_head->type, msg_id, cmd); + break; + } + + if (likely(woke)) + return 0; +out: + hmdfs_client_resp_statis(con->sbi, cmd, HMDFS_RESP_DELAY, 0, 0); + hmdfs_info("cannot find msg_id %d cmd %d", msg_id, cmd); + return -EINVAL; +} + +static int hmdfs_response_recv(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + __u16 command = head->operations.command; + int ret; + + if (command >= F_SIZE) { + ret = -EINVAL; + return ret; + } + + switch (head->operations.command) { + case F_OPEN: + case F_RELEASE: + case F_READPAGE: + case F_WRITEPAGE: + case F_MKDIR: + case F_RMDIR: + case F_CREATE: + case F_UNLINK: + case F_RENAME: + case F_SETATTR: + case F_STATFS: + case F_CONNECT_REKEY: + case F_DROP_PUSH: + case F_GETATTR: + case F_FSYNC: + case F_SYNCFS: + case F_GETXATTR: + case F_SETXATTR: + case F_LISTXATTR: + ret = hmdfs_response_handle_sync(con, head, buf); + return ret; + + case F_ITERATE: + ret = hmdfs_msg_handle_async(con, head, buf, con->async_wq, + hmdfs_file_response_work_fn); + return ret; + + default: + hmdfs_err("Fatal! Unexpected response command %d", + head->operations.command); + ret = -EINVAL; + return ret; + } +} + +static void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, + void *buf) +{ + struct hmdfs_head_cmd *hmdfs_head = (struct hmdfs_head_cmd *)head; + + trace_hmdfs_recv_mesg_callback(hmdfs_head); + + if (hmdfs_message_verify(con, hmdfs_head, buf) < 0) { + hmdfs_info("Message %d has been abandoned", hmdfs_head->msg_id); + goto out_err; + } + + switch (hmdfs_head->operations.cmd_flag) { + case C_REQUEST: + if (hmdfs_request_recv(con, hmdfs_head, buf) < 0) + goto out_err; + break; + + case C_RESPONSE: + if (hmdfs_response_recv(con, hmdfs_head, buf) < 0) + goto out_err; + break; + + default: + hmdfs_err("Fatal! Unexpected msg cmd %d", + hmdfs_head->operations.cmd_flag); + break; + } + return; + +out_err: + kfree(buf); +} + +static inline void hmdfs_recv_page_callback(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, + int err, void *data) +{ + if (head->operations.command == F_READPAGE) + hmdfs_client_recv_readpage(head, err, data); +} + +static const struct connection_operations conn_operations[] = { + [PROTOCOL_VERSION] = { + .recvmsg = hmdfs_recv_mesg_callback, + .recvpage = hmdfs_recv_page_callback, + /* remote device operations */ + .remote_file_fops = + &hmdfs_dev_file_fops_remote, + .remote_file_iops = + &hmdfs_dev_file_iops_remote, + .remote_file_aops = + &hmdfs_dev_file_aops_remote, + .remote_unlink = + hmdfs_dev_unlink_from_con, + .remote_readdir = + hmdfs_dev_readdir_from_con, + } +}; + +const struct connection_operations *hmdfs_get_peer_operation(__u8 version) +{ + if (version <= INVALID_VERSION || version >= MAX_VERSION) + return NULL; + + if (version <= USERSPACE_MAX_VER) + return &(conn_operations[USERDFS_VERSION]); + else + return &(conn_operations[PROTOCOL_VERSION]); +} + +void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp) +{ + hmdfs_wait_mp_wfired(mp); + if (!cancel_delayed_work(&mp->d_work)) + hmdfs_err("cancel parasite work err msg_id=%d cmd=%d", + mp->head.msg_id, mp->req.operations.command); + else + async_request_cb_on_wakeup_fn(&mp->d_work.work); +} + +void hmdfs_wakeup_async_work(struct hmdfs_async_work *async_work) +{ + if (!cancel_delayed_work(&async_work->d_work)) + hmdfs_err("cancel async work err msg_id=%d", + async_work->head.msg_id); + else + hmdfs_recv_page_work_fn(&async_work->d_work.work); +} diff --git a/fs/hmdfs/comm/socket_adapter.h b/fs/hmdfs/comm/socket_adapter.h new file mode 100644 index 000000000000..ba4c672d7bcc --- /dev/null +++ b/fs/hmdfs/comm/socket_adapter.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/socket_adapter.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef SOCKET_ADAPTER_H +#define SOCKET_ADAPTER_H + +#include +#include + +#include "connection.h" +#include "hmdfs.h" +#include "protocol.h" + +#define HMDFS_KEY_SIZE 32 +#define HMDFS_IV_SIZE 12 +#define HMDFS_TAG_SIZE 16 +#define HMDFS_CID_SIZE 64 +#define INVALID_SOCKET_FD (-1) + +#define HMDFS_IDR_RESCHED_COUNT 512 + +struct connection_operations { + void (*recvmsg)(struct hmdfs_peer *con, void *head, void *buf); + void (*recvpage)(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + int err, void *data); + const struct file_operations *remote_file_fops; + const struct inode_operations *remote_file_iops; + const struct address_space_operations *remote_file_aops; + int (*remote_unlink)(struct hmdfs_peer *con, struct dentry *dentry); + int (*remote_readdir)(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx); + struct hmdfs_lookup_ret *(*remote_lookup)(struct hmdfs_peer *con, + const char *relative_path, + const char *d_name); +}; + +/***************************************************************************** + * connections(TCP, UDP, .etc) adapter for RPC + *****************************************************************************/ + +struct work_handler_desp { + struct work_struct work; + struct hmdfs_peer *peer; + struct hmdfs_head_cmd *head; + void *buf; +}; + +struct work_readfile_request_async { + struct work_struct work; + struct hmdfs_peer *con; + struct hmdfs_send_command sm; +}; + +static inline void hmdfs_init_cmd(struct hmdfs_cmd *op, u8 cmd) +{ + op->reserved = 0; + op->cmd_flag = C_REQUEST; + op->command = cmd; + op->reserved2 = 0; +} + +int hmdfs_send_async_request(struct hmdfs_peer *peer, + const struct hmdfs_req *req); +int hmdfs_sendmessage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *msg); +int hmdfs_sendpage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *msg); + +int hmdfs_sendmessage_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, __u32 data_len, + void *buf, __u32 ret_code); +int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + struct file *filp); +const struct connection_operations *hmdfs_get_peer_operation(__u8 version); + +void hmdfs_recv_page_work_fn(struct work_struct *ptr); + +/***************************************************************************** + * statistics info for RPC + *****************************************************************************/ + +enum hmdfs_resp_type { + HMDFS_RESP_NORMAL, + HMDFS_RESP_DELAY, + HMDFS_RESP_TIMEOUT +}; + +struct server_statistic { + unsigned long long cnt; /* request received */ + unsigned long long max; /* max processing time */ + unsigned long long total; /* total processing time */ + unsigned long long snd_cnt; /* resp send to client */ + unsigned long long snd_fail_cnt; /* send resp to client failed cnt */ +}; + +struct client_statistic { + unsigned long long snd_cnt; /* request send to server */ + unsigned long long resp_cnt; /* response receive from server */ + unsigned long long timeout_cnt; /* no respone from server */ + unsigned long long delay_resp_cnt; /* delay response from server */ + unsigned long long max; /* max waiting time */ + unsigned long long total; /* total waiting time */ + unsigned long long snd_fail_cnt; /* request send failed to server */ +}; + + +static inline void hmdfs_statistic(struct hmdfs_sb_info *sbi, u8 cmd, + unsigned long jiff) +{ + if (cmd >= F_SIZE) + return; + + sbi->s_server_statis[cmd].cnt++; + sbi->s_server_statis[cmd].total += jiff; + if (jiff > sbi->s_server_statis[cmd].max) + sbi->s_server_statis[cmd].max = jiff; +} + +static inline void hmdfs_server_snd_statis(struct hmdfs_sb_info *sbi, + u8 cmd, int ret) +{ + if (cmd >= F_SIZE) + return; + ret ? sbi->s_server_statis[cmd].snd_fail_cnt++ : + sbi->s_server_statis[cmd].snd_cnt++; +} + +static inline void hmdfs_client_snd_statis(struct hmdfs_sb_info *sbi, + u8 cmd, int ret) +{ + if (cmd >= F_SIZE) + return; + ret ? sbi->s_client_statis[cmd].snd_fail_cnt++ : + sbi->s_client_statis[cmd].snd_cnt++; +} + +extern void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, + enum hmdfs_resp_type type, + unsigned long start, unsigned long end); + +/***************************************************************************** + * timeout configuration for RPC + *****************************************************************************/ + +enum HMDFS_TIME_OUT { + TIMEOUT_NONE = 0, + TIMEOUT_COMMON = 4, + TIMEOUT_6S = 6, + TIMEOUT_30S = 30, + TIMEOUT_1M = 60, + TIMEOUT_90S = 90, + TIMEOUT_CONFIG = UINT_MAX - 1, // for hmdfs_req to read from config + TIMEOUT_UNINIT = UINT_MAX, +}; + +static inline int get_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd) +{ + return sbi->s_cmd_timeout[cmd]; +} + +static inline void set_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd, + unsigned int value) +{ + sbi->s_cmd_timeout[cmd] = value; +} + +void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info, + __u32 ret_code, __u32 data_len, void *buf); + +void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp); + +void hmdfs_wakeup_async_work(struct hmdfs_async_work *async_work); + +void msg_put(struct sendmsg_wait_queue *msg_wq); +void head_put(struct hmdfs_msg_idr_head *head); +void mp_put(struct hmdfs_msg_parasite *mp); +void asw_put(struct hmdfs_async_work *asw); +static inline void asw_done(struct hmdfs_async_work *asw) +{ + if (asw->page) + unlock_page(asw->page); + asw_put(asw); +} + +static inline void asw_get(struct hmdfs_async_work *asw) +{ + kref_get(&asw->head.ref); +} +#endif diff --git a/fs/hmdfs/comm/transport.c b/fs/hmdfs/comm/transport.c new file mode 100644 index 000000000000..cb57da2c53f8 --- /dev/null +++ b/fs/hmdfs/comm/transport.c @@ -0,0 +1,1220 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/transport.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "transport.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "device_node.h" +#include "hmdfs_trace.h" +#include "socket_adapter.h" +#include "authority/authentication.h" + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include +#include "crypto.h" +#endif + +typedef void (*connect_recv_handler)(struct connection *, void *, void *, + __u32); + +static connect_recv_handler connect_recv_callback[CONNECT_STAT_COUNT] = { + [CONNECT_STAT_WAIT_REQUEST] = connection_handshake_recv_handler, + [CONNECT_STAT_WAIT_RESPONSE] = connection_handshake_recv_handler, + [CONNECT_STAT_WORKING] = connection_working_recv_handler, + [CONNECT_STAT_STOP] = NULL, + [CONNECT_STAT_WAIT_ACK] = connection_handshake_recv_handler, + [CONNECT_STAT_NEGO_FAIL] = NULL, +}; + +static int recvmsg_nofs(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size, int flags) +{ + unsigned int nofs_flags; + int ret; + + /* enable NOFS for memory allocation */ + nofs_flags = memalloc_nofs_save(); + ret = kernel_recvmsg(sock, msg, vec, num, size, flags); + memalloc_nofs_restore(nofs_flags); + + return ret; +} + +static int sendmsg_nofs(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size) +{ + unsigned int nofs_flags; + int ret; + + /* enable NOFS for memory allocation */ + nofs_flags = memalloc_nofs_save(); + ret = kernel_sendmsg(sock, msg, vec, num, size); + memalloc_nofs_restore(nofs_flags); + + return ret; +} + +static int tcp_set_recvtimeo(struct socket *sock, int timeout) +{ + long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); + + tcp_sock_set_nodelay(sock->sk); + tcp_sock_set_user_timeout(sock->sk, jiffies_left); + return 0; +} + +uint32_t hmdfs_tcpi_rtt(struct hmdfs_peer *con) +{ + uint32_t rtt_us = 0; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + conn_impl = get_conn_impl(con, CONNECT_TYPE_TCP); + if (!conn_impl) + return rtt_us; + tcp = (struct tcp_handle *)(conn_impl->connect_handle); + if (tcp->sock) + rtt_us = tcp_sk(tcp->sock->sk)->srtt_us >> 3; + connection_put(conn_impl); + return rtt_us; +} + +static int tcp_read_head_from_socket(struct socket *sock, void *buf, + unsigned int to_read) +{ + int rc = 0; + struct msghdr hmdfs_msg; + struct kvec iov; + + iov.iov_base = buf; + iov.iov_len = to_read; + memset(&hmdfs_msg, 0, sizeof(hmdfs_msg)); + hmdfs_msg.msg_flags = MSG_WAITALL; + hmdfs_msg.msg_control = NULL; + hmdfs_msg.msg_controllen = 0; + rc = recvmsg_nofs(sock, &hmdfs_msg, &iov, 1, to_read, + hmdfs_msg.msg_flags); + if (rc == -EAGAIN || rc == -ETIMEDOUT || rc == -EINTR || + rc == -EBADMSG) { + usleep_range(1000, 2000); + return -EAGAIN; + } + // error occurred + if (rc != to_read) { + hmdfs_err("tcp recv error %d", rc); + return -ESHUTDOWN; + } + return 0; +} + +static int tcp_read_buffer_from_socket(struct socket *sock, void *buf, + unsigned int to_read) +{ + int read_cnt = 0; + int retry_time = 0; + int rc = 0; + struct msghdr hmdfs_msg; + struct kvec iov; + + do { + iov.iov_base = (char *)buf + read_cnt; + iov.iov_len = to_read - read_cnt; + memset(&hmdfs_msg, 0, sizeof(hmdfs_msg)); + hmdfs_msg.msg_flags = MSG_WAITALL; + hmdfs_msg.msg_control = NULL; + hmdfs_msg.msg_controllen = 0; + rc = recvmsg_nofs(sock, &hmdfs_msg, &iov, 1, + to_read - read_cnt, hmdfs_msg.msg_flags); + if (rc == -EBADMSG) { + usleep_range(1000, 2000); + continue; + } + if (rc == -EAGAIN || rc == -ETIMEDOUT || rc == -EINTR) { + retry_time++; + hmdfs_info("read again %d", rc); + usleep_range(1000, 2000); + continue; + } + // error occurred + if (rc <= 0) { + hmdfs_err("tcp recv error %d", rc); + return -ESHUTDOWN; + } + read_cnt += rc; + if (read_cnt != to_read) + hmdfs_info("read again %d/%d", read_cnt, to_read); + } while (read_cnt < to_read && retry_time < MAX_RECV_RETRY_TIMES); + if (read_cnt == to_read) + return 0; + return -ESHUTDOWN; +} + +static int hmdfs_drop_readpage_buffer(struct socket *sock, + struct hmdfs_head_cmd *recv) +{ + unsigned int len; + void *buf = NULL; + int err; + + len = le32_to_cpu(recv->data_len) - sizeof(struct hmdfs_head_cmd); + if (len > HMDFS_PAGE_SIZE || !len) { + hmdfs_err("recv invalid readpage length %u", len); + return -EINVAL; + } + + /* Abort the connection if no memory */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ESHUTDOWN; + + err = tcp_read_buffer_from_socket(sock, buf, len); + kfree(buf); + + return err; +} + +static int hmdfs_get_readpage_buffer(struct socket *sock, + struct hmdfs_head_cmd *recv, + struct page *page) +{ + char *page_buf = NULL; + unsigned int out_len; + int err; + + out_len = le32_to_cpu(recv->data_len) - sizeof(struct hmdfs_head_cmd); + if (out_len > HMDFS_PAGE_SIZE || !out_len) { + hmdfs_err("recv invalid readpage length %u", out_len); + return -EINVAL; + } + + page_buf = kmap(page); + err = tcp_read_buffer_from_socket(sock, page_buf, out_len); + if (err) + goto out_unmap; + if (out_len != HMDFS_PAGE_SIZE) + memset(page_buf + out_len, 0, HMDFS_PAGE_SIZE - out_len); + +out_unmap: + kunmap(page); + return err; +} + +static int tcp_recvpage_tls(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + struct hmdfs_peer *node = NULL; + struct page *page = NULL; + struct hmdfs_async_work *async_work = NULL; + int rd_err; + + if (!connect) { + hmdfs_err("tcp connect == NULL"); + return -ESHUTDOWN; + } + node = connect->node; + tcp = (struct tcp_handle *)(connect->connect_handle); + + rd_err = le32_to_cpu(recv->ret_code); + if (rd_err) + hmdfs_warning("tcp: readpage from peer %llu ret err %d", + node->device_id, rd_err); + + async_work = (struct hmdfs_async_work *)hmdfs_find_msg_head(node, + le32_to_cpu(recv->msg_id)); + if (!async_work || !cancel_delayed_work(&async_work->d_work)) + goto out; + + page = async_work->page; + if (!page) { + hmdfs_err("page not found"); + goto out; + } + + if (!rd_err) { + ret = hmdfs_get_readpage_buffer(tcp->sock, recv, page); + if (ret) + rd_err = ret; + } + node->conn_operations->recvpage(node, recv, rd_err, async_work); + asw_put(async_work); + return ret; + +out: + /* async_work will be released by recvpage in normal processure */ + if (async_work) + asw_put(async_work); + hmdfs_err_ratelimited("timeout and droppage"); + hmdfs_client_resp_statis(node->sbi, F_READPAGE, HMDFS_RESP_DELAY, 0, 0); + if (!rd_err) + ret = hmdfs_drop_readpage_buffer(tcp->sock, recv); + return ret; +} + +static void aeadcipher_cb(struct crypto_async_request *req, int error) +{ + struct aeadcrypt_result *result = req->data; + + if (error == -EINPROGRESS) + return; + result->err = error; + complete(&result->completion); +} + +static int aeadcipher_en_de(struct aead_request *req, + struct aeadcrypt_result result, int flag) +{ + int rc = 0; + + if (flag) + rc = crypto_aead_encrypt(req); + else + rc = crypto_aead_decrypt(req); + switch (rc) { + case 0: + break; + case -EINPROGRESS: + case -EBUSY: + rc = wait_for_completion_interruptible(&result.completion); + if (!rc && !result.err) + reinit_completion(&result.completion); + break; + default: + hmdfs_err("returned rc %d result %d", rc, result.err); + break; + } + return rc; +} + +static int set_aeadcipher(struct crypto_aead *tfm, struct aead_request *req, + struct aeadcrypt_result *result) +{ + init_completion(&result->completion); + aead_request_set_callback( + req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + aeadcipher_cb, result); + return 0; +} + +int aeadcipher_encrypt_buffer(struct connection *con, __u8 *src_buf, + size_t src_len, __u8 *dst_buf, size_t dst_len) +{ + int ret = 0; + struct scatterlist src, dst; + struct aead_request *req = NULL; + struct aeadcrypt_result result; + __u8 cipher_iv[HMDFS_IV_SIZE]; + + if (src_len <= 0) + return -EINVAL; + if (!virt_addr_valid(src_buf) || !virt_addr_valid(dst_buf)) { + WARN_ON(1); + hmdfs_err("encrypt address is invalid"); + return -EPERM; + } + + get_random_bytes(cipher_iv, HMDFS_IV_SIZE); + memcpy(dst_buf, cipher_iv, HMDFS_IV_SIZE); + req = aead_request_alloc(con->tfm, GFP_KERNEL); + if (!req) { + hmdfs_err("aead_request_alloc() failed"); + return -ENOMEM; + } + ret = set_aeadcipher(con->tfm, req, &result); + if (ret) { + hmdfs_err("set_enaeadcipher exit fault"); + goto out; + } + + sg_init_one(&src, src_buf, src_len); + sg_init_one(&dst, dst_buf + HMDFS_IV_SIZE, dst_len - HMDFS_IV_SIZE); + aead_request_set_crypt(req, &src, &dst, src_len, cipher_iv); + aead_request_set_ad(req, 0); + ret = aeadcipher_en_de(req, result, ENCRYPT_FLAG); +out: + aead_request_free(req); + return ret; +} + +int aeadcipher_decrypt_buffer(struct connection *con, __u8 *src_buf, + size_t src_len, __u8 *dst_buf, size_t dst_len) +{ + int ret = 0; + struct scatterlist src, dst; + struct aead_request *req = NULL; + struct aeadcrypt_result result; + __u8 cipher_iv[HMDFS_IV_SIZE]; + + if (src_len <= HMDFS_IV_SIZE + HMDFS_TAG_SIZE) + return -EINVAL; + if (!virt_addr_valid(src_buf) || !virt_addr_valid(dst_buf)) { + WARN_ON(1); + hmdfs_err("decrypt address is invalid"); + return -EPERM; + } + + memcpy(cipher_iv, src_buf, HMDFS_IV_SIZE); + req = aead_request_alloc(con->tfm, GFP_KERNEL); + if (!req) { + hmdfs_err("aead_request_alloc() failed"); + return -ENOMEM; + } + ret = set_aeadcipher(con->tfm, req, &result); + if (ret) { + hmdfs_err("set_deaeadcipher exit fault"); + goto out; + } + + sg_init_one(&src, src_buf + HMDFS_IV_SIZE, src_len - HMDFS_IV_SIZE); + sg_init_one(&dst, dst_buf, dst_len); + aead_request_set_crypt(req, &src, &dst, src_len - HMDFS_IV_SIZE, + cipher_iv); + aead_request_set_ad(req, 0); + ret = aeadcipher_en_de(req, result, DECRYPT_FLAG); +out: + aead_request_free(req); + return ret; +} + +static int tcp_recvbuffer_cipher(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + size_t cipherbuffer_len; + __u8 *cipherbuffer = NULL; + size_t outlen = 0; + __u8 *outdata = NULL; + __u32 recv_len = le32_to_cpu(recv->data_len); + + tcp = (struct tcp_handle *)(connect->connect_handle); + if (recv_len == sizeof(struct hmdfs_head_cmd)) + goto out_recv_head; + else if (recv_len > sizeof(struct hmdfs_head_cmd) && + recv_len <= ADAPTER_MESSAGE_LENGTH) + cipherbuffer_len = recv_len - sizeof(struct hmdfs_head_cmd) + + HMDFS_IV_SIZE + HMDFS_TAG_SIZE; + else + return -ENOMSG; + cipherbuffer = kzalloc(cipherbuffer_len, GFP_KERNEL); + if (!cipherbuffer) { + hmdfs_err("zalloc cipherbuffer error"); + return -ESHUTDOWN; + } + outlen = cipherbuffer_len - HMDFS_IV_SIZE - HMDFS_TAG_SIZE; + outdata = kzalloc(outlen, GFP_KERNEL); + if (!outdata) { + hmdfs_err("encrypt zalloc outdata error"); + kfree(cipherbuffer); + return -ESHUTDOWN; + } + + ret = tcp_read_buffer_from_socket(tcp->sock, cipherbuffer, + cipherbuffer_len); + if (ret) + goto out_recv; + ret = aeadcipher_decrypt_buffer(connect, cipherbuffer, cipherbuffer_len, + outdata, outlen); + if (ret) { + hmdfs_err("decrypt_buf fail"); + goto out_recv; + } +out_recv_head: + if (connect_recv_callback[connect->status]) { + connect_recv_callback[connect->status](connect, recv, outdata, + outlen); + } else { + kfree(outdata); + hmdfs_err("encypt callback NULL status %d", connect->status); + } + kfree(cipherbuffer); + return ret; +out_recv: + kfree(cipherbuffer); + kfree(outdata); + return ret; +} + +static int tcp_recvbuffer_tls(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + size_t outlen; + __u8 *outdata = NULL; + __u32 recv_len = le32_to_cpu(recv->data_len); + + tcp = (struct tcp_handle *)(connect->connect_handle); + outlen = recv_len - sizeof(struct hmdfs_head_cmd); + if (outlen == 0) + goto out_recv_head; + + /* + * NOTE: Up to half of the allocated memory may be wasted due to + * the Internal Fragmentation, however the memory allocation times + * can be reduced and we don't have to adjust existing message + * transporting mechanism + */ + outdata = kmalloc(outlen, GFP_KERNEL); + if (!outdata) + return -ESHUTDOWN; + + ret = tcp_read_buffer_from_socket(tcp->sock, outdata, outlen); + if (ret) { + kfree(outdata); + return ret; + } + tcp->connect->stat.recv_bytes += outlen; +out_recv_head: + if (connect_recv_callback[connect->status]) { + connect_recv_callback[connect->status](connect, recv, outdata, + outlen); + } else { + kfree(outdata); + hmdfs_err("callback NULL status %d", connect->status); + } + return 0; +} + +static int tcp_receive_from_sock(struct tcp_handle *tcp) +{ + struct hmdfs_head_cmd *recv = NULL; + int ret = 0; + + if (!tcp) { + hmdfs_info("tcp recv thread !tcp"); + return -ESHUTDOWN; + } + + if (!tcp->sock) { + hmdfs_info("tcp recv thread !sock"); + return -ESHUTDOWN; + } + + recv = kmem_cache_alloc(tcp->recv_cache, GFP_KERNEL); + if (!recv) { + hmdfs_info("tcp recv thread !cache"); + return -ESHUTDOWN; + } + + ret = tcp_read_head_from_socket(tcp->sock, recv, + sizeof(struct hmdfs_head_cmd)); + if (ret) + goto out; + + tcp->connect->stat.recv_bytes += sizeof(struct hmdfs_head_cmd); + tcp->connect->stat.recv_message_count++; + + if (recv->magic != HMDFS_MSG_MAGIC) { + hmdfs_info_ratelimited("tcp recv fd %d wrong magic. drop message", + tcp->fd); + goto out; + } + + if ((le32_to_cpu(recv->data_len) > + HMDFS_MAX_MESSAGE_LEN + sizeof(struct hmdfs_head_cmd)) || + (le32_to_cpu(recv->data_len) < sizeof(struct hmdfs_head_cmd))) { + hmdfs_info("tcp recv fd %d length error. drop message", + tcp->fd); + goto out; + } + + if (recv->version > USERSPACE_MAX_VER && + tcp->connect->status == CONNECT_STAT_WORKING && + recv->operations.command == F_READPAGE && + recv->operations.cmd_flag == C_RESPONSE) { + ret = tcp_recvpage_tls(tcp->connect, recv); + goto out; + } + + if (tcp->connect->status == CONNECT_STAT_WORKING && + recv->version > USERSPACE_MAX_VER) + ret = tcp_recvbuffer_tls(tcp->connect, recv); + else + ret = tcp_recvbuffer_cipher(tcp->connect, recv); + +out: + kmem_cache_free(tcp->recv_cache, recv); + return ret; +} + +static bool tcp_handle_is_available(struct tcp_handle *tcp) +{ +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + struct tls_context *tls_ctx = NULL; + struct tls_sw_context_rx *ctx = NULL; + +#endif + if (!tcp || !tcp->sock || !tcp->sock->sk) { + hmdfs_err("Invalid tcp connection"); + return false; + } + + if (tcp->sock->sk->sk_state != TCP_ESTABLISHED) { + hmdfs_err("TCP conn %d is broken, current sk_state is %d", + tcp->fd, tcp->sock->sk->sk_state); + return false; + } + + if (tcp->sock->state != SS_CONNECTING && + tcp->sock->state != SS_CONNECTED) { + hmdfs_err("TCP conn %d is broken, current sock state is %d", + tcp->fd, tcp->sock->state); + return false; + } + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + tls_ctx = tls_get_ctx(tcp->sock->sk); + if (tls_ctx) { + ctx = tls_sw_ctx_rx(tls_ctx); + if (ctx && ctx->strp.stopped) { + hmdfs_err( + "TCP conn %d is broken, the strparser has stopped", + tcp->fd); + return false; + } + } +#endif + return true; +} + +static int tcp_recv_thread(void *arg) +{ + int ret = 0; + struct tcp_handle *tcp = (struct tcp_handle *)arg; + const struct cred *old_cred; + + WARN_ON(!tcp); + WARN_ON(!tcp->sock); + set_freezable(); + + old_cred = hmdfs_override_creds(tcp->connect->node->sbi->system_cred); + + while (!kthread_should_stop()) { + /* + * 1. In case the redundant connection has not been mounted on + * a peer + * 2. Lock is unnecessary since a transient state is acceptable + */ + if (tcp_handle_is_available(tcp) && + list_empty(&tcp->connect->list)) + goto freeze; + if (!mutex_trylock(&tcp->close_mutex)) + continue; + if (tcp_handle_is_available(tcp)) + ret = tcp_receive_from_sock(tcp); + else + ret = -ESHUTDOWN; + /* + * This kthread will exit if ret is -ESHUTDOWN, thus we need to + * set recv_task to NULL to avoid calling kthread_stop() from + * tcp_close_socket(). + */ + if (ret == -ESHUTDOWN) + tcp->recv_task = NULL; + mutex_unlock(&tcp->close_mutex); + if (ret == -ESHUTDOWN) { + hmdfs_node_inc_evt_seq(tcp->connect->node); + tcp->connect->status = CONNECT_STAT_STOP; + if (tcp->connect->node->status != NODE_STAT_OFFLINE) + hmdfs_reget_connection(tcp->connect); + break; + } +freeze: + schedule(); + try_to_freeze(); + } + + hmdfs_info("Exiting. Now, sock state = %d", tcp->sock->state); + hmdfs_revert_creds(old_cred); + connection_put(tcp->connect); + return 0; +} + +static int tcp_send_message_sock_cipher(struct tcp_handle *tcp, + struct hmdfs_send_data *msg) +{ + int ret = 0; + __u8 *outdata = NULL; + size_t outlen = 0; + int send_len = 0; + int send_vec_cnt = 0; + struct msghdr tcp_msg; + struct kvec iov[TCP_KVEC_ELE_DOUBLE]; + + memset(&tcp_msg, 0, sizeof(tcp_msg)); + if (!tcp || !tcp->sock) { + hmdfs_err("encrypt tcp socket = NULL"); + return -ESHUTDOWN; + } + iov[0].iov_base = msg->head; + iov[0].iov_len = msg->head_len; + send_vec_cnt = TCP_KVEC_HEAD; + if (msg->len == 0) + goto send; + + outlen = msg->len + HMDFS_IV_SIZE + HMDFS_TAG_SIZE; + outdata = kzalloc(outlen, GFP_KERNEL); + if (!outdata) { + hmdfs_err("tcp send message encrypt fail to alloc outdata"); + return -ENOMEM; + } + ret = aeadcipher_encrypt_buffer(tcp->connect, msg->data, msg->len, + outdata, outlen); + if (ret) { + hmdfs_err("encrypt_buf fail"); + goto out; + } + iov[1].iov_base = outdata; + iov[1].iov_len = outlen; + send_vec_cnt = TCP_KVEC_ELE_DOUBLE; +send: + mutex_lock(&tcp->send_mutex); + send_len = sendmsg_nofs(tcp->sock, &tcp_msg, iov, send_vec_cnt, + msg->head_len + outlen); + mutex_unlock(&tcp->send_mutex); + if (send_len <= 0) { + hmdfs_err("error %d", send_len); + ret = -ESHUTDOWN; + } else if (send_len != msg->head_len + outlen) { + hmdfs_err("send part of message. %d/%zu", send_len, + msg->head_len + outlen); + ret = -EAGAIN; + } else { + ret = 0; + } +out: + kfree(outdata); + return ret; +} + +static int tcp_send_message_sock_tls(struct tcp_handle *tcp, + struct hmdfs_send_data *msg) +{ + int send_len = 0; + int send_vec_cnt = 0; + struct msghdr tcp_msg; + struct kvec iov[TCP_KVEC_ELE_TRIPLE]; + + memset(&tcp_msg, 0, sizeof(tcp_msg)); + if (!tcp || !tcp->sock) { + hmdfs_err("tcp socket = NULL"); + return -ESHUTDOWN; + } + iov[TCP_KVEC_HEAD].iov_base = msg->head; + iov[TCP_KVEC_HEAD].iov_len = msg->head_len; + if (msg->len == 0 && msg->sdesc_len == 0) { + send_vec_cnt = TCP_KVEC_ELE_SINGLE; + } else if (msg->sdesc_len == 0) { + iov[TCP_KVEC_DATA].iov_base = msg->data; + iov[TCP_KVEC_DATA].iov_len = msg->len; + send_vec_cnt = TCP_KVEC_ELE_DOUBLE; + } else { + iov[TCP_KVEC_FILE_PARA].iov_base = msg->sdesc; + iov[TCP_KVEC_FILE_PARA].iov_len = msg->sdesc_len; + iov[TCP_KVEC_FILE_CONTENT].iov_base = msg->data; + iov[TCP_KVEC_FILE_CONTENT].iov_len = msg->len; + send_vec_cnt = TCP_KVEC_ELE_TRIPLE; + } + mutex_lock(&tcp->send_mutex); + send_len = sendmsg_nofs(tcp->sock, &tcp_msg, iov, send_vec_cnt, + msg->head_len + msg->len + msg->sdesc_len); + mutex_unlock(&tcp->send_mutex); + if (send_len == -EBADMSG) { + return -EBADMSG; + } else if (send_len <= 0) { + hmdfs_err("error %d", send_len); + return -ESHUTDOWN; + } else if (send_len != msg->head_len + msg->len + msg->sdesc_len) { + hmdfs_err("send part of message. %d/%zu", send_len, + msg->head_len + msg->len); + tcp->connect->stat.send_bytes += send_len; + return -EAGAIN; + } + tcp->connect->stat.send_bytes += send_len; + tcp->connect->stat.send_message_count++; + return 0; +} + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +int tcp_send_rekey_request(struct connection *connect) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct tcp_handle *tcp = connect->connect_handle; + struct hmdfs_head_cmd *head = NULL; + struct connection_rekey_request *rekey_request_param = NULL; + struct hmdfs_cmd operations; + + hmdfs_init_cmd(&operations, F_CONNECT_REKEY); + head = kzalloc(sizeof(struct hmdfs_head_cmd) + + sizeof(struct connection_rekey_request), + GFP_KERNEL); + if (!head) + return -ENOMEM; + rekey_request_param = + (struct connection_rekey_request + *)((uint8_t *)head + sizeof(struct hmdfs_head_cmd)); + + rekey_request_param->update_request = cpu_to_le32(UPDATE_NOT_REQUESTED); + + head->magic = HMDFS_MSG_MAGIC; + head->version = DFS_2_0; + head->operations = operations; + head->data_len = + cpu_to_le32(sizeof(*head) + sizeof(*rekey_request_param)); + head->reserved = 0; + head->reserved1 = 0; + head->ret_code = 0; + + msg.head = head; + msg.head_len = sizeof(*head); + msg.data = rekey_request_param; + msg.len = sizeof(*rekey_request_param); + msg.sdesc = NULL; + msg.sdesc_len = 0; + ret = tcp_send_message_sock_tls(tcp, &msg); + if (ret != 0) + hmdfs_err("return error %d", ret); + kfree(head); + return ret; +} +#endif + +static int tcp_send_message(struct connection *connect, + struct hmdfs_send_data *msg) +{ + int ret = 0; +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + unsigned long nowtime = jiffies; +#endif + struct tcp_handle *tcp = NULL; + + if (!connect) { + hmdfs_err("tcp connection = NULL "); + return -ESHUTDOWN; + } + if (!msg) { + hmdfs_err("msg = NULL"); + return -EINVAL; + } + if (msg->len > HMDFS_MAX_MESSAGE_LEN) { + hmdfs_err("message->len error: %zu", msg->len); + return -EINVAL; + } + tcp = (struct tcp_handle *)(connect->connect_handle); + if (connect->status == CONNECT_STAT_STOP) + return -EAGAIN; + + trace_hmdfs_tcp_send_message(msg->head); + + if (connect->status == CONNECT_STAT_WORKING && + connect->node->version > USERSPACE_MAX_VER) + ret = tcp_send_message_sock_tls(tcp, msg); + else + // Handshake status or version HMDFS1.0 + ret = tcp_send_message_sock_cipher(tcp, msg); + + if (ret != 0) { + hmdfs_err("return error %d", ret); + return ret; + } +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + if (nowtime - connect->stat.rekey_time >= REKEY_LIFETIME && + connect->status == CONNECT_STAT_WORKING && + connect->node->version >= DFS_2_0) { + hmdfs_info("send rekey message to devid %llu", + connect->node->device_id); + ret = tcp_send_rekey_request(connect); + if (ret == 0) + set_crypto_info(connect, SET_CRYPTO_SEND); + connect->stat.rekey_time = nowtime; + } +#endif + return ret; +} + +void tcp_close_socket(struct tcp_handle *tcp) +{ + if (!tcp) + return; + mutex_lock(&tcp->close_mutex); + if (tcp->recv_task) { + kthread_stop(tcp->recv_task); + tcp->recv_task = NULL; + } + mutex_unlock(&tcp->close_mutex); +} + +static int set_tfm(__u8 *master_key, struct crypto_aead *tfm) +{ + int ret = 0; + int iv_len; + __u8 *sec_key = NULL; + + sec_key = master_key; + crypto_aead_clear_flags(tfm, ~0); + ret = crypto_aead_setkey(tfm, sec_key, HMDFS_KEY_SIZE); + if (ret) { + hmdfs_err("failed to set the key"); + goto out; + } + ret = crypto_aead_setauthsize(tfm, HMDFS_TAG_SIZE); + if (ret) { + hmdfs_err("authsize length is error"); + goto out; + } + + iv_len = crypto_aead_ivsize(tfm); + if (iv_len != HMDFS_IV_SIZE) { + hmdfs_err("IV recommended value should be set %d", iv_len); + ret = -ENODATA; + } +out: + return ret; +} + +static int tcp_update_socket(struct tcp_handle *tcp, int fd, + uint8_t *master_key, struct socket *socket) +{ + int err = 0; + struct hmdfs_peer *node = NULL; + + if (!master_key || fd == 0) + return -EAGAIN; + + tcp->sock = socket; + tcp->fd = fd; + if (!tcp_handle_is_available(tcp)) { + err = -EPIPE; + goto put_sock; + } + + hmdfs_info("socket fd %d, state %d, refcount %ld", + fd, socket->state, file_count(socket->file)); + + tcp->recv_cache = kmem_cache_create("hmdfs_socket", + tcp->recvbuf_maxsize, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!tcp->recv_cache) { + err = -ENOMEM; + goto put_sock; + } + + socket->sk->sk_user_data = tcp; + err = tcp_set_recvtimeo(socket, TCP_RECV_TIMEOUT); + if (err) { + hmdfs_err("tcp set timeout error"); + goto free_mem_cache; + } + + /* send key and recv key, default MASTER KEY */ + memcpy(tcp->connect->master_key, master_key, HMDFS_KEY_SIZE); + memcpy(tcp->connect->send_key, master_key, HMDFS_KEY_SIZE); + memcpy(tcp->connect->recv_key, master_key, HMDFS_KEY_SIZE); + tcp->connect->tfm = crypto_alloc_aead("gcm(aes)", 0, 0); + if (IS_ERR(tcp->connect->tfm)) { + err = PTR_ERR(tcp->connect->tfm); + tcp->connect->tfm = NULL; + hmdfs_err("failed to load transform for gcm(aes):%d", err); + goto free_mem_cache; + } + + err = set_tfm(master_key, tcp->connect->tfm); + if (err) { + hmdfs_err("tfm seting exit fault"); + goto free_crypto; + } + + connection_get(tcp->connect); + + node = tcp->connect->node; + tcp->recv_task = kthread_create(tcp_recv_thread, (void *)tcp, + "dfs_rcv%u_%llu_%d", + node->owner, node->device_id, fd); + if (IS_ERR(tcp->recv_task)) { + err = PTR_ERR(tcp->recv_task); + hmdfs_err("tcp->rcev_task %d", err); + goto put_conn; + } + + return 0; + +put_conn: + tcp->recv_task = NULL; + connection_put(tcp->connect); +free_crypto: + crypto_free_aead(tcp->connect->tfm); + tcp->connect->tfm = NULL; +free_mem_cache: + kmem_cache_destroy(tcp->recv_cache); + tcp->recv_cache = NULL; +put_sock: + tcp->sock = NULL; + tcp->fd = 0; + + return err; +} + +static struct tcp_handle *tcp_alloc_handle(struct connection *connect, + int socket_fd, uint8_t *master_key, struct socket *socket) +{ + int ret = 0; + struct tcp_handle *tcp = kzalloc(sizeof(*tcp), GFP_KERNEL); + + if (!tcp) + return NULL; + tcp->connect = connect; + tcp->connect->connect_handle = (void *)tcp; + tcp->recvbuf_maxsize = MAX_RECV_SIZE; + tcp->recv_task = NULL; + tcp->recv_cache = NULL; + tcp->sock = NULL; + mutex_init(&tcp->close_mutex); + mutex_init(&tcp->send_mutex); + ret = tcp_update_socket(tcp, socket_fd, master_key, socket); + if (ret) { + kfree(tcp); + return NULL; + } + return tcp; +} + +void hmdfs_get_connection(struct hmdfs_peer *peer) +{ + struct notify_param param; + + if (!peer) + return; + param.notify = NOTIFY_GET_SESSION; + param.fd = INVALID_SOCKET_FD; + memcpy(param.remote_cid, peer->cid, HMDFS_CID_SIZE); + notify(peer, ¶m); +} + +static void connection_notify_to_close(struct connection *conn) +{ + struct notify_param param; + struct hmdfs_peer *peer = NULL; + struct tcp_handle *tcp = NULL; + + tcp = conn->connect_handle; + peer = conn->node; + + // libdistbus/src/TcpSession.cpp will close the socket + param.notify = NOTIFY_GET_SESSION; + param.fd = tcp->fd; + memcpy(param.remote_cid, peer->cid, HMDFS_CID_SIZE); + notify(peer, ¶m); +} + +void hmdfs_reget_connection(struct connection *conn) +{ + struct tcp_handle *tcp = NULL; + struct connection *conn_impl = NULL; + struct connection *next = NULL; + struct task_struct *recv_task = NULL; + bool should_put = false; + bool stop_thread = true; + + if (!conn) + return; + + // One may put a connection if and only if he took it out of the list + mutex_lock(&conn->node->conn_impl_list_lock); + list_for_each_entry_safe(conn_impl, next, &conn->node->conn_impl_list, + list) { + if (conn_impl == conn) { + should_put = true; + list_move(&conn->list, &conn->node->conn_deleting_list); + break; + } + } + if (!should_put) { + mutex_unlock(&conn->node->conn_impl_list_lock); + return; + } + + tcp = conn->connect_handle; + if (tcp) { + recv_task = tcp->recv_task; + /* + * To avoid the receive thread to stop itself. Ensure receive + * thread stop before process offline event + */ + if (!recv_task || + (recv_task && (recv_task->pid == current->pid))) + stop_thread = false; + } + mutex_unlock(&conn->node->conn_impl_list_lock); + + if (tcp) { + if (tcp->sock) { + hmdfs_info("shudown sock: fd = %d, sockref = %ld, connref = %u stop_thread = %d", + tcp->fd, file_count(tcp->sock->file), + kref_read(&conn->ref_cnt), stop_thread); + kernel_sock_shutdown(tcp->sock, SHUT_RDWR); + } + + if (stop_thread) + tcp_close_socket(tcp); + + if (tcp->fd != INVALID_SOCKET_FD) + connection_notify_to_close(conn); + } + connection_put(conn); +} + +static struct connection * +lookup_conn_by_socketfd_unsafe(struct hmdfs_peer *node, struct socket *socket) +{ + struct connection *tcp_conn = NULL; + struct tcp_handle *tcp = NULL; + + list_for_each_entry(tcp_conn, &node->conn_impl_list, list) { + if (tcp_conn->connect_handle) { + tcp = (struct tcp_handle *)(tcp_conn->connect_handle); + if (tcp->sock == socket) { + connection_get(tcp_conn); + return tcp_conn; + } + } + } + return NULL; +} + +static void hmdfs_reget_connection_work_fn(struct work_struct *work) +{ + struct connection *conn = + container_of(work, struct connection, reget_work); + + hmdfs_reget_connection(conn); + connection_put(conn); +} + +struct connection *alloc_conn_tcp(struct hmdfs_peer *node, int socket_fd, + uint8_t *master_key, uint8_t status, struct socket *socket) +{ + struct connection *tcp_conn = NULL; + unsigned long nowtime = jiffies; + + tcp_conn = kzalloc(sizeof(*tcp_conn), GFP_KERNEL); + if (!tcp_conn) + goto out_err; + + kref_init(&tcp_conn->ref_cnt); + mutex_init(&tcp_conn->ref_lock); + INIT_LIST_HEAD(&tcp_conn->list); + tcp_conn->node = node; + tcp_conn->close = tcp_stop_connect; + tcp_conn->send_message = tcp_send_message; + tcp_conn->type = CONNECT_TYPE_TCP; + tcp_conn->status = status; + tcp_conn->stat.rekey_time = nowtime; + tcp_conn->connect_handle = + (void *)tcp_alloc_handle(tcp_conn, socket_fd, master_key, socket); + INIT_WORK(&tcp_conn->reget_work, hmdfs_reget_connection_work_fn); + if (!tcp_conn->connect_handle) { + hmdfs_err("Failed to alloc tcp_handle for strcut conn"); + goto out_err; + } + return tcp_conn; + +out_err: + kfree(tcp_conn); + return NULL; +} + +static struct connection *add_conn_tcp_unsafe(struct hmdfs_peer *node, + struct socket *socket, + struct connection *conn2add) +{ + struct connection *conn; + + conn = lookup_conn_by_socketfd_unsafe(node, socket); + if (conn) { + hmdfs_info("socket already in list"); + return conn; + } + + /* Prefer to use socket opened by local device */ + if (conn2add->status == CONNECT_STAT_WAIT_REQUEST) + list_add(&conn2add->list, &node->conn_impl_list); + else + list_add_tail(&conn2add->list, &node->conn_impl_list); + connection_get(conn2add); + return conn2add; +} + +struct connection *hmdfs_get_conn_tcp(struct hmdfs_peer *node, int fd, + uint8_t *master_key, uint8_t status) +{ + struct connection *tcp_conn = NULL, *on_peer_conn = NULL; + struct tcp_handle *tcp = NULL; + struct socket *socket = NULL; + int err = 0; + + socket = sockfd_lookup(fd, &err); + if (!socket) { + hmdfs_err("lookup socket fail, socket_fd %d, err %d", fd, err); + return NULL; + } + mutex_lock(&node->conn_impl_list_lock); + tcp_conn = lookup_conn_by_socketfd_unsafe(node, socket); + mutex_unlock(&node->conn_impl_list_lock); + if (tcp_conn) { + hmdfs_info("Got a existing tcp conn: fsocket_fd = %d", + fd); + sockfd_put(socket); + goto out; + } + + tcp_conn = alloc_conn_tcp(node, fd, master_key, status, socket); + if (!tcp_conn) { + hmdfs_info("Failed to alloc a tcp conn, socket_fd %d", fd); + sockfd_put(socket); + goto out; + } + + mutex_lock(&node->conn_impl_list_lock); + on_peer_conn = add_conn_tcp_unsafe(node, socket, tcp_conn); + mutex_unlock(&node->conn_impl_list_lock); + tcp = tcp_conn->connect_handle; + if (on_peer_conn == tcp_conn) { + hmdfs_info("Got a newly allocated tcp conn: socket_fd = %d", fd); + wake_up_process(tcp->recv_task); + if (status == CONNECT_STAT_WAIT_RESPONSE) + connection_send_handshake( + on_peer_conn, CONNECT_MESG_HANDSHAKE_REQUEST, + 0); + } else { + hmdfs_info("Got a existing tcp conn: socket_fd = %d", fd); + tcp->fd = INVALID_SOCKET_FD; + tcp_close_socket(tcp); + connection_put(tcp_conn); + + tcp_conn = on_peer_conn; + } + +out: + return tcp_conn; +} + +void tcp_stop_connect(struct connection *connect) +{ + hmdfs_info("now nothing to do"); +} diff --git a/fs/hmdfs/comm/transport.h b/fs/hmdfs/comm/transport.h new file mode 100644 index 000000000000..bce882cb6997 --- /dev/null +++ b/fs/hmdfs/comm/transport.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/transport.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_TRANSPORT_H +#define HMDFS_TRANSPORT_H + +#include "connection.h" + +#define ENCRYPT_FLAG 1 +#define DECRYPT_FLAG 0 + +struct aeadcrypt_result { + struct completion completion; + int err; +}; + +#define ADAPTER_MESSAGE_LENGTH (1024 * 1024 + 1024) // 1M + 1K +#define MAX_RECV_SIZE sizeof(struct hmdfs_head_cmd) + +#define TCP_KVEC_HEAD 0 +#define TCP_KVEC_DATA 1 + +enum TCP_KVEC_FILE_ELE_INDEX { + TCP_KVEC_FILE_PARA = 1, + TCP_KVEC_FILE_CONTENT = 2, +}; + +enum TCP_KVEC_TYPE { + TCP_KVEC_ELE_SINGLE = 1, + TCP_KVEC_ELE_DOUBLE = 2, + TCP_KVEC_ELE_TRIPLE = 3, +}; + +#define TCP_RECV_TIMEOUT 2 +#define MAX_RECV_RETRY_TIMES 2 + +#ifndef SO_RCVTIMEO +#define SO_RCVTIMEO SO_RCVTIMEO_OLD +#endif + +struct tcp_handle { + struct connection *connect; + int recvbuf_maxsize; + struct mutex close_mutex; + /* + * To achieve atomicity. + * + * The sock lock held at the tcp layer may be temporally released at + * `sk_wait_event()` when waiting for sock buffer. From this point on, + * threads serialized at the initial call to `lock_sock()` contained + * in `tcp_sendmsg()` can proceed, resuling in intermixed messages. + */ + struct mutex send_mutex; + struct socket *sock; + int fd; + struct kmem_cache *recv_cache; + struct task_struct *recv_task; +}; + +void hmdfs_get_connection(struct hmdfs_peer *peer); +void hmdfs_reget_connection(struct connection *conn); +struct connection *hmdfs_get_conn_tcp(struct hmdfs_peer *node, int socket_fd, + uint8_t *master_key, uint8_t status); +void tcp_stop_connect(struct connection *connect); +uint32_t hmdfs_tcpi_rtt(struct hmdfs_peer *node); +void tcp_close_socket(struct tcp_handle *tcp); + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +int tcp_send_rekey_request(struct connection *connect); +#endif + +#endif diff --git a/fs/hmdfs/dentry.c b/fs/hmdfs/dentry.c new file mode 100644 index 000000000000..ac590df0982a --- /dev/null +++ b/fs/hmdfs/dentry.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/dentry.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include + +#include "comm/connection.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" + +extern struct kmem_cache *hmdfs_dentry_cachep; + +void hmdfs_set_time(struct dentry *dentry, unsigned long time) +{ + struct hmdfs_dentry_info *d_info = dentry->d_fsdata; + + if (d_info) + d_info->time = time; +} + +unsigned long hmdfs_get_time(struct dentry *dentry) +{ + struct hmdfs_dentry_info *d_info = dentry->d_fsdata; + + if (d_info) + return (unsigned long)d_info->time; + return 0; +} + +static int hmdfs_d_remote_revalidate(struct hmdfs_peer *conn, + struct dentry *target, + struct dentry *parent) +{ + unsigned int timeout = hmdfs_sb(target->d_sb)->dcache_timeout; + unsigned long dentry_time = hmdfs_get_time(target); + struct clearcache_item *item; + + item = hmdfs_find_cache_item(conn->device_id, parent); + if (!item) + return 0; + kref_put(&item->ref, release_cache_item); + + if (cache_item_revalidate(READ_ONCE(conn->conn_time), + dentry_time, timeout)) + return 1; + + return 0; +} + +static inline void lock_for_dname_cmp(struct dentry *dentry, + struct dentry *lower_dentry) +{ + if (dentry < lower_dentry) { + spin_lock(&dentry->d_lock); + spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&lower_dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + } +} + +static inline void unlock_for_dname_cmp(struct dentry *dentry, + struct dentry *lower_dentry) +{ + spin_unlock(&dentry->d_lock); + spin_unlock(&lower_dentry->d_lock); +} + +static int hmdfs_dev_d_revalidate(struct dentry *direntry, unsigned int flags) +{ + struct inode *dinode = NULL; + struct hmdfs_inode_info *info = NULL; + + spin_lock(&direntry->d_lock); + if (IS_ROOT(direntry)) { + spin_unlock(&direntry->d_lock); + return 1; + } + spin_unlock(&direntry->d_lock); + + dinode = d_inode(direntry); + if (!dinode) + return 0; + + info = hmdfs_i(dinode); + if (info->inode_type == HMDFS_LAYER_SECOND_LOCAL || + info->inode_type == HMDFS_LAYER_FIRST_DEVICE) { + return 1; + } + if (info->conn && info->conn->status == NODE_STAT_ONLINE) + return 1; + + return 0; +} + +static int hmdfs_d_revalidate(struct dentry *direntry, unsigned int flags) +{ + struct inode *dinode = NULL; + struct hmdfs_inode_info *info = NULL; + struct path lower_path, parent_lower_path; + struct dentry *parent_dentry = NULL; + struct dentry *parent_lower_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + struct dentry *lower_dentry = NULL; + int ret; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET | LOOKUP_REVAL)) + return 0; + + dinode = d_inode(direntry); + if (!dinode) + return 0; + + /* remote dentry timeout */ + info = hmdfs_i(dinode); + parent_dentry = dget_parent(direntry); + if (info->conn) { + ret = hmdfs_d_remote_revalidate(info->conn, direntry, + parent_dentry); + dput(parent_dentry); + return ret; + } + + hmdfs_get_lower_path(direntry, &lower_path); + lower_dentry = lower_path.dentry; + lower_cur_parent_dentry = dget_parent(lower_dentry); + hmdfs_get_lower_path(parent_dentry, &parent_lower_path); + parent_lower_dentry = parent_lower_path.dentry; + if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) { + ret = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (ret == 0) + goto out; + } + + spin_lock(&lower_dentry->d_lock); + if (d_unhashed(lower_dentry)) { + spin_unlock(&lower_dentry->d_lock); + ret = 0; + goto out; + } + spin_unlock(&lower_dentry->d_lock); + + if (parent_lower_dentry != lower_cur_parent_dentry) { + ret = 0; + goto out; + } + + ret = 1; + lock_for_dname_cmp(direntry, lower_dentry); + if (!qstr_case_eq(&direntry->d_name, &lower_dentry->d_name)) + ret = 0; + unlock_for_dname_cmp(direntry, lower_dentry); + +out: + hmdfs_put_lower_path(&parent_lower_path); + dput(lower_cur_parent_dentry); + hmdfs_put_lower_path(&lower_path); + dput(parent_dentry); + return ret; +} + +static void hmdfs_dev_d_release(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + switch (hmdfs_d(dentry)->dentry_type) { + case HMDFS_LAYER_SECOND_LOCAL: + hmdfs_clear_cache_dents(dentry, false); + hmdfs_drop_remote_cache_dents(dentry); + path_put(&(hmdfs_d(dentry)->lower_path)); + break; + case HMDFS_LAYER_ZERO: + hmdfs_put_reset_lower_path(dentry); + break; + case HMDFS_LAYER_FIRST_DEVICE: + break; + case HMDFS_LAYER_SECOND_REMOTE: + hmdfs_clear_cache_dents(dentry, false); + break; + default: + hmdfs_err("Unexpected dentry type %d", + hmdfs_d(dentry)->dentry_type); + return; + } + + kmem_cache_free(hmdfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +static void hmdfs_d_release(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + hmdfs_clear_cache_dents(dentry, false); + hmdfs_drop_remote_cache_dents(dentry); + hmdfs_put_reset_lower_path(dentry); + kmem_cache_free(hmdfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +static int hmdfs_cmp_ci(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb); + + if (name->len != len) + return 1; + + if (!sbi->s_case_sensitive) { + if (str_n_case_eq(name->name, str, len)) + return 0; + } else { + if (!strncmp(name->name, str, len)) + return 0; + } + return 1; +} + +static int hmdfs_hash_ci(const struct dentry *dentry, struct qstr *qstr) +{ + const unsigned char *name = qstr->name; + unsigned int len = qstr->len; + unsigned long hash; + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb); + + if (sbi->s_case_sensitive) + return 0; + + hash = init_name_hash(dentry); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + qstr->hash = end_name_hash(hash); + return 0; +} + +void clear_comrades_locked(struct list_head *comrade_list) +{ + struct hmdfs_dentry_comrade *cc, *nc; + + WARN_ON(!comrade_list); + list_for_each_entry_safe(cc, nc, comrade_list, list) { + dput(cc->lo_d); + kfree(cc); + } + INIT_LIST_HEAD(comrade_list); +} + +void clear_comrades(struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(dentry); + + mutex_lock(&cdi->comrade_list_lock); + clear_comrades_locked(&cdi->comrade_list); + mutex_unlock(&cdi->comrade_list_lock); +} + +/** + * d_revalidate_merge - revalidate a merge dentry + * + * Always return 0 to invalidate a dentry for fault-tolerance. + * The cost is acceptable for a overlay filesystem. + */ +static int d_revalidate_merge(struct dentry *direntry, unsigned int flags) +{ + return 0; +} + +static void d_release_merge(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + clear_comrades(dentry); + kmem_cache_free(hmdfs_dentry_merge_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +const struct dentry_operations hmdfs_dops_merge = { + .d_revalidate = d_revalidate_merge, + .d_release = d_release_merge, +}; + +const struct dentry_operations hmdfs_dev_dops = { + .d_revalidate = hmdfs_dev_d_revalidate, + .d_release = hmdfs_dev_d_release, +}; + +const struct dentry_operations hmdfs_dops = { + .d_revalidate = hmdfs_d_revalidate, + .d_release = hmdfs_d_release, + .d_compare = hmdfs_cmp_ci, + .d_hash = hmdfs_hash_ci, +}; diff --git a/fs/hmdfs/file_local.c b/fs/hmdfs/file_local.c new file mode 100644 index 000000000000..893c6edbc93b --- /dev/null +++ b/fs/hmdfs/file_local.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_local.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" +#include "hmdfs_trace.h" + +int hmdfs_file_open_local(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lower_path; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + + if (!gfi) { + err = -ENOMEM; + goto out_err; + } + + hmdfs_get_lower_path(file->f_path.dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, cred); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + kfree(gfi); + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } +out_err: + return err; +} + +int hmdfs_file_release_local(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + + file->private_data = NULL; + fput(gfi->lower_file); + kfree(gfi); + return 0; +} + +ssize_t hmdfs_read_local(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *lower_file = hmdfs_f(iocb->ki_filp)->lower_file; + int err; + + if (iter->type & ITER_KVEC) + err = kernel_read(lower_file, iter->iov->iov_base, + iter->iov->iov_len, &(iocb->ki_pos)); + else + err = vfs_read(lower_file, iter->iov->iov_base, + iter->iov->iov_len, &(iocb->ki_pos)); + + if (err >= 0) + file_inode(iocb->ki_filp)->i_atime = file_inode(lower_file)->i_atime; + return err; +} + +ssize_t hmdfs_write_local(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *lower_file = hmdfs_f(iocb->ki_filp)->lower_file; + struct inode *inode = file_inode(iocb->ki_filp); + struct inode *lower_inode = file_inode(lower_file); + struct dentry *dentry = file_dentry(iocb->ki_filp); + int err; + + if (iter->type & ITER_KVEC) + err = kernel_write(lower_file, iter->iov->iov_base, + iter->iov->iov_len, &(iocb->ki_pos)); + else + err = vfs_write(lower_file, iter->iov->iov_base, + iter->iov->iov_len, &(iocb->ki_pos)); + + if (err >= 0) { + inode_lock(inode); + i_size_write(inode, i_size_read(lower_inode)); + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + if (!hmdfs_i_merge(hmdfs_i(inode))) + update_inode_to_dentry(dentry, inode); + inode_unlock(inode); + } + return err; +} + +int hmdfs_fsync_local(struct file *file, loff_t start, loff_t end, int datasync) +{ + int err; + struct file *lower_file = hmdfs_f(file)->lower_file; + + err = __generic_file_fsync(file, start, end, datasync); + if (err) + goto out; + + err = vfs_fsync_range(lower_file, start, end, datasync); +out: + return err; +} + +loff_t hmdfs_file_llseek_local(struct file *file, loff_t offset, int whence) +{ + int err = 0; + struct file *lower_file = NULL; + + err = generic_file_llseek(file, offset, whence); + if (err < 0) + goto out; + lower_file = hmdfs_f(file)->lower_file; + err = generic_file_llseek(lower_file, offset, whence); +out: + return err; +} + +int hmdfs_file_mmap_local(struct file *file, struct vm_area_struct *vma) +{ + struct hmdfs_file_info *private_data = file->private_data; + struct file *realfile = NULL; + int ret; + + if (!private_data) + return -EINVAL; + + realfile = private_data->lower_file; + if (!realfile) + return -EINVAL; + + if (!realfile->f_op->mmap) + return -ENODEV; + + if (WARN_ON(file != vma->vm_file)) + return -EIO; + + vma->vm_file = get_file(realfile); + ret = call_mmap(vma->vm_file, vma); + if (ret) + fput(realfile); + else + fput(file); + + file_accessed(file); + + return ret; +} + +const struct file_operations hmdfs_file_fops_local = { + .owner = THIS_MODULE, + .llseek = hmdfs_file_llseek_local, + .read_iter = hmdfs_read_local, + .write_iter = hmdfs_write_local, + .mmap = hmdfs_file_mmap_local, + .open = hmdfs_file_open_local, + .release = hmdfs_file_release_local, + .fsync = hmdfs_fsync_local, +}; + +static int hmdfs_iterate_local(struct file *file, struct dir_context *ctx) +{ + int err = 0; + loff_t start_pos = ctx->pos; + struct file *lower_file = hmdfs_f(file)->lower_file; + + if (ctx->pos == -1) + return 0; + + lower_file->f_pos = file->f_pos; + err = iterate_dir(lower_file, ctx); + file->f_pos = lower_file->f_pos; + + if (err < 0) + ctx->pos = -1; + + trace_hmdfs_iterate_local(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int hmdfs_dir_open_local(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct dentry *dentry = file->f_path.dentry; + struct path lower_path; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + + if (!gfi) + return -ENOMEM; + + if (IS_ERR_OR_NULL(cred)) { + err = -EPERM; + goto out_err; + } + hmdfs_get_lower_path(dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, cred); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + goto out_err; + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } + return err; + +out_err: + kfree(gfi); + return err; +} + +static int hmdfs_dir_release_local(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + + file->private_data = NULL; + fput(gfi->lower_file); + kfree(gfi); + return 0; +} + +const struct file_operations hmdfs_dir_ops_local = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_local, + .open = hmdfs_dir_open_local, + .release = hmdfs_dir_release_local, + .fsync = hmdfs_fsync_local, +}; diff --git a/fs/hmdfs/file_merge.c b/fs/hmdfs/file_merge.c new file mode 100644 index 000000000000..2708f2ba24af --- /dev/null +++ b/fs/hmdfs/file_merge.c @@ -0,0 +1,525 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_merge.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_merge_view.h" + +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" + +struct hmdfs_iterate_callback_merge { + struct dir_context ctx; + struct dir_context *caller; + /* + * Record the return value of 'caller->actor': + * + * -EINVAL, buffer is exhausted + * -EINTR, current task is pending + * -EFAULT, something is wrong + * 0, success and can do more + */ + int result; + struct rb_root *root; + uint64_t dev_id; +}; + +struct hmdfs_cache_entry { + struct rb_node rb_node; + int name_len; + char *name; + int file_type; +}; + +struct hmdfs_cache_entry *allocate_entry(const char *name, int namelen, + int d_type) +{ + struct hmdfs_cache_entry *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + data->name = kstrndup(name, namelen, GFP_KERNEL); + if (!data->name) { + kfree(data); + return ERR_PTR(-ENOMEM); + } + + data->name_len = namelen; + data->file_type = d_type; + + return data; +} + +int insert_filename(struct rb_root *root, struct hmdfs_cache_entry **new_entry) +{ + struct rb_node *parent = NULL; + struct rb_node **new_node = &(root->rb_node); + int cmp_res = 0; + struct hmdfs_cache_entry *data = *new_entry; + + while (*new_node) { + struct hmdfs_cache_entry *entry = container_of( + *new_node, struct hmdfs_cache_entry, rb_node); + parent = *new_node; + + if (data->name_len < entry->name_len) + cmp_res = -1; + else if (data->name_len > entry->name_len) + cmp_res = 1; + else + cmp_res = strncmp(data->name, entry->name, + data->name_len); + + if (!cmp_res) { + kfree(data->name); + kfree(data); + *new_entry = entry; + return entry->file_type; + } + + if (cmp_res < 0) + new_node = &((*new_node)->rb_left); + else if (cmp_res > 0) + new_node = &((*new_node)->rb_right); + } + + rb_link_node(&data->rb_node, parent, new_node); + rb_insert_color(&data->rb_node, root); + + return 0; +} + +static void recursive_delete(struct rb_node *node) +{ + struct hmdfs_cache_entry *entry = NULL; + + if (!node) + return; + + recursive_delete(node->rb_left); + recursive_delete(node->rb_right); + + entry = container_of(node, struct hmdfs_cache_entry, rb_node); + kfree(entry->name); + kfree(entry); +} + +static void destroy_tree(struct rb_root *root) +{ + if (!root) + return; + recursive_delete(root->rb_node); + root->rb_node = NULL; +} + +static void delete_filename(struct rb_root *root, + struct hmdfs_cache_entry *data) +{ + struct rb_node **node = &(root->rb_node); + struct hmdfs_cache_entry *entry = NULL; + int cmp_res = 0; + + while (*node) { + entry = container_of(*node, struct hmdfs_cache_entry, rb_node); + if (data->name_len < entry->name_len) + cmp_res = -1; + else if (data->name_len > entry->name_len) + cmp_res = 1; + else + cmp_res = strncmp(data->name, entry->name, + data->name_len); + + if (!cmp_res) + goto found; + + if (cmp_res < 0) + node = &((*node)->rb_left); + else if (cmp_res > 0) + node = &((*node)->rb_right); + } + return; + +found: + rb_erase(*node, root); + kfree(entry->name); + kfree(entry); +} + +static void rename_conflicting_file(char *dentry_name, int *len, + unsigned int dev_id) +{ + int i = *len - 1; + int dot_pos = -1; + char *buffer; + + buffer = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); + if (!buffer) + return; + + while (i >= 0) { + if (dentry_name[i] == '/') + break; + if (dentry_name[i] == '.') { + // TODO: 这个修改同步到 CT01 + dot_pos = i; + break; + } + i--; + } + + if (dot_pos == -1) { + snprintf(dentry_name + *len, DENTRY_NAME_MAX_LEN - *len, + CONFLICTING_FILE_SUFFIX, dev_id); + goto done; + } + + for (i = 0; i < *len - dot_pos; i++) + buffer[i] = dentry_name[i + dot_pos]; + + buffer[i] = '\0'; + snprintf(dentry_name + dot_pos, DENTRY_NAME_MAX_LEN - dot_pos, + CONFLICTING_FILE_SUFFIX, dev_id); + strcat(dentry_name, buffer); + +done: + *len = strlen(dentry_name); + kfree(buffer); +} + +static void rename_conflicting_directory(char *dentry_name, int *len) +{ + snprintf(dentry_name + *len, DENTRY_NAME_MAX_LEN - *len, + CONFLICTING_DIR_SUFFIX); + *len += strlen(CONFLICTING_DIR_SUFFIX); +} + +static int hmdfs_actor_merge(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + int ret = 0; + int insert_res = 0; + int max_devid_len = 2; + char *dentry_name = NULL; + int dentry_len = namelen; + struct hmdfs_cache_entry *cache_entry = NULL; + struct hmdfs_iterate_callback_merge *iterate_callback_merge = NULL; + struct dir_context *org_ctx = NULL; + + if (hmdfs_file_type(name) != HMDFS_TYPE_COMMON) + return 0; + + if (namelen > NAME_MAX) + return -EINVAL; + dentry_name = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!dentry_name) + return -ENOMEM; + + strncpy(dentry_name, name, dentry_len); + + cache_entry = allocate_entry(dentry_name, dentry_len, d_type); + if (IS_ERR(cache_entry)) { + ret = PTR_ERR(cache_entry); + goto done; + } + + iterate_callback_merge = + container_of(ctx, struct hmdfs_iterate_callback_merge, ctx); + insert_res = + insert_filename(iterate_callback_merge->root, &cache_entry); + if (d_type == DT_DIR && insert_res == DT_DIR) { + goto done; + } else if (d_type == DT_DIR && insert_res == DT_REG) { + if (strlen(CONFLICTING_DIR_SUFFIX) > NAME_MAX - dentry_len) { + ret = -ENAMETOOLONG; + goto delete; + } + rename_conflicting_directory(dentry_name, &dentry_len); + cache_entry->file_type = DT_DIR; + } else if (d_type == DT_REG && insert_res > 0) { + if (strlen(CONFLICTING_FILE_SUFFIX) + max_devid_len > + NAME_MAX - dentry_len) { + ret = -ENAMETOOLONG; + goto delete; + } + rename_conflicting_file(dentry_name, &dentry_len, + iterate_callback_merge->dev_id); + } + + org_ctx = iterate_callback_merge->caller; + ret = org_ctx->actor(org_ctx, dentry_name, dentry_len, org_ctx->pos, + ino, d_type); + /* + * Record original return value, so that the caller can be aware of + * different situations. + */ + iterate_callback_merge->result = ret; + ret = ret == 0 ? 0 : 1; + if (ret && d_type == DT_DIR && insert_res == DT_REG && + cache_entry->file_type == DT_DIR) + cache_entry->file_type = DT_REG; + +delete: + if (ret && !insert_res) + delete_filename(iterate_callback_merge->root, cache_entry); +done: + kfree(dentry_name); + return ret; +} + +struct hmdfs_file_info * +get_next_hmdfs_file_info(struct hmdfs_file_info *fi_head, int device_id) +{ + struct hmdfs_file_info *fi_iter = NULL; + struct hmdfs_file_info *fi_result = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry_safe(fi_iter, fi_result, &(fi_head->comrade_list), + comrade_list) { + if (fi_iter->device_id == device_id) + break; + } + mutex_unlock(&fi_head->comrade_list_lock); + + return fi_result != fi_head ? fi_result : NULL; +} + +struct hmdfs_file_info *get_hmdfs_file_info(struct hmdfs_file_info *fi_head, + int device_id) +{ + struct hmdfs_file_info *fi_iter = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry(fi_iter, &(fi_head->comrade_list), comrade_list) { + if (fi_iter->device_id == device_id) { + mutex_unlock(&fi_head->comrade_list_lock); + return fi_iter; + } + } + mutex_unlock(&fi_head->comrade_list_lock); + + return NULL; +} + +int hmdfs_iterate_merge(struct file *file, struct dir_context *ctx) +{ + int err = 0; + struct hmdfs_file_info *fi_head = hmdfs_f(file); + struct hmdfs_file_info *fi_iter = NULL; + struct file *lower_file_iter = NULL; + loff_t start_pos = ctx->pos; + unsigned long device_id = (unsigned long)((ctx->pos) << 1 >> + (POS_BIT_NUM - DEV_ID_BIT_NUM)); + struct hmdfs_iterate_callback_merge ctx_merge = { + .ctx.actor = hmdfs_actor_merge, + .caller = ctx, + .root = &fi_head->root, + .dev_id = device_id + }; + + /* pos = -1 indicates that all devices have been traversed + * or an error has occurred. + */ + if (ctx->pos == -1) + return 0; + + fi_iter = get_hmdfs_file_info(fi_head, device_id); + if (!fi_iter) { + fi_iter = get_next_hmdfs_file_info(fi_head, device_id); + // dev_id is changed, parameter is set 0 to get next file info + if (fi_iter) + ctx_merge.ctx.pos = + hmdfs_set_pos(fi_iter->device_id, 0, 0); + } + while (fi_iter) { + ctx_merge.dev_id = fi_iter->device_id; + device_id = ctx_merge.dev_id; + lower_file_iter = fi_iter->lower_file; + lower_file_iter->f_pos = file->f_pos; + err = iterate_dir(lower_file_iter, &ctx_merge.ctx); + file->f_pos = lower_file_iter->f_pos; + ctx->pos = file->f_pos; + + if (err) + goto done; + /* + * ctx->actor return nonzero means buffer is exhausted or + * something is wrong, thus we should not continue. + */ + if (ctx_merge.result) + goto done; + fi_iter = get_next_hmdfs_file_info(fi_head, device_id); + if (fi_iter) { + file->f_pos = hmdfs_set_pos(fi_iter->device_id, 0, 0); + ctx->pos = file->f_pos; + } + } +done: + trace_hmdfs_iterate_merge(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int do_dir_open_merge(struct file *file, const struct cred *cred, + struct hmdfs_file_info *fi_head) +{ + int ret = -EINVAL; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(file->f_path.dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct hmdfs_file_info *fi = NULL; + struct path lo_p = { .mnt = file->f_path.mnt }; + struct file *lower_file = NULL; + + if (IS_ERR_OR_NULL(cred)) + return ret; + + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + fi = kzalloc(sizeof(*fi), GFP_KERNEL); + if (!fi) { + ret = ret ? -ENOMEM : 0; + continue; // allow some dir to fail to open + } + lo_p.dentry = comrade->lo_d; + // make sure that dentry will not be dentry_kill before open + dget(lo_p.dentry); + if (unlikely(d_is_negative(lo_p.dentry))) { + hmdfs_info("dentry is negative, try again"); + kfree(fi); + dput(lo_p.dentry); + continue; // skip this device + } + lower_file = dentry_open(&lo_p, file->f_flags, cred); + dput(lo_p.dentry); + if (IS_ERR(lower_file)) { + kfree(fi); + continue; + } + ret = 0; + fi->device_id = comrade->dev_id; + fi->lower_file = lower_file; + mutex_lock(&fi_head->comrade_list_lock); + list_add_tail(&fi->comrade_list, &fi_head->comrade_list); + mutex_unlock(&fi_head->comrade_list_lock); + } + mutex_unlock(&dim->comrade_list_lock); + return ret; +} + +int hmdfs_dir_open_merge(struct inode *inode, struct file *file) +{ + int ret = 0; + struct hmdfs_file_info *fi = NULL; + + fi = kzalloc(sizeof(*fi), GFP_KERNEL); + if (!fi) + return -ENOMEM; + + file->private_data = fi; + fi->root = RB_ROOT; + mutex_init(&fi->comrade_list_lock); + INIT_LIST_HEAD(&fi->comrade_list); + + ret = do_dir_open_merge(file, hmdfs_sb(inode->i_sb)->cred, fi); + if (ret) + kfree(fi); + + return ret; +} + +int hmdfs_dir_release_merge(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *fi_head = hmdfs_f(file); + struct hmdfs_file_info *fi_iter = NULL; + struct hmdfs_file_info *fi_temp = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry_safe(fi_iter, fi_temp, &(fi_head->comrade_list), + comrade_list) { + list_del_init(&(fi_iter->comrade_list)); + fput(fi_iter->lower_file); + kfree(fi_iter); + } + mutex_unlock(&fi_head->comrade_list_lock); + destroy_tree(&fi_head->root); + file->private_data = NULL; + kfree(fi_head); + + return 0; +} + +const struct file_operations hmdfs_dir_fops_merge = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_merge, + .open = hmdfs_dir_open_merge, + .release = hmdfs_dir_release_merge, +}; + +int hmdfs_file_open_merge(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lo_p = { .mnt = file->f_path.mnt }; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = NULL; + struct dentry *parent = NULL; + + lo_p.dentry = hmdfs_get_fst_lo_d(file->f_path.dentry); + if (!lo_p.dentry) { + err = -EINVAL; + goto out_err; + } + + gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + if (!gfi) { + err = -ENOMEM; + goto out_err; + } + + parent = dget_parent(file->f_path.dentry); + lower_file = dentry_open(&lo_p, file->f_flags, cred); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + kfree(gfi); + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } + dput(parent); +out_err: + dput(lo_p.dentry); + return err; +} + +int hmdfs_file_flush_merge(struct file *file, fl_owner_t id) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + struct file *lower_file = gfi->lower_file; + + if (lower_file->f_op->flush) + return lower_file->f_op->flush(lower_file, id); + + return 0; +} + +/* Transparent transmission of parameters to device_view level, + * so file operations are same as device_view local operations. + */ +const struct file_operations hmdfs_file_fops_merge = { + .owner = THIS_MODULE, + .llseek = hmdfs_file_llseek_local, + .read_iter = hmdfs_read_local, + .write_iter = hmdfs_write_local, + .mmap = hmdfs_file_mmap_local, + .open = hmdfs_file_open_merge, + .flush = hmdfs_file_flush_merge, + .release = hmdfs_file_release_local, + .fsync = hmdfs_fsync_local, +}; diff --git a/fs/hmdfs/file_remote.c b/fs/hmdfs/file_remote.c new file mode 100644 index 000000000000..4ae87a138999 --- /dev/null +++ b/fs/hmdfs/file_remote.c @@ -0,0 +1,1054 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_remote.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "file_remote.h" + +#include "comm/socket_adapter.h" +#include "hmdfs.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" + +static inline bool hmdfs_remote_write_cache_expired( + struct hmdfs_inode_info *info) +{ + return time_after(jiffies, info->writecache_expire); +} + +enum expire_reason { + ALL_GOOD = 0, + INO_DISMATCH = 1, + SIZE_OR_CTIME_DISMATCH = 2, + TIMER_EXPIRE = 3, + TIMER_WORKING = 4, + STABLE_CTIME_DISMATCH = 5, + KEEP_CACHE = 6, +}; + +/* + * hmdfs_open_final_remote - Do final steps of opening a remote file, update + * local inode cache and decide whether of not to truncate inode pages. + * + * @info: hmdfs inode info + * @open_ret: values returned from remote when opening a remote file + * @keep_cache: keep local cache & i_size + */ +static int hmdfs_open_final_remote(struct hmdfs_inode_info *info, + struct hmdfs_open_ret *open_ret, + struct file *file, bool keep_cache) +{ + struct inode *inode = &info->vfs_inode; + bool truncate = false; + enum expire_reason reason = ALL_GOOD; + int ret = 0; + + /* + * if remote inode number changed and lookup stale data, we'll return + * -ESTALE, and reopen the file with metedate from remote getattr. + */ + if (info->remote_ino != open_ret->ino) { + hmdfs_debug( + "got stale local inode, ino in local %llu, ino from open %llu", + info->remote_ino, open_ret->ino); + hmdfs_send_close(info->conn, &open_ret->fid); + reason = INO_DISMATCH; + ret = -ESTALE; + goto out; + } + + if (keep_cache) { + reason = KEEP_CACHE; + trace_hmdfs_open_final_remote(info, open_ret, file, reason); + goto set_fid_out; + } + + /* + * if remote size do not match local inode, or remote ctime do not match + * the last time same file was opened. + */ + if (inode->i_size != open_ret->file_size || + hmdfs_time_compare(&info->remote_ctime, &open_ret->remote_ctime)) { + truncate = true; + reason = SIZE_OR_CTIME_DISMATCH; + goto out; + } + + /* + * If 'writecache_expire' is set, check if it expires. And skip the + * checking of stable_ctime. + */ + if (info->writecache_expire) { + truncate = hmdfs_remote_write_cache_expired(info); + if (truncate) + reason = TIMER_EXPIRE; + else + reason = TIMER_WORKING; + goto out; + } + + /* the first time, or remote ctime is ahead of remote time */ + if (info->stable_ctime.tv_sec == 0 && info->stable_ctime.tv_nsec == 0) { + truncate = true; + reason = STABLE_CTIME_DISMATCH; + goto out; + } + + /* + * - if last stable_ctime == stable_ctime, we do nothing. + * a. if ctime < stable_ctime, data is ensured to be uptodate, + * b. if ctime == stable_ctime, stale data might be accessed. This is + * acceptable since pagecache will be dropped later. + * c. ctime > stable_ctime is impossible. + * - if last stable_ctime < stable_ctime, we clear the cache. + * d. ctime != last stable_ctime is impossible + * e. ctime == last stable_ctime, this is possible to read again from + * b, thus we need to drop the cache. + * - if last stable_ctime > stable_ctime, we clear the cache. + * stable_ctime must be zero in this case, this is possible because + * system time might be changed. + */ + if (hmdfs_time_compare(&info->stable_ctime, &open_ret->stable_ctime)) { + truncate = true; + reason = STABLE_CTIME_DISMATCH; + goto out; + } + +out: + trace_hmdfs_open_final_remote(info, open_ret, file, reason); + if (ret) + return ret; + + if (reason == SIZE_OR_CTIME_DISMATCH) { + inode->i_ctime = open_ret->remote_ctime; + info->remote_ctime = open_ret->remote_ctime; + } + + if (truncate) { + info->writecache_expire = 0; + truncate_inode_pages(inode->i_mapping, 0); + } + + atomic64_set(&info->write_counter, 0); + info->stable_ctime = open_ret->stable_ctime; + i_size_write(inode, open_ret->file_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; +set_fid_out: + spin_lock(&info->fid_lock); + info->fid = open_ret->fid; + spin_unlock(&info->fid_lock); + return 0; +} + +int hmdfs_do_open_remote(struct file *file, bool keep_cache) +{ + struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); + struct hmdfs_peer *conn = info->conn; + struct hmdfs_open_ret open_ret; + __u8 file_type = hmdfs_d(file->f_path.dentry)->file_type; + char *send_buf; + int err = 0; + + send_buf = hmdfs_get_dentry_relative_path(file->f_path.dentry); + if (!send_buf) { + err = -ENOMEM; + goto out_free; + } + err = hmdfs_send_open(conn, send_buf, file_type, &open_ret); + if (err) { + hmdfs_err("hmdfs_send_open return failed with %d", err); + goto out_free; + } + + err = hmdfs_open_final_remote(info, &open_ret, file, keep_cache); + +out_free: + kfree(send_buf); + return err; +} + +static inline bool hmdfs_remote_need_reopen(struct hmdfs_inode_info *info) +{ + return test_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); +} + +static inline bool hmdfs_remote_is_opening_file(struct hmdfs_inode_info *info) +{ + return test_bit(HMDFS_FID_OPENING, &info->fid_flags); +} + +static int hmdfs_remote_wait_opening_file(struct hmdfs_inode_info *info) +{ + int err; + + if (!hmdfs_remote_is_opening_file(info)) + return 0; + + err = ___wait_event(info->fid_wq, hmdfs_remote_is_opening_file(info), + TASK_INTERRUPTIBLE, 0, 0, + spin_unlock(&info->fid_lock); + schedule(); + spin_lock(&info->fid_lock)); + if (err) + err = -EINTR; + + return err; +} + +static int hmdfs_remote_file_reopen(struct hmdfs_inode_info *info, + struct file *filp) +{ + int err = 0; + struct hmdfs_peer *conn = info->conn; + struct inode *inode = NULL; + struct hmdfs_fid fid; + + if (conn->status == NODE_STAT_OFFLINE) + return -EAGAIN; + + spin_lock(&info->fid_lock); + err = hmdfs_remote_wait_opening_file(info); + if (err || !hmdfs_remote_need_reopen(info)) { + spin_unlock(&info->fid_lock); + goto out; + } + + set_bit(HMDFS_FID_OPENING, &info->fid_flags); + fid = info->fid; + spin_unlock(&info->fid_lock); + + inode = &info->vfs_inode; + inode_lock(inode); + /* + * Most closing cases are meaningless, except for one: + * read process A read process B + * err = -EBADF err = -EBADF (caused by re-online) + * set_need_reopen + * do reopen + * fid = new fid_1 [server hold fid_1] + * set need_reopen + * do reopen + * send close (fid_1) // In case of leak + * fid = new fid_2 + */ + if (fid.id != HMDFS_INODE_INVALID_FILE_ID) + hmdfs_send_close(conn, &fid); + err = hmdfs_do_open_remote(filp, true); + inode_unlock(inode); + + spin_lock(&info->fid_lock); + /* + * May make the bit set in offline handler lost, but server + * will tell us whether or not the newly-opened file id is + * generated before offline, if it is opened before offline, + * the operation on the file id will return -EBADF and + * HMDFS_FID_NEED_OPEN bit will be set again. + */ + if (!err) + clear_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + clear_bit(HMDFS_FID_OPENING, &info->fid_flags); + spin_unlock(&info->fid_lock); + + wake_up_interruptible_all(&info->fid_wq); +out: + return err; +} + +static int hmdfs_remote_check_and_reopen(struct hmdfs_inode_info *info, + struct file *filp) +{ + if (!hmdfs_remote_need_reopen(info)) + return 0; + + return hmdfs_remote_file_reopen(info, filp); +} + +void hmdfs_do_close_remote(struct kref *kref) +{ + struct hmdfs_inode_info *info = + container_of(kref, struct hmdfs_inode_info, ref); + struct hmdfs_fid fid; + + hmdfs_remote_fetch_fid(info, &fid); + /* This function can return asynchronously */ + hmdfs_send_close(info->conn, &fid); +} + +static inline bool hmdfs_remote_need_track_file(const struct hmdfs_sb_info *sbi, + fmode_t mode) +{ + return (hmdfs_is_stash_enabled(sbi) && (mode & FMODE_WRITE)); +} + +static void +hmdfs_remote_del_wr_opened_inode_nolock(struct hmdfs_inode_info *info) +{ + WARN_ON(list_empty(&info->wr_opened_node)); + if (atomic_dec_and_test(&info->wr_opened_cnt)) + list_del_init(&info->wr_opened_node); +} + +void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->wr_opened_inode_lock); + hmdfs_remote_del_wr_opened_inode_nolock(info); + spin_unlock(&conn->wr_opened_inode_lock); +} + +void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + if (list_empty(&info->wr_opened_node)) { + atomic_set(&info->wr_opened_cnt, 1); + list_add_tail(&info->wr_opened_node, + &conn->wr_opened_inode_list); + } else { + atomic_inc(&info->wr_opened_cnt); + } +} + +static void hmdfs_remote_add_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->wr_opened_inode_lock); + hmdfs_remote_add_wr_opened_inode_nolock(conn, info); + spin_unlock(&conn->wr_opened_inode_lock); +} + +int hmdfs_file_open_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct kref *ref = &(info->ref); + int err = 0; + + inode_lock(inode); + if (kref_read(ref) == 0) { + err = hmdfs_do_open_remote(file, false); + if (err == 0) + kref_init(ref); + } else { + kref_get(ref); + } + inode_unlock(inode); + + if (!err && hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), + file->f_mode)) + hmdfs_remote_add_wr_opened_inode(info->conn, info); + + return err; +} + +static void hmdfs_set_writecache_expire(struct hmdfs_inode_info *info, + unsigned int seconds) +{ + unsigned long new_expire = jiffies + seconds * HZ; + + /* + * When file has been written before closing, set pagecache expire + * if it has not been set yet. This is necessary because ctime might + * stay the same after overwrite. + */ + if (info->writecache_expire && + time_after(new_expire, info->writecache_expire)) + return; + + info->writecache_expire = new_expire; +} + +static void hmdfs_remote_keep_writecache(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = NULL; + struct kref *ref = NULL; + struct hmdfs_getattr_ret *getattr_ret = NULL; + unsigned int write_cache_timeout = + hmdfs_sb(inode->i_sb)->write_cache_timeout; + int err; + + if (!write_cache_timeout) + return; + + info = hmdfs_i(inode); + ref = &(info->ref); + /* + * don't do anything if file is still opening or file hasn't been + * written. + */ + if (kref_read(ref) > 0 || !atomic64_read(&info->write_counter)) + return; + + /* + * If remote getattr failed, and we don't update ctime, + * pagecache will be truncated the next time file is opened. + */ + err = hmdfs_remote_getattr(info->conn, file_dentry(file), 0, + &getattr_ret); + if (err) { + hmdfs_err("remote getattr failed with err %d", err); + return; + } + + if (!(getattr_ret->stat.result_mask & STATX_CTIME)) { + hmdfs_err("get remote ctime failed with mask 0x%x", + getattr_ret->stat.result_mask); + kfree(getattr_ret); + return; + } + /* + * update ctime from remote, in case that pagecahe will be + * truncated in next open. + */ + inode->i_ctime = getattr_ret->stat.ctime; + info->remote_ctime = getattr_ret->stat.ctime; + hmdfs_set_writecache_expire(info, write_cache_timeout); + kfree(getattr_ret); +} + +int hmdfs_file_release_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + if (hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), file->f_mode)) + hmdfs_remote_del_wr_opened_inode(info->conn, info); + + inode_lock(inode); + kref_put(&info->ref, hmdfs_do_close_remote); + hmdfs_remote_keep_writecache(inode, file); + inode_unlock(inode); + + return 0; +} + +static int hmdfs_file_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct inode *inode = file_inode(file); + + if (!(file->f_mode & FMODE_WRITE)) + return 0; + + /* + * Continue regardless of whether file reopen fails or not, + * because there may be no dirty page. + */ + hmdfs_remote_check_and_reopen(hmdfs_i(inode), file); + + /* + * Wait for wsem here would impact the performance greatly, so we + * overlap the time to issue as many wbs as we can, expecting async + * wbs are eliminated afterwards. + */ + filemap_fdatawrite(inode->i_mapping); + down_write(&hmdfs_i(inode)->wpage_sem); + err = filemap_write_and_wait(inode->i_mapping); + up_write(&hmdfs_i(inode)->wpage_sem); + return err; +} + +static ssize_t hmdfs_file_read_iter_remote(struct kiocb *iocb, + struct iov_iter *iter) +{ + struct file *filp = iocb->ki_filp; + struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); + struct file_ra_state *ra = NULL; + unsigned int rtt; + int err; + bool tried = false; + +retry: + err = hmdfs_remote_check_and_reopen(info, filp); + if (err) + return err; + + ra = &filp->f_ra; + /* rtt is measured in 10 msecs */ + rtt = hmdfs_tcpi_rtt(info->conn) / 10000; + switch (rtt) { + case 0: + break; + case 1: + ra->ra_pages = 256; + break; + case 2: + ra->ra_pages = 512; + break; + default: + ra->ra_pages = 1024; + break; + } + + err = generic_file_read_iter(iocb, iter); + if (err < 0 && !tried && hmdfs_remote_need_reopen(info)) { + /* Read from a stale fid, try read again once. */ + tried = true; + goto retry; + } + + return err; +} + +static inline bool hmdfs_is_file_unwritable(const struct hmdfs_inode_info *info, + bool check_stash) +{ + return (check_stash && hmdfs_inode_is_stashing(info)) || + !hmdfs_is_node_online(info->conn); +} + +static ssize_t __hmdfs_file_write_iter_remote(struct kiocb *iocb, + struct iov_iter *iter, + bool check_stash) +{ + struct file *filp = iocb->ki_filp; + struct inode *inode = file_inode(filp); + struct hmdfs_inode_info *info = hmdfs_i(inode); + ssize_t ret; + + if (hmdfs_is_file_unwritable(info, check_stash)) + return -EAGAIN; + + ret = hmdfs_remote_check_and_reopen(info, filp); + if (ret) + return ret; + + inode_lock(inode); + if (hmdfs_is_file_unwritable(info, check_stash)) { + ret = -EAGAIN; + goto out; + } + ret = generic_write_checks(iocb, iter); + if (ret > 0) + ret = __generic_file_write_iter(iocb, iter); +out: + inode_unlock(inode); + + if (ret > 0) + ret = generic_write_sync(iocb, ret); + return ret; +} + +ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, + struct iov_iter *iter) +{ + return __hmdfs_file_write_iter_remote(iocb, iter, false); +} + +static ssize_t hmdfs_file_write_iter_remote(struct kiocb *iocb, + struct iov_iter *iter) +{ + return __hmdfs_file_write_iter_remote(iocb, iter, true); +} + +/* hmdfs not support mmap write remote file */ +static vm_fault_t hmdfs_page_mkwrite(struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct hmdfs_file_vm_ops = { + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = hmdfs_page_mkwrite, +}; + +static int hmdfs_file_mmap_remote(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &hmdfs_file_vm_ops; + file_accessed(file); + + return 0; +} + +static int hmdfs_file_fsync_remote(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); + struct hmdfs_peer *conn = info->conn; + struct hmdfs_fid fid; + int err; + + trace_hmdfs_fsync_enter_remote(conn->sbi, conn->device_id, + info->remote_ino, datasync); + /* + * Continue regardless of whether file reopen fails or not, + * because there may be no dirty page. + */ + hmdfs_remote_check_and_reopen(info, file); + + filemap_fdatawrite(file->f_mapping); + down_write(&info->wpage_sem); + err = file_write_and_wait_range(file, start, end); + up_write(&info->wpage_sem); + if (err) { + hmdfs_err("local fsync fail with %d", err); + goto out; + } + + hmdfs_remote_fetch_fid(info, &fid); + err = hmdfs_send_fsync(conn, &fid, start, end, datasync); + if (err) + hmdfs_err("send fsync fail with %d", err); + +out: + trace_hmdfs_fsync_exit_remote(conn->sbi, conn->device_id, + info->remote_ino, + get_cmd_timeout(conn->sbi, F_FSYNC), err); + + /* Compatible with POSIX retcode */ + if (err == -ETIME) + err = -EIO; + + return err; +} + +const struct file_operations hmdfs_dev_file_fops_remote = { + .owner = THIS_MODULE, + .llseek = generic_file_llseek, + .read_iter = hmdfs_file_read_iter_remote, + .write_iter = hmdfs_file_write_iter_remote, + .mmap = hmdfs_file_mmap_remote, + .open = hmdfs_file_open_remote, + .release = hmdfs_file_release_remote, + .flush = hmdfs_file_flush, + .fsync = hmdfs_file_fsync_remote, +}; + +static void hmdfs_fill_page_zero(struct page *page) +{ + void *addr = NULL; + + addr = kmap(page); + memset(addr, 0, PAGE_SIZE); + kunmap(page); + SetPageUptodate(page); + unlock_page(page); +} + +static int hmdfs_readpage_remote(struct file *file, struct page *page) +{ + struct inode *inode = file_inode(file); + struct hmdfs_inode_info *info = hmdfs_i(inode); + loff_t isize = i_size_read(inode); + pgoff_t end_index = (isize - 1) >> PAGE_SHIFT; + struct hmdfs_fid fid; + + if (!isize || page->index > end_index) { + hmdfs_fill_page_zero(page); + return 0; + } + + if (!isize || page->index > end_index) { + hmdfs_fill_page_zero(page); + return 0; + } + + hmdfs_remote_fetch_fid(info, &fid); + return hmdfs_client_readpage(info->conn, &fid, page); +} + +uint32_t hmdfs_get_writecount(struct page *page) +{ + uint32_t count = 0; + loff_t pos = (loff_t)page->index << HMDFS_PAGE_OFFSET; + struct inode *inode = page->mapping->host; + loff_t size = i_size_read(inode); + /* + * If page offset is greater than i_size, this is possible when + * writepage concurrent with truncate. In this case, we don't need to + * do remote writepage since it'll be truncated after the page is + * unlocked. + */ + if (pos >= size) + count = 0; + /* + * If the page about to write is beyond i_size, we can't write beyond + * i_size because remote file size will be wrong. + */ + else if (size < pos + HMDFS_PAGE_SIZE) + count = size - pos; + /* It's safe to write the whole page */ + else + count = HMDFS_PAGE_SIZE; + + return count; +} + +static bool allow_cur_thread_wpage(struct hmdfs_inode_info *info, + bool *rsem_held, bool sync_all) +{ + WARN_ON(!rsem_held); + + if (sync_all) { + *rsem_held = false; + return true; + } + *rsem_held = down_read_trylock(&info->wpage_sem); + return *rsem_held; +} + +/** + * hmdfs_writepage_remote - writeback a dirty page to remote + * + * INFO: + * When asked to WB_SYNC_ALL, this function should leave with both the page and + * the radix tree node clean to achieve close-to-open consitency. Moreover, + * this shall never return -EIO to help filemap to iterate all dirty pages. + * + * INFO: + * When asked to WB_SYNC_NONE, this function should be mercy if faults(oom or + * bad pipe) happended to enable subsequent r/w & wb. + */ +static int hmdfs_writepage_remote(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_sb_info *sbi = hmdfs_sb(inode->i_sb); + int ret = 0; + bool rsem_held = false; + bool sync = wbc->sync_mode == WB_SYNC_ALL; + struct hmdfs_writepage_context *param = NULL; + + if (!allow_cur_thread_wpage(info, &rsem_held, sync)) + goto out_unlock; + + set_page_writeback(page); + + param = kzalloc(sizeof(*param), GFP_NOFS); + if (!param) { + ret = -ENOMEM; + goto out_endwb; + } + + if (sync && hmdfs_usr_sig_pending(current)) { + ClearPageUptodate(page); + goto out_free; + } + param->count = hmdfs_get_writecount(page); + if (!param->count) + goto out_free; + param->rsem_held = rsem_held; + hmdfs_remote_fetch_fid(info, ¶m->fid); + param->sync_all = sync; + param->caller = current; + get_task_struct(current); + param->page = page; + param->timeout = jiffies + msecs_to_jiffies(sbi->wb_timeout_ms); + INIT_DELAYED_WORK(¶m->retry_dwork, hmdfs_remote_writepage_retry); + ret = hmdfs_remote_do_writepage(info->conn, param); + if (likely(!ret)) + return 0; + + put_task_struct(current); +out_free: + kfree(param); +out_endwb: + end_page_writeback(page); + if (rsem_held) + up_read(&info->wpage_sem); +out_unlock: + if (sync || !hmdfs_need_redirty_page(info, ret)) { + SetPageError(page); + mapping_set_error(page->mapping, ret); + } else { + redirty_page_for_writepage(wbc, page); + } + unlock_page(page); + return ret; +} + +static void hmdfs_account_dirty_pages(struct address_space *mapping) +{ + struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; + + if (!sbi->h_wb->dirty_writeback_control) + return; + + this_cpu_inc(*sbi->h_wb->bdp_ratelimits); +} + +static int hmdfs_write_begin_remote(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata) +{ + pgoff_t index = ((unsigned long long)pos) >> PAGE_SHIFT; + struct inode *inode = file_inode(file); + struct page *page = NULL; + int ret = 0; + +start: + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return -ENOMEM; + *pagep = page; + wait_on_page_writeback(page); + + // If this page will be covered completely. + if (len == HMDFS_PAGE_SIZE || PageUptodate(page)) + return 0; + + /* + * If data existed in this page will covered, + * we just need to clear this page. + */ + if (!((unsigned long long)pos & (HMDFS_PAGE_SIZE - 1)) && + (pos + len) >= i_size_read(inode)) { + zero_user_segment(page, len, HMDFS_PAGE_SIZE); + return 0; + } + /* + * We need readpage before write date to this page. + */ + ret = hmdfs_readpage_remote(file, page); + if (!ret) { + if (PageLocked(page)) { + ret = __lock_page_killable(page); + if (!ret) + unlock_page(page); + } + + if (!ret && PageUptodate(page)) { + put_page(page); + goto start; + } + if (!ret) + ret = -EIO; + } + put_page(page); + return ret; +} + +static int hmdfs_write_end_remote(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned int len, unsigned int copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + if (!PageUptodate(page)) { + if (unlikely(copied != len)) + copied = 0; + else + SetPageUptodate(page); + } + if (!copied) + goto unlock_out; + + if (!PageDirty(page)) { + hmdfs_account_dirty_pages(mapping); + set_page_dirty(page); + } + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + hmdfs_i(inode)->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + } +unlock_out: + unlock_page(page); + put_page(page); + + /* hmdfs private writeback control */ + hmdfs_balance_dirty_pages_ratelimited(mapping); + return copied; +} + +const struct address_space_operations hmdfs_dev_file_aops_remote = { + .readpage = hmdfs_readpage_remote, + .write_begin = hmdfs_write_begin_remote, + .write_end = hmdfs_write_end_remote, + .writepage = hmdfs_writepage_remote, + .set_page_dirty = __set_page_dirty_nobuffers, +}; + +loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, + unsigned long offset) +{ + loff_t pos; + + pos = ((loff_t)dev_id << (POS_BIT_NUM - 1 - DEV_ID_BIT_NUM)) + + ((loff_t)group_id << OFFSET_BIT_NUM) + offset; + if (dev_id) + pos |= ((loff_t)1 << (POS_BIT_NUM - 1)); + return pos; +} + +static int analysis_dentry_file_from_con(struct hmdfs_sb_info *sbi, + struct file *file, + struct file *handler, + struct dir_context *ctx) +{ + struct hmdfs_dentry_group *dentry_group = NULL; + loff_t pos = ctx->pos; + unsigned long dev_id = (unsigned long)((pos << 1) >> (POS_BIT_NUM - DEV_ID_BIT_NUM)); + unsigned long group_id = (unsigned long)((pos << (1 + DEV_ID_BIT_NUM)) >> + (POS_BIT_NUM - GROUP_ID_BIT_NUM)); + loff_t offset = pos & OFFSET_BIT_MASK; + int group_num = 0; + char *dentry_name = NULL; + int iterate_result = 0; + int i, j; + + dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); + + if (!dentry_group) + return -ENOMEM; + + if (IS_ERR_OR_NULL(handler)) { + kfree(dentry_group); + return -ENOENT; + } + + group_num = get_dentry_group_cnt(file_inode(handler)); + dentry_name = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); + if (!dentry_name) { + kfree(dentry_group); + return -ENOMEM; + } + + for (i = group_id; i < group_num; i++) { + int ret = hmdfs_metainfo_read(sbi, handler, dentry_group, + sizeof(struct hmdfs_dentry_group), + i); + if (ret != sizeof(struct hmdfs_dentry_group)) { + hmdfs_err("read dentry group failed ret:%d", ret); + goto done; + } + + for (j = offset; j < DENTRY_PER_GROUP; j++) { + int len; + int file_type = DT_UNKNOWN; + bool is_continue; + + len = le16_to_cpu(dentry_group->nsl[j].namelen); + if (!test_bit_le(j, dentry_group->bitmap) || len == 0) + continue; + + memset(dentry_name, 0, DENTRY_NAME_MAX_LEN); + // TODO: Support more file_type + if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) + file_type = DT_DIR; + else if (S_ISREG(le16_to_cpu( + dentry_group->nsl[j].i_mode))) + file_type = DT_REG; + + strncat(dentry_name, dentry_group->filename[j], len); + pos = hmdfs_set_pos(dev_id, i, j); + is_continue = + dir_emit(ctx, dentry_name, len, + pos + INUNUMBER_START, file_type); + if (!is_continue) { + ctx->pos = pos; + iterate_result = 1; + goto done; + } + } + offset = 0; + } + +done: + kfree(dentry_name); + kfree(dentry_group); + return iterate_result; +} + +int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx) +{ + int iterate_result = 0; + + iterate_result = analysis_dentry_file_from_con( + con->sbi, file, file->private_data, ctx); + return iterate_result; +} + +static int hmdfs_iterate_remote(struct file *file, struct dir_context *ctx) +{ + int err = 0; + loff_t start_pos = ctx->pos; + struct hmdfs_peer *con = NULL; + struct hmdfs_dentry_info *di = hmdfs_d(file->f_path.dentry); + bool is_local = !((ctx->pos) >> (POS_BIT_NUM - 1)); + uint64_t dev_id = di->device_id; + + if (ctx->pos == -1) + return 0; + if (is_local) + ctx->pos = hmdfs_set_pos(dev_id, 0, 0); + + con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); + if (con) { + // ctx->pos = 0; + err = con->conn_operations->remote_readdir(con, file, ctx); + if (unlikely(!con)) { + hmdfs_err("con is null"); + goto done; + } + peer_put(con); + if (err) + goto done; + } + +done: + if (err <= 0) + ctx->pos = -1; + + trace_hmdfs_iterate_remote(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int hmdfs_dir_open_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct clearcache_item *cache_item = NULL; + + if (info->conn && info->conn->version <= USERSPACE_MAX_VER) { + return 0; + } else if (info->conn) { + if (!hmdfs_cache_revalidate(READ_ONCE(info->conn->conn_time), + info->conn->device_id, + file->f_path.dentry)) + get_remote_dentry_file_sync(file->f_path.dentry, + info->conn); + cache_item = hmdfs_find_cache_item(info->conn->device_id, + file->f_path.dentry); + if (cache_item) { + file->private_data = cache_item->filp; + get_file(file->private_data); + kref_put(&cache_item->ref, release_cache_item); + return 0; + } + return -ENOENT; + } + return -ENOENT; +} + +static int hmdfs_dir_release_remote(struct inode *inode, struct file *file) +{ + if (file->private_data) + fput(file->private_data); + file->private_data = NULL; + return 0; +} + +const struct file_operations hmdfs_dev_dir_ops_remote = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_remote, + .open = hmdfs_dir_open_remote, + .release = hmdfs_dir_release_remote, + .fsync = __generic_file_fsync, +}; diff --git a/fs/hmdfs/file_remote.h b/fs/hmdfs/file_remote.h new file mode 100644 index 000000000000..026bd0c944a6 --- /dev/null +++ b/fs/hmdfs/file_remote.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/file_remote.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_FILE_REMOTE_H +#define HMDFS_FILE_REMOTE_H + +#include +#include + +#include "hmdfs.h" +#include "comm/connection.h" + +void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info); + +void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info); + +ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, + struct iov_iter *iter); + +#endif diff --git a/fs/hmdfs/file_root.c b/fs/hmdfs/file_root.c new file mode 100644 index 000000000000..d82ff4d0b04b --- /dev/null +++ b/fs/hmdfs/file_root.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_root.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" + +#define DEVICE_VIEW_CTX_POS 2 +#define MERGE_VIEW_CTX_POS 3 +#define ROOT_DIR_INO_START 20000000 + +// used by hmdfs_device_iterate functions +#define DEVICE_VIEW_INO_START 20000002 +#define LOCAL_DEVICE_CTX_POS 2 + +struct hmdfs_peer *get_next_con(struct hmdfs_sb_info *sbi, + unsigned long current_dev_id) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *next_con = NULL; + struct list_head *head, *node; + + mutex_lock(&sbi->connections.node_lock); + head = &sbi->connections.node_list; + if (current_dev_id == 0) { + node = head->next; + if (node == head) + goto done; + next_con = container_of(node, struct hmdfs_peer, list); + if (next_con->status == NODE_STAT_ONLINE) + goto done; + current_dev_id = next_con->device_id; + next_con = NULL; + } + + list_for_each_entry(con, &sbi->connections.node_list, list) { + if ((con->device_id & 0xFFFF) == (current_dev_id & 0xFFFF)) { + node = con->list.next; + if (node == head) + goto done; + next_con = container_of(node, struct hmdfs_peer, list); + if (next_con->status == NODE_STAT_ONLINE) + goto done; + current_dev_id = next_con->device_id; + next_con = NULL; + } + } +done: + if (next_con) + peer_get(next_con); + mutex_unlock(&sbi->connections.node_lock); + return next_con; +} + +int hmdfs_device_iterate(struct file *file, struct dir_context *ctx) +{ + int err = 0; + uint64_t ino_start = DEVICE_VIEW_INO_START; + struct hmdfs_peer *next_con = NULL; + unsigned long dev_id = 0; + struct hmdfs_peer *con = NULL; + char *remote_device_name = NULL; + + if (ctx->pos != 0) + goto out; + dir_emit_dots(file, ctx); + + if (ctx->pos == LOCAL_DEVICE_CTX_POS) { + err = dir_emit(ctx, DEVICE_VIEW_LOCAL, + sizeof(DEVICE_VIEW_LOCAL) - 1, ino_start++, + DT_DIR); + if (!err) + goto out; + (ctx->pos)++; + } + next_con = get_next_con(file->f_inode->i_sb->s_fs_info, 0); + if (!next_con) + goto out; + + dev_id = next_con->device_id; + peer_put(next_con); + con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); + remote_device_name = kmalloc(HMDFS_CID_SIZE + 1, GFP_KERNEL); + if (!remote_device_name) { + err = -ENOMEM; + goto out; + } + while (con) { + peer_put(con); + snprintf(remote_device_name, HMDFS_CID_SIZE + 1, "%s", + con->cid); + if (!dir_emit(ctx, remote_device_name, + strlen(remote_device_name), ino_start++, DT_DIR)) + goto done; + + (ctx->pos)++; + con = get_next_con(file->f_inode->i_sb->s_fs_info, dev_id); + if (!con) + goto done; + + dev_id = con->device_id; + } +done: + kfree(remote_device_name); +out: + if (err <= 0) + ctx->pos = -1; + + return err; +} + +int hmdfs_root_iterate(struct file *file, struct dir_context *ctx) +{ + uint64_t ino_start = ROOT_DIR_INO_START; + struct hmdfs_sb_info *sbi = file_inode(file)->i_sb->s_fs_info; + + if (!dir_emit_dots(file, ctx)) + return 0; + if (ctx->pos == DEVICE_VIEW_CTX_POS) { + if (!dir_emit(ctx, DEVICE_VIEW_ROOT, + sizeof(DEVICE_VIEW_ROOT) - 1, ino_start, DT_DIR)) + return 0; + ino_start++; + ctx->pos = MERGE_VIEW_CTX_POS; + } + if (sbi->s_merge_switch && ctx->pos == MERGE_VIEW_CTX_POS) { + if (!dir_emit(ctx, MERGE_VIEW_ROOT, sizeof(MERGE_VIEW_ROOT) - 1, + ino_start, DT_DIR)) + return 0; + (ctx->pos)++; + } + return 0; +} + +const struct file_operations hmdfs_root_fops = { + .owner = THIS_MODULE, + .iterate = hmdfs_root_iterate, +}; + +const struct file_operations hmdfs_device_fops = { + .owner = THIS_MODULE, + .iterate = hmdfs_device_iterate, +}; diff --git a/fs/hmdfs/hmdfs.h b/fs/hmdfs/hmdfs.h new file mode 100644 index 000000000000..d0a24db08f62 --- /dev/null +++ b/fs/hmdfs/hmdfs.h @@ -0,0 +1,325 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_H +#define HMDFS_H + +#include +#include +#include +#include +#include +#include + +#include "comm/protocol.h" +#include "comm/fault_inject.h" + +#if KERNEL_VERSION(4, 15, 0) < LINUX_VERSION_CODE +#define hmdfs_time_t timespec64 +#define hmdfs_time_compare timespec64_compare +#define hmdfs_time_add timespec64_add +#else +#define hmdfs_time_t timespec +#define hmdfs_time_compare timespec_compare +#define hmdfs_time_add timespec_add +#endif + +#define HMDFS_PAGE_SIZE 4096 +#define HMDFS_PAGE_OFFSET 12 + +/* max xattr value size, not include '\0' */ +#define HMDFS_XATTR_SIZE_MAX 4096 +/* max listxattr response size, include '\0' */ +#define HMDFS_LISTXATTR_SIZE_MAX 4096 + +// 20 digits +'\0', Converted from a u64 integer +#define HMDFS_ACCOUNT_HASH_MAX_LEN 21 +#define CTRL_PATH_MAX_LEN 21 + +#define HMDFS_SUPER_MAGIC 0x20200302 + +#define DEFAULT_WRITE_CACHE_TIMEOUT 30 +#define DEFAULT_SRV_REQ_MAX_ACTIVE 16 + +#define HMDFS_INODE_INVALID_FILE_ID (1U << 31) +#define HMDFS_FID_VER_BOOT_COOKIE_SHIFT 15 + +/* According to task_struct instead of workqueue_struct */ +#define HMDFS_WQ_NAME_LEN 16 + +#define HMDFS_DEF_WB_TIMEOUT_MS 60000 +#define HMDFS_MAX_WB_TIMEOUT_MS 900000 + +#define HMDFS_READPAGES_NR_MAX 32 + +enum { + HMDFS_FEATURE_READPAGES = 1ULL << 0, + HMDFS_FEATURE_READPAGES_OPEN = 1ULL << 1, + HMDFS_ATOMIC_OPEN = 1ULL << 2, +}; + +struct client_statistic; +struct server_statistic; +struct hmdfs_writeback; +struct hmdfs_server_writeback; +struct hmdfs_syncfs_info { + wait_queue_head_t wq; + atomic_t wait_count; + int remote_ret; + unsigned long long version; + + /* Protect version in concurrent operations */ + spinlock_t v_lock; + /* + * Serialize hmdfs_sync_fs() process: + * |<- pending_list ->| exexuting |<- wait_list ->| + * syncfs_1 syncfs_2 (syncfs_3) syncfs_4 syncfs_5 + * + * Abandon syncfs processes in pending_list after syncfs_3 finished; + * Pick the last syncfs process in wait_list after syncfs_3 finished; + */ + bool is_executing; + /* syncfs process arriving after current exexcuting syncfs */ + struct list_head wait_list; + /* syncfs process arriving before current exexcuting syncfs */ + struct list_head pending_list; + spinlock_t list_lock; +}; + +struct hmdfs_sb_info { + /* list for all registered superblocks */ + struct list_head list; + struct mutex umount_mutex; + + struct kobject kobj; + struct completion s_kobj_unregister; + struct super_block *sb; + struct super_block *lower_sb; + /* from mount, which is root */ + const struct cred *cred; + /* from update cmd, expected to be system */ + const struct cred *system_cred; + struct { + struct mutex node_lock; + struct list_head node_list; + atomic_t conn_seq; + unsigned long recent_ol; + } connections; + char *local_dst; + char *real_dst; + char *local_src; + char *cache_dir; + /* seq number for hmdfs super block */ + unsigned int seq; + + /* + * This value indicate how long the pagecache stay valid(in seconds) in + * client if metadate(except iversion) is equal to server. This + * functionality is disabled if this value is 0. + */ + unsigned int write_cache_timeout; + unsigned int dcache_timeout; + unsigned int dcache_precision; + unsigned long dcache_threshold; + struct list_head client_cache; + struct list_head server_cache; + struct list_head to_delete; + struct mutex cache_list_lock; + + /* local operation time statistic */ + struct server_statistic *s_server_statis; + + /* client statistic */ + struct client_statistic *s_client_statis; + + /* TIMEOUT of each command */ + struct kobject s_cmd_timeout_kobj; + struct completion s_timeout_kobj_unregister; + unsigned int s_cmd_timeout[F_SIZE]; + + /* For case sensitive */ + bool s_case_sensitive; + + /* For features supporting */ + u64 s_features; + + /* For merge & device view */ + unsigned int s_merge_switch; + /* For writeback */ + struct hmdfs_writeback *h_wb; + /* For server writeback */ + struct hmdfs_server_writeback *h_swb; + + /* syncfs info */ + struct hmdfs_syncfs_info hsi; + + /* To bridge the userspace utils */ + struct kfifo notify_fifo; + spinlock_t notify_fifo_lock; + struct hmdfs_fault_inject fault_inject; + + /* For reboot detect */ + uint64_t boot_cookie; + /* offline process */ + unsigned int async_cb_delay; + /* For server handle requests */ + unsigned int async_req_max_active; + /* stash dirty pages during offline */ + bool s_offline_stash; + + /* Timeout (ms) to retry writing remote pages */ + unsigned int wb_timeout_ms; + + struct path stash_work_dir; + /* dentry cache */ + bool s_dentry_cache; + + /* msgs that are waiting for remote */ + struct list_head async_readdir_msg_list; + /* protect async_readdir_msg_list */ + spinlock_t async_readdir_msg_lock; + /* async readdir work that are queued but not finished */ + struct list_head async_readdir_work_list; + /* protect async_readdir_work_list */ + spinlock_t async_readdir_work_lock; + /* wait for async_readdir_work_list to be empty in umount */ + wait_queue_head_t async_readdir_wq; + /* don't allow async readdir */ + bool async_readdir_prohibit; +}; + +static inline struct hmdfs_sb_info *hmdfs_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline bool hmdfs_is_stash_enabled(const struct hmdfs_sb_info *sbi) +{ + return sbi->s_offline_stash; +} + +struct setattr_info { + loff_t size; + unsigned int valid; + umode_t mode; + kuid_t uid; + kgid_t gid; + long long atime; + long atime_nsec; + long long mtime; + long mtime_nsec; + long long ctime; + long ctime_nsec; +}; + +struct hmdfs_file_info { + union { + struct { + struct rb_root root; + struct mutex comrade_list_lock; + }; + struct { + struct file *lower_file; + int device_id; + }; + }; + struct list_head comrade_list; +}; + +static inline struct hmdfs_file_info *hmdfs_f(struct file *file) +{ + return file->private_data; +} + +// Almost all the source files want this, so... +#include "inode.h" + +/* locking helpers */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + inode_unlock(d_inode(dir)); + dput(dir); +} + +extern uint64_t path_hash(const char *path, int len, bool case_sense); +extern int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, + const char *name, unsigned int flags, + struct path *path); +extern ssize_t hmdfs_remote_listxattr(struct dentry *dentry, char *buffer, + size_t size); + +int check_filename(const char *name, int len); + +int hmdfs_permission(struct inode *inode, int mask); + +int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data); + +/* Refer to comments in hmdfs_request_work_fn() */ +#define HMDFS_SERVER_CTX_FLAGS (PF_KTHREAD | PF_WQ_WORKER | PF_NPROC_EXCEEDED) + +static inline bool is_current_hmdfs_server_ctx(void) +{ + return ((current->flags & HMDFS_SERVER_CTX_FLAGS) == + HMDFS_SERVER_CTX_FLAGS); +} + +extern uint64_t hmdfs_gen_boot_cookie(void); + +static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len) +{ + return !strncasecmp(s1, s2, len); +} + +static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2) +{ + return q1->len == q2->len && str_n_case_eq(q1->name, q2->name, q2->len); +} + +/***************************************************************************** + * log print helpers + *****************************************************************************/ +__printf(4, 5) void __hmdfs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...); +#define hmdfs_err(fmt, ...) \ + __hmdfs_log(KERN_ERR, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_warning(fmt, ...) \ + __hmdfs_log(KERN_WARNING, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_info(fmt, ...) \ + __hmdfs_log(KERN_INFO, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_err_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_ERR, true, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_warning_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_WARNING, true, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_info_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_INFO, true, __func__, fmt, ##__VA_ARGS__) +#ifdef CONFIG_HMDFS_FS_DEBUG +#define hmdfs_debug(fmt, ...) \ + __hmdfs_log(KERN_DEBUG, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_debug_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_DEBUG, true, __func__, fmt, ##__VA_ARGS__) +#else +#define hmdfs_debug(fmt, ...) ((void)0) +#define hmdfs_debug_ratelimited(fmt, ...) ((void)0) +#endif + +/***************************************************************************** + * inode/file operations declartion + *****************************************************************************/ +extern const struct inode_operations hmdfs_device_ops; +extern const struct inode_operations hmdfs_root_ops; +extern const struct file_operations hmdfs_root_fops; +extern const struct file_operations hmdfs_device_fops; + +#endif // HMDFS_H diff --git a/fs/hmdfs/hmdfs_client.c b/fs/hmdfs/hmdfs_client.c new file mode 100644 index 000000000000..2c381f57f7e0 --- /dev/null +++ b/fs/hmdfs/hmdfs_client.c @@ -0,0 +1,1096 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_client.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_client.h" +#include "hmdfs_server.h" + +#include +#include +#include + +#include "comm/socket_adapter.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" +#include "comm/node_cb.h" +#include "stash.h" +#include "authority/authentication.h" + +#define HMDFS_SYNC_WPAGE_RETRY_MS 2000 + +static inline void free_sm_outbuf(struct hmdfs_send_command *sm) +{ + if (sm->out_buf && sm->out_len != 0) + kfree(sm->out_buf); + sm->out_len = 0; + sm->out_buf = NULL; +} + +int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf, + __u8 file_type, struct hmdfs_open_ret *open_ret) +{ + int ret; + int path_len = strlen(send_buf); + size_t send_len = sizeof(struct open_request) + path_len + 1; + struct open_request *open_req = kzalloc(send_len, GFP_KERNEL); + struct open_response *resp; + struct hmdfs_send_command sm = { + .data = open_req, + .len = send_len, + }; + hmdfs_init_cmd(&sm.operations, F_OPEN); + + if (!open_req) { + ret = -ENOMEM; + goto out; + } + open_req->file_type = file_type; + open_req->path_len = cpu_to_le32(path_len); + strcpy(open_req->buf, send_buf); + ret = hmdfs_sendmessage_request(con, &sm); + kfree(open_req); + + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + resp = sm.out_buf; + + open_ret->ino = le64_to_cpu(resp->ino); + open_ret->fid.ver = le64_to_cpu(resp->file_ver); + open_ret->fid.id = le32_to_cpu(resp->file_id); + open_ret->file_size = le64_to_cpu(resp->file_size); + open_ret->remote_ctime.tv_sec = le64_to_cpu(resp->ctime); + open_ret->remote_ctime.tv_nsec = le32_to_cpu(resp->ctime_nsec); + open_ret->stable_ctime.tv_sec = le64_to_cpu(resp->stable_ctime); + open_ret->stable_ctime.tv_nsec = le32_to_cpu(resp->stable_ctime_nsec); + +out: + free_sm_outbuf(&sm); + return ret; +} + +void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid) +{ + size_t send_len = sizeof(struct release_request); + struct release_request *release_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = release_req, + .len = send_len, + }; + hmdfs_init_cmd(&sm.operations, F_RELEASE); + + if (!release_req) + return; + + release_req->file_ver = cpu_to_le64(fid->ver); + release_req->file_id = cpu_to_le32(fid->id); + + hmdfs_sendmessage_request(con, &sm); + kfree(release_req); +} + +int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + __s64 start, __s64 end, __s32 datasync) +{ + int ret; + struct fsync_request *fsync_req = + kzalloc(sizeof(struct fsync_request), GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = fsync_req, + .len = sizeof(struct fsync_request), + }; + + hmdfs_init_cmd(&sm.operations, F_FSYNC); + if (!fsync_req) + return -ENOMEM; + + fsync_req->file_ver = cpu_to_le64(fid->ver); + fsync_req->file_id = cpu_to_le32(fid->id); + fsync_req->datasync = cpu_to_le32(datasync); + fsync_req->start = cpu_to_le64(start); + fsync_req->end = cpu_to_le64(end); + + ret = hmdfs_sendmessage_request(con, &sm); + + free_sm_outbuf(&sm); + kfree(fsync_req); + return ret; +} + +int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + struct page *page) +{ + int ret; + size_t send_len = sizeof(struct readpage_request); + struct readpage_request *read_data = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = read_data, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_READPAGE); + if (!read_data) { + unlock_page(page); + return -ENOMEM; + } + + sm.out_buf = page; + read_data->file_ver = cpu_to_le64(fid->ver); + read_data->file_id = cpu_to_le32(fid->id); + read_data->size = cpu_to_le32(HMDFS_PAGE_SIZE); + read_data->index = cpu_to_le64(page->index); + ret = hmdfs_sendpage_request(con, &sm); + kfree(read_data); + return ret; +} + +bool hmdfs_usr_sig_pending(struct task_struct *p) +{ + sigset_t *sig = &p->pending.signal; + + if (likely(!signal_pending(p))) + return false; + return sigismember(sig, SIGINT) || sigismember(sig, SIGTERM) || + sigismember(sig, SIGKILL); +} + +void hmdfs_client_writepage_done(struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx) +{ + struct page *page = ctx->page; + bool unlock = ctx->rsem_held; + + SetPageUptodate(page); + end_page_writeback(page); + if (unlock) + up_read(&info->wpage_sem); + unlock_page(page); +} + +static void hmdfs_client_writepage_err(struct hmdfs_peer *peer, + struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx, + int err) +{ + struct page *page = ctx->page; + bool unlock = ctx->rsem_held; + + if (err == -ENOMEM || err == -EAGAIN || err == -ESHUTDOWN || + err == -ETIME) + SetPageUptodate(page); + else + hmdfs_info("Page %ld of file %u writeback err %d devid %llu", + page->index, ctx->fid.id, err, peer->device_id); + + /* + * Current and subsequent writebacks have been canceled by the + * user, leaving these pages' states in chaos. Read pages in + * the future to update these pages. + */ + if (ctx->sync_all && hmdfs_usr_sig_pending(ctx->caller)) + ClearPageUptodate(page); + + if (ctx->sync_all || !time_is_after_eq_jiffies(ctx->timeout) || + !(err == -ETIME || hmdfs_need_redirty_page(info, err))) { + SetPageError(page); + mapping_set_error(page->mapping, -EIO); + } else { + __set_page_dirty_nobuffers(page); + account_page_redirty(page); + } + + end_page_writeback(page); + if (unlock) + up_read(&info->wpage_sem); + unlock_page(page); +} + +static inline bool +hmdfs_no_timedout_sync_write(struct hmdfs_writepage_context *ctx) +{ + return ctx->sync_all && time_is_after_eq_jiffies(ctx->timeout); +} + +static inline bool +hmdfs_client_rewrite_for_timeout(struct hmdfs_writepage_context *ctx, int err) +{ + return (err == -ETIME && hmdfs_no_timedout_sync_write(ctx) && + !hmdfs_usr_sig_pending(ctx->caller)); +} + +static inline bool +hmdfs_client_rewrite_for_offline(struct hmdfs_sb_info *sbi, + struct hmdfs_writepage_context *ctx, int err) +{ + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + unsigned int status = READ_ONCE(info->stash_status); + + /* + * No retry if offline occurs during inode restoration. + * + * Do retry if local file cache is ready even it is not + * a WB_SYNC_ALL write, else no-sync_all writeback will + * return -EIO, mapping_set_error(mapping, -EIO) will be + * called and it will make the concurrent calling of + * filemap_write_and_wait() in hmdfs_flush_stash_file_data() + * return -EIO. + */ + return (hmdfs_is_stash_enabled(sbi) && + status != HMDFS_REMOTE_INODE_RESTORING && + (hmdfs_no_timedout_sync_write(ctx) || + status == HMDFS_REMOTE_INODE_STASHING) && + hmdfs_is_offline_or_timeout_err(err)); +} + +static inline bool +hmdfs_client_redo_writepage(struct hmdfs_sb_info *sbi, + struct hmdfs_writepage_context *ctx, int err) +{ + return hmdfs_client_rewrite_for_timeout(ctx, err) || + hmdfs_client_rewrite_for_offline(sbi, ctx, err); +} + +static bool hmdfs_remote_write_to_remote(struct hmdfs_inode_info *info) +{ + unsigned int status = READ_ONCE(info->stash_status); + bool stashing; + + if (status != HMDFS_REMOTE_INODE_STASHING) + return true; + + /* Ensure it's OK to use info->cache afterwards */ + spin_lock(&info->stash_lock); + stashing = (info->stash_status == HMDFS_REMOTE_INODE_STASHING); + spin_unlock(&info->stash_lock); + + return !stashing; +} + +int hmdfs_remote_do_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *ctx) +{ + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + bool to_remote = false; + int err = 0; + + to_remote = hmdfs_remote_write_to_remote(info); + if (to_remote) + err = hmdfs_client_writepage(info->conn, ctx); + else + err = hmdfs_stash_writepage(info->conn, ctx); + if (!err) + return 0; + + if (!(to_remote && + hmdfs_client_rewrite_for_offline(con->sbi, ctx, err))) + return err; + + queue_delayed_work(con->retry_wb_wq, &ctx->retry_dwork, + msecs_to_jiffies(HMDFS_SYNC_WPAGE_RETRY_MS)); + + return 0; +} + +void hmdfs_remote_writepage_retry(struct work_struct *work) +{ + struct hmdfs_writepage_context *ctx = + container_of(work, struct hmdfs_writepage_context, + retry_dwork.work); + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + struct hmdfs_peer *peer = info->conn; + const struct cred *old_cred = NULL; + int err; + + old_cred = hmdfs_override_creds(peer->sbi->cred); + err = hmdfs_remote_do_writepage(peer, ctx); + hmdfs_revert_creds(old_cred); + if (err) { + hmdfs_client_writepage_err(peer, info, ctx, err); + put_task_struct(ctx->caller); + kfree(ctx); + } +} + +void hmdfs_writepage_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp) +{ + struct hmdfs_writepage_context *ctx = req->private; + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + int ret = resp->ret_code; + unsigned long page_index = ctx->page->index; + + trace_hmdfs_writepage_cb_enter(peer, info->remote_ino, page_index, ret); + + if (!ret) { + hmdfs_client_writepage_done(info, ctx); + atomic64_inc(&info->write_counter); + goto cleanup_all; + } + + if (hmdfs_client_redo_writepage(peer->sbi, ctx, ret)) { + ret = hmdfs_remote_do_writepage(peer, ctx); + if (!ret) + goto cleanup_req; + WARN_ON(ret == -ETIME); + } + + hmdfs_client_writepage_err(peer, info, ctx, ret); + +cleanup_all: + put_task_struct(ctx->caller); + kfree(ctx); +cleanup_req: + kfree(req->data); + + trace_hmdfs_writepage_cb_exit(peer, info->remote_ino, page_index, ret); +} + +int hmdfs_client_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *param) +{ + int ret = 0; + size_t send_len = sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + struct writepage_request *write_data = kzalloc(send_len, GFP_NOFS); + struct hmdfs_req req; + char *data = NULL; + + if (unlikely(!write_data)) + return -ENOMEM; + + WARN_ON(!PageLocked(param->page)); // VFS + WARN_ON(PageDirty(param->page)); // VFS + WARN_ON(!PageWriteback(param->page)); // hmdfs + + write_data->file_ver = cpu_to_le64(param->fid.ver); + write_data->file_id = cpu_to_le32(param->fid.id); + write_data->index = cpu_to_le64(param->page->index); + write_data->count = cpu_to_le32(param->count); + data = kmap(param->page); + memcpy((char *)write_data->buf, data, HMDFS_PAGE_SIZE); + kunmap(param->page); + req.data = write_data; + req.data_len = send_len; + + req.private = param; + req.private_len = sizeof(*param); + + req.timeout = TIMEOUT_CONFIG; + hmdfs_init_cmd(&req.operations, F_WRITEPAGE); + ret = hmdfs_send_async_request(con, &req); + if (unlikely(ret)) + kfree(write_data); + return ret; +} + +void hmdfs_client_recv_readpage(struct hmdfs_head_cmd *head, int err, + struct hmdfs_async_work *async_work) +{ + struct page *page = async_work->page; + int ret = le32_to_cpu(head->ret_code); + struct hmdfs_inode_info *info = hmdfs_i(page->mapping->host); + unsigned long page_index = page->index; + + if (!err) + SetPageUptodate(page); + else if (err == -EBADF) + /* There may be a stale fd caused by fid version, need reopen */ + set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + + hmdfs_client_resp_statis(async_work->head.peer->sbi, F_READPAGE, + HMDFS_RESP_NORMAL, async_work->start, jiffies); + + trace_hmdfs_client_recv_readpage(async_work->head.peer, + info->remote_ino, page_index, ret); + + asw_done(async_work); +} + +/* read cache dentry file at path and write them into filp */ +int hmdfs_client_start_readdir(struct hmdfs_peer *con, struct file *filp, + const char *path, int path_len, + struct hmdfs_dcache_header *header) +{ + int ret; + size_t send_len = sizeof(struct readdir_request) + path_len + 1; + struct readdir_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + .local_filp = filp, + }; + + hmdfs_init_cmd(&sm.operations, F_ITERATE); + if (!req) + return -ENOMEM; + + /* add ref or it will be release at msg put */ + get_file(sm.local_filp); + req->path_len = cpu_to_le32(path_len); + strncpy(req->path, path, path_len); + + /* + * Is we already have a cache file, verify it. If it is + * uptodate, then we don't have to transfer a new one + */ + if (header) { + req->dcache_crtime = header->dcache_crtime; + req->dcache_crtime_nsec = header->dcache_crtime_nsec; + req->dentry_ctime = header->dentry_ctime; + req->dentry_ctime_nsec = header->dentry_ctime_nsec; + req->num = header->num; + req->verify_cache = cpu_to_le32(1); + } + + ret = hmdfs_sendmessage_request(con, &sm); + kfree(req); + return ret; +} + +int hmdfs_client_start_mkdir(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, struct hmdfs_lookup_ret *mkdir_ret) +{ + int ret = 0; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct mkdir_request) + path_len + 1 + + name_len + 1; + struct mkdir_request *mkdir_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_inodeinfo_response *resp = NULL; + struct hmdfs_send_command sm = { + .data = mkdir_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_MKDIR); + if (!mkdir_req) + return -ENOMEM; + + mkdir_req->path_len = cpu_to_le32(path_len); + mkdir_req->name_len = cpu_to_le32(name_len); + mkdir_req->mode = cpu_to_le16(mode); + strncpy(mkdir_req->path, path, path_len); + strncpy(mkdir_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + if (ret == -ENOENT || ret == -ETIME || ret == -EOPNOTSUPP) + goto out; + if (!sm.out_buf) { + ret = -ENOENT; + goto out; + } + resp = sm.out_buf; + mkdir_ret->i_mode = le16_to_cpu(resp->i_mode); + mkdir_ret->i_size = le64_to_cpu(resp->i_size); + mkdir_ret->i_mtime = le64_to_cpu(resp->i_mtime); + mkdir_ret->i_mtime_nsec = le32_to_cpu(resp->i_mtime_nsec); + mkdir_ret->i_ino = le64_to_cpu(resp->i_ino); + +out: + free_sm_outbuf(&sm); + kfree(mkdir_req); + return ret; +} + +int hmdfs_client_start_create(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, bool want_excl, + struct hmdfs_lookup_ret *create_ret) +{ + int ret = 0; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct create_request) + path_len + 1 + + name_len + 1; + struct create_request *create_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_inodeinfo_response *resp = NULL; + struct hmdfs_send_command sm = { + .data = create_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_CREATE); + if (!create_req) + return -ENOMEM; + + create_req->path_len = cpu_to_le32(path_len); + create_req->name_len = cpu_to_le32(name_len); + create_req->mode = cpu_to_le16(mode); + create_req->want_excl = want_excl; + strncpy(create_req->path, path, path_len); + strncpy(create_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + if (ret == -ENOENT || ret == -ETIME || ret == -EOPNOTSUPP) + goto out; + if (!sm.out_buf) { + ret = -ENOENT; + goto out; + } + resp = sm.out_buf; + create_ret->i_mode = le16_to_cpu(resp->i_mode); + create_ret->i_size = le64_to_cpu(resp->i_size); + create_ret->i_mtime = le64_to_cpu(resp->i_mtime); + create_ret->i_mtime_nsec = le32_to_cpu(resp->i_mtime_nsec); + create_ret->i_ino = le64_to_cpu(resp->i_ino); + +out: + free_sm_outbuf(&sm); + kfree(create_req); + return ret; +} + +int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path, + const char *name) +{ + int ret; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct rmdir_request) + path_len + 1 + + name_len + 1; + struct rmdir_request *rmdir_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = rmdir_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_RMDIR); + if (!rmdir_req) + return -ENOMEM; + + rmdir_req->path_len = cpu_to_le32(path_len); + rmdir_req->name_len = cpu_to_le32(name_len); + strncpy(rmdir_req->path, path, path_len); + strncpy(rmdir_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + free_sm_outbuf(&sm); + kfree(rmdir_req); + return ret; +} + +int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path, + const char *name) +{ + int ret; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct unlink_request) + path_len + 1 + + name_len + 1; + struct unlink_request *unlink_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = unlink_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_UNLINK); + if (!unlink_req) + return -ENOMEM; + + unlink_req->path_len = cpu_to_le32(path_len); + unlink_req->name_len = cpu_to_le32(name_len); + strncpy(unlink_req->path, path, path_len); + strncpy(unlink_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + kfree(unlink_req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path, + const char *old_name, const char *new_path, + const char *new_name, unsigned int flags) +{ + int ret; + int old_path_len = strlen(old_path); + int new_path_len = strlen(new_path); + int old_name_len = strlen(old_name); + int new_name_len = strlen(new_name); + + size_t send_len = sizeof(struct rename_request) + old_path_len + 1 + + new_path_len + 1 + old_name_len + 1 + new_name_len + + 1; + struct rename_request *rename_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = rename_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_RENAME); + if (!rename_req) + return -ENOMEM; + + rename_req->old_path_len = cpu_to_le32(old_path_len); + rename_req->new_path_len = cpu_to_le32(new_path_len); + rename_req->old_name_len = cpu_to_le32(old_name_len); + rename_req->new_name_len = cpu_to_le32(new_name_len); + rename_req->flags = cpu_to_le32(flags); + + strncpy(rename_req->path, old_path, old_path_len); + strncpy(rename_req->path + old_path_len + 1, new_path, new_path_len); + + strncpy(rename_req->path + old_path_len + 1 + new_path_len + 1, + old_name, old_name_len); + strncpy(rename_req->path + old_path_len + 1 + new_path_len + 1 + + old_name_len + 1, + new_name, new_name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + free_sm_outbuf(&sm); + kfree(rename_req); + return ret; +} + +int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf, + struct setattr_info *attr_info) +{ + int ret; + int path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct setattr_request); + struct setattr_request *setattr_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = setattr_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_SETATTR); + if (!setattr_req) + return -ENOMEM; + + strcpy(setattr_req->buf, send_buf); + setattr_req->path_len = cpu_to_le32(path_len); + setattr_req->valid = cpu_to_le32(attr_info->valid); + setattr_req->size = cpu_to_le64(attr_info->size); + setattr_req->mtime = cpu_to_le64(attr_info->mtime); + setattr_req->mtime_nsec = cpu_to_le32(attr_info->mtime_nsec); + ret = hmdfs_sendmessage_request(con, &sm); + kfree(setattr_req); + return ret; +} + +static void hmdfs_update_getattr_ret(struct getattr_response *resp, + struct hmdfs_getattr_ret *result) +{ + struct kstat *stat = &result->stat; + + stat->result_mask = le32_to_cpu(resp->result_mask); + if (stat->result_mask == 0) + return; + + stat->ino = le64_to_cpu(resp->ino); + stat->mode = le16_to_cpu(resp->mode); + stat->nlink = le32_to_cpu(resp->nlink); + stat->uid.val = le32_to_cpu(resp->uid); + stat->gid.val = le32_to_cpu(resp->gid); + stat->size = le64_to_cpu(resp->size); + stat->blocks = le64_to_cpu(resp->blocks); + stat->blksize = le32_to_cpu(resp->blksize); + stat->atime.tv_sec = le64_to_cpu(resp->atime); + stat->atime.tv_nsec = le32_to_cpu(resp->atime_nsec); + stat->mtime.tv_sec = le64_to_cpu(resp->mtime); + stat->mtime.tv_nsec = le32_to_cpu(resp->mtime_nsec); + stat->ctime.tv_sec = le64_to_cpu(resp->ctime); + stat->ctime.tv_nsec = le32_to_cpu(resp->ctime_nsec); + stat->btime.tv_sec = le64_to_cpu(resp->crtime); + stat->btime.tv_nsec = le32_to_cpu(resp->crtime_nsec); + result->fsid = le64_to_cpu(resp->fsid); + /* currently not used */ + result->i_flags = 0; +} + +int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf, + unsigned int lookup_flags, + struct hmdfs_getattr_ret *result) +{ + int path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct getattr_request); + int ret = 0; + struct getattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_GETATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->lookup_flags = cpu_to_le32(lookup_flags); + strncpy(req->buf, send_buf, path_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_getattr_ret(sm.out_buf, result); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +static void hmdfs_update_statfs_ret(struct statfs_response *resp, + struct kstatfs *buf) +{ + buf->f_type = le64_to_cpu(resp->f_type); + buf->f_bsize = le64_to_cpu(resp->f_bsize); + buf->f_blocks = le64_to_cpu(resp->f_blocks); + buf->f_bfree = le64_to_cpu(resp->f_bfree); + buf->f_bavail = le64_to_cpu(resp->f_bavail); + buf->f_files = le64_to_cpu(resp->f_files); + buf->f_ffree = le64_to_cpu(resp->f_ffree); + buf->f_fsid.val[0] = le32_to_cpu(resp->f_fsid_0); + buf->f_fsid.val[1] = le32_to_cpu(resp->f_fsid_1); + buf->f_namelen = le64_to_cpu(resp->f_namelen); + buf->f_frsize = le64_to_cpu(resp->f_frsize); + buf->f_flags = le64_to_cpu(resp->f_flags); + buf->f_spare[0] = le64_to_cpu(resp->f_spare_0); + buf->f_spare[1] = le64_to_cpu(resp->f_spare_1); + buf->f_spare[2] = le64_to_cpu(resp->f_spare_2); + buf->f_spare[3] = le64_to_cpu(resp->f_spare_3); +} + +int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path, + struct kstatfs *buf) +{ + int ret; + int path_len = strlen(path); + size_t send_len = sizeof(struct statfs_request) + path_len + 1; + struct statfs_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_STATFS); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + strncpy(req->path, path, path_len); + + ret = hmdfs_sendmessage_request(con, &sm); + + if (ret == -ETIME) + ret = -EIO; + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_statfs_ret(sm.out_buf, buf); +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_send_syncfs(struct hmdfs_peer *con, int syncfs_timeout) +{ + int ret; + struct hmdfs_req req; + struct hmdfs_sb_info *sbi = con->sbi; + struct syncfs_request *syncfs_req = + kzalloc(sizeof(struct syncfs_request), GFP_KERNEL); + + if (!syncfs_req) { + hmdfs_err("cannot allocate syncfs_request"); + return -ENOMEM; + } + + hmdfs_init_cmd(&req.operations, F_SYNCFS); + req.timeout = syncfs_timeout; + + syncfs_req->version = cpu_to_le64(sbi->hsi.version); + req.data = syncfs_req; + req.data_len = sizeof(*syncfs_req); + + ret = hmdfs_send_async_request(con, &req); + if (ret) { + kfree(syncfs_req); + hmdfs_err("ret fail with %d", ret); + } + + return ret; +} + +static void hmdfs_update_getxattr_ret(struct getxattr_response *resp, + void *value, size_t o_size, int *ret) +{ + ssize_t size = le32_to_cpu(resp->size); + + if (o_size && o_size < size) { + *ret = -ERANGE; + return; + } + + if (o_size) + memcpy(value, resp->value, size); + + *ret = size; +} + +int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, void *value, size_t size) +{ + size_t path_len = strlen(send_buf); + size_t name_len = strlen(name); + size_t send_len = path_len + name_len + + sizeof(struct getxattr_request) + 2; + int ret = 0; + struct getxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_GETXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->name_len = cpu_to_le32(name_len); + req->size = cpu_to_le32(size); + strncpy(req->buf, send_buf, path_len); + strncpy(req->buf + path_len + 1, name, name_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_getxattr_ret(sm.out_buf, value, size, &ret); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, const void *value, + size_t size, int flags) +{ + size_t path_len = strlen(send_buf); + size_t name_len = strlen(name); + size_t send_len = path_len + name_len + size + 2 + + sizeof(struct setxattr_request); + int ret = 0; + struct setxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_SETXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->name_len = cpu_to_le32(name_len); + req->size = cpu_to_le32(size); + req->flags = cpu_to_le32(flags); + strncpy(req->buf, send_buf, path_len); + strncpy(req->buf + path_len + 1, name, name_len); + memcpy(req->buf + path_len + name_len + 2, value, size); + if (!value) + req->del = true; + ret = hmdfs_sendmessage_request(con, &sm); + kfree(req); + return ret; +} + +static void hmdfs_update_listxattr_ret(struct listxattr_response *resp, + char *list, size_t o_size, ssize_t *ret) +{ + ssize_t size = le32_to_cpu(resp->size); + + if (o_size && o_size < size) { + *ret = -ERANGE; + return; + } + + /* multi name split with '\0', use memcpy */ + if (o_size) + memcpy(list, resp->list, size); + + *ret = size; +} + +ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf, + char *list, size_t size) +{ + size_t path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct listxattr_request); + ssize_t ret = 0; + struct listxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_LISTXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->size = cpu_to_le32(size); + strncpy(req->buf, send_buf, path_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_listxattr_ret(sm.out_buf, list, size, &ret); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +void hmdfs_recv_syncfs_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp) +{ + struct hmdfs_sb_info *sbi = peer->sbi; + struct syncfs_request *syncfs_req = (struct syncfs_request *)req->data; + + WARN_ON(!syncfs_req); + spin_lock(&sbi->hsi.v_lock); + if (le64_to_cpu(syncfs_req->version) != sbi->hsi.version) { + hmdfs_info( + "Recv stale syncfs resp[ver: %llu] from device %llu, current ver %llu", + le64_to_cpu(syncfs_req->version), peer->device_id, + sbi->hsi.version); + spin_unlock(&sbi->hsi.v_lock); + goto out; + } + + if (!sbi->hsi.remote_ret) + sbi->hsi.remote_ret = resp->ret_code; + + if (resp->ret_code) { + hmdfs_err("Recv syncfs error code %d from device %llu", + resp->ret_code, peer->device_id); + } else { + /* + * Set @sb_dirty_count to zero if no one else produce + * dirty data on remote server during remote sync. + */ + atomic64_cmpxchg(&peer->sb_dirty_count, + peer->old_sb_dirty_count, 0); + } + + atomic_dec(&sbi->hsi.wait_count); + spin_unlock(&sbi->hsi.v_lock); + wake_up_interruptible(&sbi->hsi.wq); + +out: + kfree(syncfs_req); +} + +void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path) +{ + int path_len = strlen(path); + size_t send_len = sizeof(struct drop_push_request) + path_len + 1; + struct drop_push_request *dp_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = dp_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_DROP_PUSH); + if (!dp_req) + return; + + dp_req->path_len = cpu_to_le32(path_len); + strncpy(dp_req->path, path, path_len); + + hmdfs_sendmessage_request(con, &sm); + kfree(dp_req); +} + +static void *hmdfs_get_msg_next(struct hmdfs_peer *peer, int *id) +{ + struct hmdfs_msg_idr_head *head = NULL; + + spin_lock(&peer->idr_lock); + head = idr_get_next(&peer->msg_idr, id); + if (head && head->type < MSG_IDR_MAX && head->type >= 0) + kref_get(&head->ref); + + spin_unlock(&peer->idr_lock); + + return head; +} + +void hmdfs_client_offline_notify(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + int id; + int count = 0; + struct hmdfs_msg_idr_head *head = NULL; + + for (id = 0; (head = hmdfs_get_msg_next(conn, &id)) != NULL; ++id) { + switch (head->type) { + case MSG_IDR_1_0_NONE: + head_put(head); + head_put(head); + break; + case MSG_IDR_MESSAGE_SYNC: + case MSG_IDR_1_0_MESSAGE_SYNC: + hmdfs_response_wakeup((struct sendmsg_wait_queue *)head, + -ETIME, 0, NULL); + hmdfs_debug("wakeup id=%d", head->msg_id); + msg_put((struct sendmsg_wait_queue *)head); + break; + case MSG_IDR_MESSAGE_ASYNC: + hmdfs_wakeup_parasite( + (struct hmdfs_msg_parasite *)head); + hmdfs_debug("wakeup parasite id=%d", head->msg_id); + mp_put((struct hmdfs_msg_parasite *)head); + break; + case MSG_IDR_PAGE: + case MSG_IDR_1_0_PAGE: + hmdfs_wakeup_async_work( + (struct hmdfs_async_work *)head); + hmdfs_debug("wakeup async work id=%d", head->msg_id); + asw_put((struct hmdfs_async_work *)head); + break; + default: + hmdfs_err("Bad type=%d id=%d", head->type, + head->msg_id); + break; + } + + count++; + /* If there are too many idr to process, avoid to soft lockup, + * process every 512 message we resched + */ + if (count % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } +} + +static struct hmdfs_node_cb_desc client_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_1_0, + .fn = hmdfs_client_offline_notify, + }, +}; + +void __init hmdfs_client_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(client_cb, ARRAY_SIZE(client_cb)); +} diff --git a/fs/hmdfs/hmdfs_client.h b/fs/hmdfs/hmdfs_client.h new file mode 100644 index 000000000000..ab2867dca457 --- /dev/null +++ b/fs/hmdfs/hmdfs_client.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_client.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_CLIENT_H +#define HMDFS_CLIENT_H + +#include "comm/transport.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" + +struct hmdfs_open_ret { + struct hmdfs_fid fid; + __u64 file_size; + __u64 ino; + struct hmdfs_time_t remote_ctime; + struct hmdfs_time_t stable_ctime; +}; + +struct hmdfs_writepage_context { + struct hmdfs_fid fid; + uint32_t count; + bool sync_all; + bool rsem_held; + unsigned long timeout; + struct task_struct *caller; + struct page *page; + struct delayed_work retry_dwork; +}; + +int hmdfs_client_start_readdir(struct hmdfs_peer *con, struct file *filp, + const char *path, int path_len, + struct hmdfs_dcache_header *header); +int hmdfs_client_start_mkdir(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, struct hmdfs_lookup_ret *mkdir_ret); +int hmdfs_client_start_create(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, bool want_excl, + struct hmdfs_lookup_ret *create_ret); +int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path, + const char *name); +int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path, + const char *name); +int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path, + const char *old_name, const char *new_path, + const char *new_name, unsigned int flags); + +static inline bool hmdfs_is_offline_err(int err) +{ + /* + * writepage() will get -EBADF if peer is online + * again during offline stash, and -EBADF also + * needs redo. + */ + return (err == -EAGAIN || err == -ESHUTDOWN || err == -EBADF); +} + +static inline bool hmdfs_is_offline_or_timeout_err(int err) +{ + return hmdfs_is_offline_err(err) || err == -ETIME; +} + +static inline bool hmdfs_need_redirty_page(const struct hmdfs_inode_info *info, + int err) +{ + /* + * 1. stash is enabled + * 2. offline related error + * 3. no restore + */ + return hmdfs_is_stash_enabled(info->conn->sbi) && + hmdfs_is_offline_err(err) && + READ_ONCE(info->stash_status) != HMDFS_REMOTE_INODE_RESTORING; +} + +bool hmdfs_usr_sig_pending(struct task_struct *p); +void hmdfs_writepage_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); +int hmdfs_client_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *param); +int hmdfs_remote_do_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *ctx); +void hmdfs_remote_writepage_retry(struct work_struct *work); + +void hmdfs_client_writepage_done(struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx); + +int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf, + __u8 file_type, struct hmdfs_open_ret *open_ret); +void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid); +int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + __s64 start, __s64 end, __s32 datasync); +int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + struct page *page); + +int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf, + struct setattr_info *attr_info); +int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf, + unsigned int lookup_flags, + struct hmdfs_getattr_ret *getattr_result); +int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path, + struct kstatfs *buf); +void hmdfs_client_recv_readpage(struct hmdfs_head_cmd *head, int err, + struct hmdfs_async_work *async_work); +int hmdfs_send_syncfs(struct hmdfs_peer *con, int syncfs_timeout); +int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, void *value, size_t size); +int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, const void *val, + size_t size, int flags); +ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf, + char *list, size_t size); +void hmdfs_recv_syncfs_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); + +void __init hmdfs_client_add_node_evt_cb(void); +#endif diff --git a/fs/hmdfs/hmdfs_dentryfile.c b/fs/hmdfs/hmdfs_dentryfile.c new file mode 100644 index 000000000000..98b215ba2d8e --- /dev/null +++ b/fs/hmdfs/hmdfs_dentryfile.c @@ -0,0 +1,2680 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_dentryfile.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_dentryfile.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/transport.h" +#include "hmdfs_client.h" +#include "hmdfs_device_view.h" + +/* Hashing code copied from f2fs */ +#define HMDFS_HASH_COL_BIT ((0x1ULL) << 63) +#define DELTA 0x9E3779B9 + +static bool is_dot_dotdot(const unsigned char *name, __u32 len) +{ + if (len == 1 && name[0] == '.') + return true; + + if (len == 2 && name[0] == '.' && name[1] == '.') + return true; + + return false; +} + +static void str2hashbuf(const unsigned char *msg, size_t len, unsigned int *buf, + int num, bool case_sense) +{ + unsigned int pad, val; + int i; + unsigned char c; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > (size_t)num * 4) + len = (size_t)num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + c = msg[i]; + if (!case_sense) + c = tolower(c); + val = c + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +static void tea_transform(unsigned int buf[4], unsigned int const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4) + a) ^ (b1 + sum) ^ ((b1 >> 5) + b); + b1 += ((b0 << 4) + c) ^ (b0 + sum) ^ ((b0 >> 5) + d); + } while (--n); + + buf[0] += b0; + buf[1] += b1; +} + +static __u32 hmdfs_dentry_hash(const struct qstr *qstr, bool case_sense) +{ + __u32 hash; + __u32 hmdfs_hash; + const unsigned char *p = qstr->name; + __u32 len = qstr->len; + __u32 in[8], buf[4]; + + if (is_dot_dotdot(p, len)) + return 0; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + while (1) { + str2hashbuf(p, len, in, 4, case_sense); + tea_transform(buf, in); + p += 16; + if (len <= 16) + break; + len -= 16; + } + hash = buf[0]; + hmdfs_hash = hash & ~HMDFS_HASH_COL_BIT; + return hmdfs_hash; +} + +static atomic_t curr_ino = ATOMIC_INIT(INUNUMBER_START); +int get_inonumber(void) +{ + return atomic_inc_return(&curr_ino); +} + +static int hmdfs_get_root_dentry_type(struct dentry *dentry, int *is_root) +{ + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + *is_root = 1; + switch (d_info->dentry_type) { + case HMDFS_LAYER_OTHER_LOCAL: + *is_root = 0; + fallthrough; + case HMDFS_LAYER_SECOND_LOCAL: + return HMDFS_LAYER_SECOND_LOCAL; + case HMDFS_LAYER_OTHER_REMOTE: + *is_root = 0; + fallthrough; + case HMDFS_LAYER_SECOND_REMOTE: + return HMDFS_LAYER_SECOND_REMOTE; + default: + hmdfs_info("Unexpected dentry type %d", d_info->dentry_type); + return -EINVAL; + } +} + +static int prepend(char **buffer, int *buflen, const char *str, int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) + return -ENAMETOOLONG; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + +static int prepend_name(char **buffer, int *buflen, const struct qstr *name) +{ + const char *dname = name->name; + u32 dlen = name->len; + char *p = NULL; + + *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; + p = *buffer -= dlen + 1; + *p++ = '/'; + while (dlen--) { + char c = *dname++; + + if (!c) + break; + *p++ = c; + } + return 0; +} + +static char *hmdfs_dentry_path_raw(struct dentry *d, char *buf, int buflen) +{ + struct dentry *dentry = NULL; + char *end = NULL; + char *retval = NULL; + unsigned int len; + unsigned int seq = 0; + int root_flag = 0; + int error = 0; + struct hmdfs_dentry_info *di = hmdfs_d(d); + int hmdfs_root_dentry_type = 0; + + di->time = jiffies; + hmdfs_root_dentry_type = hmdfs_get_root_dentry_type(d, &root_flag); + if (hmdfs_root_dentry_type < 0) + return NULL; + if (root_flag) { + strcpy(buf, "/"); + return buf; + } + rcu_read_lock(); +restart: + dentry = d; + di = hmdfs_d(dentry); + di->time = jiffies; + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); + retval = end - 1; + *retval = '/'; + read_seqbegin_or_lock(&rename_lock, &seq); + while (di->dentry_type != hmdfs_root_dentry_type) { + struct dentry *parent = dentry->d_parent; + + prefetch(parent); + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; + retval = end; + dentry = parent; + di = hmdfs_d(dentry); + di->time = jiffies; + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +char *hmdfs_get_dentry_relative_path(struct dentry *dentry) +{ + char *final_buf = NULL; + char *buf = NULL; + char *p = NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + final_buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!final_buf) { + kfree(buf); + return NULL; + } + + /* NULL dentry return root dir */ + if (!dentry) { + strcpy(final_buf, "/"); + kfree(buf); + return final_buf; + } + p = hmdfs_dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR_OR_NULL(p)) { + kfree(buf); + kfree(final_buf); + return NULL; + } + + if (strlen(p) >= PATH_MAX) { + kfree(buf); + kfree(final_buf); + return NULL; + } + strcpy(final_buf, p); + kfree(buf); + return final_buf; +} + +char *hmdfs_get_dentry_absolute_path(const char *rootdir, + const char *relative_path) +{ + char *buf = 0; + + if (!rootdir || !relative_path) + return NULL; + if (strlen(rootdir) + strlen(relative_path) >= PATH_MAX) + return NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + strcpy(buf, rootdir); + strcat(buf, relative_path); + return buf; +} + +char *hmdfs_connect_path(const char *path, const char *name) +{ + char *buf = 0; + + if (!path || !name) + return NULL; + + if (strlen(path) + strlen(name) + 1 >= PATH_MAX) + return NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + strcpy(buf, path); + strcat(buf, "/"); + strcat(buf, name); + return buf; +} + +int hmdfs_metainfo_read(struct hmdfs_sb_info *sbi, struct file *filp, + void *buffer, int size, int bidx) +{ + loff_t pos = get_dentry_group_pos(bidx); + + return cache_file_read(sbi, filp, buffer, (size_t)size, &pos); +} + +int hmdfs_metainfo_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buffer, int size, int bidx) +{ + loff_t pos = get_dentry_group_pos(bidx); + + return cache_file_write(sbi, filp, buffer, (size_t)size, &pos); +} + +/* for each level */ +/* bucketseq start offset by 0,for example + * level0 bucket0(0) + * level1 bucket0(1) bucket1(2) + * level2 bucket0(3) bucket1(4) bucket2(5) bucket3(6) + * return bucket number. + */ +static __u32 get_bucketaddr(int level, int buckoffset) +{ + int all_level_bucketaddr = 0; + __u32 curlevelmaxbucks; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return all_level_bucketaddr; + } + curlevelmaxbucks = (1 << level); + if (buckoffset >= curlevelmaxbucks) { + hmdfs_err("buckoffset %d overflow, level %d has %d buckets max", + buckoffset, level, curlevelmaxbucks); + return all_level_bucketaddr; + } + all_level_bucketaddr = curlevelmaxbucks + buckoffset - 1; + + return all_level_bucketaddr; +} + +static __u32 get_bucket_by_level(int level) +{ + int buckets = 0; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return buckets; + } + + buckets = (1 << level); + return buckets; +} + +static __u32 get_overall_bucket(int level) +{ + int buckets = 0; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return buckets; + } + buckets = (1 << (level + 1)) - 1; + return buckets; +} + +static inline loff_t get_dcache_file_size(int level) +{ + loff_t buckets = get_overall_bucket(level); + + return buckets * DENTRYGROUP_SIZE * BUCKET_BLOCKS + DENTRYGROUP_HEADER; +} + +static char *get_relative_path(struct hmdfs_sb_info *sbi, char *from) +{ + char *relative; + + if (strncmp(from, sbi->local_src, strlen(sbi->local_src))) { + hmdfs_warning("orig path do not start with local_src"); + return NULL; + } + relative = from + strlen(sbi->local_src); + if (*relative == '/') + relative++; + return relative; +} + +struct file *hmdfs_get_or_create_dents(struct hmdfs_sb_info *sbi, char *name) +{ + struct path root_path, path; + struct file *filp = NULL; + char *relative; + int err; + + err = kern_path(sbi->local_src, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + return NULL; + } + relative = get_relative_path(sbi, name); + if (!relative) { + hmdfs_err("get relative path failed"); + goto err_root_path; + } + err = vfs_path_lookup(root_path.dentry, root_path.mnt, relative, 0, + &path); + if (err) { + hmdfs_err("lookup failed err = %d", err); + goto err_root_path; + } + + filp = hmdfs_server_cache_revalidate(sbi, relative, &path); + if (IS_ERR_OR_NULL(filp)) { + filp = hmdfs_server_rebuild_dents(sbi, &path, NULL, relative); + if (IS_ERR_OR_NULL(filp)) + goto err_lookup_path; + } + +err_lookup_path: + path_put(&path); +err_root_path: + path_put(&root_path); + return filp; +} + +/* read all dentry in target path directory */ +int read_dentry(struct hmdfs_sb_info *sbi, char *file_name, + struct dir_context *ctx) +{ + unsigned long pos = (unsigned long)(ctx->pos); + unsigned long group_id = (pos << (1 + DEV_ID_BIT_NUM)) >> + (POS_BIT_NUM - GROUP_ID_BIT_NUM); + unsigned long offset = pos & OFFSET_BIT_MASK; + struct hmdfs_dentry_group *dentry_group = NULL; + struct file *handler = NULL; + int group_num = 0; + int iterate_result = 0; + int i, j; + const struct cred *saved_cred; + + saved_cred = hmdfs_override_fsids(false); + if (!saved_cred) { + hmdfs_err("prepare cred failed!"); + return -ENOMEM; + } + + + if (!file_name) + return -EINVAL; + + dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); + if (!dentry_group) + return -ENOMEM; + + handler = hmdfs_get_or_create_dents(sbi, file_name); + if (IS_ERR_OR_NULL(handler)) { + kfree(dentry_group); + return -ENOENT; + } + + group_num = get_dentry_group_cnt(file_inode(handler)); + + for (i = group_id; i < group_num; i++) { + hmdfs_metainfo_read(sbi, handler, dentry_group, + sizeof(struct hmdfs_dentry_group), i); + for (j = offset; j < DENTRY_PER_GROUP; j++) { + int len; + int file_type = 0; + bool is_continue; + + len = le16_to_cpu(dentry_group->nsl[j].namelen); + if (!test_bit_le(j, dentry_group->bitmap) || len == 0) + continue; + + if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) + file_type = DT_DIR; + else if (S_ISREG(le16_to_cpu( + dentry_group->nsl[j].i_mode))) + file_type = DT_REG; + else if (S_ISLNK(le16_to_cpu( + dentry_group->nsl[j].i_mode))) + file_type = DT_LNK; + + pos = hmdfs_set_pos(0, i, j); + is_continue = dir_emit( + ctx, dentry_group->filename[j], len, + le64_to_cpu(dentry_group->nsl[j].i_ino), + file_type); + if (!is_continue) { + ctx->pos = pos; + iterate_result = 1; + goto done; + } + } + offset = 0; + } + +done: + hmdfs_revert_fsids(saved_cred); + kfree(dentry_group); + fput(handler); + return iterate_result; +} + +unsigned int get_max_depth(struct file *filp) +{ + size_t isize; + + isize = get_dentry_group_cnt(file_inode(filp)) / BUCKET_BLOCKS; + + return get_count_order(isize + 1); +} + +struct hmdfs_dentry_group *find_dentry_page(struct hmdfs_sb_info *sbi, + pgoff_t index, struct file *filp) +{ + int size; + struct hmdfs_dentry_group *dentry_blk = NULL; + loff_t pos = get_dentry_group_pos(index); + int err; + + dentry_blk = kmalloc(sizeof(*dentry_blk), GFP_KERNEL); + if (!dentry_blk) + return NULL; + + err = hmdfs_wlock_file(filp, pos, DENTRYGROUP_SIZE); + if (err) { + hmdfs_err("lock file pos %lld failed", pos); + kfree(dentry_blk); + return NULL; + } + + size = cache_file_read(sbi, filp, dentry_blk, (size_t)DENTRYGROUP_SIZE, + &pos); + if (size != DENTRYGROUP_SIZE) { + kfree(dentry_blk); + dentry_blk = NULL; + } + + return dentry_blk; +} + +static ssize_t write_dentry_page(struct file *filp, const void *buffer, + int buffersize, loff_t position) +{ + ssize_t size; + + size = kernel_write(filp, buffer, (size_t)buffersize, &position); + if (size != buffersize) + hmdfs_err("write failed, ret = %zd", size); + + return size; +} + +static struct hmdfs_dentry *find_in_block(struct hmdfs_dentry_group *dentry_blk, + __u32 namehash, + const struct qstr *qstr, + struct hmdfs_dentry **insense_de, + bool case_sense) +{ + struct hmdfs_dentry *de; + unsigned long bit_pos = 0; + int max_len = 0; + + while (bit_pos < DENTRY_PER_GROUP) { + if (!test_bit_le(bit_pos, dentry_blk->bitmap)) { + bit_pos++; + max_len++; + } + de = &dentry_blk->nsl[bit_pos]; + if (unlikely(!de->namelen)) { + bit_pos++; + continue; + } + + if (le32_to_cpu(de->hash) == namehash && + le16_to_cpu(de->namelen) == qstr->len && + !memcmp(qstr->name, dentry_blk->filename[bit_pos], + le16_to_cpu(de->namelen))) + goto found; + if (!(*insense_de) && !case_sense && + le32_to_cpu(de->hash) == namehash && + le16_to_cpu(de->namelen) == qstr->len && + str_n_case_eq(qstr->name, dentry_blk->filename[bit_pos], + le16_to_cpu(de->namelen))) + *insense_de = de; + max_len = 0; + bit_pos += get_dentry_slots(le16_to_cpu(de->namelen)); + } + de = NULL; +found: + return de; +} + +static struct hmdfs_dentry *hmdfs_in_level(struct dentry *child_dentry, + unsigned int level, + struct hmdfs_dcache_lookup_ctx *ctx) +{ + unsigned int nbucket; + unsigned int bidx, end_block; + struct hmdfs_dentry *de = NULL; + struct hmdfs_dentry *tmp_insense_de = NULL; + struct hmdfs_dentry_group *dentry_blk; + + nbucket = get_bucket_by_level(level); + if (!nbucket) + return de; + + bidx = get_bucketaddr(level, ctx->hash % nbucket) * BUCKET_BLOCKS; + end_block = bidx + BUCKET_BLOCKS; + + for (; bidx < end_block; bidx++) { + dentry_blk = find_dentry_page(ctx->sbi, bidx, ctx->filp); + if (!dentry_blk) + break; + + de = find_in_block(dentry_blk, ctx->hash, ctx->name, + &tmp_insense_de, ctx->sbi->s_case_sensitive); + if (!de && !(ctx->insense_de) && tmp_insense_de) { + ctx->insense_de = tmp_insense_de; + ctx->insense_page = dentry_blk; + ctx->insense_bidx = bidx; + } else if (!de) { + hmdfs_unlock_file(ctx->filp, get_dentry_group_pos(bidx), + DENTRYGROUP_SIZE); + kfree(dentry_blk); + } else { + ctx->page = dentry_blk; + break; + } + } + ctx->bidx = bidx; + return de; +} + +struct hmdfs_dentry *hmdfs_find_dentry(struct dentry *child_dentry, + struct hmdfs_dcache_lookup_ctx *ctx) +{ + struct hmdfs_dentry *de = NULL; + unsigned int max_depth; + unsigned int level; + + if (!ctx->filp) + return NULL; + + ctx->hash = hmdfs_dentry_hash(ctx->name, ctx->sbi->s_case_sensitive); + + max_depth = get_max_depth(ctx->filp); + for (level = 0; level < max_depth; level++) { + de = hmdfs_in_level(child_dentry, level, ctx); + if (de) { + if (ctx->insense_page) { + hmdfs_unlock_file(ctx->filp, + get_dentry_group_pos(ctx->insense_bidx), + DENTRYGROUP_SIZE); + kfree(ctx->insense_page); + ctx->insense_page = NULL; + } + return de; + } + } + if (ctx->insense_de) { + ctx->bidx = ctx->insense_bidx; + ctx->page = ctx->insense_page; + ctx->insense_bidx = 0; + ctx->insense_page = NULL; + } + return ctx->insense_de; +} + +void update_dentry(struct hmdfs_dentry_group *d, struct dentry *child_dentry, + struct inode *inode, __u32 name_hash, unsigned int bit_pos) +{ + struct hmdfs_dentry *de; + struct hmdfs_dentry_info *gdi = hmdfs_d(child_dentry); + const struct qstr name = child_dentry->d_name; + int slots = get_dentry_slots(name.len); + int i; + unsigned long ino; + __u32 igen; + + /* + * If the dentry's inode is symlink, it must be lower inode, + * and we should use the upper ino and generation to fill + * the dentryfile. + */ + if (!gdi && S_ISLNK(d_inode(child_dentry)->i_mode)) { + ino = d_inode(child_dentry)->i_ino; + igen = d_inode(child_dentry)->i_generation; + } else { + ino = inode->i_ino; + igen = inode->i_generation; + } + + de = &d->nsl[bit_pos]; + de->hash = cpu_to_le32(name_hash); + de->namelen = cpu_to_le16(name.len); + memcpy(d->filename[bit_pos], name.name, name.len); + de->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + de->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + de->i_size = cpu_to_le64(inode->i_size); + de->i_ino = cpu_to_le64(generate_u64_ino(ino, igen)); + de->i_flag = 0; + + /* + * If the dentry has fsdata, we just assume it must be + * hmdfs filesystem's dentry. + * Only client may update it's info in dentryfile when rename + * the remote file. + * Since the symlink mtime and size is from server's lower + * inode, we should just use it and only set S_IFLNK in mode. + */ + if (gdi && hm_islnk(gdi->file_type)) + de->i_mode = cpu_to_le16(S_IFLNK); + else if (!gdi && S_ISLNK(d_inode(child_dentry)->i_mode)) + de->i_mode = d_inode(child_dentry)->i_mode; + else + de->i_mode = cpu_to_le16(inode->i_mode); + + for (i = 0; i < slots; i++) { + __set_bit_le(bit_pos + i, d->bitmap); + /* avoid wrong garbage data for readdir */ + if (i) + (de + i)->namelen = 0; + } +} + +int room_for_filename(const void *bitmap, int slots, int max_slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; + + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= max_slots) + return max_slots; + goto next; +} + +void create_in_cache_file(uint64_t dev_id, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + + item = hmdfs_find_cache_item(dev_id, dentry->d_parent); + if (item) { + if (d_inode(dentry)) + create_dentry(dentry, d_inode(dentry), item->filp, + hmdfs_sb(dentry->d_sb)); + else + hmdfs_err("inode is null!"); + kref_put(&item->ref, release_cache_item); + } else { + hmdfs_info("find cache item failed, device_id:%llu", dev_id); + } +} + +int create_dentry(struct dentry *child_dentry, struct inode *inode, + struct file *file, struct hmdfs_sb_info *sbi) +{ + unsigned int bit_pos, level; + unsigned long bidx, end_block; + const struct qstr qstr = child_dentry->d_name; + __u32 namehash; + loff_t pos; + ssize_t size; + int ret = 0; + struct hmdfs_dentry_group *dentry_blk = NULL; + + level = 0; + + namehash = hmdfs_dentry_hash(&qstr, sbi->s_case_sensitive); + + dentry_blk = kmalloc(sizeof(*dentry_blk), GFP_KERNEL); + if (!dentry_blk) { + ret = -ENOMEM; + goto out_err; + } +find: + if (level == MAX_BUCKET_LEVEL) { + ret = -ENOSPC; + goto out; + } + bidx = BUCKET_BLOCKS * + get_bucketaddr(level, namehash % get_bucket_by_level(level)); + end_block = bidx + BUCKET_BLOCKS; + if (end_block > get_dentry_group_cnt(file_inode(file))) { + if (cache_file_truncate(sbi, &(file->f_path), + get_dcache_file_size(level))) { + ret = -ENOSPC; + goto out; + } + } + + for (; bidx < end_block; bidx++) { + int size; + + pos = get_dentry_group_pos(bidx); + ret = hmdfs_wlock_file(file, pos, DENTRYGROUP_SIZE); + if (ret) + goto out; + + size = cache_file_read(sbi, file, dentry_blk, + (size_t)DENTRYGROUP_SIZE, &pos); + if (size != DENTRYGROUP_SIZE) { + ret = -ENOSPC; + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); + goto out; + } + + bit_pos = room_for_filename(&dentry_blk->bitmap, + get_dentry_slots(qstr.len), + DENTRY_PER_GROUP); + if (bit_pos < DENTRY_PER_GROUP) + goto add; + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); + } + ++level; + goto find; +add: + pos = get_dentry_group_pos(bidx); + update_dentry(dentry_blk, child_dentry, inode, namehash, bit_pos); + size = cache_file_write(sbi, file, dentry_blk, + sizeof(struct hmdfs_dentry_group), &pos); + if (size != sizeof(struct hmdfs_dentry_group)) + hmdfs_err("cache file write failed!, ret = %zd", size); + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); +out: + kfree(dentry_blk); +out_err: + return ret; +} + +void hmdfs_init_dcache_lookup_ctx(struct hmdfs_dcache_lookup_ctx *ctx, + struct hmdfs_sb_info *sbi, + const struct qstr *qstr, struct file *filp) +{ + ctx->sbi = sbi; + ctx->name = qstr; + ctx->filp = filp; + ctx->bidx = 0; + ctx->page = NULL; + ctx->insense_de = NULL; + ctx->insense_bidx = 0; + ctx->insense_page = NULL; +} + +int update_inode_to_dentry(struct dentry *child_dentry, struct inode *inode) +{ + struct hmdfs_sb_info *sbi = d_inode(child_dentry)->i_sb->s_fs_info; + struct hmdfs_dentry *de = NULL; + loff_t ipos; + struct dentry *parent_dentry; + struct cache_file_node *cfn = NULL; + char *relative_path = NULL; + struct hmdfs_dcache_lookup_ctx ctx; + + parent_dentry = child_dentry->d_parent; + + relative_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!relative_path) + return -ENOMEM; + + cfn = find_cfn(sbi, HMDFS_SERVER_CID, relative_path, true); + if (!cfn) + goto out; + + hmdfs_init_dcache_lookup_ctx(&ctx, sbi, &child_dentry->d_name, + cfn->filp); + de = hmdfs_find_dentry(child_dentry, &ctx); + if (!de) + goto out_cfn; + + de->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + de->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + de->i_size = cpu_to_le64(inode->i_size); + de->i_ino = cpu_to_le64( + generate_u64_ino(inode->i_ino, inode->i_generation)); + de->i_flag = 0; + + ipos = get_dentry_group_pos(ctx.bidx); + write_dentry_page(cfn->filp, ctx.page, + sizeof(struct hmdfs_dentry_group), ipos); + hmdfs_unlock_file(cfn->filp, ipos, DENTRYGROUP_SIZE); + kfree(ctx.page); +out_cfn: + release_cfn(cfn); +out: + kfree(relative_path); + return 0; +} + +void hmdfs_delete_dentry(struct dentry *d, struct file *filp) +{ + struct hmdfs_dentry *de = NULL; + unsigned int bit_pos; + int slots, i; + loff_t ipos; + ssize_t size; + struct hmdfs_dcache_lookup_ctx ctx; + + hmdfs_init_dcache_lookup_ctx(&ctx, hmdfs_sb(d->d_sb), &d->d_name, filp); + + de = hmdfs_find_dentry(d, &ctx); + if (IS_ERR_OR_NULL(de)) { + hmdfs_info("find dentry failed!, err=%ld", PTR_ERR(de)); + return; + } + slots = get_dentry_slots(le16_to_cpu(de->namelen)); + + bit_pos = de - ctx.page->nsl; + for (i = 0; i < slots; i++) + __clear_bit_le(bit_pos + i, &ctx.page->bitmap); + + ipos = get_dentry_group_pos(ctx.bidx); + size = cache_file_write(hmdfs_sb(d->d_sb), filp, ctx.page, + sizeof(struct hmdfs_dentry_group), &ipos); + if (size != sizeof(struct hmdfs_dentry_group)) + hmdfs_err("cache file write failed!, ret = %zd", size); + hmdfs_unlock_file(filp, ipos, DENTRYGROUP_SIZE); + kfree(ctx.page); +} + +static int hmdfs_get_cache_path(struct hmdfs_sb_info *sbi, struct path *dir) +{ + struct hmdfs_dentry_info *di = hmdfs_d(sbi->sb->s_root); + int err; + + if (!sbi->s_dentry_cache) { + *dir = di->lower_path; + return 0; + } + + err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, dir); + if (err) + hmdfs_err("open failed, errno = %d", err); + + return err; +} + +static void hmdfs_put_cache_path(struct hmdfs_sb_info *sbi, struct path *dir) +{ + if (!sbi->s_dentry_cache) + return; + path_put(dir); +} + +struct file *create_local_dentry_file_cache(struct hmdfs_sb_info *sbi) +{ + struct file *filp = NULL; + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + struct path cache_dir; + int err; + + err = hmdfs_get_cache_path(sbi, &cache_dir); + if (err) { + filp = ERR_PTR(err); + goto out; + } + + filp = file_open_root(&cache_dir, ".", + O_RDWR | O_LARGEFILE | O_TMPFILE, + DENTRY_FILE_PERM); + if (IS_ERR(filp)) + hmdfs_err("dentryfile open failed and exit err=%ld", + PTR_ERR(filp)); + + hmdfs_put_cache_path(sbi, &cache_dir); +out: + hmdfs_revert_creds(old_cred); + return filp; +} + +static int hmdfs_linkat(struct path *old_path, const char *newname) +{ + struct dentry *new_dentry = NULL; + struct path new_path; + int error; + + new_dentry = kern_path_create(AT_FDCWD, newname, &new_path, 0); + if (IS_ERR(new_dentry)) { + hmdfs_err("create kernel path failed, error: %ld", + PTR_ERR(new_dentry)); + return PTR_ERR(new_dentry); + } + + error = -EXDEV; + if (old_path->mnt != new_path.mnt) + goto out_dput; + + error = vfs_link(old_path->dentry, new_path.dentry->d_inode, new_dentry, + NULL); + +out_dput: + done_path_create(&new_path, new_dentry); + return error; +} + +static int cache_file_mkdir(const char *name, umode_t mode) +{ + struct dentry *dentry; + struct path path; + int err; + + dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + err = vfs_mkdir(d_inode(path.dentry), dentry, mode); + if (err && err != -EEXIST) + hmdfs_err("vfs_mkdir failed, err = %d", err); + + done_path_create(&path, dentry); + return err; +} + +static int cache_file_create_path(const char *fullpath) +{ + char *path; + char *s; + int err = 0; + + path = kstrdup(fullpath, GFP_KERNEL); + if (!path) + return -ENOMEM; + + s = path + 1; + while (true) { + s = strchr(s, '/'); + if (!s) + break; + s[0] = '\0'; + err = cache_file_mkdir(path, 0755); + if (err && err != -EEXIST) + break; + s[0] = '/'; + s++; + } + kfree(path); + return err; +} + +static void hmdfs_cache_path_create(char *s, const char *dir, bool server) +{ + if (server) + snprintf(s, PATH_MAX, "%s/dentry_cache/server/", dir); + else + snprintf(s, PATH_MAX, "%s/dentry_cache/client/", dir); +} + +static void hmdfs_cache_file_create(char *s, uint64_t hash, const char *id, + bool server) +{ + int offset = strlen(s); + + if (server) + snprintf(s + offset, PATH_MAX - offset, "%016llx", hash); + else + snprintf(s + offset, PATH_MAX - offset, "%s_%016llx", id, hash); +} + +int cache_file_name_generate(char *fullname, struct hmdfs_peer *con, + const char *relative_path, bool server) +{ + struct hmdfs_sb_info *sbi = con->sbi; + uint64_t hash; + char cid[HMDFS_CFN_CID_SIZE]; + int err; + + hmdfs_cache_path_create(fullname, sbi->cache_dir, server); + + err = cache_file_create_path(fullname); + if (err && err != -EEXIST) { + hmdfs_err("making dir failed %d", err); + return err; + } + + strncpy(cid, con->cid, HMDFS_CFN_CID_SIZE - 1); + cid[HMDFS_CFN_CID_SIZE - 1] = '\0'; + + hash = path_hash(relative_path, strlen(relative_path), + sbi->s_case_sensitive); + hmdfs_cache_file_create(fullname, hash, cid, server); + + return 0; +} + +static void free_cfn(struct cache_file_node *cfn) +{ + if (!IS_ERR_OR_NULL(cfn->filp)) + filp_close(cfn->filp, NULL); + + kfree(cfn->relative_path); + kfree(cfn); +} + +static bool dentry_file_match(struct cache_file_node *cfn, const char *id, + const char *path) +{ + int ret; + + if (cfn->sbi->s_case_sensitive) + ret = strcmp(cfn->relative_path, path); + else + ret = strcasecmp(cfn->relative_path, path); + + return (!ret && !strncmp((cfn)->cid, id, HMDFS_CFN_CID_SIZE - 1)); +} + +struct cache_file_node *__find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server) +{ + struct cache_file_node *cfn = NULL; + struct list_head *head = get_list_head(sbi, server); + + list_for_each_entry(cfn, head, list) { + if (dentry_file_match(cfn, cid, path)) { + refcount_inc(&cfn->ref); + return cfn; + } + } + return NULL; +} + +struct cache_file_node *create_cfn(struct hmdfs_sb_info *sbi, const char *path, + const char *cid, bool server) +{ + struct cache_file_node *cfn = kzalloc(sizeof(*cfn), GFP_KERNEL); + + if (!cfn) + return NULL; + + cfn->relative_path = kstrdup(path, GFP_KERNEL); + if (!cfn->relative_path) + goto out; + + refcount_set(&cfn->ref, 1); + strncpy(cfn->cid, cid, HMDFS_CFN_CID_SIZE - 1); + cfn->cid[HMDFS_CFN_CID_SIZE - 1] = '\0'; + cfn->sbi = sbi; + cfn->server = server; + return cfn; +out: + free_cfn(cfn); + return NULL; +} + +static struct file *insert_cfn(struct hmdfs_sb_info *sbi, const char *filename, + const char *path, const char *cid, bool server) +{ + const struct cred *old_cred = NULL; + struct cache_file_node *cfn = NULL; + struct cache_file_node *exist = NULL; + struct list_head *head = NULL; + struct file *filp = NULL; + + cfn = create_cfn(sbi, path, cid, server); + if (!cfn) + return ERR_PTR(-ENOMEM); + + old_cred = hmdfs_override_creds(sbi->system_cred); + filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0); + hmdfs_revert_creds(old_cred); + if (IS_ERR(filp)) { + hmdfs_err("open file failed, err=%ld", PTR_ERR(filp)); + goto out; + } + + head = get_list_head(sbi, server); + + mutex_lock(&sbi->cache_list_lock); + exist = __find_cfn(sbi, cid, path, server); + if (!exist) { + cfn->filp = filp; + list_add_tail(&cfn->list, head); + } else { + mutex_unlock(&sbi->cache_list_lock); + release_cfn(exist); + filp_close(filp, NULL); + filp = ERR_PTR(-EEXIST); + goto out; + } + mutex_unlock(&sbi->cache_list_lock); + return filp; +out: + free_cfn(cfn); + return filp; +} + +int hmdfs_rename_dentry(struct dentry *old_dentry, struct dentry *new_dentry, + struct file *old_filp, struct file *new_filp) +{ + int ret; + struct hmdfs_sb_info *sbi = hmdfs_sb(new_dentry->d_sb); + + /* + * Try to delete first, because stale dentry might exist after + * coverwrite. + */ + hmdfs_delete_dentry(new_dentry, new_filp); + + ret = create_dentry(new_dentry, d_inode(old_dentry), new_filp, sbi); + if (ret) { + hmdfs_err("create dentry failed!, err=%d", ret); + return ret; + } + + hmdfs_delete_dentry(old_dentry, old_filp); + return 0; +} + +/** + * cache_file_persistent - link the tmpfile to the cache dir + * @con: the connection peer + * @filp: the file handler of the tmpfile + * @relative_path: the relative path which the tmpfile belongs + * @server: server or client + * + * Return value: the new file handler of the persistent file if the + * persistent operation succeed. Otherwise will return the original handler + * of the tmpfile passed in, so that the caller does not have to check + * the returned handler. + * + */ +struct file *cache_file_persistent(struct hmdfs_peer *con, struct file *filp, + const char *relative_path, bool server) +{ + struct cache_file_node *cfn = NULL; + char *fullname = NULL; + char *cid = server ? HMDFS_SERVER_CID : (char *)con->cid; + struct file *newf = NULL; + int i = 0; + int len; + int err; + + if (!con->sbi->s_dentry_cache) + return filp; + + cfn = find_cfn(con->sbi, cid, relative_path, server); + if (cfn) { + release_cfn(cfn); + return filp; + } + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return filp; + + err = cache_file_name_generate(fullname, con, relative_path, server); + if (err) + goto out; + + err = __vfs_setxattr(file_dentry(filp), file_inode(filp), + DENTRY_FILE_XATTR_NAME, relative_path, + strlen(relative_path), 0); + if (err) { + hmdfs_err("setxattr for file failed, err=%d", err); + goto out; + } + + len = strlen(fullname); + + do { + err = hmdfs_linkat(&filp->f_path, fullname); + if (!err) + break; + + snprintf(fullname + len, PATH_MAX - len, "_%d", i); + } while (i++ < DENTRY_FILE_NAME_RETRY); + + if (err) { + hmdfs_err("link for file failed, err=%d", err); + goto out; + } + + newf = insert_cfn(con->sbi, fullname, relative_path, cid, server); + if (!IS_ERR(newf)) + filp = newf; +out: + kfree(fullname); + return filp; +} + +void __destroy_cfn(struct list_head *head) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + list_for_each_entry_safe(cfn, n, head, list) { + list_del_init(&cfn->list); + release_cfn(cfn); + } +} + +void hmdfs_cfn_destroy(struct hmdfs_sb_info *sbi) +{ + mutex_lock(&sbi->cache_list_lock); + __destroy_cfn(&sbi->client_cache); + __destroy_cfn(&sbi->server_cache); + mutex_unlock(&sbi->cache_list_lock); +} + +struct cache_file_node *find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server) +{ + struct cache_file_node *cfn = NULL; + + mutex_lock(&sbi->cache_list_lock); + cfn = __find_cfn(sbi, cid, path, server); + mutex_unlock(&sbi->cache_list_lock); + return cfn; +} + +void release_cfn(struct cache_file_node *cfn) +{ + if (refcount_dec_and_test(&cfn->ref)) + free_cfn(cfn); +} + +void remove_cfn(struct cache_file_node *cfn) +{ + struct hmdfs_sb_info *sbi = cfn->sbi; + bool deleted; + + mutex_lock(&sbi->cache_list_lock); + deleted = list_empty(&cfn->list); + if (!deleted) + list_del_init(&cfn->list); + mutex_unlock(&sbi->cache_list_lock); + if (!deleted) { + delete_dentry_file(cfn->filp); + release_cfn(cfn); + } +} + +int hmdfs_do_lock_file(struct file *filp, unsigned char fl_type, loff_t start, + loff_t len) +{ + struct file_lock fl; + int err; + + locks_init_lock(&fl); + + fl.fl_type = fl_type; + fl.fl_flags = FL_POSIX | FL_CLOSE | FL_SLEEP; + fl.fl_start = start; + fl.fl_end = start + len - 1; + fl.fl_owner = filp; + fl.fl_pid = current->tgid; + fl.fl_file = filp; + fl.fl_ops = NULL; + fl.fl_lmops = NULL; + + err = locks_lock_file_wait(filp, &fl); + if (err) + hmdfs_err("lock file wait failed: %d", err); + + return err; +} + +int hmdfs_wlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_WRLCK, start, len); +} + +int hmdfs_rlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_RDLCK, start, len); +} + +int hmdfs_unlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_UNLCK, start, len); +} + +long cache_file_truncate(struct hmdfs_sb_info *sbi, const struct path *path, + loff_t length) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + long ret = vfs_truncate(path, length); + + hmdfs_revert_creds(old_cred); + + return ret; +} + +ssize_t cache_file_read(struct hmdfs_sb_info *sbi, struct file *filp, void *buf, + size_t count, loff_t *pos) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + ssize_t ret = kernel_read(filp, buf, count, pos); + + hmdfs_revert_creds(old_cred); + + return ret; +} + +ssize_t cache_file_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buf, size_t count, loff_t *pos) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + ssize_t ret = kernel_write(filp, buf, count, pos); + + hmdfs_revert_creds(old_cred); + + return ret; +} + + +int read_header(struct hmdfs_sb_info *sbi, struct file *filp, + struct hmdfs_dcache_header *header) +{ + ssize_t bytes; + loff_t pos = 0; + + bytes = cache_file_read(sbi, filp, header, sizeof(*header), &pos); + if (bytes != sizeof(*header)) { + hmdfs_err("read file failed, err:%zd", bytes); + return -EIO; + } + + return 0; +} + +static unsigned long long cache_get_dentry_count(struct hmdfs_sb_info *sbi, + struct file *filp) +{ + struct hmdfs_dcache_header header; + int overallpage; + + overallpage = get_dentry_group_cnt(file_inode(filp)); + if (overallpage == 0) + return 0; + + if (read_header(sbi, filp, &header)) + return 0; + + return le64_to_cpu(header.num); +} + +static int cache_check_case_sensitive(struct hmdfs_sb_info *sbi, + struct file *filp) +{ + struct hmdfs_dcache_header header; + + if (read_header(sbi, filp, &header)) + return 0; + + if (sbi->s_case_sensitive != (bool)header.case_sensitive) { + hmdfs_info("Case sensitive inconsistent, current fs is: %d, cache is %d, will drop cache", + sbi->s_case_sensitive, header.case_sensitive); + return 0; + } + return 1; +} + +int write_header(struct file *filp, struct hmdfs_dcache_header *header) +{ + loff_t pos = 0; + ssize_t size; + + size = kernel_write(filp, header, sizeof(*header), &pos); + if (size != sizeof(*header)) { + hmdfs_err("update dcache header failed %zd", size); + return -EIO; + } + + return 0; +} + +void add_to_delete_list(struct hmdfs_sb_info *sbi, struct cache_file_node *cfn) +{ + mutex_lock(&sbi->cache_list_lock); + list_add_tail(&cfn->list, &sbi->to_delete); + mutex_unlock(&sbi->cache_list_lock); +} + +void load_cfn(struct hmdfs_sb_info *sbi, const char *fullname, const char *path, + const char *cid, bool server) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *cfn1 = NULL; + struct list_head *head = NULL; + + cfn = create_cfn(sbi, path, cid, server); + if (!cfn) + return; + + cfn->filp = filp_open(fullname, O_RDWR | O_LARGEFILE, 0); + if (IS_ERR(cfn->filp)) { + hmdfs_err("open fail %ld", PTR_ERR(cfn->filp)); + goto out; + } + + if (cache_get_dentry_count(sbi, cfn->filp) < sbi->dcache_threshold) { + add_to_delete_list(sbi, cfn); + return; + } + + if (!cache_check_case_sensitive(sbi, cfn->filp)) { + add_to_delete_list(sbi, cfn); + return; + } + + head = get_list_head(sbi, server); + + mutex_lock(&sbi->cache_list_lock); + cfn1 = __find_cfn(sbi, cid, path, server); + if (!cfn1) { + list_add_tail(&cfn->list, head); + } else { + release_cfn(cfn1); + mutex_unlock(&sbi->cache_list_lock); + add_to_delete_list(sbi, cfn); + return; + } + mutex_unlock(&sbi->cache_list_lock); + + return; +out: + free_cfn(cfn); +} + +static int get_cid_and_hash(const char *name, uint64_t *hash, char *cid) +{ + int len; + char *p = strstr(name, "_"); + + if (!p) + return -EINVAL; + + len = p - name; + if (len >= HMDFS_CFN_CID_SIZE) + return -EINVAL; + + memcpy(cid, name, len); + cid[len] = '\0'; + + if (sscanf(++p, "%llx", hash) != 1) + return -EINVAL; + return 0; +} + +static void store_one(const char *name, struct cache_file_callback *cb) +{ + struct file *file = NULL; + char *fullname = NULL; + char *kvalue = NULL; + char cid[HMDFS_CFN_CID_SIZE]; + uint64_t hash; + ssize_t error; + + if (strlen(name) + strlen(cb->dirname) >= PATH_MAX) + return; + + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return; + + snprintf(fullname, PATH_MAX, "%s%s", cb->dirname, name); + + file = filp_open(fullname, O_RDWR | O_LARGEFILE, 0); + if (IS_ERR(file)) { + hmdfs_err("open fail %ld", PTR_ERR(file)); + goto out; + } + + kvalue = kzalloc(PATH_MAX, GFP_KERNEL); + if (!kvalue) + goto out_file; + + error = __vfs_getxattr(file_dentry(file), file_inode(file), + DENTRY_FILE_XATTR_NAME, kvalue, PATH_MAX); + if (error <= 0 || error >= PATH_MAX) { + hmdfs_err("getxattr return: %zd", error); + goto out_kvalue; + } + kvalue[error] = '\0'; + cid[0] = '\0'; + + if (!cb->server) { + if (get_cid_and_hash(name, &hash, cid)) { + hmdfs_err("get cid and hash fail"); + goto out_kvalue; + } + } + + load_cfn(cb->sbi, fullname, kvalue, cid, cb->server); + +out_kvalue: + kfree(kvalue); +out_file: + filp_close(file, NULL); +out: + kfree(fullname); +} + +static int cache_file_iterate(struct dir_context *ctx, const char *name, + int name_len, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct cache_file_item *cfi = NULL; + struct cache_file_callback *cb = + container_of(ctx, struct cache_file_callback, ctx); + + if (name_len > NAME_MAX) { + hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX); + return 0; + } + + if (d_type != DT_REG) + return 0; + + cfi = kmalloc(sizeof(*cfi), GFP_KERNEL); + if (!cfi) + return -ENOMEM; + + cfi->name = kstrndup(name, name_len, GFP_KERNEL); + if (!cfi->name) { + kfree(cfi); + return -ENOMEM; + } + + list_add_tail(&cfi->list, &cb->list); + + return 0; +} + +void hmdfs_do_load(struct hmdfs_sb_info *sbi, const char *fullname, bool server) +{ + struct file *file = NULL; + struct path dirpath; + int err; + struct cache_file_item *cfi = NULL; + struct cache_file_item *n = NULL; + struct cache_file_callback cb = { + .ctx.actor = cache_file_iterate, + .ctx.pos = 0, + .dirname = fullname, + .sbi = sbi, + .server = server, + }; + INIT_LIST_HEAD(&cb.list); + + + err = kern_path(fullname, LOOKUP_DIRECTORY, &dirpath); + if (err) { + hmdfs_info("No file path"); + return; + } + + file = dentry_open(&dirpath, O_RDONLY, current_cred()); + if (IS_ERR_OR_NULL(file)) { + hmdfs_err("dentry_open failed, error: %ld", PTR_ERR(file)); + path_put(&dirpath); + return; + } + + err = iterate_dir(file, &cb.ctx); + if (err) + hmdfs_err("iterate_dir failed, err: %d", err); + + list_for_each_entry_safe(cfi, n, &cb.list, list) { + store_one(cfi->name, &cb); + list_del_init(&cfi->list); + kfree(cfi->name); + kfree(cfi); + } + + fput(file); + path_put(&dirpath); +} + +/** + * This function just used for delete dentryfile.dat + */ +int delete_dentry_file(struct file *filp) +{ + int err = 0; + struct dentry *dentry = file_dentry(filp); + struct dentry *parent = lock_parent(dentry); + + if (dentry->d_parent == parent) { + dget(dentry); + err = vfs_unlink(d_inode(parent), dentry, NULL); + dput(dentry); + } + unlock_dir(parent); + + return err; +} + +void hmdfs_delete_useless_cfn(struct hmdfs_sb_info *sbi) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + mutex_lock(&sbi->cache_list_lock); + + list_for_each_entry_safe(cfn, n, &sbi->to_delete, list) { + delete_dentry_file(cfn->filp); + list_del_init(&cfn->list); + release_cfn(cfn); + } + mutex_unlock(&sbi->cache_list_lock); +} + +void hmdfs_cfn_load(struct hmdfs_sb_info *sbi) +{ + char *fullname = NULL; + + if (!sbi->s_dentry_cache) + return; + + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return; + + snprintf(fullname, PATH_MAX, "%s/dentry_cache/client/", + sbi->cache_dir); + hmdfs_do_load(sbi, fullname, false); + + snprintf(fullname, PATH_MAX, "%s/dentry_cache/server/", + sbi->cache_dir); + hmdfs_do_load(sbi, fullname, true); + kfree(fullname); + + hmdfs_delete_useless_cfn(sbi); +} + +static void __cache_file_destroy_by_path(struct list_head *head, + const char *path) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + list_for_each_entry_safe(cfn, n, head, list) { + if (strcmp(path, cfn->relative_path) != 0) + continue; + list_del_init(&cfn->list); + delete_dentry_file(cfn->filp); + release_cfn(cfn); + } +} + +static void cache_file_destroy_by_path(struct hmdfs_sb_info *sbi, + const char *path) +{ + mutex_lock(&sbi->cache_list_lock); + + __cache_file_destroy_by_path(&sbi->server_cache, path); + __cache_file_destroy_by_path(&sbi->client_cache, path); + + mutex_unlock(&sbi->cache_list_lock); +} + +static void cache_file_find_and_delete(struct hmdfs_peer *con, + const char *relative_path) +{ + struct cache_file_node *cfn; + + cfn = find_cfn(con->sbi, con->cid, relative_path, false); + if (!cfn) + return; + + remove_cfn(cfn); + release_cfn(cfn); +} + +void cache_file_delete_by_dentry(struct hmdfs_peer *con, struct dentry *dentry) +{ + char *relative_path = NULL; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (unlikely(!relative_path)) { + hmdfs_err("get relative path failed %d", -ENOMEM); + return; + } + cache_file_find_and_delete(con, relative_path); + kfree(relative_path); +} + +struct file *hmdfs_get_new_dentry_file(struct hmdfs_peer *con, + const char *relative_path, + struct hmdfs_dcache_header *header) +{ + struct hmdfs_sb_info *sbi = con->sbi; + int len = strlen(relative_path); + struct file *filp = NULL; + int err; + + filp = create_local_dentry_file_cache(sbi); + if (IS_ERR(filp)) + return filp; + + err = hmdfs_client_start_readdir(con, filp, relative_path, len, header); + if (err) { + if (err != -ENOENT) + hmdfs_err("readdir failed dev: %llu err: %d", + con->device_id, err); + fput(filp); + filp = ERR_PTR(err); + } + + return filp; +} + +void add_cfn_to_item(struct dentry *dentry, struct hmdfs_peer *con, + struct cache_file_node *cfn) +{ + struct file *file = cfn->filp; + int err; + + err = hmdfs_add_cache_list(con->device_id, dentry, file); + if (unlikely(err)) { + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + return; + } +} + +int hmdfs_add_file_to_cache(struct dentry *dentry, struct hmdfs_peer *con, + struct file *file, const char *relative_path) +{ + struct hmdfs_sb_info *sbi = con->sbi; + struct file *newf = file; + + if (cache_get_dentry_count(sbi, file) >= sbi->dcache_threshold) + newf = cache_file_persistent(con, file, relative_path, false); + else + cache_file_find_and_delete(con, relative_path); + + return hmdfs_add_cache_list(con->device_id, dentry, newf); +} + +static struct file *read_header_and_revalidate(struct hmdfs_peer *con, + struct file *filp, + const char *relative_path) +{ + struct hmdfs_dcache_header header; + struct hmdfs_dcache_header *p = NULL; + + if (read_header(con->sbi, filp, &header) == 0) + p = &header; + + return hmdfs_get_new_dentry_file(con, relative_path, p); +} + +void remote_file_revalidate_cfn(struct dentry *dentry, struct hmdfs_peer *con, + struct cache_file_node *cfn, + const char *relative_path) +{ + struct file *file = NULL; + int err; + + file = read_header_and_revalidate(con, cfn->filp, relative_path); + if (IS_ERR(file)) + return; + + /* + * If the request returned ok but file length is 0, we assume + * that the server verified the client cache file is uptodate. + */ + if (i_size_read(file->f_inode) == 0) { + hmdfs_info("The cfn cache for dev:%llu is uptodate", + con->device_id); + fput(file); + add_cfn_to_item(dentry, con, cfn); + return; + } + + /* OK, cfn is not uptodate, let's remove it and add the new file */ + remove_cfn(cfn); + + err = hmdfs_add_file_to_cache(dentry, con, file, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + fput(file); +} + +void remote_file_revalidate_item(struct dentry *dentry, struct hmdfs_peer *con, + struct clearcache_item *item, + const char *relative_path) +{ + struct file *file = NULL; + int err; + + file = read_header_and_revalidate(con, item->filp, relative_path); + if (IS_ERR(file)) + return; + + /* + * If the request returned ok but file length is 0, we assume + * that the server verified the client cache file is uptodate. + */ + if (i_size_read(file->f_inode) == 0) { + hmdfs_info("The item cache for dev:%llu is uptodate", + con->device_id); + item->time = jiffies; + fput(file); + return; + } + + /* We need to replace the old item */ + remove_cache_item(item); + cache_file_find_and_delete(con, relative_path); + + err = hmdfs_add_file_to_cache(dentry, con, file, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + fput(file); +} + +bool get_remote_dentry_file(struct dentry *dentry, struct hmdfs_peer *con) +{ + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + struct cache_file_node *cfn = NULL; + struct hmdfs_sb_info *sbi = con->sbi; + char *relative_path = NULL; + int err = 0; + struct file *filp = NULL; + struct clearcache_item *item; + + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), con->device_id, + dentry)) + return false; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (unlikely(!relative_path)) { + hmdfs_err("get relative path failed %d", -ENOMEM); + return false; + } + mutex_lock(&d_info->cache_pull_lock); + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), con->device_id, + dentry)) + goto out_unlock; + + item = hmdfs_find_cache_item(con->device_id, dentry); + if (item) { + remote_file_revalidate_item(dentry, con, item, relative_path); + kref_put(&item->ref, release_cache_item); + goto out_unlock; + } + + cfn = find_cfn(sbi, con->cid, relative_path, false); + if (cfn) { + remote_file_revalidate_cfn(dentry, con, cfn, relative_path); + release_cfn(cfn); + goto out_unlock; + } + + filp = hmdfs_get_new_dentry_file(con, relative_path, NULL); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out_unlock; + } + + err = hmdfs_add_file_to_cache(dentry, con, filp, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%lu err:%d", + (unsigned long)con->device_id, err); + fput(filp); + +out_unlock: + mutex_unlock(&d_info->cache_pull_lock); + if (err && err != -ENOENT) + hmdfs_err("readdir failed dev:%lu err:%d", + (unsigned long)con->device_id, err); + kfree(relative_path); + return true; +} + +int hmdfs_file_type(const char *name) +{ + if (!name) + return -EINVAL; + + if (!strcmp(name, CURRENT_DIR) || !strcmp(name, PARENT_DIR)) + return HMDFS_TYPE_DOT; + + return HMDFS_TYPE_COMMON; +} + +struct clearcache_item *hmdfs_find_cache_item(uint64_t dev_id, + struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + if (!d_info) + return NULL; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry(item, &(d_info->cache_list_head), list) { + if (dev_id == item->dev_id) { + kref_get(&item->ref); + spin_unlock(&d_info->cache_list_lock); + return item; + } + } + spin_unlock(&d_info->cache_list_lock); + return NULL; +} + +bool hmdfs_cache_revalidate(unsigned long conn_time, uint64_t dev_id, + struct dentry *dentry) +{ + bool ret = false; + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + unsigned int timeout; + + if (!d_info) + return ret; + + timeout = hmdfs_sb(dentry->d_sb)->dcache_timeout; + spin_lock(&d_info->cache_list_lock); + list_for_each_entry(item, &(d_info->cache_list_head), list) { + if (dev_id == item->dev_id) { + ret = cache_item_revalidate(conn_time, item->time, + timeout); + break; + } + } + spin_unlock(&d_info->cache_list_lock); + return ret; +} + +void remove_cache_item(struct clearcache_item *item) +{ + bool deleted; + + spin_lock(&item->d_info->cache_list_lock); + deleted = list_empty(&item->list); + if (!deleted) + list_del_init(&item->list); + spin_unlock(&item->d_info->cache_list_lock); + if (!deleted) + kref_put(&item->ref, release_cache_item); +} + +void release_cache_item(struct kref *ref) +{ + struct clearcache_item *item = + container_of(ref, struct clearcache_item, ref); + + if (item->filp) + fput(item->filp); + kfree(item); +} + +void hmdfs_remove_cache_filp(struct hmdfs_peer *con, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + struct clearcache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + // struct path *lower_path = NULL; + + if (!d_info) + return; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry_safe(item, item_temp, &(d_info->cache_list_head), + list) { + if (con->device_id == item->dev_id) { + list_del_init(&item->list); + spin_unlock(&d_info->cache_list_lock); + cache_file_delete_by_dentry(con, dentry); + kref_put(&item->ref, release_cache_item); + return; + } + } + spin_unlock(&d_info->cache_list_lock); +} + +int hmdfs_add_cache_list(uint64_t dev_id, struct dentry *dentry, + struct file *filp) +{ + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + if (!d_info) + return -ENOMEM; + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->dev_id = dev_id; + item->filp = get_file(filp); + item->time = jiffies; + item->d_info = d_info; + kref_init(&item->ref); + spin_lock(&d_info->cache_list_lock); + list_add_tail(&(item->list), &(d_info->cache_list_head)); + spin_unlock(&d_info->cache_list_lock); + return 0; +} + +void hmdfs_add_remote_cache_list(struct hmdfs_peer *con, const char *dir_path) +{ + int err = 0; + struct remotecache_item *item = NULL; + struct remotecache_item *item_temp = NULL; + struct path path, root_path; + struct hmdfs_dentry_info *d_info = NULL; + + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + return; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, dir_path, 0, + &path); + if (err) + goto out_put_root; + + d_info = hmdfs_d(path.dentry); + if (!d_info) { + err = -EINVAL; + goto out; + } + + /* find duplicate con */ + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry_safe(item, item_temp, + &(d_info->remote_cache_list_head), list) { + if (item->con->device_id == con->device_id) { + mutex_unlock(&d_info->remote_cache_list_lock); + goto out; + } + } + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) { + err = -ENOMEM; + mutex_unlock(&d_info->remote_cache_list_lock); + goto out; + } + + item->con = con; + item->drop_flag = 0; + list_add(&(item->list), &(d_info->remote_cache_list_head)); + mutex_unlock(&d_info->remote_cache_list_lock); + +out: + path_put(&path); +out_put_root: + path_put(&root_path); +} + +int hmdfs_drop_remote_cache_dents(struct dentry *dentry) +{ + struct path lower_path; + struct inode *lower_inode = NULL; + struct remotecache_item *item = NULL; + struct remotecache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = NULL; + char *relative_path = NULL; + + if (!dentry) { + hmdfs_err("dentry null and return"); + return 0; + } + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return 0; + } + hmdfs_get_lower_path(dentry, &lower_path); + if (IS_ERR_OR_NULL(lower_path.dentry)) { + hmdfs_put_lower_path(&lower_path); + return 0; + } + lower_inode = d_inode(lower_path.dentry); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR_OR_NULL(lower_inode)) + return 0; + /* only for directory */ + if (!S_ISDIR(lower_inode->i_mode)) + return 0; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (!relative_path) { + hmdfs_err("get dentry relative path failed"); + return 0; + } + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry_safe(item, item_temp, + &(d_info->remote_cache_list_head), list) { + if (item->drop_flag) { + item->drop_flag = 0; + continue; + } + mutex_unlock(&d_info->remote_cache_list_lock); + hmdfs_send_drop_push(item->con, relative_path); + mutex_lock(&d_info->remote_cache_list_lock); + list_del(&item->list); + kfree(item); + } + mutex_unlock(&d_info->remote_cache_list_lock); + + kfree(relative_path); + return 0; +} + +/* Clear the dentry cache files of target directory */ +int hmdfs_clear_cache_dents(struct dentry *dentry, bool remove_cache) +{ + struct clearcache_item *item = NULL; + struct clearcache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + char *path = NULL; + + if (!d_info) + return 0; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry_safe(item, item_temp, &(d_info->cache_list_head), + list) { + list_del_init(&item->list); + kref_put(&item->ref, release_cache_item); + } + spin_unlock(&d_info->cache_list_lock); + + if (!remove_cache) + return 0; + + /* it also need confirm that there are no dentryfile_dev* + * under this dentry + */ + path = hmdfs_get_dentry_relative_path(dentry); + + if (unlikely(!path)) { + hmdfs_err("get relative path failed"); + return 0; + } + + cache_file_destroy_by_path(hmdfs_sb(dentry->d_sb), path); + + kfree(path); + return 0; +} + +void hmdfs_mark_drop_flag(uint64_t device_id, struct dentry *dentry) +{ + struct remotecache_item *item = NULL; + struct hmdfs_dentry_info *d_info = NULL; + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return; + } + + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry(item, &(d_info->remote_cache_list_head), list) { + if (item->con->device_id == device_id) { + item->drop_flag = 1; + break; + } + } + mutex_unlock(&d_info->remote_cache_list_lock); +} + +void hmdfs_clear_drop_flag(struct dentry *dentry) +{ + struct remotecache_item *item = NULL; + struct hmdfs_dentry_info *d_info = NULL; + + if (!dentry) { + hmdfs_err("dentry null and return"); + return; + } + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return; + } + + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry(item, &(d_info->remote_cache_list_head), list) { + if (item->drop_flag) + item->drop_flag = 0; + } + mutex_unlock(&d_info->remote_cache_list_lock); +} + +#define DUSTBIN_SUFFIX ".hwbk" +static void hmdfs_rename_bak(struct dentry *dentry) +{ + struct path lower_path; + struct dentry *lower_parent = NULL; + struct dentry *lower_dentry = NULL; + struct dentry *new_dentry = NULL; + char *name = NULL; + int len = 0; + int err = 0; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + len = strlen(lower_dentry->d_name.name) + strlen(DUSTBIN_SUFFIX) + 2; + if (len >= NAME_MAX) { + err = -ENAMETOOLONG; + goto put_lower_path; + } + + name = kmalloc(len, GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto put_lower_path; + } + + snprintf(name, len, ".%s%s", lower_dentry->d_name.name, DUSTBIN_SUFFIX); + err = mnt_want_write(lower_path.mnt); + if (err) { + hmdfs_info("get write access failed, err %d", err); + goto free_name; + } + + lower_parent = lock_parent(lower_dentry); + new_dentry = lookup_one_len(name, lower_parent, strlen(name)); + if (IS_ERR(new_dentry)) { + err = PTR_ERR(new_dentry); + hmdfs_info("lookup new dentry failed, err %d", err); + goto unlock_parent; + } + + err = vfs_rename(d_inode(lower_parent), lower_dentry, + d_inode(lower_parent), new_dentry, NULL, 0); + + dput(new_dentry); +unlock_parent: + unlock_dir(lower_parent); + mnt_drop_write(lower_path.mnt); +free_name: + kfree(name); +put_lower_path: + hmdfs_put_lower_path(&lower_path); + + if (err) + hmdfs_err("failed to rename file, err %d", err); +} + +int hmdfs_root_unlink(uint64_t device_id, struct path *root_path, + const char *unlink_dir, const char *unlink_name) +{ + int err = 0; + struct path path; + struct dentry *child_dentry = NULL; + struct inode *dir = NULL; + struct inode *child_inode = NULL; + kuid_t tmp_uid; + + err = vfs_path_lookup(root_path->dentry, root_path->mnt, + unlink_dir, LOOKUP_DIRECTORY, &path); + if (err) { + hmdfs_err("found path failed err = %d", err); + return err; + } + dir = d_inode(path.dentry); + inode_lock_nested(dir, I_MUTEX_PARENT); + + child_dentry = lookup_one_len(unlink_name, path.dentry, + strlen(unlink_name)); + if (IS_ERR(child_dentry)) { + err = PTR_ERR(child_dentry); + hmdfs_err("lookup_one_len failed, err = %d", err); + goto unlock_out; + } + if (d_is_negative(child_dentry)) { + err = -ENOENT; + dput(child_dentry); + goto unlock_out; + } + child_inode = d_inode(child_dentry); + + tmp_uid = hmdfs_override_inode_uid(dir); + + hmdfs_mark_drop_flag(device_id, path.dentry); + ihold(child_inode); + err = vfs_unlink(dir, child_dentry, NULL); + /* + * -EOWNERDEAD means we want to put the file in a specail dir instead of + * deleting it, specifically dustbin in phone, so that user can + * recover the deleted images and videos. + */ + if (err == -EOWNERDEAD) { + hmdfs_rename_bak(child_dentry); + err = 0; + } + if (err) + hmdfs_err("unlink path failed err = %d", err); + hmdfs_revert_inode_uid(dir, tmp_uid); + dput(child_dentry); + +unlock_out: + inode_unlock(dir); + if (child_inode) + iput(child_inode); + path_put(&path); + return err; +} + +struct dentry *hmdfs_root_mkdir(uint64_t device_id, const char *local_dst_path, + const char *mkdir_dir, const char *mkdir_name, + umode_t mode) +{ + int err; + struct path path; + struct dentry *child_dentry = NULL; + struct dentry *ret = NULL; + char *mkdir_path = NULL; + char *mkdir_abs_path = NULL; + + mkdir_path = hmdfs_connect_path(mkdir_dir, mkdir_name); + if (!mkdir_path) + return ERR_PTR(-EACCES); + + mkdir_abs_path = + hmdfs_get_dentry_absolute_path(local_dst_path, mkdir_path); + if (!mkdir_abs_path) { + ret = ERR_PTR(-ENOMEM); + goto out; + } + + child_dentry = kern_path_create(AT_FDCWD, mkdir_abs_path, + &path, LOOKUP_DIRECTORY); + if (IS_ERR(child_dentry)) { + ret = child_dentry; + goto out; + } + + hmdfs_mark_drop_flag(device_id, child_dentry->d_parent); + err = vfs_mkdir(d_inode(path.dentry), child_dentry, mode); + if (err) { + hmdfs_err("mkdir failed! err=%d", err); + ret = ERR_PTR(err); + goto out_put; + } + ret = dget(child_dentry); +out_put: + done_path_create(&path, child_dentry); +out: + kfree(mkdir_path); + kfree(mkdir_abs_path); + return ret; +} + +struct dentry *hmdfs_root_create(uint64_t device_id, const char *local_dst_path, + const char *create_dir, + const char *create_name, + umode_t mode, bool want_excl) +{ + int err; + struct path path; + struct dentry *child_dentry = NULL; + struct dentry *ret = NULL; + char *create_path = NULL; + char *create_abs_path = NULL; + + create_path = hmdfs_connect_path(create_dir, create_name); + if (!create_path) + return ERR_PTR(-EACCES); + + create_abs_path = + hmdfs_get_dentry_absolute_path(local_dst_path, create_path); + if (!create_abs_path) { + ret = ERR_PTR(-ENOMEM); + goto out; + } + + child_dentry = kern_path_create(AT_FDCWD, create_abs_path, &path, 0); + + if (IS_ERR(child_dentry)) { + ret = child_dentry; + goto out; + } + hmdfs_mark_drop_flag(device_id, child_dentry->d_parent); + err = vfs_create(d_inode(path.dentry), child_dentry, mode, want_excl); + if (err) { + hmdfs_err("path create failed! err=%d", err); + ret = ERR_PTR(err); + goto out_put; + } + ret = dget(child_dentry); +out_put: + done_path_create(&path, child_dentry); +out: + kfree(create_path); + kfree(create_abs_path); + return ret; +} + +int hmdfs_root_rmdir(uint64_t device_id, struct path *root_path, + const char *rmdir_dir, const char *rmdir_name) +{ + int err = 0; + struct path path; + struct dentry *child_dentry = NULL; + struct inode *dir = NULL; + + err = vfs_path_lookup(root_path->dentry, root_path->mnt, + rmdir_dir, LOOKUP_DIRECTORY, &path); + if (err) { + hmdfs_err("found path failed err = %d", err); + return err; + } + dir = d_inode(path.dentry); + inode_lock_nested(dir, I_MUTEX_PARENT); + + child_dentry = lookup_one_len(rmdir_name, path.dentry, + strlen(rmdir_name)); + if (IS_ERR(child_dentry)) { + err = PTR_ERR(child_dentry); + hmdfs_err("lookup_one_len failed, err = %d", err); + goto unlock_out; + } + if (d_is_negative(child_dentry)) { + err = -ENOENT; + dput(child_dentry); + goto unlock_out; + } + + hmdfs_mark_drop_flag(device_id, path.dentry); + err = vfs_rmdir(dir, child_dentry); + if (err) + hmdfs_err("rmdir failed err = %d", err); + dput(child_dentry); + +unlock_out: + inode_unlock(dir); + path_put(&path); + return err; +} + +int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id, + const char *oldpath, const char *oldname, + const char *newpath, const char *newname, + unsigned int flags) +{ + int err = 0; + struct path path_dst; + struct path path_old; + struct path path_new; + struct dentry *trap = NULL; + struct dentry *old_dentry = NULL; + struct dentry *new_dentry = NULL; + + err = kern_path(sbi->local_dst, 0, &path_dst); + if (err) { + hmdfs_err("kern_path for local dst failed %d", err); + return err; + } + + err = vfs_path_lookup(path_dst.dentry, path_dst.mnt, oldpath, 0, + &path_old); + if (err) { + hmdfs_info("lookup oldpath from local_dst failed, err %d", err); + goto put_path_dst; + } + + err = vfs_path_lookup(path_dst.dentry, path_dst.mnt, newpath, 0, + &path_new); + if (err) { + hmdfs_info("lookup newpath from local_dst failed, err %d", err); + goto put_path_old; + } + + err = mnt_want_write(path_dst.mnt); + if (err) { + hmdfs_info("get write access failed for local_dst, err %d", + err); + goto put_path_new; + } + + trap = lock_rename(path_new.dentry, path_old.dentry); + + old_dentry = lookup_one_len(oldname, path_old.dentry, strlen(oldname)); + if (IS_ERR(old_dentry)) { + err = PTR_ERR(old_dentry); + hmdfs_info("lookup old dentry failed, err %d", err); + goto unlock; + } + + /* source should not be ancestor of target */ + if (old_dentry == trap) { + err = -EINVAL; + goto put_old_dentry; + } + + new_dentry = lookup_one_len(newname, path_new.dentry, strlen(newname)); + if (IS_ERR(new_dentry)) { + err = PTR_ERR(new_dentry); + hmdfs_info("lookup new dentry failed, err %d", err); + goto put_old_dentry; + } + + /* + * Exchange rename is not supported, thus target should not be an + * ancestor of source. + */ + if (trap == new_dentry) { + err = -ENOTEMPTY; + goto put_new_dentry; + } + + if (d_is_positive(new_dentry) && (flags & RENAME_NOREPLACE)) { + err = -EEXIST; + goto put_new_dentry; + } + + hmdfs_mark_drop_flag(device_id, path_old.dentry); + if (path_old.dentry != path_new.dentry) + hmdfs_mark_drop_flag(device_id, path_new.dentry); + + err = vfs_rename(d_inode(path_old.dentry), old_dentry, + d_inode(path_new.dentry), new_dentry, NULL, 0); + +put_new_dentry: + dput(new_dentry); +put_old_dentry: + dput(old_dentry); +unlock: + unlock_rename(path_new.dentry, path_old.dentry); + mnt_drop_write(path_dst.mnt); +put_path_new: + path_put(&path_new); +put_path_old: + path_put(&path_old); +put_path_dst: + path_put(&path_dst); + + return err; +} + +int hmdfs_get_path_in_sb(struct super_block *sb, const char *name, + unsigned int flags, struct path *path) +{ + int err; + + err = kern_path(name, flags, path); + if (err) { + hmdfs_err("can't get %s %d\n", name, err); + return err; + } + + /* should ensure the path is belong sb */ + if (path->dentry->d_sb != sb) { + err = -EINVAL; + hmdfs_err("Wrong sb: %s on %s", name, + path->dentry->d_sb->s_type->name); + path_put(path); + } + + return err; +} diff --git a/fs/hmdfs/hmdfs_dentryfile.h b/fs/hmdfs/hmdfs_dentryfile.h new file mode 100644 index 000000000000..df1463007f15 --- /dev/null +++ b/fs/hmdfs/hmdfs_dentryfile.h @@ -0,0 +1,342 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_dentryfile.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DENTRYFILE_H +#define HMDFS_DENTRYFILE_H + +#include "hmdfs.h" +#include + +/* use for escape from hmdfs file system, hmdfs hide follow names */ +#define CURRENT_DIR "." +#define PARENT_DIR ".." + +/* local dentry cache data */ +#define DENTRY_FILE_XATTR_NAME "user.hmdfs_cache" + +#define DENTRY_FILE_NAME_RETRY 10 + +#define MAX_BUCKET_LEVEL 63 +#define BUCKET_BLOCKS 2 +#define MAX_DIR_BUCKETS (1 << ((MAX_BUCKET_LEVEL / 2) - 1)) + +#define CONFLICTING_FILE_CONST_SUFFIX "_conflict_dev" +#define CONFLICTING_FILE_SUFFIX "_conflict_dev%u" +#define CONFLICTING_DIR_SUFFIX "_remote_directory" + +#define POS_BIT_NUM 64 +#define DEV_ID_BIT_NUM 16 +#define GROUP_ID_BIT_NUM 39 +#define OFFSET_BIT_NUM 8 +#define OFFSET_BIT_MASK 0xFF + +#define DEFAULT_DCACHE_TIMEOUT 30 +#define DEFAULT_DCACHE_PRECISION 10 +#define DEFAULT_DCACHE_THRESHOLD 1000 +#define HMDFS_STALE_REMOTE_ISIZE ULLONG_MAX + +/* Seconds per-week */ +#define MAX_DCACHE_TIMEOUT 604800 + +struct hmdfs_iterate_callback { + struct dir_context ctx; + struct dir_context *caller; + int result; + struct rb_root *root; +}; + +/* + * 4096 = version(1) + bitmap(10) + reserved(5) + * + nsl(80 * 43) + filename(80 * 8) + */ +#define DENTRYGROUP_SIZE 4096 +#define DENTRY_NAME_LEN 8 +#define DENTRY_RESERVED_LENGTH 3 +#define DENTRY_PER_GROUP 80 +#define DENTRY_BITMAP_LENGTH 10 +#define DENTRY_GROUP_RESERVED 5 +#define DENTRYGROUP_HEADER 4096 + +struct hmdfs_dentry { + __le32 hash; + __le16 i_mode; + __le16 namelen; + __le64 i_size; + /* modification time */ + __le64 i_mtime; + /* modification time in nano scale */ + __le32 i_mtime_nsec; + /* combination of inode number and generation */ + __le64 i_ino; + __le32 i_flag; + /* reserved bytes for long term extend, total 43 bytes */ + __u8 reserved[DENTRY_RESERVED_LENGTH]; +} __packed; + +/* 4K/51 Bytes = 80 dentries for per dentrygroup */ +struct hmdfs_dentry_group { + __u8 dentry_version; /* dentry version start from 1 */ + __u8 bitmap[DENTRY_BITMAP_LENGTH]; + struct hmdfs_dentry nsl[DENTRY_PER_GROUP]; + __u8 filename[DENTRY_PER_GROUP][DENTRY_NAME_LEN]; + __u8 reserved[DENTRY_GROUP_RESERVED]; +} __packed; + +/** + * The content of 1st 4k block in dentryfile.dat. + * Used for check whether the dcache can be used directly or + * need to rebuild. + * + * Since the ctime has 10ms or less precision, if the dcache + * rebuild at the same time of the dentry inode ctime, maybe + * non-consistent in dcache. + * eg: create 1.jpg 2.jpg 3.jpg + * dcache rebuild may only has 1.jpg 2.jpg + * So, we need use these time to verify the dcache. + */ +struct hmdfs_dcache_header { + /* The time of dcache rebuild */ + __le64 dcache_crtime; + __le64 dcache_crtime_nsec; + + /* The directory inode ctime when dcache rebuild */ + __le64 dentry_ctime; + __le64 dentry_ctime_nsec; + + /* The dentry count */ + __le64 num; + + /* The case sensitive */ + __u8 case_sensitive; +} __packed; + +static inline loff_t get_dentry_group_pos(unsigned int bidx) +{ + return ((loff_t)bidx) * DENTRYGROUP_SIZE + DENTRYGROUP_HEADER; +} + +static inline unsigned int get_dentry_group_cnt(struct inode *inode) +{ + loff_t size = i_size_read(inode); + + return size >= DENTRYGROUP_HEADER ? + (size - DENTRYGROUP_HEADER) / DENTRYGROUP_SIZE : + 0; +} + +#define DENTRY_NAME_MAX_LEN (DENTRY_PER_GROUP * DENTRY_NAME_LEN) +#define BITS_PER_BYTE 8 +#define HMDFS_SLOT_LEN_BITS 3 +#define get_dentry_slots(x) (((x) + BITS_PER_BYTE - 1) >> HMDFS_SLOT_LEN_BITS) + +#define INUNUMBER_START 10000000 + +#ifdef CONFIG_HMDFS_FS_PERMISSION +#define DENTRY_FILE_PERM 0660 +#else +#define DENTRY_FILE_PERM 0666 +#endif + +struct hmdfs_dcache_lookup_ctx { + struct hmdfs_sb_info *sbi; + const struct qstr *name; + struct file *filp; + __u32 hash; + + /* for case sensitive */ + unsigned int bidx; + struct hmdfs_dentry_group *page; + + /* for case insensitive */ + struct hmdfs_dentry *insense_de; + unsigned int insense_bidx; + struct hmdfs_dentry_group *insense_page; +}; + +extern void hmdfs_init_dcache_lookup_ctx(struct hmdfs_dcache_lookup_ctx *ctx, + struct hmdfs_sb_info *sbi, + const struct qstr *qstr, + struct file *filp); + +int create_dentry(struct dentry *child_dentry, struct inode *inode, + struct file *file, struct hmdfs_sb_info *sbi); +int read_dentry(struct hmdfs_sb_info *sbi, char *file_name, + struct dir_context *ctx); +struct hmdfs_dentry *hmdfs_find_dentry(struct dentry *child_dentry, + struct hmdfs_dcache_lookup_ctx *ctx); +void hmdfs_delete_dentry(struct dentry *d, struct file *filp); +int hmdfs_rename_dentry(struct dentry *old_dentry, struct dentry *new_dentry, + struct file *old_filp, struct file *new_filp); +int get_inonumber(void); +struct file *create_local_dentry_file_cache(struct hmdfs_sb_info *sbi); +int update_inode_to_dentry(struct dentry *child_dentry, struct inode *inode); +struct file *cache_file_persistent(struct hmdfs_peer *con, struct file *filp, + const char *relative_path, bool server); + +#define HMDFS_TYPE_COMMON 0 +#define HMDFS_TYPE_DOT 1 +#define HMDFS_TYPE_DENTRY 2 +#define HMDFS_TYPE_DENTRY_CACHE 3 +int hmdfs_file_type(const char *name); + +loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, + unsigned long offset); + +struct getdents_callback_real { + struct dir_context ctx; + struct path *parent_path; + loff_t num; + struct file *file; + struct hmdfs_sb_info *sbi; + const char *dir; +}; + +struct file *hmdfs_server_rebuild_dents(struct hmdfs_sb_info *sbi, + struct path *path, loff_t *num, + const char *dir); + +#define DCACHE_LIFETIME 30 + +struct clearcache_item { + uint64_t dev_id; + struct file *filp; + unsigned long time; + struct list_head list; + struct kref ref; + struct hmdfs_dentry_info *d_info; +}; + +void hmdfs_add_remote_cache_list(struct hmdfs_peer *con, const char *dir_path); + +struct remotecache_item { + struct hmdfs_peer *con; + struct list_head list; + __u8 drop_flag; +}; + +#define HMDFS_CFN_CID_SIZE 65 +#define HMDFS_SERVER_CID "" + +struct cache_file_node { + struct list_head list; + struct hmdfs_sb_info *sbi; + char *relative_path; + u8 cid[HMDFS_CFN_CID_SIZE]; + refcount_t ref; + bool server; + struct file *filp; +}; + +struct cache_file_item { + struct list_head list; + const char *name; +}; + +struct cache_file_callback { + struct dir_context ctx; + const char *dirname; + struct hmdfs_sb_info *sbi; + bool server; + struct list_head list; +}; + +int hmdfs_drop_remote_cache_dents(struct dentry *dentry); +void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path); +void hmdfs_mark_drop_flag(uint64_t device_id, struct dentry *dentry); +void hmdfs_clear_drop_flag(struct dentry *dentry); +void delete_in_cache_file(uint64_t dev_id, struct dentry *dentry); +void create_in_cache_file(uint64_t dev_id, struct dentry *dentry); +struct clearcache_item *hmdfs_find_cache_item(uint64_t dev_id, + struct dentry *dentry); +bool hmdfs_cache_revalidate(unsigned long conn_time, uint64_t dev_id, + struct dentry *dentry); +void hmdfs_remove_cache_filp(struct hmdfs_peer *con, struct dentry *dentry); +int hmdfs_add_cache_list(uint64_t dev_id, struct dentry *dentry, + struct file *filp); +int hmdfs_clear_cache_dents(struct dentry *dentry, bool remove_cache); + +int hmdfs_root_unlink(uint64_t device_id, struct path *root_path, + const char *unlink_dir, const char *unlink_name); +struct dentry *hmdfs_root_mkdir(uint64_t device_id, const char *local_dst_path, + const char *mkdir_dir, const char *mkdir_name, + umode_t mode); +struct dentry *hmdfs_root_create(uint64_t device_id, const char *local_dst_path, + const char *create_dir, + const char *create_name, + umode_t mode, bool want_excl); +int hmdfs_root_rmdir(uint64_t device_id, struct path *root_path, + const char *rmdir_dir, const char *rmdir_name); +int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id, + const char *oldpath, const char *oldname, + const char *newpath, const char *newname, + unsigned int flags); + +int hmdfs_get_path_in_sb(struct super_block *sb, const char *name, + unsigned int flags, struct path *path); + +int hmdfs_wlock_file(struct file *filp, loff_t start, loff_t len); +int hmdfs_rlock_file(struct file *filp, loff_t start, loff_t len); +int hmdfs_unlock_file(struct file *filp, loff_t start, loff_t len); +long cache_file_truncate(struct hmdfs_sb_info *sbi, const struct path *path, + loff_t length); +ssize_t cache_file_read(struct hmdfs_sb_info *sbi, struct file *filp, void *buf, + size_t count, loff_t *pos); +ssize_t cache_file_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buf, size_t count, loff_t *pos); +int hmdfs_metainfo_read(struct hmdfs_sb_info *sbi, struct file *filp, + void *buffer, int buffersize, int bidx); + +bool get_remote_dentry_file(struct dentry *dentry, struct hmdfs_peer *con); +void get_remote_dentry_file_sync(struct dentry *dentry, struct hmdfs_peer *con); + +void release_cache_item(struct kref *ref); +void remove_cache_item(struct clearcache_item *item); + +void hmdfs_cfn_load(struct hmdfs_sb_info *sbi); +void hmdfs_cfn_destroy(struct hmdfs_sb_info *sbi); +struct cache_file_node *find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server); +void release_cfn(struct cache_file_node *cfn); +void destroy_cfn(struct hmdfs_sb_info *sbi); +void remove_cfn(struct cache_file_node *cfn); +int delete_dentry_file(struct file *filp); +struct file *hmdfs_server_cache_revalidate(struct hmdfs_sb_info *sbi, + const char *recvpath, + struct path *path); +int write_header(struct file *filp, struct hmdfs_dcache_header *header); + +static inline struct list_head *get_list_head(struct hmdfs_sb_info *sbi, + bool server) +{ + return ((server) ? &(sbi)->server_cache : &(sbi)->client_cache); +} + +/* + * generate_u64_ino - generate a new 64 bit inode number + * + * @ino: origin 32 bit inode number + * @generation: origin 32 bit inode generation + * + * We need both remote inode number and generation to ensure the uniqueness of + * the local inode, thus we store inode->i_ino in lower 32 bits, and + * inode->i_generation in higher 32 bits. + */ +static inline uint64_t generate_u64_ino(unsigned long ino, + unsigned int generation) +{ + return (uint64_t)ino | ((uint64_t)generation << 32); +} + +static inline bool cache_item_revalidate(unsigned long conn_time, + unsigned long item_time, + unsigned int timeout) +{ + return time_before_eq(jiffies, item_time + timeout * HZ) && + time_before_eq(conn_time, item_time); +} + +#endif diff --git a/fs/hmdfs/hmdfs_device_view.h b/fs/hmdfs/hmdfs_device_view.h new file mode 100644 index 000000000000..dcc49fb89597 --- /dev/null +++ b/fs/hmdfs/hmdfs_device_view.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_device_view.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DEVICE_VIEW_H +#define HMDFS_DEVICE_VIEW_H + +#include "hmdfs.h" + +/***************************************************************************** + * macro defination + *****************************************************************************/ + +#define DEVICE_VIEW_ROOT "device_view" +#define MERGE_VIEW_ROOT "merge_view" +#define UPDATE_LOCAL_DST "/device_view/local/" + +#define DEVICE_VIEW_LOCAL "local" + +/* + * in order to distinguish from vfs, we define our own bitmask, this should + * covert to vfs bitmask while calling vfs apis + */ +#define HMDFS_LOOKUP_REVAL 0x1 + +enum HMDFS_FILE_TYPE { + HM_REG = 0, + HM_SYMLINK = 1, + + HM_MAX_FILE_TYPE = 0XFF +}; + +struct bydev_inode_info { + struct inode *lower_inode; + uint64_t ino; +}; + +struct hmdfs_dentry_info { + struct path lower_path; + unsigned long time; + struct list_head cache_list_head; + spinlock_t cache_list_lock; + struct list_head remote_cache_list_head; + struct mutex remote_cache_list_lock; + __u8 file_type; + __u8 dentry_type; + uint64_t device_id; + spinlock_t lock; + struct mutex cache_pull_lock; + bool async_readdir_in_progress; +}; + +struct hmdfs_lookup_ret { + uint64_t i_size; + uint64_t i_mtime; + uint32_t i_mtime_nsec; + uint16_t i_mode; + uint64_t i_ino; +}; + +struct hmdfs_getattr_ret { + /* + * if stat->result_mask is 0, it means this remote getattr failed with + * look up, see details in hmdfs_server_getattr. + */ + struct kstat stat; + uint32_t i_flags; + uint64_t fsid; +}; + +extern int hmdfs_remote_getattr(struct hmdfs_peer *conn, struct dentry *dentry, + unsigned int lookup_flags, + struct hmdfs_getattr_ret **getattr_result); + +/***************************************************************************** + * local/remote inode/file operations + *****************************************************************************/ + +extern const struct dentry_operations hmdfs_dops; +extern const struct dentry_operations hmdfs_dev_dops; + +/* local device operation */ +extern const struct inode_operations hmdfs_file_iops_local; +extern const struct file_operations hmdfs_file_fops_local; +extern const struct inode_operations hmdfs_dir_inode_ops_local; +extern const struct file_operations hmdfs_dir_ops_local; +extern const struct inode_operations hmdfs_symlink_iops_local; + +/* remote device operation */ +extern const struct inode_operations hmdfs_dev_file_iops_remote; +extern const struct file_operations hmdfs_dev_file_fops_remote; +extern const struct address_space_operations hmdfs_dev_file_aops_remote; +extern const struct inode_operations hmdfs_dev_dir_inode_ops_remote; +extern const struct file_operations hmdfs_dev_dir_ops_remote; +extern int hmdfs_dev_unlink_from_con(struct hmdfs_peer *conn, + struct dentry *dentry); +extern int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx); +int hmdfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +int hmdfs_rmdir(struct inode *dir, struct dentry *dentry); +int hmdfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl); +int hmdfs_unlink(struct inode *dir, struct dentry *dentry); +int hmdfs_remote_unlink(struct hmdfs_peer *conn, struct dentry *dentry); +int hmdfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags); +loff_t hmdfs_file_llseek_local(struct file *file, loff_t offset, int whence); +ssize_t hmdfs_read_local(struct kiocb *iocb, struct iov_iter *iter); +ssize_t hmdfs_write_local(struct kiocb *iocb, struct iov_iter *iter); +int hmdfs_file_release_local(struct inode *inode, struct file *file); +int hmdfs_file_mmap_local(struct file *file, struct vm_area_struct *vma); +struct dentry *hmdfs_lookup(struct inode *parent_inode, + struct dentry *child_dentry, unsigned int flags); +struct dentry *hmdfs_lookup_local(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); +struct dentry *hmdfs_lookup_remote(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); +int hmdfs_symlink_local(struct inode *dir, struct dentry *dentry, + const char *symname); +int hmdfs_fsync_local(struct file *file, loff_t start, loff_t end, + int datasync); +int hmdfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname); +int hmdfs_fsync(struct file *file, loff_t start, loff_t end, int datasync); + +/***************************************************************************** + * common functions declaration + *****************************************************************************/ + +static inline struct hmdfs_dentry_info *hmdfs_d(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline bool hm_isreg(uint8_t file_type) +{ + return (file_type == HM_REG); +} + +static inline bool hm_islnk(uint8_t file_type) +{ + return (file_type == HM_SYMLINK); +} +struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, + struct hmdfs_lookup_ret *lookup_result, + struct inode *dir); +struct hmdfs_lookup_ret *get_remote_inode_info(struct hmdfs_peer *con, + struct dentry *dentry, + unsigned int flags); +void hmdfs_set_time(struct dentry *dentry, unsigned long time); +struct inode *fill_inode_local(struct super_block *sb, + struct inode *lower_inode); +struct inode *fill_root_inode(struct super_block *sb, + struct inode *lower_inode); +struct inode *fill_device_inode(struct super_block *sb, + struct inode *lower_inode); +struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con, + struct dentry *dentry, + struct qstr *qstr, + unsigned int flags, + const char *relative_path); +char *hmdfs_connect_path(const char *path, const char *name); + +char *hmdfs_get_dentry_relative_path(struct dentry *dentry); +char *hmdfs_get_dentry_absolute_path(const char *rootdir, + const char *relative_path); +int hmdfs_convert_lookup_flags(unsigned int hmdfs_flags, + unsigned int *vfs_flags); +static inline void hmdfs_get_lower_path(struct dentry *dent, struct path *pname) +{ + spin_lock(&hmdfs_d(dent)->lock); + pname->dentry = hmdfs_d(dent)->lower_path.dentry; + pname->mnt = hmdfs_d(dent)->lower_path.mnt; + path_get(pname); + spin_unlock(&hmdfs_d(dent)->lock); +} + +static inline void hmdfs_put_lower_path(struct path *pname) +{ + path_put(pname); +} + +static inline void hmdfs_put_reset_lower_path(struct dentry *dent) +{ + struct path pname; + + spin_lock(&hmdfs_d(dent)->lock); + if (hmdfs_d(dent)->lower_path.dentry) { + pname.dentry = hmdfs_d(dent)->lower_path.dentry; + pname.mnt = hmdfs_d(dent)->lower_path.mnt; + hmdfs_d(dent)->lower_path.dentry = NULL; + hmdfs_d(dent)->lower_path.mnt = NULL; + spin_unlock(&hmdfs_d(dent)->lock); + path_put(&pname); + } else { + spin_unlock(&hmdfs_d(dent)->lock); + } +} + +static inline void hmdfs_set_lower_path(struct dentry *dent, struct path *pname) +{ + spin_lock(&hmdfs_d(dent)->lock); + hmdfs_d(dent)->lower_path.dentry = pname->dentry; + hmdfs_d(dent)->lower_path.mnt = pname->mnt; + spin_unlock(&hmdfs_d(dent)->lock); +} + +/* Only reg file for HMDFS_LAYER_OTHER_* support xattr */ +static inline bool hmdfs_support_xattr(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_dentry_info *gdi = hmdfs_d(dentry); + + if (info->inode_type != HMDFS_LAYER_OTHER_LOCAL && + info->inode_type != HMDFS_LAYER_OTHER_REMOTE) + return false; + + if (!S_ISREG(inode->i_mode)) + return false; + + if (hm_islnk(gdi->file_type)) + return false; + + return true; +} + +int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry, + int dentry_type); + +#endif diff --git a/fs/hmdfs/hmdfs_merge_view.h b/fs/hmdfs/hmdfs_merge_view.h new file mode 100644 index 000000000000..01064b3d98df --- /dev/null +++ b/fs/hmdfs/hmdfs_merge_view.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_merge_view.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_MERGE_VIEW_H +#define HMDFS_MERGE_VIEW_H + +#include "hmdfs.h" + +#include "comm/connection.h" +#include + +/***************************************************************************** + * Dentires for merge view and their comrades. + * A dentry's lower dentry is named COMRADE. + *****************************************************************************/ + +struct hmdfs_dentry_info_merge { + unsigned long ctime; + // For the merge view to link dentries with same names + struct mutex comrade_list_lock; + struct list_head comrade_list; +}; + +struct hmdfs_dentry_comrade { + uint64_t dev_id; + struct dentry *lo_d; + struct list_head list; +}; + +enum FILE_CMD_MERGE { + F_MKDIR_MERGE = 0, + F_CREATE_MERGE = 1, + F_SYMLINK_MERGE = 2, +}; + +struct hmdfs_recursive_para { + bool is_last; + int opcode; + umode_t mode; + bool want_excl; + const char *name; +}; +static inline struct hmdfs_dentry_info_merge *hmdfs_dm(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline umode_t hmdfs_cm(struct hmdfs_dentry_comrade *comrade) +{ + return d_inode(comrade->lo_d)->i_mode; +} + +static inline bool comrade_is_local(struct hmdfs_dentry_comrade *comrade) +{ + return comrade->dev_id == HMDFS_DEVID_LOCAL; +} + +struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); + +struct hmdfs_dentry_comrade *alloc_comrade(struct dentry *lo_d, int dev_id); + +void link_comrade(struct list_head *onstack_comrades_head, + struct hmdfs_dentry_comrade *comrade); + +static inline void destroy_comrade(struct hmdfs_dentry_comrade *comrade) +{ + dput(comrade->lo_d); + kfree(comrade); +} + +void clear_comrades(struct dentry *dentry); + +static inline void link_comrade_unlocked(struct dentry *dentry, + struct hmdfs_dentry_comrade *comrade) +{ + mutex_lock(&hmdfs_dm(dentry)->comrade_list_lock); + link_comrade(&hmdfs_dm(dentry)->comrade_list, comrade); + mutex_unlock(&hmdfs_dm(dentry)->comrade_list_lock); +} + +void clear_comrades_locked(struct list_head *comrade_list); + +#define for_each_comrade_locked(_dentry, _comrade) \ + list_for_each_entry(_comrade, &(hmdfs_dm(_dentry)->comrade_list), list) + +#define hmdfs_trace_merge(_trace_func, _parent_inode, _child_dentry, err) \ + { \ + struct hmdfs_dentry_comrade *comrade; \ + struct hmdfs_dentry_info_merge *dm = hmdfs_dm(_child_dentry); \ + _trace_func(_parent_inode, _child_dentry, err); \ + if (likely(dm)) { \ + mutex_lock(&dm->comrade_list_lock); \ + for_each_comrade_locked(_child_dentry, comrade) \ + trace_hmdfs_show_comrade(_child_dentry, \ + comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&dm->comrade_list_lock); \ + } \ + } + +#define hmdfs_trace_rename_merge(olddir, olddentry, newdir, newdentry, err) \ + { \ + struct hmdfs_dentry_comrade *comrade; \ + trace_hmdfs_rename_merge(olddir, olddentry, newdir, newdentry, \ + err); \ + mutex_lock(&hmdfs_dm(olddentry)->comrade_list_lock); \ + for_each_comrade_locked(olddentry, comrade) \ + trace_hmdfs_show_comrade(olddentry, comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&hmdfs_dm(olddentry)->comrade_list_lock); \ + mutex_lock(&hmdfs_dm(newdentry)->comrade_list_lock); \ + for_each_comrade_locked(newdentry, comrade) \ + trace_hmdfs_show_comrade(newdentry, comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&hmdfs_dm(newdentry)->comrade_list_lock); \ + } + +/***************************************************************************** + * Helper functions abstarcting out comrade + *****************************************************************************/ + +static inline bool hmdfs_i_merge(struct hmdfs_inode_info *hii) +{ + __u8 t = hii->inode_type; + return t == HMDFS_LAYER_FIRST_MERGE || t == HMDFS_LAYER_OTHER_MERGE; +} + +struct dentry *hmdfs_get_lo_d(struct dentry *dentry, int dev_id); +struct dentry *hmdfs_get_fst_lo_d(struct dentry *dentry); + +/***************************************************************************** + * Inode operations for the merge view + *****************************************************************************/ + +extern const struct inode_operations hmdfs_file_iops_merge; +extern const struct file_operations hmdfs_file_fops_merge; +extern const struct inode_operations hmdfs_symlink_iops_merge; +extern const struct inode_operations hmdfs_dir_iops_merge; +extern const struct file_operations hmdfs_dir_fops_merge; +extern const struct dentry_operations hmdfs_dops_merge; + +/***************************************************************************** + * dentry cache for the merge view + *****************************************************************************/ +extern struct kmem_cache *hmdfs_dentry_merge_cachep; + +#endif // HMDFS_MERGE_H diff --git a/fs/hmdfs/hmdfs_server.c b/fs/hmdfs/hmdfs_server.c new file mode 100644 index 000000000000..c50e9f9de842 --- /dev/null +++ b/fs/hmdfs/hmdfs_server.c @@ -0,0 +1,2073 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_server.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_server.h" + +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/fault_inject.h" +#include "hmdfs.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" +#include "server_writeback.h" +#include "comm/node_cb.h" + +#define HMDFS_MAX_HIDDEN_DIR 1 + +struct hmdfs_open_info { + struct file *file; + struct inode *inode; + bool stat_valid; + struct kstat stat; + uint64_t real_ino; + int file_id; +}; + +static int insert_file_into_conn(struct hmdfs_peer *conn, struct file *file) +{ + struct idr *idr = &(conn->file_id_idr); + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&(conn->file_id_lock)); + ret = idr_alloc_cyclic(idr, file, 0, 0, GFP_NOWAIT); + spin_unlock(&(conn->file_id_lock)); + idr_preload_end(); + return ret; +} + +/* + * get_file_from_conn - get file from conn by file_id. It should be noted that + * an additional reference will be acquired for returned file, the called should + * put it after the file is not used anymore. + */ +static struct file *get_file_from_conn(struct hmdfs_peer *conn, __u32 file_id) +{ + struct file *file; + struct idr *idr = &(conn->file_id_idr); + + rcu_read_lock(); + file = idr_find(idr, file_id); + if (file && !get_file_rcu(file)) + file = NULL; + rcu_read_unlock(); + return file; +} + +void remove_file_from_conn(struct hmdfs_peer *conn, __u32 file_id) +{ + spinlock_t *lock = &(conn->file_id_lock); + struct idr *idr = &(conn->file_id_idr); + + spin_lock(lock); + idr_remove(idr, file_id); + spin_unlock(lock); +} + +struct file *hmdfs_open_photokit_path(struct hmdfs_sb_info *sbi, + const char *path) +{ + struct file *file; + int err; + const char *root_name = sbi->local_dst; + char *real_path; + int path_len; + + path_len = strlen(root_name) + strlen(path) + 2; + if (path_len >= PATH_MAX) { + err = -EINVAL; + return ERR_PTR(err); + } + real_path = kzalloc(path_len, GFP_KERNEL); + if (!real_path) { + err = -ENOMEM; + return ERR_PTR(err); + } + + sprintf(real_path, "%s/%s", root_name, path); + file = filp_open(real_path, O_RDWR | O_LARGEFILE, 0644); + if (IS_ERR(file)) { + hmdfs_info("filp_open failed: %ld", PTR_ERR(file)); + } else { + hmdfs_info("get file with magic %lu", + file->f_inode->i_sb->s_magic); + } + + kfree(real_path); + return file; +} + +struct file *hmdfs_open_path(struct hmdfs_sb_info *sbi, const char *path) +{ + struct path root_path; + struct file *file; + int err; + const char *root_name = sbi->local_dst; + + err = kern_path(root_name, 0, &root_path); + if (err) { + hmdfs_info("kern_path failed: %d", err); + return ERR_PTR(err); + } + file = file_open_root(&root_path, path, + O_RDWR | O_LARGEFILE, 0644); + path_put(&root_path); + if (IS_ERR(file)) { + hmdfs_err( + "GRAPERR sb->s_readonly_remount %d sb_flag %lu", + sbi->sb->s_readonly_remount, sbi->sb->s_flags); + hmdfs_info("file_open_root failed: %ld", PTR_ERR(file)); + } else { + hmdfs_info("get file with magic %lu", + file->f_inode->i_sb->s_magic); + } + return file; +} + +inline void hmdfs_close_path(struct file *file) +{ + fput(file); +} + +/* After offline server close all files opened by client */ +void hmdfs_server_offline_notify(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + int id; + int count = 0; + unsigned int next; + struct file *filp = NULL; + struct idr *idr = &conn->file_id_idr; + + /* wait all async work complete */ + flush_workqueue(conn->req_handle_wq); + flush_workqueue(conn->async_wq); + + /* If there is some open requests in processing, + * Maybe, we need to close file when peer offline + */ + idr_for_each_entry(idr, filp, id) { + hmdfs_debug("[%d]Server close: id=%d", count, id); + hmdfs_close_path(filp); + count++; + if (count % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } + + /* Reinitialize idr */ + next = idr_get_cursor(idr); + idr_destroy(idr); + + idr_init(idr); + idr_set_cursor(idr, next); + + /* Make old file id to be stale */ + conn->fid_cookie++; +} + +static struct hmdfs_node_cb_desc server_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_server_offline_notify + }, +}; + +void __init hmdfs_server_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(server_cb, ARRAY_SIZE(server_cb)); +} + +static int hmdfs_get_inode_by_name(struct hmdfs_peer *con, const char *filename, + uint64_t *ino) +{ + int ret = 0; + struct path root_path; + struct path dst_path; + struct inode *inode = NULL; + + ret = kern_path(con->sbi->local_dst, 0, &root_path); + if (ret) { + hmdfs_err("kern_path failed err = %d", ret); + return ret; + } + + ret = vfs_path_lookup(root_path.dentry, root_path.mnt, filename, 0, + &dst_path); + if (ret) { + path_put(&root_path); + return ret; + } + + inode = d_inode(dst_path.dentry); + if (con->sbi->sb == inode->i_sb) + inode = hmdfs_i(inode)->lower_inode; + *ino = generate_u64_ino(inode->i_ino, inode->i_generation); + + path_put(&dst_path); + path_put(&root_path); + + return 0; +} + +static struct file *hmdfs_open_file(struct hmdfs_peer *con, + const char *filename, uint8_t file_type, + int *file_id) +{ + struct file *file = NULL; + int id; + + if (!filename) { + hmdfs_err("filename is NULL"); + return ERR_PTR(-EINVAL); + } + + if (hm_islnk(file_type)) + file = hmdfs_open_photokit_path(con->sbi, filename); + else + file = hmdfs_open_path(con->sbi, filename); + if (IS_ERR(file)) + return file; + + id = insert_file_into_conn(con, file); + if (id < 0) { + hmdfs_err("file_id alloc failed! err=%d", id); + hmdfs_close_path(file); + return ERR_PTR(id); + } + *file_id = id; + + return file; +} + +static struct hmdfs_time_t msec_to_timespec(unsigned int msec) +{ + struct hmdfs_time_t timespec = { + .tv_sec = msec / MSEC_PER_SEC, + .tv_nsec = (msec % MSEC_PER_SEC) * NSEC_PER_MSEC, + }; + + return timespec; +} + +static struct hmdfs_time_t hmdfs_current_kernel_time(void) +{ + struct hmdfs_time_t time; + +#if KERNEL_VERSION(4, 18, 0) < LINUX_VERSION_CODE + ktime_get_coarse_real_ts64(&time); +#else + time = current_kernel_time(); +#endif + return time; +} + +/* + * Generate fid version like following format: + * + * | boot cookie | con cookie | + * |---------------------|-------------| + * 49 15 (bits) + */ +static uint64_t hmdfs_server_pack_fid_ver(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd) +{ + uint64_t boot_cookie = con->sbi->boot_cookie; + uint16_t con_cookie = con->fid_cookie; + + if (hmdfs_should_fake_fid_ver(&con->sbi->fault_inject, con, + cmd, T_BOOT_COOKIE)) + boot_cookie = hmdfs_gen_boot_cookie(); + + if (hmdfs_should_fake_fid_ver(&con->sbi->fault_inject, con, + cmd, T_CON_COOKIE)) + con_cookie++; + + return (boot_cookie | + (con_cookie & ((1 << HMDFS_FID_VER_BOOT_COOKIE_SHIFT) - 1))); +} + +static struct file *get_file_by_fid_and_ver(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + __u32 file_id, __u64 file_ver) +{ + struct file *file = NULL; + __u64 cur_file_ver = hmdfs_server_pack_fid_ver(con, cmd); + + if (file_ver != cur_file_ver) { + hmdfs_warning("Stale file version %llu for fid %u (ver %llu)", + file_ver, file_id, cur_file_ver); + return ERR_PTR(-EBADF); + } + + file = get_file_from_conn(con, file_id); + if (!file) + return ERR_PTR(-EBADF); + + return file; +} + +static void hmdfs_update_open_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct hmdfs_open_info *info, + struct open_response *resp) +{ + struct hmdfs_time_t current_time = hmdfs_current_kernel_time(); + struct hmdfs_time_t ctime = info->stat_valid ? info->stat.ctime : + info->inode->i_ctime; + struct hmdfs_time_t precision = + msec_to_timespec(con->sbi->dcache_precision); + loff_t size = info->stat_valid ? info->stat.size : + i_size_read(info->inode); + + resp->ino = cpu_to_le64(info->real_ino); + resp->file_ver = cpu_to_le64(hmdfs_server_pack_fid_ver(con, cmd)); + resp->file_id = cpu_to_le32(info->file_id); + resp->file_size = cpu_to_le64(size); + resp->ctime = cpu_to_le64(ctime.tv_sec); + resp->ctime_nsec = cpu_to_le32(ctime.tv_nsec); + + /* + * In server, ctime might stay the same after coverwrite. We introduce a + * new value stable_ctime to handle the problem. + * - if open rpc time < ctime, stable_ctime = 0; + * - if ctime <= open rpc time < ctime + dcache_precision, stable_ctime + * = ctime + * - else, stable_ctime = ctime + dcache_precision; + */ + precision = hmdfs_time_add(ctime, precision); + if (hmdfs_time_compare(¤t_time, &ctime) < 0) { + resp->stable_ctime = cpu_to_le64(0); + resp->stable_ctime_nsec = cpu_to_le32(0); + } else if (hmdfs_time_compare(¤t_time, &ctime) >= 0 && + hmdfs_time_compare(¤t_time, &precision) < 0) { + resp->stable_ctime = resp->ctime; + resp->stable_ctime_nsec = resp->ctime_nsec; + } else { + resp->stable_ctime = cpu_to_le64(precision.tv_sec); + resp->stable_ctime_nsec = cpu_to_le32(precision.tv_nsec); + } +} + +static int hmdfs_get_open_info(struct hmdfs_peer *con, uint8_t file_type, + const char *filename, + struct hmdfs_open_info *info) +{ + int ret = 0; + + info->inode = file_inode(info->file); + info->stat_valid = false; + if (con->sbi->sb == info->inode->i_sb) { + /* if open a regular file */ + info->inode = hmdfs_i(info->inode)->lower_inode; + } else if (con->sbi->lower_sb != info->inode->i_sb) { + /* It's possible that inode is not from lower, for example: + * 1. touch /f2fs/file + * 2. ln -s /sdcard_fs/file /f2fs/link + * 3. cat /hmdfs/link -> generate dentry cache in sdcard_fs + * 4. echo hi >> /hmdfs/file -> append write not through + * sdcard_fs + * 5. cat /hmdfs/link -> got inode in sdcard, which size is + * still 0 + * + * If src file isn't in lower, use getattr to get + * information. + */ + ret = vfs_getattr(&info->file->f_path, &info->stat, STATX_BASIC_STATS | STATX_BTIME, + 0); + if (ret) { + hmdfs_err("call vfs_getattr failed, err %d", ret); + return ret; + } + info->stat_valid = true; + } + + /* if open a link file, get ino from link inode */ + if (hm_islnk(file_type)) { + ret = hmdfs_get_inode_by_name(con, filename, &info->real_ino); + if (ret) + return ret; + } else { + info->real_ino = generate_u64_ino(info->inode->i_ino, + info->inode->i_generation); + } + + return 0; +} + +void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct open_request *recv = data; + int sizeread = sizeof(struct open_response); + struct open_response *resp = NULL; + struct hmdfs_open_info *info = NULL; + int ret = 0; + + trace_hmdfs_server_open_enter(con, recv); + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) + goto out_err; + + resp = kzalloc(sizeread, GFP_KERNEL); + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!resp || !info) { + ret = -ENOMEM; + goto err_free; + } + + info->file = hmdfs_open_file(con, recv->buf, recv->file_type, + &info->file_id); + if (IS_ERR(info->file)) { + ret = PTR_ERR(info->file); + goto err_free; + } + + ret = hmdfs_get_open_info(con, recv->file_type, recv->buf, info); + if (ret) + goto err_close; + + hmdfs_update_open_response(con, cmd, info, resp); + + trace_hmdfs_server_open_exit(con, resp, info->file, 0); + ret = hmdfs_sendmessage_response(con, cmd, sizeread, resp, 0); + if (ret) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, ret); + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } + kfree(resp); + kfree(info); + return; + +err_close: + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); +err_free: + kfree(resp); + kfree(info); +out_err: + trace_hmdfs_server_open_exit(con, NULL, NULL, ret); + hmdfs_send_err_response(con, cmd, ret); +} + +static int hmdfs_check_and_create(struct path *path_parent, + struct dentry *dentry, uint64_t device_id, + umode_t mode, bool is_excl) +{ + int err = 0; + + /* if inode doesn't exist, create it */ + if (d_is_negative(dentry)) { + hmdfs_mark_drop_flag(device_id, path_parent->dentry); + err = vfs_create(d_inode(path_parent->dentry), dentry, mode, + is_excl); + if (err) + hmdfs_err("create failed, err %d", err); + } else { + if (is_excl) + err = -EEXIST; + /* if inode aready exist, see if it's symlink */ + else if (S_ISREG(d_inode(dentry)->i_mode) && + hm_islnk(hmdfs_d(dentry)->file_type)) + err = -EINVAL; + else if (S_ISDIR(d_inode(dentry)->i_mode)) + err = -EISDIR; + } + + return err; +} +static int hmdfs_lookup_create(struct hmdfs_peer *con, + struct atomic_open_request *recv, + struct path *child_path, bool *truncate) +{ + int err = 0; + struct path path_root; + struct path path_parent; + uint32_t open_flags = le32_to_cpu(recv->open_flags); + char *path = recv->buf; + char *filename = recv->buf + le32_to_cpu(recv->path_len) + 1; + struct dentry *dentry = NULL; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &path_root); + if (err) { + hmdfs_err("no path for %s, err %d", con->sbi->local_dst, err); + return err; + } + + err = vfs_path_lookup(path_root.dentry, path_root.mnt, path, + LOOKUP_DIRECTORY, &path_parent); + if (err) { + hmdfs_info("no dir in %s, err %d", con->sbi->local_dst, err); + goto put_path_root; + } + + inode_lock(d_inode(path_parent.dentry)); + dentry = lookup_one_len(filename, path_parent.dentry, strlen(filename)); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + inode_unlock(d_inode(path_parent.dentry)); + goto put_path_parent; + } + /* only truncate if inode already exists */ + *truncate = ((open_flags & HMDFS_O_TRUNC) && d_is_positive(dentry)); + err = hmdfs_check_and_create(&path_parent, dentry, con->device_id, + le16_to_cpu(recv->mode), + open_flags & HMDFS_O_EXCL); + inode_unlock(d_inode(path_parent.dentry)); + if (err) { + dput(dentry); + } else { + child_path->dentry = dentry; + child_path->mnt = mntget(path_parent.mnt); + } + +put_path_parent: + path_put(&path_parent); +put_path_root: + path_put(&path_root); + return err; +} + +static int hmdfs_dentry_open(struct hmdfs_peer *con, + const struct path *path, + struct hmdfs_open_info *info) +{ + int err = 0; + + info->file = dentry_open(path, O_RDWR | O_LARGEFILE, current_cred()); + if (IS_ERR(info->file)) { + err = PTR_ERR(info->file); + hmdfs_err("open file failed, err %d", err); + return err; + } + + info->file_id = insert_file_into_conn(con, info->file); + if (info->file_id < 0) { + err = info->file_id; + hmdfs_err("file_id alloc failed! err %d", err); + hmdfs_close_path(info->file); + return err; + } + + return 0; +} + +static int hmdfs_server_do_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct atomic_open_request *recv, + struct hmdfs_open_info *info, + struct atomic_open_response *resp) +{ + struct path child_path; + bool truncate = false; + int err = 0; + + err = hmdfs_lookup_create(con, recv, &child_path, &truncate); + if (err) + return err; + + err = hmdfs_dentry_open(con, &child_path, info); + if (err) + goto put_child; + + err = hmdfs_get_open_info(con, HM_REG, NULL, info); + if (err) + goto fail_close; + + if (truncate) { + err = vfs_truncate(&child_path, 0); + if (err) { + hmdfs_err("truncate failed, err %d", err); + goto fail_close; + } + } + hmdfs_update_open_response(con, cmd, info, &resp->open_resp); + resp->i_mode = cpu_to_le16(file_inode(info->file)->i_mode); + +fail_close: + if (err) { + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } +put_child: + path_put(&child_path); + return err; +} + +void hmdfs_server_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + int err; + struct atomic_open_request *recv = data; + struct atomic_open_response *resp = NULL; + struct hmdfs_open_info *info = NULL; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + resp = kzalloc(sizeof(*resp), GFP_KERNEL); + if (!resp || !info) { + err = -ENOMEM; + goto out; + } + + err = hmdfs_server_do_atomic_open(con, cmd, recv, info, resp); + +out: + if (err) { + hmdfs_send_err_response(con, cmd, err); + } else { + err = hmdfs_sendmessage_response(con, cmd, sizeof(*resp), resp, + 0); + if (err) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, err); + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } + } + kfree(info); + kfree(resp); +} + +void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct release_request *release_recv = data; + struct file *file = NULL; + __u32 file_id; + __u64 file_ver; + int ret = 0; + + file_id = le32_to_cpu(release_recv->file_id); + file_ver = le64_to_cpu(release_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_err("cannot find %u", file_id); + ret = PTR_ERR(file); + goto out; + } + /* put the reference acquired by get_file_by_fid_and_ver() */ + hmdfs_close_path(file); + hmdfs_info("close %u", file_id); + remove_file_from_conn(con, file_id); + + hmdfs_close_path(file); + +out: + trace_hmdfs_server_release(con, file_id, file_ver, ret); + set_conn_sock_quickack(con); +} + +void hmdfs_server_fsync(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct fsync_request *fsync_recv = data; + __s32 datasync = le32_to_cpu(fsync_recv->datasync); + __s64 start = le64_to_cpu(fsync_recv->start); + __s64 end = le64_to_cpu(fsync_recv->end); + struct file *file = NULL; + __u32 file_id; + __u64 file_ver; + int ret = 0; + + file_id = le32_to_cpu(fsync_recv->file_id); + file_ver = le64_to_cpu(fsync_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_err("cannot find %u", file_id); + ret = PTR_ERR(file); + goto out; + } + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) + goto out_put_file; + + ret = vfs_fsync_range(file, start, end, datasync); + if (ret) + hmdfs_err("fsync fail, ret %d", ret); + +out_put_file: + hmdfs_close_path(file); +out: + hmdfs_send_err_response(con, cmd, ret); +} + +void hmdfs_server_readpage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readpage_request *readpage_recv = data; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + struct readpage_response *readpage = NULL; + int ret = 0; + size_t read_len; + + file_id = le32_to_cpu(readpage_recv->file_id); + file_ver = le64_to_cpu(readpage_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_info( + "file with id %u does not exist, pgindex %llu, devid %llu", + file_id, le64_to_cpu(readpage_recv->index), + con->device_id); + ret = PTR_ERR(file); + goto fail; + } + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) + goto fail_put_file; + + read_len = (size_t)le32_to_cpu(readpage_recv->size); + if (read_len == 0) + goto fail_put_file; + + readpage = kmalloc(read_len, GFP_KERNEL); + if (!readpage) { + ret = -ENOMEM; + goto fail_put_file; + } + + pos = (loff_t)le64_to_cpu(readpage_recv->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(file, readpage->buf, read_len, &pos); + if (ret < 0) { + hmdfs_send_err_response(con, cmd, -EIO); + } else { + if (ret != read_len) + memset(readpage->buf + ret, 0, read_len - ret); + hmdfs_sendmessage_response(con, cmd, read_len, readpage, 0); + } + + hmdfs_close_path(file); + kfree(readpage); + return; + +fail_put_file: + hmdfs_close_path(file); +fail: + hmdfs_send_err_response(con, cmd, ret); +} + +static struct readpages_response *alloc_readpages_resp(unsigned int len) +{ + struct readpages_response *resp = NULL; + + if (len > HMDFS_PAGE_SIZE) + resp = vmalloc(len); + else + resp = kmalloc(len, GFP_KERNEL); + + return resp; +} + +static void free_readpages_resp(struct readpages_response *resp, + unsigned int len) +{ + if (len > HMDFS_PAGE_SIZE) + vfree(resp); + else + kfree(resp); +} + +void hmdfs_server_readpages(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readpages_request *req = data; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + struct readpages_response *resp = NULL; + ssize_t ret = 0; + size_t read_len; + + file_id = le32_to_cpu(req->file_id); + file_ver = le64_to_cpu(req->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto fail; + } + + read_len = (size_t)le32_to_cpu(req->size); + if (read_len == 0) + goto fail_put_file; + + resp = alloc_readpages_resp(read_len); + if (!resp) { + ret = -ENOMEM; + goto fail_put_file; + } + + pos = (loff_t)le64_to_cpu(req->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(file, resp->buf, read_len, &pos); + if (ret < 0) { + ret = -EIO; + goto fail_free_resp; + } + + hmdfs_sendmessage_response(con, cmd, ret, resp, 0); + hmdfs_close_path(file); + free_readpages_resp(resp, read_len); + return; + +fail_free_resp: + free_readpages_resp(resp, read_len); +fail_put_file: + hmdfs_close_path(file); +fail: + hmdfs_send_err_response(con, cmd, ret); +} + +static int hmdfs_do_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct readpages_open_request *recv, + struct hmdfs_open_info *info, + struct readpages_open_response *resp) +{ + int ret = 0; + loff_t pos = 0; + + info->file = hmdfs_open_file(con, recv->buf, recv->file_type, + &info->file_id); + if (IS_ERR(info->file)) + return PTR_ERR(info->file); + + ret = hmdfs_get_open_info(con, recv->file_type, recv->buf, info); + if (ret) + goto fail_close; + + pos = (loff_t)le64_to_cpu(recv->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(info->file, resp->buf, le32_to_cpu(recv->size), &pos); + if (ret < 0) + goto fail_close; + + hmdfs_update_open_response(con, cmd, info, &resp->open_resp); + memset(resp->reserved, 0, sizeof(resp->reserved)); + ret = hmdfs_sendmessage_response(con, cmd, sizeof(*resp) + ret, resp, + 0); + if (ret) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, ret); + ret = 0; + goto fail_close; + } + return 0; + +fail_close: + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + return ret; +} + +void hmdfs_server_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct readpages_open_request *recv = data; + struct readpages_open_response *resp = NULL; + int ret = -EINVAL; + size_t read_len = 0; + size_t resp_len = 0; + struct hmdfs_open_info *info = NULL; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) + goto fail; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + + read_len = (size_t)le32_to_cpu(recv->size); + if (read_len == 0) { + ret = -EINVAL; + goto fail_free_info; + } + resp_len = read_len + sizeof(*resp); + resp = vmalloc(resp_len); + if (!resp) { + ret = -ENOMEM; + goto fail_free_info; + } + + ret = hmdfs_do_readpages_open(con, cmd, recv, info, resp); + + vfree(resp); +fail_free_info: + kfree(info); +fail: + if (ret) + hmdfs_send_err_response(con, cmd, ret); +} + +static bool need_rebuild_dcache(struct hmdfs_dcache_header *h, + struct hmdfs_time_t time, + unsigned int precision) +{ + struct hmdfs_time_t crtime = { .tv_sec = le64_to_cpu(h->dcache_crtime), + .tv_nsec = le64_to_cpu( + h->dcache_crtime_nsec) }; + struct hmdfs_time_t ctime = { .tv_sec = le64_to_cpu(h->dentry_ctime), + .tv_nsec = le64_to_cpu( + h->dentry_ctime_nsec) }; + struct hmdfs_time_t pre_time = { .tv_sec = precision / MSEC_PER_SEC, + .tv_nsec = precision % MSEC_PER_SEC * + NSEC_PER_MSEC }; + + if (hmdfs_time_compare(&time, &ctime) != 0) + return true; + + pre_time = hmdfs_time_add(time, pre_time); + if (hmdfs_time_compare(&crtime, &pre_time) < 0) + return true; + + return false; +} + +static bool hmdfs_server_cache_validate(struct file *filp, struct inode *inode, + unsigned long precision) +{ + struct hmdfs_dcache_header header; + int overallpage; + ssize_t bytes; + loff_t pos = 0; + + overallpage = get_dentry_group_cnt(file_inode(filp)); + if (overallpage == 0) { + hmdfs_err("cache file size is 0"); + return false; + } + + bytes = kernel_read(filp, &header, sizeof(header), &pos); + if (bytes != sizeof(header)) { + hmdfs_err("read file failed, err:%zd", bytes); + return false; + } + + return !need_rebuild_dcache(&header, inode->i_ctime, precision); +} + +struct file *hmdfs_server_cache_revalidate(struct hmdfs_sb_info *sbi, + const char *recvpath, + struct path *path) +{ + struct cache_file_node *cfn = NULL; + struct file *file; + + cfn = find_cfn(sbi, HMDFS_SERVER_CID, recvpath, true); + if (!cfn) + return NULL; + + if (!hmdfs_server_cache_validate(cfn->filp, path->dentry->d_inode, + sbi->dcache_precision)) { + remove_cfn(cfn); + release_cfn(cfn); + return NULL; + } + file = cfn->filp; + get_file(cfn->filp); + release_cfn(cfn); + + return file; +} + +bool hmdfs_client_cache_validate(struct hmdfs_sb_info *sbi, + struct readdir_request *readdir_recv, + struct path *path) +{ + struct inode *inode = path->dentry->d_inode; + struct hmdfs_dcache_header header; + + /* always rebuild dentryfile for small dir */ + if (le64_to_cpu(readdir_recv->num) < sbi->dcache_threshold) + return false; + + header.dcache_crtime = readdir_recv->dcache_crtime; + header.dcache_crtime_nsec = readdir_recv->dcache_crtime_nsec; + header.dentry_ctime = readdir_recv->dentry_ctime; + header.dentry_ctime_nsec = readdir_recv->dentry_ctime_nsec; + + return !need_rebuild_dcache(&header, inode->i_ctime, + sbi->dcache_precision); +} + +static char *server_lower_dentry_path_raw(struct hmdfs_peer *peer, + struct dentry *lo_d) +{ + struct hmdfs_dentry_info *di = hmdfs_d(peer->sbi->sb->s_root); + struct dentry *lo_d_root = di->lower_path.dentry; + struct dentry *lo_d_tmp = NULL; + char *lo_p_buf = NULL; + char *buf_head = NULL; + char *buf_tail = NULL; + size_t path_len = 0; + + lo_p_buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (unlikely(!lo_p_buf)) + return ERR_PTR(-ENOMEM); + + /* To generate a reversed path str */ + for (lo_d_tmp = lo_d; lo_d_tmp != lo_d_root && !IS_ROOT(lo_d_tmp); + lo_d_tmp = lo_d_tmp->d_parent) { + u32 dlen = lo_d_tmp->d_name.len; + int reverse_index = dlen - 1; + + /* Considering the appended slash and '\0' */ + if (unlikely(path_len + dlen + 1 > PATH_MAX - 1)) { + kfree(lo_p_buf); + return ERR_PTR(-ENAMETOOLONG); + } + for (; reverse_index >= 0; --reverse_index) + lo_p_buf[path_len++] = + lo_d_tmp->d_name.name[reverse_index]; + lo_p_buf[path_len++] = '/'; + } + + /* Reverse the reversed path str to get the real path str */ + for (buf_head = lo_p_buf, buf_tail = lo_p_buf + path_len - 1; + buf_head < buf_tail; ++buf_head, --buf_tail) + swap(*buf_head, *buf_tail); + + if (path_len == 0) + lo_p_buf[0] = '/'; + return lo_p_buf; +} + +static int server_lookup(struct hmdfs_peer *peer, const char *req_path, + struct path *path) +{ + struct path root_path; + int err = 0; + + err = kern_path(peer->sbi->local_dst, 0, &root_path); + if (err) + goto out_noroot; + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, req_path, + LOOKUP_DIRECTORY, path); + path_put(&root_path); +out_noroot: + return err; +} + +/** + * server_lookup_lower - lookup lower file-system + * @peer: target device node + * @req_path: abs path (mount point as the root) from the request + * @lo_o: the lower path to return + * + * return the lower path's name, with characters' cases matched + */ +static char *server_lookup_lower(struct hmdfs_peer *peer, const char *req_path, + struct path *lo_p) +{ + char *lo_p_name = ERR_PTR(-ENOENT); + struct path up_p; + int err = 0; + + err = server_lookup(peer, req_path, &up_p); + if (err) + goto out; + + hmdfs_get_lower_path(up_p.dentry, lo_p); + path_put(&up_p); + + lo_p_name = server_lower_dentry_path_raw(peer, lo_p->dentry); + if (IS_ERR(lo_p_name)) { + err = PTR_ERR(lo_p_name); + path_put(lo_p); + } +out: + return err ? ERR_PTR(err) : lo_p_name; +} + +void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readdir_request *readdir_recv = data; + struct path lo_p; + struct file *filp = NULL; + int err = 0; + unsigned long long num = 0; + char *lo_p_name = NULL; + + trace_hmdfs_server_readdir(readdir_recv); + + lo_p_name = server_lookup_lower(con, readdir_recv->path, &lo_p); + if (IS_ERR(lo_p_name)) { + err = PTR_ERR(lo_p_name); + hmdfs_info("Failed to get lower path: %d", err); + goto send_err; + } + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto err_lookup_path; + + if (le32_to_cpu(readdir_recv->verify_cache)) { + if (hmdfs_client_cache_validate(con->sbi, readdir_recv, &lo_p)) + goto out_response; + } + + filp = hmdfs_server_cache_revalidate(con->sbi, lo_p_name, &lo_p); + if (IS_ERR_OR_NULL(filp)) { + filp = hmdfs_server_rebuild_dents(con->sbi, &lo_p, &num, + lo_p_name); + if (IS_ERR_OR_NULL(filp)) { + err = PTR_ERR(filp); + goto err_lookup_path; + } + } + +out_response: + err = hmdfs_readfile_response(con, cmd, filp); + if (!err) + hmdfs_add_remote_cache_list(con, lo_p_name); + if (num >= con->sbi->dcache_threshold) + cache_file_persistent(con, filp, lo_p_name, true); + if (filp) + fput(filp); +err_lookup_path: + path_put(&lo_p); + kfree(lo_p_name); +send_err: + if (err) + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct mkdir_request *mkdir_recv = data; + struct inode *child_inode = NULL; + struct dentry *dent = NULL; + char *mkdir_dir = NULL; + char *mkdir_name = NULL; + struct hmdfs_inodeinfo_response *mkdir_resp = NULL; + int respsize = sizeof(struct hmdfs_inodeinfo_response); + int path_len = le32_to_cpu(mkdir_recv->path_len); + + mkdir_resp = kzalloc(respsize, GFP_KERNEL); + if (!mkdir_resp) { + err = -ENOMEM; + goto mkdir_out; + } + + mkdir_dir = mkdir_recv->path; + mkdir_name = mkdir_recv->path + path_len + 1; + + dent = hmdfs_root_mkdir(con->device_id, con->sbi->local_dst, + mkdir_dir, mkdir_name, + le16_to_cpu(mkdir_recv->mode)); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + hmdfs_err("hmdfs_root_mkdir failed err = %d", err); + goto mkdir_out; + } + child_inode = d_inode(dent); + mkdir_resp->i_mode = cpu_to_le16(child_inode->i_mode); + mkdir_resp->i_size = cpu_to_le64(child_inode->i_size); + mkdir_resp->i_mtime = cpu_to_le64(child_inode->i_mtime.tv_sec); + mkdir_resp->i_mtime_nsec = cpu_to_le32(child_inode->i_mtime.tv_nsec); + mkdir_resp->i_ino = cpu_to_le64(child_inode->i_ino); + dput(dent); +mkdir_out: + hmdfs_sendmessage_response(con, cmd, respsize, mkdir_resp, err); + kfree(mkdir_resp); +} + +void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct create_request *create_recv = data; + struct inode *child_inode = NULL; + struct dentry *dent = NULL; + char *create_dir = NULL; + char *create_name = NULL; + struct hmdfs_inodeinfo_response *create_resp = NULL; + int respsize = sizeof(struct hmdfs_inodeinfo_response); + int path_len = le32_to_cpu(create_recv->path_len); + + create_resp = kzalloc(respsize, GFP_KERNEL); + if (!create_resp) { + err = -ENOMEM; + goto create_out; + } + + create_dir = create_recv->path; + create_name = create_recv->path + path_len + 1; + + dent = hmdfs_root_create(con->device_id, con->sbi->local_dst, + create_dir, create_name, + le16_to_cpu(create_recv->mode), + create_recv->want_excl); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + hmdfs_err("hmdfs_root_create failed err = %d", err); + goto create_out; + } + child_inode = d_inode(dent); + create_resp->i_mode = cpu_to_le16(child_inode->i_mode); + create_resp->i_size = cpu_to_le64(child_inode->i_size); + create_resp->i_mtime = cpu_to_le64(child_inode->i_mtime.tv_sec); + create_resp->i_mtime_nsec = cpu_to_le32(child_inode->i_mtime.tv_nsec); + /* + * keep same as hmdfs_server_open, + * to prevent hmdfs_open_final_remote from judging ino errors. + */ + create_resp->i_ino = cpu_to_le64( + generate_u64_ino(hmdfs_i(child_inode)->lower_inode->i_ino, + child_inode->i_generation)); + dput(dent); +create_out: + hmdfs_sendmessage_response(con, cmd, respsize, create_resp, err); + kfree(create_resp); +} + +void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct path root_path; + char *path = NULL; + char *name = NULL; + struct rmdir_request *rmdir_recv = data; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + path = rmdir_recv->path; + name = rmdir_recv->path + le32_to_cpu(rmdir_recv->path_len) + 1; + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (!err) { + err = hmdfs_root_rmdir(con->device_id, &root_path, path, name); + path_put(&root_path); + } +out: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct path root_path; + char *path = NULL; + char *name = NULL; + struct unlink_request *unlink_recv = data; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + path = unlink_recv->path; + name = unlink_recv->path + le32_to_cpu(unlink_recv->path_len) + 1; + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (!err) { + err = hmdfs_root_unlink(con->device_id, &root_path, path, name); + path_put(&root_path); + } +out: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + int old_path_len; + int new_path_len; + int old_name_len; + int new_name_len; + unsigned int flags; + char *path_old = NULL; + char *name_old = NULL; + char *path_new = NULL; + char *name_new = NULL; + struct rename_request *recv = data; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + old_path_len = le32_to_cpu(recv->old_path_len); + new_path_len = le32_to_cpu(recv->new_path_len); + old_name_len = le32_to_cpu(recv->old_name_len); + new_name_len = le32_to_cpu(recv->new_name_len); + flags = le32_to_cpu(recv->flags); + + path_old = recv->path; + path_new = recv->path + old_path_len + 1; + name_old = recv->path + old_path_len + 1 + new_path_len + 1; + name_new = recv->path + old_path_len + 1 + new_path_len + 1 + + old_name_len + 1; + + err = hmdfs_root_rename(con->sbi, con->device_id, path_old, name_old, + path_new, name_new, flags); +out: + hmdfs_send_err_response(con, cmd, err); +} + +static int hmdfs_lookup_symlink(struct path *link_path, const char *path_fmt, + ...) +{ + int ret; + va_list args; + char *path = kmalloc(PATH_MAX, GFP_KERNEL); + + if (!path) + return -ENOMEM; + + va_start(args, path_fmt); + ret = vsnprintf(path, PATH_MAX, path_fmt, args); + va_end(args); + + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + goto out; + } + + /* + * Todo: when rebuild dentryfile, there maybe deadlock + * because iterate_dir already hold the parent + * lock, but now, we didn't know the symlink + * src's parent. + */ + ret = kern_path(path, LOOKUP_FOLLOW, link_path); + if (ret) { + hmdfs_err("kern_path failed err = %d", ret); + goto out; + } + + if (!S_ISREG(d_inode(link_path->dentry)->i_mode)) { + hmdfs_err("path is dir symlink"); + path_put(link_path); + ret = -EOPNOTSUPP; + goto out; + } +out: + kfree(path); + return ret; +} + +static int hmdfs_filldir_real(struct dir_context *ctx, const char *name, + int name_len, loff_t offset, u64 ino, + unsigned int d_type) +{ + int res = 0; + char namestr[NAME_MAX + 1]; + struct getdents_callback_real *gc = NULL; + struct dentry *child = NULL; + + if (name_len > NAME_MAX) { + hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX); + goto out; + } + + gc = container_of(ctx, struct getdents_callback_real, ctx); + + memcpy(namestr, name, name_len); + namestr[name_len] = '\0'; + + if (hmdfs_file_type(namestr) != HMDFS_TYPE_COMMON) + goto out; + + /* parent lock already hold by iterate_dir */ + child = lookup_one_len(name, gc->parent_path->dentry, name_len); + if (IS_ERR(child)) { + res = PTR_ERR(child); + hmdfs_err("lookup failed because %d", res); + goto out; + } + + if (d_really_is_negative(child)) { + dput(child); + hmdfs_err("lookup failed because negative dentry"); + /* just do not fill this entry and continue for next entry */ + goto out; + } + + if (d_type == DT_REG || d_type == DT_DIR) { + create_dentry(child, d_inode(child), gc->file, gc->sbi); + gc->num++; + } else if (d_type == DT_LNK) { + struct path link_path; + + res = hmdfs_lookup_symlink(&link_path, "%s/%s/%s", + gc->sbi->local_src, gc->dir, + namestr); + if (!res) { + create_dentry(child, d_inode(link_path.dentry), + gc->file, gc->sbi); + path_put(&link_path); + gc->num++; + } else if (res == -ENOENT) { + /* + * If source file do not exist, use the info from link + * inode. + */ + create_dentry(child, d_inode(child), gc->file, gc->sbi); + gc->num++; + } + } + + dput(child); + +out: + /* + * we always return 0 here, so that the caller can continue to next + * dentry even if failed on this dentry somehow. + */ + return 0; +} + +static void hmdfs_server_set_header(struct hmdfs_dcache_header *header, + struct file *file, struct file *dentry_file) +{ + struct inode *inode = NULL; + struct hmdfs_time_t cur_time; + + inode = file_inode(file); + cur_time = current_time(file_inode(dentry_file)); + header->dcache_crtime = cpu_to_le64(cur_time.tv_sec); + header->dcache_crtime_nsec = cpu_to_le64(cur_time.tv_nsec); + header->dentry_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + header->dentry_ctime_nsec = cpu_to_le64(inode->i_ctime.tv_nsec); +} + +// Get the dentries of target directory +struct file *hmdfs_server_rebuild_dents(struct hmdfs_sb_info *sbi, + struct path *path, loff_t *num, + const char *dir) +{ + int err = 0; + struct getdents_callback_real gc = { + .ctx.actor = hmdfs_filldir_real, + .ctx.pos = 0, + .num = 0, + .sbi = sbi, + .dir = dir, + }; + struct file *file = NULL; + struct file *dentry_file = NULL; + struct hmdfs_dcache_header header; + + dentry_file = create_local_dentry_file_cache(sbi); + if (IS_ERR(dentry_file)) { + hmdfs_err("file create failed err=%ld", PTR_ERR(dentry_file)); + return dentry_file; + } + + file = dentry_open(path, O_RDONLY | O_DIRECTORY, current_cred()); + if (IS_ERR(file)) { + err = PTR_ERR(file); + hmdfs_err("dentry_open failed"); + goto out; + } + + hmdfs_server_set_header(&header, file, dentry_file); + + gc.parent_path = path; + gc.file = dentry_file; + + err = iterate_dir(file, &(gc.ctx)); + if (err) { + hmdfs_err("iterate_dir failed"); + goto out; + } + + header.case_sensitive = sbi->s_case_sensitive; + header.num = cpu_to_le64(gc.num); + if (num) + *num = gc.num; + + err = write_header(dentry_file, &header); +out: + if (!IS_ERR_OR_NULL(file)) + fput(file); + + if (err) { + fput(dentry_file); + dentry_file = ERR_PTR(err); + } + + trace_hmdfs_server_rebuild_dents(&header, err); + return dentry_file; +} + +void hmdfs_server_writepage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct writepage_request *writepage_recv = data; + struct hmdfs_server_writeback *hswb = NULL; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + __u32 count; + ssize_t ret; + int err = 0; + + file_id = le32_to_cpu(writepage_recv->file_id); + file_ver = le64_to_cpu(writepage_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_info( + "file with id %u does not exist, pgindex %llu, devid %llu", + file_id, le64_to_cpu(writepage_recv->index), + con->device_id); + err = PTR_ERR(file); + goto out; + } + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out_put_file; + + pos = (loff_t)le64_to_cpu(writepage_recv->index) << HMDFS_PAGE_OFFSET; + count = le32_to_cpu(writepage_recv->count); + ret = kernel_write(file, writepage_recv->buf, count, &pos); + if (ret != count) + err = -EIO; + +out_put_file: + hmdfs_close_path(file); +out: + hmdfs_send_err_response(con, cmd, err); + + hswb = con->sbi->h_swb; + if (!err && hswb->dirty_writeback_control) + hmdfs_server_check_writeback(hswb); +} + +static int hmdfs_lookup_linkpath(struct hmdfs_sb_info *sbi, + const char *path_name, struct path *dst_path) +{ + struct path link_path; + int err; + + err = hmdfs_lookup_symlink(&link_path, "%s/%s", sbi->local_dst, + path_name); + if (err) + return err; + + if (d_inode(link_path.dentry)->i_sb != sbi->sb) { + path_put(dst_path); + *dst_path = link_path; + } else { + path_put(&link_path); + } + + return 0; +} + +static struct inode *hmdfs_verify_path(struct dentry *dentry, char *recv_buf, + struct super_block *sb) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = NULL; + + /* if we found path from wrong fs */ + if (inode->i_sb != sb) { + hmdfs_err("super block do not match"); + return NULL; + } + + info = hmdfs_i(inode); + /* make sure lower inode is not NULL */ + if (info->lower_inode) + return info->lower_inode; + + /* + * we don't expect lower inode to be NULL in server. However, it's + * possible because dentry cache can contain stale data. + */ + hmdfs_info("lower inode is NULL, is remote file: %d", + info->conn != NULL); + return NULL; +} + +static int hmdfs_notify_change(struct vfsmount *mnt, struct dentry *dentry, + struct iattr *attr, + struct inode **delegated_inode) +{ +#ifdef CONFIG_SDCARD_FS + /* sdcard_fs need to call setattr2, notify_change will call setattr */ + return notify_change2(mnt, dentry, attr, delegated_inode); +#else + return notify_change(dentry, attr, delegated_inode); +#endif +} + +void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct dentry *dentry = NULL; + struct inode *inode = NULL; + struct setattr_request *recv = data; + struct path root_path, dst_path; + struct iattr attr; + __u32 valid = le32_to_cpu(recv->valid); + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto out; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->buf, 0, + &dst_path); + if (err) + goto out_put_root; + + inode = hmdfs_verify_path(dst_path.dentry, recv->buf, con->sbi->sb); + if (!inode) { + err = -ENOENT; + goto out_put_dst; + } + + /* We need to follow if symlink was found */ + if (S_ISLNK(inode->i_mode)) { + err = hmdfs_lookup_linkpath(con->sbi, recv->buf, &dst_path); + /* if source file doesn't exist, use link inode */ + if (err == -ENOENT) + err = 0; + else if (err) + goto out_put_dst; + } + + dentry = dst_path.dentry; + memset(&attr, 0, sizeof(attr)); + /* only support size and mtime */ + if (valid & (ATTR_SIZE | ATTR_MTIME)) + attr.ia_valid = + (valid & (ATTR_MTIME | ATTR_MTIME_SET | ATTR_SIZE)); + attr.ia_size = le64_to_cpu(recv->size); + attr.ia_mtime.tv_sec = le64_to_cpu(recv->mtime); + attr.ia_mtime.tv_nsec = le32_to_cpu(recv->mtime_nsec); + + inode_lock(dentry->d_inode); + err = hmdfs_notify_change(dst_path.mnt, dentry, &attr, NULL); + inode_unlock(dentry->d_inode); + +out_put_dst: + path_put(&dst_path); +out_put_root: + path_put(&root_path); +out: + hmdfs_send_err_response(con, cmd, err); +} + +static void update_getattr_response(struct hmdfs_peer *con, struct inode *inode, + struct kstat *ks, + struct getattr_response *resp) +{ + /* if getattr for link, get ino and mode from actual lower inode */ + resp->ino = cpu_to_le64( + generate_u64_ino(inode->i_ino, inode->i_generation)); + resp->mode = cpu_to_le16(inode->i_mode); + + /* get other information from vfs_getattr() */ + resp->result_mask = cpu_to_le32(STATX_BASIC_STATS | STATX_BTIME); + resp->fsid = cpu_to_le64(ks->dev); + resp->nlink = cpu_to_le32(ks->nlink); + resp->uid = cpu_to_le32(ks->uid.val); + resp->gid = cpu_to_le32(ks->gid.val); + resp->size = cpu_to_le64(ks->size); + resp->blocks = cpu_to_le64(ks->blocks); + resp->blksize = cpu_to_le32(ks->blksize); + resp->atime = cpu_to_le64(ks->atime.tv_sec); + resp->atime_nsec = cpu_to_le32(ks->atime.tv_nsec); + resp->mtime = cpu_to_le64(ks->mtime.tv_sec); + resp->mtime_nsec = cpu_to_le32(ks->mtime.tv_nsec); + resp->ctime = cpu_to_le64(ks->ctime.tv_sec); + resp->ctime_nsec = cpu_to_le32(ks->ctime.tv_nsec); + resp->crtime = cpu_to_le64(ks->btime.tv_sec); + resp->crtime_nsec = cpu_to_le32(ks->btime.tv_nsec); +} + +void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct getattr_request *recv = data; + int size_read = sizeof(struct getattr_response); + struct getattr_response *resp = NULL; + struct kstat ks; + struct path root_path, dst_path; + struct inode *inode = NULL; + unsigned int recv_flags = le32_to_cpu(recv->lookup_flags); + unsigned int lookup_flags = 0; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto err; + + err = hmdfs_convert_lookup_flags(recv_flags, &lookup_flags); + if (err) + goto err; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto err; + } + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto err_free_resp; + } + //TODO: local_dst -->local_src + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->buf, + lookup_flags, &dst_path); + if (err) + goto out_put_root; + + inode = hmdfs_verify_path(dst_path.dentry, recv->buf, con->sbi->sb); + if (!inode) { + err = -ENOENT; + goto out_put_dst; + } + /* We need to follow if symlink was found */ + if (S_ISLNK(inode->i_mode)) { + err = hmdfs_lookup_linkpath(con->sbi, recv->buf, &dst_path); + /* if source file doesn't exist, use link inode */ + if (err && err != -ENOENT) + goto out_put_dst; + } + + err = vfs_getattr(&dst_path, &ks, STATX_BASIC_STATS | STATX_BTIME, 0); + if (err) + goto err_put_dst; + update_getattr_response(con, inode, &ks, resp); + +out_put_dst: + path_put(&dst_path); +out_put_root: + /* + * if path lookup failed, we return with result_mask setting to + * zero. So we can be aware of such situation in caller. + */ + if (err) + resp->result_mask = cpu_to_le32(0); + path_put(&root_path); + hmdfs_sendmessage_response(con, cmd, size_read, resp, err); + kfree(resp); + return; + +err_put_dst: + path_put(&dst_path); + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +static void init_statfs_response(struct statfs_response *resp, + struct kstatfs *st) +{ + resp->f_type = cpu_to_le64(HMDFS_SUPER_MAGIC); + resp->f_bsize = cpu_to_le64(st->f_bsize); + resp->f_blocks = cpu_to_le64(st->f_blocks); + resp->f_bfree = cpu_to_le64(st->f_bfree); + resp->f_bavail = cpu_to_le64(st->f_bavail); + resp->f_files = cpu_to_le64(st->f_files); + resp->f_ffree = cpu_to_le64(st->f_ffree); + resp->f_fsid_0 = cpu_to_le32(st->f_fsid.val[0]); + resp->f_fsid_1 = cpu_to_le32(st->f_fsid.val[1]); + resp->f_namelen = cpu_to_le64(st->f_namelen); + resp->f_frsize = cpu_to_le64(st->f_frsize); + resp->f_flags = cpu_to_le64(st->f_flags); + /* f_spare is not used in f2fs or ext4 */ + resp->f_spare_0 = cpu_to_le64(st->f_spare[0]); + resp->f_spare_1 = cpu_to_le64(st->f_spare[1]); + resp->f_spare_2 = cpu_to_le64(st->f_spare[2]); + resp->f_spare_3 = cpu_to_le64(st->f_spare[3]); +} + +void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct statfs_request *recv = data; + struct statfs_response *resp = NULL; + struct path root_path, path; + struct kstatfs *st = NULL; + int err = 0; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) { + err = -ENOMEM; + goto out; + } + + resp = kmalloc(sizeof(*resp), GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto free_st; + } + + err = kern_path(con->sbi->local_src, 0, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto free_st; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->path, 0, + &path); + if (err) { + hmdfs_info("recv->path found failed err = %d", err); + goto put_root; + } + + err = vfs_statfs(&path, st); + if (err) + hmdfs_info("statfs local dentry failed, err = %d", err); + init_statfs_response(resp, st); + path_put(&path); + +put_root: + path_put(&root_path); +free_st: + kfree(st); +out: + if (err) + hmdfs_send_err_response(con, cmd, err); + else + hmdfs_sendmessage_response(con, cmd, sizeof(*resp), resp, 0); + + kfree(resp); +} + +void hmdfs_server_syncfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + /* + * Reserved interface. There is a difference compared with traditional + * syncfs process. Remote syncfs process in client: + * 1. Remote writepages by async call + * 2. Remote syncfs calling + * 3. Wait all remote async calls(writepages) return in step 1 + */ + int ret = 0; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) { + hmdfs_send_err_response(con, cmd, ret); + return; + } + + hmdfs_send_err_response(con, cmd, ret); +} + +void hmdfs_server_getxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct getxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + size_t size_read = sizeof(struct getxattr_response) + size; + struct getxattr_response *resp = NULL; + struct path root_path; + struct path path; + char *file_path = recv->buf; + char *name = recv->buf + recv->path_len + 1; + int err = -ENOMEM; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) + goto err; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err_free_resp; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (!size) + err = vfs_getxattr(path.dentry, name, NULL, size); + else + err = vfs_getxattr(path.dentry, name, resp->value, size); + if (err < 0) { + hmdfs_info("getxattr failed err %d", err); + goto err_put_path; + } + + resp->size = cpu_to_le32(err); + hmdfs_sendmessage_response(con, cmd, size_read, resp, 0); + path_put(&path); + path_put(&root_path); + kfree(resp); + return; + +err_put_path: + path_put(&path); +err_put_root: + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_setxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct setxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + int flags = le32_to_cpu(recv->flags); + bool del = recv->del; + struct path root_path; + struct path path; + const char *file_path = NULL; + const char *name = NULL; + const void *value = NULL; + int err; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err; + } + + file_path = recv->buf; + name = recv->buf + recv->path_len + 1; + value = name + recv->name_len + 1; + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (del) { + WARN_ON(flags != XATTR_REPLACE); + err = vfs_removexattr(path.dentry, name); + } else { + err = vfs_setxattr(path.dentry, name, value, size, flags); + } + + path_put(&path); +err_put_root: + path_put(&root_path); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_listxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct listxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + int size_read = sizeof(struct listxattr_response) + size; + struct listxattr_response *resp = NULL; + const char *file_path = NULL; + struct path root_path; + struct path path; + int err = 0; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto err; + } + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err_free_resp; + } + + file_path = recv->buf; + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (!size) + err = vfs_listxattr(path.dentry, NULL, size); + else + err = vfs_listxattr(path.dentry, resp->list, size); + if (err < 0) { + hmdfs_info("listxattr failed err = %d", err); + goto err_put_path; + } + + resp->size = cpu_to_le32(err); + hmdfs_sendmessage_response(con, cmd, size_read, resp, 0); + path_put(&root_path); + path_put(&path); + kfree(resp); + return; + +err_put_path: + path_put(&path); +err_put_root: + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_get_drop_push(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct drop_push_request *dp_recv = data; + struct path root_path, path; + int err; + char *tmp_path = NULL; + + err = kern_path(con->sbi->real_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto quickack; + } + tmp_path = kzalloc(PATH_MAX, GFP_KERNEL); + if (!tmp_path) + goto out; + snprintf(tmp_path, PATH_MAX, "/" DEVICE_VIEW_ROOT "/%s%s", + con->cid, dp_recv->path); + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, tmp_path, 0, + &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto free; + } + hmdfs_remove_cache_filp(con, path.dentry); + + path_put(&path); +free: + kfree(tmp_path); +out: + path_put(&root_path); +quickack: + set_conn_sock_quickack(con); +} diff --git a/fs/hmdfs/hmdfs_server.h b/fs/hmdfs/hmdfs_server.h new file mode 100644 index 000000000000..844f3a9ee82c --- /dev/null +++ b/fs/hmdfs/hmdfs_server.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_server.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_SERVER_H +#define HMDFS_SERVER_H + +#include "hmdfs.h" +#include "comm/transport.h" +#include "comm/socket_adapter.h" + +static inline void hmdfs_send_err_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, int err) +{ + if (hmdfs_sendmessage_response(con, cmd, 0, NULL, (__u32)err)) + hmdfs_warning("send err failed"); +} + +void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); +void hmdfs_server_fsync(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpages(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); +void hmdfs_server_writepage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_syncfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_getxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_setxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_listxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_get_drop_push(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); + +void __init hmdfs_server_add_node_evt_cb(void); +#endif diff --git a/fs/hmdfs/hmdfs_trace.h b/fs/hmdfs/hmdfs_trace.h new file mode 100644 index 000000000000..205bf697c357 --- /dev/null +++ b/fs/hmdfs/hmdfs_trace.h @@ -0,0 +1,800 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_trace.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hmdfs + +#if !defined(__HMDFS_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) + +#define __HMDFS_TRACE_H__ + +#include +#include "comm/protocol.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_client.h" +#include "hmdfs_device_view.h" +#include "client_writeback.h" + +TRACE_EVENT(hmdfs_permission, + + TP_PROTO(unsigned long ino), + + TP_ARGS(ino), + + TP_STRUCT__entry(__field(unsigned long, ino)), + + TP_fast_assign(__entry->ino = ino;), + + TP_printk("permission check for ino %lu failed", __entry->ino)); + +/* communication */ +TRACE_EVENT(hmdfs_recv_mesg_callback, + + TP_PROTO(struct hmdfs_head_cmd *cmd), + + TP_ARGS(cmd), + + TP_STRUCT__entry( + __field(__u32, msg_id) + __field(__u32, magic) + __field(__u16, command) + __field(__u16, cmd_flag) + __field(__u32, data_len) + __field(__u32, ret_code) + ), + + TP_fast_assign( + __entry->msg_id = le32_to_cpu(cmd->msg_id); + __entry->magic = cmd->magic; + __entry->command = cmd->operations.command; + __entry->cmd_flag = cmd->operations.cmd_flag; + __entry->data_len = cmd->data_len; + __entry->ret_code = cmd->ret_code; + ), + + TP_printk("msg_id:%u magic:%u command:%hu, cmd_flag:%hu, data_len:%u, ret_code:%u", + __entry->msg_id, __entry->magic, __entry->command, + __entry->cmd_flag, __entry->data_len, __entry->ret_code) +); + +TRACE_EVENT(hmdfs_tcp_send_message, + + TP_PROTO(struct hmdfs_head_cmd *cmd), + + TP_ARGS(cmd), + + TP_STRUCT__entry( + __field(__u32, msg_id) + __field(__u32, magic) + __field(__u16, command) + __field(__u16, cmd_flag) + __field(__u32, data_len) + __field(__u32, ret_code) + ), + + TP_fast_assign( + __entry->msg_id = le32_to_cpu(cmd->msg_id); + __entry->magic = cmd->magic; + __entry->command = cmd->operations.command; + __entry->cmd_flag = cmd->operations.cmd_flag; + __entry->data_len = cmd->data_len; + __entry->ret_code = cmd->ret_code; + ), + + TP_printk("msg_id:%u magic:%u command:%hu, cmd_flag:%hu, data_len:%u, ret_code:%u", + __entry->msg_id, __entry->magic, __entry->command, + __entry->cmd_flag, __entry->data_len, __entry->ret_code) +); + +/* file system interface */ +DECLARE_EVENT_CLASS(hmdfs_iterate_op_end, + + TP_PROTO(struct dentry *__d, loff_t start_pos, loff_t end_pos, int err), + + TP_ARGS(__d, start_pos, end_pos, err), + + TP_STRUCT__entry( + __string(name_str, __d->d_name.name) + __field(loff_t, start) + __field(loff_t, end) + __field(int, err) + ), + + TP_fast_assign( + __assign_str(name_str, __d->d_name.name); + __entry->start = start_pos; + __entry->end = end_pos; + __entry->err = err; + ), + + TP_printk("dentry[%s] start_pos:%llx, end_pos:%llx, err:%d", + __get_str(name_str), __entry->start, + __entry->end, __entry->err) +); + +#define define_hmdfs_iterate_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_iterate_op_end, event_name, \ + TP_PROTO(struct dentry *__d, loff_t start_pos, \ + loff_t end_pos, int err), \ + TP_ARGS(__d, start_pos, end_pos, err)) + +define_hmdfs_iterate_op_end_event(hmdfs_iterate_local); +define_hmdfs_iterate_op_end_event(hmdfs_iterate_remote); +define_hmdfs_iterate_op_end_event(hmdfs_iterate_merge); + + +TRACE_EVENT(hmdfs_lookup, + + TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(ino_t, ino) + __string(name_str, dentry->d_name.name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->ino = dir->i_ino; + __assign_str(name_str, dentry->d_name.name); + __entry->flags = flags; + ), + + TP_printk("parent_ino = %lu, name:%s, flags:%u", + __entry->ino, __get_str(name_str), __entry->flags) +); + +DECLARE_EVENT_CLASS(hmdfs_lookup_op_end, + + TP_PROTO(struct inode *dir, struct dentry *dentry, int err), + + TP_ARGS(dir, dentry, err), + + TP_STRUCT__entry( + __field(ino_t, ino) + __string(name_str, dentry->d_name.name) + __field(int, err) + ), + + TP_fast_assign( + __entry->ino = dir->i_ino; + __assign_str(name_str, dentry->d_name.name); + __entry->err = err; + ), + + TP_printk("parent_ino = %lu, name:%s, err:%d", + __entry->ino, __get_str(name_str), __entry->err) +); + +#define define_hmdfs_lookup_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_lookup_op_end, event_name, \ + TP_PROTO(struct inode *dir, struct dentry *dentry, \ + int err), \ + TP_ARGS(dir, dentry, err)) + + +define_hmdfs_lookup_op_end_event(hmdfs_root_lookup); +define_hmdfs_lookup_op_end_event(hmdfs_root_lookup_end); + +define_hmdfs_lookup_op_end_event(hmdfs_device_lookup); +define_hmdfs_lookup_op_end_event(hmdfs_device_lookup_end); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_local); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_local_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_local); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_local); +define_hmdfs_lookup_op_end_event(hmdfs_create_local); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_remote); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_remote_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_remote); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_remote); +define_hmdfs_lookup_op_end_event(hmdfs_create_remote); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_merge); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_merge_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_merge); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_merge); +define_hmdfs_lookup_op_end_event(hmdfs_create_merge); + + +define_hmdfs_lookup_op_end_event(hmdfs_symlink_merge); +define_hmdfs_lookup_op_end_event(hmdfs_symlink_local); + +define_hmdfs_lookup_op_end_event(hmdfs_get_link_merge); +define_hmdfs_lookup_op_end_event(hmdfs_get_link_local); + +TRACE_EVENT(hmdfs_show_comrade, + + TP_PROTO(struct dentry *d, struct dentry *lo_d, uint64_t devid), + + TP_ARGS(d, lo_d, devid), + + TP_STRUCT__entry( + __string(name, d->d_name.name) + __string(lo_name, lo_d->d_name.name) + __field(uint64_t, devid) + ), + + TP_fast_assign( + __assign_str(name, d->d_name.name) + __assign_str(lo_name, lo_d->d_name.name) + __entry->devid = devid; + ), + + TP_printk("parent_name:%s -> lo_d_name:%s, lo_d_devid:%llu", + __get_str(name), __get_str(lo_name), __entry->devid) +); + +DECLARE_EVENT_CLASS(hmdfs_rename_op_end, + + TP_PROTO(struct inode *olddir, struct dentry *olddentry, + struct inode *newdir, struct dentry *newdentry, + unsigned int flags), + + TP_ARGS(olddir, olddentry, newdir, newdentry, flags), + + TP_STRUCT__entry( + __field(ino_t, oldino) + __string(oldname_str, olddentry->d_name.name) + __field(ino_t, newino) + __string(newname_str, newdentry->d_name.name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->oldino = olddir->i_ino; + __assign_str(oldname_str, olddentry->d_name.name); + __entry->newino = newdir->i_ino; + __assign_str(newname_str, newdentry->d_name.name); + __entry->flags = flags; + ), + + TP_printk("old_pino = %lu, oldname:%s; new_pino = %lu, newname:%s, flags:%u", + __entry->oldino, __get_str(oldname_str), + __entry->newino, __get_str(newname_str), __entry->flags) +); + +#define define_hmdfs_rename_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_rename_op_end, event_name, \ + TP_PROTO(struct inode *olddir, struct dentry *olddentry, \ + struct inode *newdir, struct dentry *newdentry, \ + unsigned int flags), \ + TP_ARGS(olddir, olddentry, newdir, newdentry, flags)) + +define_hmdfs_rename_op_end_event(hmdfs_rename_local); +define_hmdfs_rename_op_end_event(hmdfs_rename_remote); +define_hmdfs_rename_op_end_event(hmdfs_rename_merge); + +TRACE_EVENT(hmdfs_statfs, + + TP_PROTO(struct dentry *d, uint8_t type), + + TP_ARGS(d, type), + + TP_STRUCT__entry( + __string(name, d->d_name.name) + __field(uint8_t, type) + ), + + TP_fast_assign( + __assign_str(name, d->d_name.name) + __entry->type = type; + ), + + TP_printk("dentry_name:%s, lo_d_devid:%u", + __get_str(name), __entry->type) +); + + + +TRACE_EVENT(hmdfs_balance_dirty_pages_ratelimited, + + TP_PROTO(struct hmdfs_sb_info *sbi, + struct hmdfs_writeback *hwb, + int bdp_ratelimits), + + TP_ARGS(sbi, hwb, bdp_ratelimits), + + TP_STRUCT__entry( + __array(char, dst, 128) + __field(int, nr_dirtied) + __field(int, nr_dirtied_pause) + __field(int, dirty_exceeded) + __field(long long, bdp_ratelimits) + __field(long, ratelimit_pages) + ), + + TP_fast_assign( + strlcpy(__entry->dst, sbi->local_dst, 128); + + __entry->nr_dirtied = current->nr_dirtied; + __entry->nr_dirtied_pause = current->nr_dirtied_pause; + __entry->dirty_exceeded = hwb->dirty_exceeded; + __entry->bdp_ratelimits = bdp_ratelimits; + __entry->ratelimit_pages = hwb->ratelimit_pages; + ), + + TP_printk("hmdfs dst:%s nr_dirtied=%d nr_dirtied_pause=%d dirty_exceeded=%d bdp_ratelimits=%lld ratelimit_pages=%ld", + __entry->dst, __entry->nr_dirtied, __entry->nr_dirtied_pause, + __entry->dirty_exceeded, __entry->bdp_ratelimits, + __entry->ratelimit_pages) +); + +TRACE_EVENT(hmdfs_balance_dirty_pages, + + TP_PROTO(struct hmdfs_sb_info *sbi, + struct bdi_writeback *wb, + struct hmdfs_dirty_throttle_control *hdtc, + unsigned long pause, + unsigned long start_time), + + TP_ARGS(sbi, wb, hdtc, pause, start_time), + + TP_STRUCT__entry( + __array(char, dst, 128) + __field(unsigned long, write_bw) + __field(unsigned long, avg_write_bw) + __field(unsigned long, file_bg_thresh) + __field(unsigned long, fs_bg_thresh) + __field(unsigned long, file_thresh) + __field(unsigned long, fs_thresh) + __field(unsigned long, file_nr_dirty) + __field(unsigned long, fs_nr_dirty) + __field(unsigned long, file_nr_rec) + __field(unsigned long, fs_nr_rec) + __field(unsigned long, pause) + __field(unsigned long, paused) + ), + + TP_fast_assign( + strlcpy(__entry->dst, sbi->local_dst, 128); + + __entry->write_bw = wb->write_bandwidth; + __entry->avg_write_bw = wb->avg_write_bandwidth; + __entry->file_bg_thresh = hdtc->file_bg_thresh; + __entry->fs_bg_thresh = hdtc->fs_bg_thresh; + __entry->file_thresh = hdtc->file_thresh; + __entry->fs_thresh = hdtc->fs_thresh; + __entry->file_nr_dirty = hdtc->file_nr_dirty; + __entry->fs_nr_dirty = hdtc->fs_nr_dirty; + __entry->file_nr_rec = hdtc->file_nr_reclaimable; + __entry->fs_nr_rec = hdtc->fs_nr_reclaimable; + __entry->pause = pause * 1000 / HZ; + __entry->paused = (jiffies - start_time) * + 1000 / HZ; + ), + + TP_printk("hmdfs dst:%s write_bw=%lu, awrite_bw=%lu, bg_thresh=%lu,%lu thresh=%lu,%lu dirty=%lu,%lu reclaimable=%lu,%lu pause=%lu paused=%lu", + __entry->dst, __entry->write_bw, __entry->avg_write_bw, + __entry->file_bg_thresh, __entry->fs_bg_thresh, + __entry->file_thresh, __entry->fs_thresh, + __entry->file_nr_dirty, __entry->fs_nr_dirty, + __entry->file_nr_rec, __entry->fs_nr_rec, + __entry->pause, __entry->paused + ) +); + +TRACE_EVENT(hmdfs_start_srv_wb, + + TP_PROTO(struct hmdfs_sb_info *sbi, int dirty_pages, + unsigned int dirty_thresh_pg), + + TP_ARGS(sbi, dirty_pages, dirty_thresh_pg), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(int, dirty_pages) + __field(unsigned int, dirty_thresh_pg) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->dirty_pages = dirty_pages; + __entry->dirty_thresh_pg = dirty_thresh_pg; + ), + + TP_printk("hmdfs src: %s, start writeback dirty pages. writeback %d pages dirty_thresh is %d pages", + __entry->src, __entry->dirty_pages, __entry->dirty_thresh_pg) +); + +TRACE_EVENT(hmdfs_fsync_enter_remote, + + TP_PROTO(struct hmdfs_sb_info *sbi, unsigned long long device_id, + unsigned long long remote_ino, int datasync), + + TP_ARGS(sbi, device_id, remote_ino, datasync), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, device_id) + __field(uint64_t, remote_ino) + __field(int, datasync) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->device_id = device_id; + __entry->remote_ino = remote_ino; + __entry->datasync = datasync; + ), + + TP_printk("hmdfs: src %s, start remote fsync file(remote dev_id=%llu,ino=%llu), datasync=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->datasync) +); + +TRACE_EVENT(hmdfs_fsync_exit_remote, + + TP_PROTO(struct hmdfs_sb_info *sbi, unsigned long long device_id, + unsigned long long remote_ino, unsigned int timeout, int err), + + TP_ARGS(sbi, device_id, remote_ino, timeout, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, device_id) + __field(uint64_t, remote_ino) + __field(uint32_t, timeout) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->device_id = device_id; + __entry->remote_ino = remote_ino; + __entry->timeout = timeout; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, finish remote fsync file(remote dev_id=%llu,ino=%llu), timeout=%u, err=%d", + __entry->src, __entry->device_id, __entry->remote_ino, + __entry->timeout, __entry->err) +); + +TRACE_EVENT(hmdfs_syncfs_enter, + + TP_PROTO(struct hmdfs_sb_info *sbi), + + TP_ARGS(sbi), + + TP_STRUCT__entry( + __array(char, src, 128) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + ), + + TP_printk("hmdfs: src %s, start syncfs", __entry->src) +); + +TRACE_EVENT(hmdfs_syncfs_exit, + + TP_PROTO(struct hmdfs_sb_info *sbi, int remain_count, + unsigned int timeout, int err), + + TP_ARGS(sbi, remain_count, timeout, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(int, remain_count) + __field(uint32_t, timeout) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->remain_count = remain_count; + __entry->timeout = timeout; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, finish syncfs(timeout=%u), remain %d remote devices to response, err=%d", + __entry->src, __entry->timeout, + __entry->remain_count, __entry->err) +); + +TRACE_EVENT(hmdfs_server_release, + + TP_PROTO(struct hmdfs_peer *con, uint32_t file_id, + uint64_t file_ver, int err), + + TP_ARGS(con, file_id, file_ver, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint32_t, file_id) + __field(uint64_t, file_ver) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->file_id = file_id; + __entry->file_ver = file_ver; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, server release file, fid=%u, fid_ver=%llu, remote_dev=%llu, err=%d", + __entry->src, __entry->file_id, __entry->file_ver, + __entry->device_id, __entry->err) +); + +TRACE_EVENT(hmdfs_client_recv_readpage, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, client readpage callback from remote device %llu, remote_ino=%llu, page_idx=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_writepage_cb_enter, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, writepage_cb start, return from remote device %llu, remote_ino=%llu, page_idx=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_writepage_cb_exit, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, writepage_cb exit, return from remote device %llu, remote_ino=%llu, page_index=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_server_rebuild_dents, + + TP_PROTO(struct hmdfs_dcache_header *__h, int err), + + TP_ARGS(__h, err), + + TP_STRUCT__entry( + __field(uint64_t, crtime) + __field(uint64_t, crtime_nsec) + __field(uint64_t, ctime) + __field(uint64_t, ctime_nsec) + __field(uint64_t, num) + __field(int, err) + ), + + TP_fast_assign( + __entry->crtime = le64_to_cpu(__h->dcache_crtime); + __entry->crtime_nsec = le64_to_cpu(__h->dcache_crtime_nsec); + __entry->ctime = le64_to_cpu(__h->dentry_ctime); + __entry->ctime_nsec = le64_to_cpu(__h->dentry_ctime_nsec); + __entry->num = le64_to_cpu(__h->num); + __entry->err = err; + ), + + TP_printk("dcache crtime %llu:%llu ctime %llu:%llu has %llu dentry err %d", + __entry->crtime, __entry->crtime_nsec, __entry->ctime, + __entry->ctime_nsec, __entry->num, __entry->err) +); + +TRACE_EVENT(hmdfs_server_readdir, + + TP_PROTO(struct readdir_request *req), + + TP_ARGS(req), + + TP_STRUCT__entry( + __string(path, req->path) + ), + + TP_fast_assign( + __assign_str(path, req->path); + ), + + TP_printk("hmdfs_server_readdir %s", __get_str(path)) +); + +TRACE_EVENT(hmdfs_open_final_remote, + + TP_PROTO(struct hmdfs_inode_info *info, + struct hmdfs_open_ret *open_ret, + struct file *file, + int reason), + + TP_ARGS(info, open_ret, file, reason), + + TP_STRUCT__entry( + __array(char, file_path, MAX_FILTER_STR_VAL) + __field(uint32_t, reason) + __field(uint32_t, file_id) + __field(uint64_t, file_ver) + __field(uint64_t, remote_file_size) + __field(uint64_t, remote_ino) + __field(uint64_t, remote_ctime) + __field(uint64_t, remote_ctime_nsec) + __field(uint64_t, remote_stable_ctime) + __field(uint64_t, remote_stable_ctime_nsec) + __field(uint64_t, local_file_size) + __field(uint64_t, local_ino) + __field(uint64_t, local_ctime) + __field(uint64_t, local_ctime_nsec) + __field(uint64_t, local_stable_ctime) + __field(uint64_t, local_stable_ctime_nsec) + ), + + TP_fast_assign( + strlcpy(__entry->file_path, file->f_path.dentry->d_name.name, + MAX_FILTER_STR_VAL); + __entry->reason = reason; + __entry->file_id = open_ret->fid.id; + __entry->file_ver = open_ret->fid.ver; + __entry->remote_file_size = open_ret->file_size; + __entry->remote_ino = open_ret->ino; + __entry->remote_ctime = open_ret->remote_ctime.tv_sec; + __entry->remote_ctime_nsec = open_ret->remote_ctime.tv_nsec; + __entry->remote_stable_ctime = open_ret->stable_ctime.tv_sec; + __entry->remote_stable_ctime_nsec = + open_ret->stable_ctime.tv_nsec; + __entry->local_file_size = info->vfs_inode.i_size; + __entry->local_ino = info->remote_ino; + __entry->local_ctime = info->remote_ctime.tv_sec; + __entry->local_ctime_nsec = info->remote_ctime.tv_nsec; + __entry->local_stable_ctime = info->stable_ctime.tv_sec; + __entry->local_stable_ctime_nsec = info->stable_ctime.tv_nsec; + ), + + TP_printk("file path: %s, file id: %u, file ver: %llu, reason: %d, file size: %llu/%llu, ino: %llu/%llu, ctime: %llu.%llu/%llu.%llu, stable_ctime: %llu.%llu/%llu.%llu from remote/local", + __entry->file_path, __entry->file_id, __entry->file_ver, + __entry->reason, __entry->remote_file_size, + __entry->local_file_size, __entry->remote_ino, + __entry->local_ino, __entry->remote_ctime, + __entry->remote_ctime_nsec, __entry->local_ctime, + __entry->local_ctime_nsec, __entry->remote_stable_ctime, + __entry->remote_stable_ctime_nsec, + __entry->local_stable_ctime, __entry->local_stable_ctime_nsec) +); + +TRACE_EVENT(hmdfs_server_open_enter, + + TP_PROTO(struct hmdfs_peer *con, + struct open_request *recv), + + TP_ARGS(con, recv), + + TP_STRUCT__entry( + __array(char, open_path, MAX_FILTER_STR_VAL) + __array(char, dst_path, MAX_FILTER_STR_VAL) + __field(uint32_t, file_type) + ), + + TP_fast_assign( + strlcpy(__entry->open_path, recv->buf, MAX_FILTER_STR_VAL); + strlcpy(__entry->dst_path, con->sbi->local_dst, + MAX_FILTER_STR_VAL); + __entry->file_type = recv->file_type; + ), + + TP_printk("server open file %s from %s, file_type is %u", + __entry->open_path, __entry->dst_path, + __entry->file_type) +); + +TRACE_EVENT(hmdfs_server_open_exit, + + TP_PROTO(struct hmdfs_peer *con, + struct open_response *resp, + struct file *file, + int ret), + + TP_ARGS(con, resp, file, ret), + + TP_STRUCT__entry( + __array(char, file_path, MAX_FILTER_STR_VAL) + __array(char, src_path, MAX_FILTER_STR_VAL) + __field(uint32_t, file_id) + __field(uint64_t, file_size) + __field(uint64_t, ino) + __field(uint64_t, ctime) + __field(uint64_t, ctime_nsec) + __field(uint64_t, stable_ctime) + __field(uint64_t, stable_ctime_nsec) + __field(int, retval) + ), + + TP_fast_assign( + if (file) + strlcpy(__entry->file_path, + file->f_path.dentry->d_name.name, + MAX_FILTER_STR_VAL); + else + strlcpy(__entry->file_path, "null", MAX_FILTER_STR_VAL); + strlcpy(__entry->src_path, con->sbi->local_src, + MAX_FILTER_STR_VAL); + __entry->file_id = resp ? resp->file_id : UINT_MAX; + __entry->file_size = resp ? resp->file_size : ULLONG_MAX; + __entry->ino = resp ? resp->ino : 0; + __entry->ctime = resp ? resp->ctime : 0; + __entry->ctime_nsec = resp ? resp->ctime_nsec : 0; + __entry->stable_ctime = resp ? resp->stable_ctime : 0; + __entry->stable_ctime_nsec = resp ? resp->stable_ctime_nsec : 0; + __entry->retval = ret; + ), + + TP_printk("server file %s is opened from %s, open result: %d, file id: %u, file size: %llu, ino: %llu, ctime: %llu.%llu, stable ctime: %llu.%llu", + __entry->file_path, __entry->src_path, + __entry->retval, __entry->file_id, + __entry->file_size, __entry->ino, __entry->ctime, + __entry->ctime_nsec, __entry->stable_ctime, + __entry->stable_ctime_nsec) +); +#endif + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE hmdfs_trace +#include diff --git a/fs/hmdfs/inode.c b/fs/hmdfs/inode.c new file mode 100644 index 000000000000..8cdedf42dc95 --- /dev/null +++ b/fs/hmdfs/inode.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_device_view.h" +#include "inode.h" +#include "comm/connection.h" + +/** + * Rules to generate inode numbers: + * + * "/", "/device_view", "/merge_view", "/device_view/local", "/device_view/cid" + * = DOMAIN {3} : dev_id {29} : HMDFS_ROOT {32} + * + * "/device_view/cid/xxx" + * = DOMAIN {3} : dev_id {29} : hash(remote_ino){32} + * + * "/merge_view/xxx" + * = DOMAIN {3} : lower's dev_id {29} : lower's ino_raw {32} + */ + +#define BIT_WIDE_TOTAL 64 + +#define BIT_WIDE_DOMAIN 3 +#define BIT_WIDE_DEVID 29 +#define BIT_WIDE_INO_RAW 32 + +enum DOMAIN { + DOMAIN_ROOT, + DOMAIN_DEVICE_LOCAL, + DOMAIN_DEVICE_REMOTE, + DOMAIN_MERGE_VIEW, + DOMAIN_INVALID, +}; + +union hmdfs_ino { + const uint64_t ino_output; + struct { + uint64_t ino_raw : BIT_WIDE_INO_RAW; + uint64_t dev_id : BIT_WIDE_DEVID; + uint8_t domain : BIT_WIDE_DOMAIN; + }; +}; + +static uint8_t read_ino_domain(uint64_t ino) +{ + union hmdfs_ino _ino = { + .ino_output = ino, + }; + + return _ino.domain; +} + +struct iget_args { + /* The lower inode of local/merge/root(part) inode */ + struct inode *lo_i; + /* The peer of remote inode */ + struct hmdfs_peer *peer; + /* The ino of remote inode */ + uint64_t remote_ino; + + /* Returned inode's ino */ + union hmdfs_ino ino; +}; + +/** + * iget_test - whether or not the inode with matched hashval is the one we are + * looking for + * + * @inode: the local inode we found in inode cache with matched hashval + * @data: struct iget_args + */ +static int iget_test(struct inode *inode, void *data) +{ + struct hmdfs_inode_info *hii = hmdfs_i(inode); + struct iget_args *ia = data; + int res = 0; + + WARN_ON(ia->ino.domain < DOMAIN_ROOT || + ia->ino.domain >= DOMAIN_INVALID); + + if (read_ino_domain(inode->i_ino) == DOMAIN_ROOT) + return 0; + + switch (ia->ino.domain) { + case DOMAIN_MERGE_VIEW: + res = (ia->lo_i == hii->lower_inode); + break; + case DOMAIN_DEVICE_LOCAL: + res = (ia->lo_i == hii->lower_inode); + break; + case DOMAIN_DEVICE_REMOTE: + res = (ia->peer == hii->conn && + ia->remote_ino == hii->remote_ino); + break; + } + + return res; +} + +/** + * iget_set - initialize a inode with iget_args + * + * @sb: the superblock of current hmdfs instance + * @data: struct iget_args + */ +static int iget_set(struct inode *inode, void *data) +{ + struct hmdfs_inode_info *hii = hmdfs_i(inode); + struct iget_args *ia = (struct iget_args *)data; + + inode->i_ino = ia->ino.ino_output; + inode_inc_iversion(inode); + + hii->conn = ia->peer; + hii->remote_ino = ia->remote_ino; + hii->lower_inode = ia->lo_i; + + return 0; +} + +static uint64_t make_ino_raw_dev_local(uint64_t lo_ino) +{ + if (!(lo_ino >> BIT_WIDE_INO_RAW)) + return lo_ino; + + return lo_ino * GOLDEN_RATIO_64 >> BIT_WIDE_INO_RAW; +} + +static uint64_t make_ino_raw_dev_remote(uint64_t remote_ino) +{ + return hash_long(remote_ino, BIT_WIDE_INO_RAW); +} + +/** + * hmdfs_iget5_locked_merge - obtain an inode for the merge-view + * + * @sb: superblock of current instance + * @fst_lo_i: the lower inode of it's first comrade + * + * Simply replace the lower's domain for a new ino. + */ +struct inode *hmdfs_iget5_locked_merge(struct super_block *sb, + struct dentry *fst_lo_d) +{ + struct iget_args ia = { + .lo_i = d_inode(fst_lo_d), + .peer = NULL, + .remote_ino = 0, + .ino.ino_output = 0, + }; + + if (unlikely(!d_inode(fst_lo_d))) { + hmdfs_err("Received a invalid lower inode"); + return NULL; + } + + ia.ino.ino_raw = d_inode(fst_lo_d)->i_ino; + ia.ino.dev_id = hmdfs_d(fst_lo_d)->device_id; + ia.ino.domain = DOMAIN_MERGE_VIEW; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +/** + * hmdfs_iget5_locked_local - obtain an inode for the local-dev-view + * + * @sb: superblock of current instance + * @lo_i: the lower inode from local filesystem + * + * Hashing local inode's ino to generate our ino. We continue to compare the + * address of the lower_inode for uniqueness when collisions occurred. + */ +struct inode *hmdfs_iget5_locked_local(struct super_block *sb, + struct inode *lo_i) +{ + struct iget_args ia = { + .lo_i = lo_i, + .peer = NULL, + .remote_ino = 0, + .ino.ino_output = 0, + }; + + if (unlikely(!lo_i)) { + hmdfs_err("Received a invalid lower inode"); + return NULL; + } + ia.ino.ino_raw = make_ino_raw_dev_local(lo_i->i_ino); + ia.ino.dev_id = 0; + ia.ino.domain = DOMAIN_DEVICE_LOCAL; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +/** + * hmdfs_iget5_locked_remote - obtain an inode for the remote-dev-view + * + * @sb: superblock of current instance + * @peer: corresponding device node + * @remote_ino: remote inode's ino + * + * Hash remote ino for ino's 32bit~1bit. + * + * Note that currenly implementation assume the each remote inode has unique + * ino. Thus the combination of the peer's unique dev_id and the remote_ino + * is enough to determine a unique remote inode. + */ +struct inode *hmdfs_iget5_locked_remote(struct super_block *sb, + struct hmdfs_peer *peer, + uint64_t remote_ino) +{ + struct iget_args ia = { + .lo_i = NULL, + .peer = peer, + .remote_ino = remote_ino, + .ino.ino_output = 0, + }; + + if (unlikely(!peer)) { + hmdfs_err("Received a invalid peer"); + return NULL; + } + + ia.ino.ino_raw = make_ino_raw_dev_remote(remote_ino); + ia.ino.dev_id = peer->device_id; + ia.ino.domain = DOMAIN_DEVICE_REMOTE; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino, + struct inode *lo_i, + struct hmdfs_peer *peer) +{ + struct iget_args ia = { + .lo_i = lo_i, + .peer = peer, + .remote_ino = 0, + .ino.ino_raw = root_ino, + .ino.dev_id = peer ? peer->device_id : 0, + .ino.domain = DOMAIN_ROOT, + }; + + if (unlikely(root_ino < 0 || root_ino >= HMDFS_ROOT_INVALID)) { + hmdfs_err("Root %llu is invalid", root_ino); + return NULL; + } + if (unlikely(root_ino == HMDFS_ROOT_DEV_REMOTE && !peer)) { + hmdfs_err("Root %llu received a invalid peer", root_ino); + return NULL; + } + + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} diff --git a/fs/hmdfs/inode.h b/fs/hmdfs/inode.h new file mode 100644 index 000000000000..47f189f3cf82 --- /dev/null +++ b/fs/hmdfs/inode.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/inode.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef INODE_H +#define INODE_H + +#include "hmdfs.h" + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) +#include +#endif + +enum { + HMDFS_REMOTE_INODE_NONE = 0, + HMDFS_REMOTE_INODE_STASHING, + HMDFS_REMOTE_INODE_RESTORING, +}; + +/***************************************************************************** + * fid + *****************************************************************************/ + +/* Bits for fid_flags */ +enum { + HMDFS_FID_NEED_OPEN = 0, + HMDFS_FID_OPENING, +}; + +struct hmdfs_fid { + __u64 ver; + __u32 id; +}; + +/* + * Cache file is stored in file like following format: + * ________________________________________________________________ + * |meta file info| remote file(s) path | file content | + * | head | path | data | + * ↑ ↑ + * path_offs data_offs + */ +struct hmdfs_cache_info { + /* Path start offset in file (HMDFS_STASH_BLK_SIZE aligned) */ + __u32 path_offs; + __u32 path_len; + __u32 path_cnt; + char *path_buf; + /* Stricky remote file(hardlink)s' path, split by '\0' */ + char *path; + /* Data start offset in file (HMDFS_STASH_BLK_SIZE aligned) */ + __u32 data_offs; + /* # of pages need to be written to remote file during offline */ + atomic64_t to_write_pgs; + /* # of pages written to remote file during offline */ + atomic64_t written_pgs; + /* Stash file handler */ + struct file *cache_file; +}; + +/***************************************************************************** + * inode info and it's inline helpers + *****************************************************************************/ + +struct hmdfs_inode_info { + struct inode *lower_inode; // for local/merge inode + struct hmdfs_peer *conn; // for remote inode + struct kref ref; + spinlock_t fid_lock; + struct hmdfs_fid fid; + unsigned long fid_flags; + wait_queue_head_t fid_wq; + __u8 inode_type; // deprecated: use ino system instead + + /* writeback list */ + struct list_head wb_list; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + __u16 perm; +#endif + /* + * lookup remote file will generate a local inode, this store the + * combination of remote inode number and generation in such situation. + * the uniqueness of local inode can be determined. + */ + __u64 remote_ino; + /* + * if this value is not ULLONG_MAX, it means that remote getattr syscall + * should return this value as inode size. + */ + __u64 getattr_isize; + /* + * this value stores remote ctime, explicitly when remote file is opened + */ + struct hmdfs_time_t remote_ctime; + /* + * this value stores the last time, aligned to dcache_precision, that + * remote file was modified. It should be noted that this value won't + * be effective if writecace_expire is set. + */ + struct hmdfs_time_t stable_ctime; + /* + * If this value is set nonzero, pagecache should be truncated if the + * time that the file is opened is beyond the value. Furthermore, + * the functionality of stable_ctime won't be effective. + */ + unsigned long writecache_expire; + /* + * This value record how many times the file has been written while file + * is opened. 'writecache_expire' will set in close if this value is + * nonzero. + */ + atomic64_t write_counter; + /* + * will be linked to hmdfs_peer::wr_opened_inode_list + * if the remote inode is writable-opened. And using + * wr_opened_cnt to track possibly multiple writeable-open. + */ + struct list_head wr_opened_node; + atomic_t wr_opened_cnt; + spinlock_t stash_lock; + unsigned int stash_status; + struct hmdfs_cache_info *cache; + /* link to hmdfs_peer::stashed_inode_list when stashing completes */ + struct list_head stash_node; + /* + * The flush/fsync thread will hold the write lock while threads + * calling writepage will hold the read lock. We use rwlock to + * eliminate the cases that flush/fsync operations are done with + * re-dirtied pages remain dirty. + * + * Here is the explanation in detail: + * + * During `writepage()`, the state of a re-dirtied page will switch + * to the following states in sequence: + * s1: page dirty + tree dirty + * s2: page dirty + tree dirty + * s3: page clean + tree dirty + * s4: page clean + tree clean + write back + * s5: page dirty + tree dirty + write back + * s6: page dirty + tree dirty + * + * A page upon s4 will thus be ignored by the concurrent + * `do_writepages()` contained by `close()`, `fsync()`, making it's + * state inconsistent. + * + * To avoid such situation, we use per-file rwsems to prevent + * concurrent in-flight `writepage` during `close()` or `fsync()`. + * + * Minimal overhead is brought in since rsems allow concurrent + * `writepage` while `close()` or `fsync()` is natural to wait for + * in-flight `writepage()`s to complete. + * + * NOTE that in the worst case, a process may wait for wsem for TIMEOUT + * even if a signal is pending. But we've to wait there to iterate all + * pages and make sure that no dirty page should remain. + */ + struct rw_semaphore wpage_sem; + + // The real inode shared with vfs. ALWAYS PUT IT AT THE BOTTOM. + struct inode vfs_inode; +}; + +struct hmdfs_readdir_work { + struct list_head head; + struct dentry *dentry; + struct hmdfs_peer *con; + struct delayed_work dwork; +}; + +static inline struct hmdfs_inode_info *hmdfs_i(struct inode *inode) +{ + return container_of(inode, struct hmdfs_inode_info, vfs_inode); +} + +static inline bool hmdfs_inode_is_stashing(const struct hmdfs_inode_info *info) +{ + const struct hmdfs_sb_info *sbi = hmdfs_sb(info->vfs_inode.i_sb); + + /* Refer to comments in hmdfs_stash_remote_inode() */ + return (hmdfs_is_stash_enabled(sbi) && + smp_load_acquire(&info->stash_status)); // protect +} + +static inline void hmdfs_remote_fetch_fid(struct hmdfs_inode_info *info, + struct hmdfs_fid *fid) +{ + spin_lock(&info->fid_lock); + *fid = info->fid; + spin_unlock(&info->fid_lock); +} + +/***************************************************************************** + * ino allocator + *****************************************************************************/ + +enum HMDFS_ROOT { + HMDFS_ROOT_ANCESTOR = 1, // / + HMDFS_ROOT_DEV, // /device_view + HMDFS_ROOT_DEV_LOCAL, // /device_view/local + HMDFS_ROOT_DEV_REMOTE, // /device_view/remote + HMDFS_ROOT_MERGE, // /merge_view + + HMDFS_ROOT_INVALID, +}; + +// delete layer, directory layer, not overlay layer +enum HMDFS_LAYER_TYPE { + HMDFS_LAYER_ZERO = 0, // / + HMDFS_LAYER_FIRST_DEVICE, // /device_view + HMDFS_LAYER_SECOND_LOCAL, // /device_view/local + HMDFS_LAYER_SECOND_REMOTE, // /device_view/remote + HMDFS_LAYER_OTHER_LOCAL, // /device_view/local/xx + HMDFS_LAYER_OTHER_REMOTE, // /device_view/remote/xx + + HMDFS_LAYER_FIRST_MERGE, // /merge_view + HMDFS_LAYER_OTHER_MERGE, // /merge_view/xxx + HMDFS_LAYER_INVALID, +}; + +struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino, + struct inode *lo_i, + struct hmdfs_peer *peer); +struct inode *hmdfs_iget5_locked_merge(struct super_block *sb, + struct dentry *fst_lo_d); + +struct inode *hmdfs_iget5_locked_local(struct super_block *sb, + struct inode *lo_i); +struct hmdfs_peer; +struct inode *hmdfs_iget5_locked_remote(struct super_block *sb, + struct hmdfs_peer *peer, + uint64_t remote_ino); + +#endif // INODE_H diff --git a/fs/hmdfs/inode_local.c b/fs/hmdfs/inode_local.c new file mode 100644 index 000000000000..d34b765ab65d --- /dev/null +++ b/fs/hmdfs/inode_local.c @@ -0,0 +1,963 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_local.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_trace.h" + +extern struct kmem_cache *hmdfs_dentry_cachep; + +static const char *const symlink_tgt_white_list[] = { + "/storage/", + "/sdcard/", +}; + +struct hmdfs_name_data { + struct dir_context ctx; + const struct qstr *to_find; + char *name; + bool found; +}; + +int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry, + int dentry_type) +{ + struct hmdfs_dentry_info *info = + kmem_cache_zalloc(hmdfs_dentry_cachep, GFP_ATOMIC); + + if (!info) + return -ENOMEM; + dentry->d_fsdata = info; + INIT_LIST_HEAD(&info->cache_list_head); + INIT_LIST_HEAD(&info->remote_cache_list_head); + spin_lock_init(&info->cache_list_lock); + mutex_init(&info->remote_cache_list_lock); + mutex_init(&info->cache_pull_lock); + spin_lock_init(&info->lock); + info->dentry_type = dentry_type; + info->device_id = 0; + if (dentry_type == HMDFS_LAYER_ZERO || + dentry_type == HMDFS_LAYER_FIRST_DEVICE || + dentry_type == HMDFS_LAYER_SECOND_LOCAL || + dentry_type == HMDFS_LAYER_SECOND_REMOTE) + d_set_d_op(dentry, &hmdfs_dev_dops); + else + d_set_d_op(dentry, &hmdfs_dops); + return 0; +} + +static inline void set_symlink_flag(struct hmdfs_dentry_info *gdi) +{ + gdi->file_type = HM_SYMLINK; +} + +struct inode *fill_inode_local(struct super_block *sb, + struct inode *lower_inode) +{ + struct inode *inode; + struct hmdfs_inode_info *info; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget5_locked_local(sb, lower_inode); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); +#ifdef CONFIG_HMDFS_FS_PERMISSION + info->perm = hmdfs_read_perm(lower_inode); +#endif + if (S_ISDIR(lower_inode->i_mode)) + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRWXU | + S_IRWXG | S_IXOTH; + else if (S_ISREG(lower_inode->i_mode)) + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | + S_IWUSR | S_IRGRP | S_IWGRP; + else if (S_ISLNK(lower_inode->i_mode)) + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = lower_inode->i_uid; + inode->i_gid = lower_inode->i_gid; +#else + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); +#endif + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_generation = lower_inode->i_generation; + + info->inode_type = HMDFS_LAYER_OTHER_LOCAL; + if (S_ISDIR(lower_inode->i_mode)) { + inode->i_op = &hmdfs_dir_inode_ops_local; + inode->i_fop = &hmdfs_dir_ops_local; + inode->i_mode |= S_IXUGO; + } else if (S_ISREG(lower_inode->i_mode)) { + inode->i_op = &hmdfs_file_iops_local; + inode->i_fop = &hmdfs_file_fops_local; + } else if (S_ISLNK(lower_inode->i_mode)) { + inode->i_op = &hmdfs_symlink_iops_local; + inode->i_fop = &hmdfs_file_fops_local; + } + + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} + +/* hmdfs_convert_lookup_flags - covert hmdfs lookup flags to vfs lookup flags + * + * @hmdfs_flags: hmdfs lookup flags + * @vfs_flags: pointer to converted flags + * + * return 0 on success, or err code on failure. + */ +int hmdfs_convert_lookup_flags(unsigned int hmdfs_flags, + unsigned int *vfs_flags) +{ + *vfs_flags = 0; + + /* currently only support HMDFS_LOOKUP_REVAL */ + if (hmdfs_flags & ~HMDFS_LOOKUP_REVAL) + return -EINVAL; + + if (hmdfs_flags & HMDFS_LOOKUP_REVAL) + *vfs_flags |= LOOKUP_REVAL; + + return 0; +} + +static int hmdfs_name_match(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct hmdfs_name_data *buf = + container_of(ctx, struct hmdfs_name_data, ctx); + struct qstr candidate = QSTR_INIT(name, namelen); + + if (qstr_case_eq(buf->to_find, &candidate)) { + memcpy(buf->name, name, namelen); + buf->name[namelen] = 0; + buf->found = true; + return 1; + } + return 0; +} + +static int __lookup_nosensitive(struct path *lower_parent_path, + struct dentry *child_dentry, unsigned int flags, + struct path *lower_path) +{ + struct file *file; + const struct cred *cred = current_cred(); + const struct qstr *name = &child_dentry->d_name; + int err; + struct hmdfs_name_data buffer = { + .ctx.actor = hmdfs_name_match, + .to_find = name, + .name = __getname(), + .found = false, + }; + + if (!buffer.name) { + err = -ENOMEM; + goto out; + } + file = dentry_open(lower_parent_path, O_RDONLY, cred); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_name; + } + err = iterate_dir(file, &buffer.ctx); + fput(file); + if (err) + goto put_name; + if (buffer.found) + err = vfs_path_lookup(lower_parent_path->dentry, + lower_parent_path->mnt, buffer.name, + flags, lower_path); + else + err = -ENOENT; +put_name: + __putname(buffer.name); +out: + return err; +} + +struct dentry *hmdfs_lookup_local(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + int err = 0; + struct path lower_path, lower_parent_path; + struct dentry *lower_dentry = NULL, *parent_dentry = NULL, *ret = NULL; + struct hmdfs_dentry_info *gdi = NULL; + struct inode *child_inode = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + trace_hmdfs_lookup_local(parent_inode, child_dentry, flags); + if (child_dentry->d_name.len > NAME_MAX) { + ret = ERR_PTR(-ENAMETOOLONG); + goto out; + } + + /* local device */ + parent_dentry = dget_parent(child_dentry); + hmdfs_get_lower_path(parent_dentry, &lower_parent_path); + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_OTHER_LOCAL); + if (err) { + ret = ERR_PTR(err); + goto out_err; + } + + gdi = hmdfs_d(child_dentry); + + flags &= ~LOOKUP_FOLLOW; + err = vfs_path_lookup(lower_parent_path.dentry, lower_parent_path.mnt, + (child_dentry->d_name.name), 0, &lower_path); + if (err == -ENOENT && !sbi->s_case_sensitive) + err = __lookup_nosensitive(&lower_parent_path, child_dentry, 0, + &lower_path); + if (err && err != -ENOENT) { + ret = ERR_PTR(err); + goto out_err; + } else if (!err) { + hmdfs_set_lower_path(child_dentry, &lower_path); + child_inode = fill_inode_local(parent_inode->i_sb, + d_inode(lower_path.dentry)); + if (S_ISLNK(d_inode(lower_path.dentry)->i_mode)) + set_symlink_flag(gdi); + if (IS_ERR(child_inode)) { + err = PTR_ERR(child_inode); + ret = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out_err; + } + ret = d_splice_alias(child_inode, child_dentry); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + hmdfs_put_reset_lower_path(child_dentry); + goto out_err; + } + + check_and_fixup_ownership(parent_inode, child_inode, + lower_path.dentry, + child_dentry->d_name.name); + goto out_err; + } + /* + * return 0 here, so that vfs can continue the process of making this + * negative dentry to a positive one while creating a new file. + */ + err = 0; + ret = 0; + + lower_dentry = lookup_one_len_unlocked(d_name, lower_parent_path.dentry, + child_dentry->d_name.len); + if (IS_ERR(lower_dentry)) { + err = PTR_ERR(lower_dentry); + ret = lower_dentry; + goto out_err; + } + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_parent_path.mnt); + hmdfs_set_lower_path(child_dentry, &lower_path); + +out_err: + if (!err) + hmdfs_set_time(child_dentry, jiffies); + hmdfs_put_lower_path(&lower_parent_path); + dput(parent_dentry); +out: + trace_hmdfs_lookup_local_end(parent_inode, child_dentry, err); + return ret; +} + +int hmdfs_mkdir_local_dentry(struct inode *dir, struct dentry *dentry, + umode_t mode) +{ + struct inode *lower_dir = hmdfs_i(dir)->lower_inode; + struct dentry *lower_dir_dentry = NULL; + struct super_block *sb = dir->i_sb; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + struct inode *lower_inode = NULL; + struct inode *child_inode = NULL; + bool local_res = false; + struct cache_fs_override or; + __u16 child_perm; + kuid_t tmp_uid; + + error = hmdfs_override_dir_id_fs(&or, dir, dentry, &child_perm); + if (error) + goto cleanup; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_dir_dentry = lock_parent(lower_dentry); + + tmp_uid = hmdfs_override_inode_uid(lower_dir); + mode = (mode & S_IFMT) | 00771; + + error = vfs_mkdir(lower_dir, lower_dentry, mode); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + if (error) { + hmdfs_err("vfs_mkdir() error:%d", error); + goto out; + } + local_res = true; + lower_inode = d_inode(lower_dentry); +#ifdef CONFIG_HMDFS_FS_PERMISSION + error = hmdfs_persist_perm(lower_dentry, &child_perm); +#endif + child_inode = fill_inode_local(sb, lower_inode); + if (IS_ERR(child_inode)) { + error = PTR_ERR(child_inode); + goto out; + } + d_add(dentry, child_inode); + set_nlink(dir, hmdfs_i(dir)->lower_inode->i_nlink); +out: + unlock_dir(lower_dir_dentry); + if (local_res) + hmdfs_drop_remote_cache_dents(dentry->d_parent); + + if (error) { + hmdfs_clear_drop_flag(dentry->d_parent); + d_drop(dentry); + } + hmdfs_put_lower_path(&lower_path); + hmdfs_revert_dir_id_fs(&or); +cleanup: + return error; +} + +int hmdfs_mkdir_local(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int err = 0; + + if (check_filename(dentry->d_name.name, dentry->d_name.len)) { + err = -EINVAL; + return err; + } + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + return err; + } + err = hmdfs_mkdir_local_dentry(dir, dentry, mode); + trace_hmdfs_mkdir_local(dir, dentry, err); + return err; +} + +int hmdfs_create_local_dentry(struct inode *dir, struct dentry *dentry, + umode_t mode, bool want_excl) +{ + struct inode *lower_dir = NULL; + struct dentry *lower_dir_dentry = NULL; + struct super_block *sb = dir->i_sb; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + struct inode *lower_inode = NULL; + struct inode *child_inode = NULL; + kuid_t tmp_uid; +#ifdef CONFIG_HMDFS_FS_PERMISSION + const struct cred *saved_cred = NULL; + struct fs_struct *saved_fs = NULL, *copied_fs = NULL; + __u16 child_perm; +#endif + +#ifdef CONFIG_HMDFS_FS_PERMISSION + saved_cred = hmdfs_override_file_fsids(dir, &child_perm); + if (!saved_cred) { + error = -ENOMEM; + goto path_err; + } + + saved_fs = current->fs; + copied_fs = hmdfs_override_fsstruct(saved_fs); + if (!copied_fs) { + error = -ENOMEM; + goto revert_fsids; + } +#endif + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + mode = (mode & S_IFMT) | 00660; + lower_dir_dentry = lock_parent(lower_dentry); + lower_dir = d_inode(lower_dir_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + error = vfs_create(lower_dir, lower_dentry, mode, want_excl); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + unlock_dir(lower_dir_dentry); + if (error) + goto out; + + lower_inode = d_inode(lower_dentry); +#ifdef CONFIG_HMDFS_FS_PERMISSION + error = hmdfs_persist_perm(lower_dentry, &child_perm); +#endif + child_inode = fill_inode_local(sb, lower_inode); + if (IS_ERR(child_inode)) { + error = PTR_ERR(child_inode); + goto out_created; + } + d_add(dentry, child_inode); + +out_created: + hmdfs_drop_remote_cache_dents(dentry->d_parent); +out: + if (error) { + hmdfs_clear_drop_flag(dentry->d_parent); + d_drop(dentry); + } + hmdfs_put_lower_path(&lower_path); + +#ifdef CONFIG_HMDFS_FS_PERMISSION + hmdfs_revert_fsstruct(saved_fs, copied_fs); +revert_fsids: + hmdfs_revert_fsids(saved_cred); +#endif +#ifdef CONFIG_HMDFS_FS_PERMISSION +path_err: +#endif + return error; +} + +int hmdfs_create_local(struct inode *dir, struct dentry *child_dentry, + umode_t mode, bool want_excl) +{ + int err = 0; + + if (check_filename(child_dentry->d_name.name, + child_dentry->d_name.len)) { + err = -EINVAL; + return err; + } + + if (hmdfs_file_type(child_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + return err; + } + + err = hmdfs_create_local_dentry(dir, child_dentry, mode, want_excl); + trace_hmdfs_create_local(dir, child_dentry, err); + return err; +} + +int hmdfs_rmdir_local_dentry(struct inode *dir, struct dentry *dentry) +{ + struct inode *lower_dir = NULL; + struct dentry *lower_dir_dentry = NULL; + kuid_t tmp_uid; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + + hmdfs_clear_cache_dents(dentry, true); + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_dir_dentry = lock_parent(lower_dentry); + lower_dir = d_inode(lower_dir_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + + error = vfs_rmdir(lower_dir, lower_dentry); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + unlock_dir(lower_dir_dentry); + hmdfs_put_lower_path(&lower_path); + if (error) + goto path_err; + hmdfs_drop_remote_cache_dents(dentry->d_parent); +path_err: + if (error) + hmdfs_clear_drop_flag(dentry->d_parent); + return error; +} + +int hmdfs_rmdir_local(struct inode *dir, struct dentry *dentry) +{ + int err = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto out; + } + + err = hmdfs_rmdir_local_dentry(dir, dentry); + if (err != 0) { + hmdfs_err("rm dir failed:%d", err); + goto out; + } + + /* drop dentry even remote failed + * it maybe cause that one remote devices disconnect + * when doing remote rmdir + */ + d_drop(dentry); +out: + /* return connect device's errcode */ + trace_hmdfs_rmdir_local(dir, dentry, err); + return err; +} + +int hmdfs_unlink_local_dentry(struct inode *dir, struct dentry *dentry) +{ + struct inode *lower_dir = hmdfs_i(dir)->lower_inode; + struct dentry *lower_dir_dentry = NULL; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error; + kuid_t tmp_uid; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + error = vfs_unlink(lower_dir, lower_dentry, NULL); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + set_nlink(d_inode(dentry), + hmdfs_i(d_inode(dentry))->lower_inode->i_nlink); + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + if (error) + goto path_err; + + hmdfs_drop_remote_cache_dents(dentry->d_parent); + d_drop(dentry); + hmdfs_put_lower_path(&lower_path); + +path_err: + if (error) + hmdfs_clear_drop_flag(dentry->d_parent); + return error; +} + +int hmdfs_unlink_local(struct inode *dir, struct dentry *dentry) +{ + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) + return -EACCES; + + return hmdfs_unlink_local_dentry(dir, dentry); +} + +int hmdfs_rename_local_dentry(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct path lower_old_path; + struct path lower_new_path; + struct dentry *lower_old_dentry = NULL; + struct dentry *lower_new_dentry = NULL; + struct dentry *lower_old_dir_dentry = NULL; + struct dentry *lower_new_dir_dentry = NULL; + struct dentry *trap = NULL; + int rc = 0; + kuid_t old_dir_uid, new_dir_uid; + + if (flags) + return -EINVAL; + + hmdfs_get_lower_path(old_dentry, &lower_old_path); + lower_old_dentry = lower_old_path.dentry; + if (!lower_old_dentry) { + hmdfs_err("lower_old_dentry as NULL"); + rc = -EACCES; + goto out_put_old_path; + } + + hmdfs_get_lower_path(new_dentry, &lower_new_path); + lower_new_dentry = lower_new_path.dentry; + if (!lower_new_dentry) { + hmdfs_err("lower_new_dentry as NULL"); + rc = -EACCES; + goto out_put_new_path; + } + + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + old_dir_uid = hmdfs_override_inode_uid(d_inode(lower_old_dir_dentry)); + new_dir_uid = hmdfs_override_inode_uid(d_inode(lower_new_dir_dentry)); + + /* source should not be ancestor of target */ + if (trap == lower_old_dentry) { + rc = -EINVAL; + goto out_lock; + } + /* target should not be ancestor of source */ + if (trap == lower_new_dentry) { + rc = -ENOTEMPTY; + goto out_lock; + } + + rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry, + d_inode(lower_new_dir_dentry), lower_new_dentry, NULL, + flags); +out_lock: + dget(old_dentry); + + hmdfs_revert_inode_uid(d_inode(lower_old_dir_dentry), old_dir_uid); + hmdfs_revert_inode_uid(d_inode(lower_new_dir_dentry), new_dir_uid); + + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + if (rc == 0) { + hmdfs_drop_remote_cache_dents(old_dentry->d_parent); + if (old_dentry->d_parent != new_dentry->d_parent) + hmdfs_drop_remote_cache_dents(new_dentry->d_parent); + } else { + hmdfs_clear_drop_flag(old_dentry->d_parent); + if (old_dentry->d_parent != new_dentry->d_parent) + hmdfs_clear_drop_flag(old_dentry->d_parent); + d_drop(new_dentry); + } + + dput(old_dentry); + dput(lower_old_dir_dentry); + dput(lower_new_dir_dentry); + +out_put_new_path: + hmdfs_put_lower_path(&lower_new_path); +out_put_old_path: + hmdfs_put_lower_path(&lower_old_path); + return rc; +} + +int hmdfs_rename_local(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err = 0; + int ret = 0; + + trace_hmdfs_rename_local(old_dir, old_dentry, new_dir, new_dentry, + flags); + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto rename_out; + } + + if (S_ISREG(old_dentry->d_inode->i_mode)) { + err = hmdfs_rename_local_dentry(old_dir, old_dentry, new_dir, + new_dentry, flags); + } else if (S_ISDIR(old_dentry->d_inode->i_mode)) { + ret = hmdfs_rename_local_dentry(old_dir, old_dentry, new_dir, + new_dentry, flags); + if (ret != 0) { + err = ret; + goto rename_out; + } + } + + if (!err) + d_invalidate(old_dentry); + +rename_out: + return err; +} + +static bool symname_is_allowed(const char *symname) +{ + size_t symname_len = strlen(symname); + const char *prefix = NULL; + int i, total; + + /** + * Adjacent dots are prohibited. + * Note that vfs has escaped back slashes yet. + */ + for (i = 0; i < symname_len - 1; ++i) + if (symname[i] == '.' && symname[i + 1] == '.') + goto out_fail; + + /** + * Check if the symname is included in the whitelist + * Note that we skipped cmping strlen because symname is end with '\0' + */ + total = sizeof(symlink_tgt_white_list) / + sizeof(*symlink_tgt_white_list); + for (i = 0; i < total; ++i) { + prefix = symlink_tgt_white_list[i]; + if (!strncmp(symname, prefix, strlen(prefix))) + goto out_succ; + } + +out_fail: + hmdfs_err("Prohibited link path"); + return false; +out_succ: + return true; +} + +int hmdfs_symlink_local(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int err; + struct dentry *lower_dentry = NULL; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + struct inode *child_inode = NULL; + struct inode *lower_dir_inode = hmdfs_i(dir)->lower_inode; + struct hmdfs_dentry_info *gdi = hmdfs_d(dentry); + kuid_t tmp_uid; +#ifdef CONFIG_HMDFS_FS_PERMISSION + const struct cred *saved_cred = NULL; + struct fs_struct *saved_fs = NULL, *copied_fs = NULL; + __u16 child_perm; +#endif + + if (unlikely(!symname_is_allowed(symname))) { + err = -EPERM; + goto path_err; + } + +#ifdef CONFIG_HMDFS_FS_PERMISSION + saved_cred = hmdfs_override_file_fsids(dir, &child_perm); + if (!saved_cred) { + err = -ENOMEM; + goto path_err; + } + + saved_fs = current->fs; + copied_fs = hmdfs_override_fsstruct(saved_fs); + if (!copied_fs) { + err = -ENOMEM; + goto revert_fsids; + } +#endif + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir_inode); + err = vfs_symlink(lower_dir_inode, lower_dentry, symname); + hmdfs_revert_inode_uid(lower_dir_inode, tmp_uid); + unlock_dir(lower_parent_dentry); + if (err) + goto out_err; + set_symlink_flag(gdi); +#ifdef CONFIG_HMDFS_FS_PERMISSION + err = hmdfs_persist_perm(lower_dentry, &child_perm); +#endif + child_inode = fill_inode_local(dir->i_sb, d_inode(lower_dentry)); + if (IS_ERR(child_inode)) { + err = PTR_ERR(child_inode); + goto out_err; + } + d_add(dentry, child_inode); + fsstack_copy_attr_times(dir, lower_dir_inode); + fsstack_copy_inode_size(dir, lower_dir_inode); + +out_err: + hmdfs_put_lower_path(&lower_path); +#ifdef CONFIG_HMDFS_FS_PERMISSION + hmdfs_revert_fsstruct(saved_fs, copied_fs); +revert_fsids: + hmdfs_revert_fsids(saved_cred); +#endif +path_err: + trace_hmdfs_symlink_local(dir, dentry, err); + return err; +} + +static const char *hmdfs_get_link_local(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + const char *link = NULL; + struct dentry *lower_dentry = NULL; + struct inode *lower_inode = NULL; + struct path lower_path; + + if (!dentry) { + hmdfs_err("dentry NULL"); + link = ERR_PTR(-ECHILD); + goto link_out; + } + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_inode = d_inode(lower_dentry); + if (!lower_inode->i_op || !lower_inode->i_op->get_link) { + hmdfs_err("The lower inode doesn't support get_link i_op"); + link = ERR_PTR(-EINVAL); + goto out; + } + + link = lower_inode->i_op->get_link(lower_dentry, lower_inode, done); + if (IS_ERR_OR_NULL(link)) + goto out; + fsstack_copy_attr_atime(inode, lower_inode); +out: + hmdfs_put_lower_path(&lower_path); + trace_hmdfs_get_link_local(inode, dentry, PTR_ERR_OR_ZERO(link)); +link_out: + return link; +} + +static int hmdfs_setattr_local(struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = d_inode(dentry); + struct inode *lower_inode = hmdfs_i(inode)->lower_inode; + struct path lower_path; + struct dentry *lower_dentry = NULL; + struct iattr lower_ia; + unsigned int ia_valid = ia->ia_valid; + int err = 0; + kuid_t tmp_uid; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + memcpy(&lower_ia, ia, sizeof(lower_ia)); + if (ia_valid & ATTR_FILE) + lower_ia.ia_file = hmdfs_f(ia->ia_file)->lower_file; + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + if (ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(inode, ia->ia_size); + if (err) + goto out; + truncate_setsize(inode, ia->ia_size); + } + inode_lock(lower_inode); + tmp_uid = hmdfs_override_inode_uid(lower_inode); + + err = notify_change(lower_dentry, &lower_ia, NULL); + i_size_write(inode, i_size_read(lower_inode)); + inode->i_atime = lower_inode->i_atime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_ctime = lower_inode->i_ctime; + err = update_inode_to_dentry(dentry, inode); + hmdfs_revert_inode_uid(lower_inode, tmp_uid); + + inode_unlock(lower_inode); +out: + hmdfs_put_lower_path(&lower_path); + return err; +} + +static int hmdfs_getattr_local(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct path lower_path; + int ret; + + hmdfs_get_lower_path(path->dentry, &lower_path); + ret = vfs_getattr(&lower_path, stat, request_mask, flags); + stat->ino = d_inode(path->dentry)->i_ino; + hmdfs_put_lower_path(&lower_path); + + return ret; +} + +int hmdfs_permission(struct inode *inode, int mask) +{ +#ifdef CONFIG_HMDFS_FS_PERMISSION + unsigned int mode = inode->i_mode; + struct hmdfs_inode_info *hii = hmdfs_i(inode); + kuid_t cur_uid = current_fsuid(); + + if (uid_eq(cur_uid, ROOT_UID) || uid_eq(cur_uid, SYSTEM_UID)) + return 0; + + if (uid_eq(cur_uid, inode->i_uid)) { + mode >>= 6; + } else if (in_group_p(inode->i_gid)) { + mode >>= 3; + } else if (is_pkg_auth(hii->perm)) { + if (uid_eq(cur_uid, inode->i_uid)) + return 0; + } else if (is_system_auth(hii->perm)) { + if (in_group_p(MEDIA_RW_GID)) + return 0; + } + + if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + return 0; + + trace_hmdfs_permission(inode->i_ino); + return -EACCES; +#else + + return 0; +#endif +} + +static ssize_t hmdfs_local_listxattr(struct dentry *dentry, char *list, + size_t size) +{ + struct path lower_path; + ssize_t res = 0; + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (size > HMDFS_LISTXATTR_SIZE_MAX) + r_size = HMDFS_LISTXATTR_SIZE_MAX; + + hmdfs_get_lower_path(dentry, &lower_path); + res = vfs_listxattr(lower_path.dentry, list, r_size); + hmdfs_put_lower_path(&lower_path); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support listxattr size over than %d", + HMDFS_LISTXATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} + +const struct inode_operations hmdfs_symlink_iops_local = { + .get_link = hmdfs_get_link_local, + .permission = hmdfs_permission, + .setattr = hmdfs_setattr_local, +}; + +const struct inode_operations hmdfs_dir_inode_ops_local = { + .lookup = hmdfs_lookup_local, + .mkdir = hmdfs_mkdir_local, + .create = hmdfs_create_local, + .rmdir = hmdfs_rmdir_local, + .unlink = hmdfs_unlink_local, + .symlink = hmdfs_symlink_local, + .rename = hmdfs_rename_local, + .permission = hmdfs_permission, + .setattr = hmdfs_setattr_local, + .getattr = hmdfs_getattr_local, +}; + +const struct inode_operations hmdfs_file_iops_local = { + .setattr = hmdfs_setattr_local, + .getattr = hmdfs_getattr_local, + .permission = hmdfs_permission, + .listxattr = hmdfs_local_listxattr, +}; diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c new file mode 100644 index 000000000000..f84f57d5e85c --- /dev/null +++ b/fs/hmdfs/inode_merge.c @@ -0,0 +1,1357 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_merge.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_merge_view.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "authority/authentication.h" +#include "hmdfs_trace.h" + +struct kmem_cache *hmdfs_dentry_merge_cachep; + +struct dentry *hmdfs_get_fst_lo_d(struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *d = NULL; + + mutex_lock(&dim->comrade_list_lock); + comrade = list_first_entry_or_null(&dim->comrade_list, + struct hmdfs_dentry_comrade, list); + if (comrade) + d = dget(comrade->lo_d); + mutex_unlock(&dim->comrade_list_lock); + return d; +} + +struct dentry *hmdfs_get_lo_d(struct dentry *dentry, int dev_id) +{ + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *d = NULL; + + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &dim->comrade_list, list) { + if (comrade->dev_id == dev_id) { + d = dget(comrade->lo_d); + break; + } + } + mutex_unlock(&dim->comrade_list_lock); + return d; +} + +static void update_inode_attr(struct inode *inode, struct dentry *child_dentry) +{ + struct inode *li = NULL; + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(child_dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct hmdfs_dentry_comrade *fst_comrade = NULL; + + mutex_lock(&cdi->comrade_list_lock); + fst_comrade = list_first_entry(&cdi->comrade_list, + struct hmdfs_dentry_comrade, list); + list_for_each_entry(comrade, &cdi->comrade_list, list) { + li = d_inode(comrade->lo_d); + if (!li) + continue; + + if (comrade == fst_comrade) { + inode->i_atime = li->i_atime; + inode->i_ctime = li->i_ctime; + inode->i_mtime = li->i_mtime; + inode->i_size = li->i_size; + continue; + } + + if (hmdfs_time_compare(&inode->i_mtime, &li->i_mtime) < 0) + inode->i_mtime = li->i_mtime; + } + mutex_unlock(&cdi->comrade_list_lock); +} + +static int get_num_comrades(struct dentry *dentry) +{ + struct list_head *pos; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + int count = 0; + + mutex_lock(&dim->comrade_list_lock); + list_for_each(pos, &dim->comrade_list) + count++; + mutex_unlock(&dim->comrade_list_lock); + return count; +} + +static struct inode *fill_inode_merge(struct super_block *sb, + struct inode *parent_inode, + struct dentry *child_dentry, + struct dentry *lo_d_dentry) +{ + struct dentry *fst_lo_d = NULL; + struct hmdfs_inode_info *info = NULL; + struct inode *inode = NULL; + umode_t mode; + + if (lo_d_dentry) { + fst_lo_d = lo_d_dentry; + dget(fst_lo_d); + } else { + fst_lo_d = hmdfs_get_fst_lo_d(child_dentry); + } + if (!fst_lo_d) { + inode = ERR_PTR(-EINVAL); + goto out; + } + if (hmdfs_i(parent_inode)->inode_type == HMDFS_LAYER_ZERO) + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_MERGE, NULL, + NULL); + else + inode = hmdfs_iget5_locked_merge(sb, fst_lo_d); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + inode = ERR_PTR(-ENOMEM); + goto out; + } + if (!(inode->i_state & I_NEW)) + goto out; + info = hmdfs_i(inode); + if (hmdfs_i(parent_inode)->inode_type == HMDFS_LAYER_ZERO) + info->inode_type = HMDFS_LAYER_FIRST_MERGE; + else + info->inode_type = HMDFS_LAYER_OTHER_MERGE; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + update_inode_attr(inode, child_dentry); + mode = d_inode(fst_lo_d)->i_mode; + /* remote symlink need to treat as regfile, + * the specific operation is performed by device_view. + * local symlink is managed by merge_view. + */ + if (hm_islnk(hmdfs_d(fst_lo_d)->file_type) && + hmdfs_d(fst_lo_d)->device_id == 0) { + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + inode->i_op = &hmdfs_symlink_iops_merge; + inode->i_fop = &hmdfs_file_fops_merge; + set_nlink(inode, 1); + } else if (S_ISREG(mode)) { // Reguler file 0660 + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + inode->i_op = &hmdfs_file_iops_merge; + inode->i_fop = &hmdfs_file_fops_merge; + set_nlink(inode, 1); + } else if (S_ISDIR(mode)) { // Directory 0771 + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + inode->i_op = &hmdfs_dir_iops_merge; + inode->i_fop = &hmdfs_dir_fops_merge; + set_nlink(inode, get_num_comrades(child_dentry) + 2); + } + + unlock_new_inode(inode); +out: + dput(fst_lo_d); + return inode; +} + +struct hmdfs_dentry_comrade *alloc_comrade(struct dentry *lo_d, int dev_id) +{ + struct hmdfs_dentry_comrade *comrade = NULL; + + // 文件只有一个 comrade,考虑 {comrade, list + list lock} + comrade = kzalloc(sizeof(*comrade), GFP_KERNEL); + if (unlikely(!comrade)) + return ERR_PTR(-ENOMEM); + + comrade->lo_d = lo_d; + comrade->dev_id = dev_id; + dget(lo_d); + return comrade; +} + +void link_comrade(struct list_head *onstack_comrades_head, + struct hmdfs_dentry_comrade *comrade) +{ + struct hmdfs_dentry_comrade *c = NULL; + + list_for_each_entry(c, onstack_comrades_head, list) { + if (likely(c->dev_id != comrade->dev_id)) + continue; + hmdfs_err("Redundant comrade of device %llu", c->dev_id); + dput(comrade->lo_d); + kfree(comrade); + WARN_ON(1); + return; + } + + if (comrade_is_local(comrade)) + list_add(&comrade->list, onstack_comrades_head); + else + list_add_tail(&comrade->list, onstack_comrades_head); +} + +/** + * assign_comrades_unlocked - assign a child dentry with comrades + * + * We tend to setup a local list of all the comrades we found and place the + * list onto the dentry_info to achieve atomicity. + */ +static void assign_comrades_unlocked(struct dentry *child_dentry, + struct list_head *onstack_comrades_head) +{ + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(child_dentry); + + mutex_lock(&cdi->comrade_list_lock); + WARN_ON(!list_empty(&cdi->comrade_list)); + list_splice_init(onstack_comrades_head, &cdi->comrade_list); + mutex_unlock(&cdi->comrade_list_lock); +} + +static struct hmdfs_dentry_comrade *lookup_comrade(struct path lower_path, + const char *d_name, + int dev_id, + unsigned int flags) +{ + struct path path; + struct hmdfs_dentry_comrade *comrade = NULL; + int err; + + err = vfs_path_lookup(lower_path.dentry, lower_path.mnt, d_name, flags, + &path); + if (err) + return ERR_PTR(err); + + comrade = alloc_comrade(path.dentry, dev_id); + path_put(&path); + return comrade; +} + +/** + * conf_name_trans_nop - do nothing but copy + * + * WARNING: always check before translation + */ +static char *conf_name_trans_nop(struct dentry *d) +{ + return kstrndup(d->d_name.name, d->d_name.len, GFP_KERNEL); +} + +/** + * conf_name_trans_dir - conflicted name translation for directory + * + * WARNING: always check before translation + */ +static char *conf_name_trans_dir(struct dentry *d) +{ + int len = d->d_name.len - strlen(CONFLICTING_DIR_SUFFIX); + + return kstrndup(d->d_name.name, len, GFP_KERNEL); +} + +/** + * conf_name_trans_reg - conflicted name translation for regular file + * + * WARNING: always check before translation + */ +static char *conf_name_trans_reg(struct dentry *d, int *dev_id) +{ + int dot_pos, start_cpy_pos, num_len, i; + int len = d->d_name.len; + char *name = kstrndup(d->d_name.name, d->d_name.len, GFP_KERNEL); + + if (unlikely(!name)) + return NULL; + + // find the last dot if possible + for (dot_pos = len - 1; dot_pos >= 0; dot_pos--) { + if (name[dot_pos] == '.') + break; + } + if (dot_pos == -1) + dot_pos = len; + + // retrieve the conf sn (i.e. dev_id) + num_len = 0; + for (i = dot_pos - 1; i >= 0; i--) { + if (name[i] >= '0' && name[i] <= '9') + num_len++; + else + break; + } + + *dev_id = 0; + for (i = 0; i < num_len; i++) + *dev_id = *dev_id * 10 + name[dot_pos - num_len + i] - '0'; + + // move the file suffix( '\0' included) right after the file name + start_cpy_pos = + dot_pos - num_len - strlen(CONFLICTING_FILE_CONST_SUFFIX); + memmove(name + start_cpy_pos, name + dot_pos, len - dot_pos + 1); + return name; +} + +int check_filename(const char *name, int len) +{ + int cmp_res = 0; + + if (len >= strlen(CONFLICTING_DIR_SUFFIX)) { + cmp_res = strncmp(name + len - strlen(CONFLICTING_DIR_SUFFIX), + CONFLICTING_DIR_SUFFIX, + strlen(CONFLICTING_DIR_SUFFIX)); + if (cmp_res == 0) + return DT_DIR; + } + + if (len >= strlen(CONFLICTING_FILE_CONST_SUFFIX)) { + int dot_pos, start_cmp_pos, num_len, i; + + for (dot_pos = len - 1; dot_pos >= 0; dot_pos--) { + if (name[dot_pos] == '.') + break; + } + if (dot_pos == -1) + dot_pos = len; + + num_len = 0; + for (i = dot_pos - 1; i >= 0; i--) { + if (name[i] >= '0' && name[i] <= '9') + num_len++; + else + break; + } + + start_cmp_pos = dot_pos - num_len - + strlen(CONFLICTING_FILE_CONST_SUFFIX); + cmp_res = strncmp(name + start_cmp_pos, + CONFLICTING_FILE_CONST_SUFFIX, + strlen(CONFLICTING_FILE_CONST_SUFFIX)); + if (cmp_res == 0) + return DT_REG; + } + + return 0; +} + +static int lookup_merge_normal(struct dentry *child_dentry, unsigned int flags) +{ + struct dentry *parent_dentry = dget_parent(child_dentry); + struct hmdfs_dentry_info_merge *pdi = hmdfs_dm(parent_dentry); + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct hmdfs_dentry_comrade *comrade, *cc; + struct path lo_p, path; + LIST_HEAD(head); + int ret = -ENOENT; + int dev_id = -1; + int ftype; + char *lo_name; + umode_t mode; + + ftype = check_filename(child_dentry->d_name.name, + child_dentry->d_name.len); + if (ftype == DT_REG) + lo_name = conf_name_trans_reg(child_dentry, &dev_id); + else if (ftype == DT_DIR) + lo_name = conf_name_trans_dir(child_dentry); + else + lo_name = conf_name_trans_nop(child_dentry); + if (unlikely(!lo_name)) { + ret = -ENOMEM; + goto out; + } + + ret = hmdfs_get_path_in_sb(child_dentry->d_sb, sbi->real_dst, + LOOKUP_DIRECTORY, &path); + if (ret) { + if (ret == -ENOENT) + ret = -EINVAL; + goto free; + } + lo_p.mnt = path.mnt; + + ret = -ENOENT; + mutex_lock(&pdi->comrade_list_lock); + list_for_each_entry(cc, &pdi->comrade_list, list) { + if (ftype == DT_REG && cc->dev_id != dev_id) + continue; + + lo_p.dentry = cc->lo_d; + comrade = lookup_comrade(lo_p, lo_name, cc->dev_id, flags); + if (IS_ERR(comrade)) { + ret = ret ? PTR_ERR(comrade) : 0; + continue; + } + + mode = hmdfs_cm(comrade); + if ((ftype == DT_DIR && !S_ISDIR(mode)) || + (ftype == DT_REG && S_ISDIR(mode))) { + destroy_comrade(comrade); + ret = ret ? PTR_ERR(comrade) : 0; + continue; + } + + ret = 0; + link_comrade(&head, comrade); + + if (!S_ISDIR(mode)) + break; + } + mutex_unlock(&pdi->comrade_list_lock); + + assign_comrades_unlocked(child_dentry, &head); + path_put(&path); +free: + kfree(lo_name); +out: + dput(parent_dentry); + return ret; +} + +/** + * do_lookup_merge_root - lookup the root of the merge view(root/merge_view) + * + * It's common for a network filesystem to incur various of faults, so we + * intent to show mercy for faults here, except faults reported by the local. + */ +static int do_lookup_merge_root(struct path path_dev, + struct dentry *child_dentry, unsigned int flags) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct hmdfs_dentry_comrade *comrade; + const int buf_len = + max((int)HMDFS_CID_SIZE + 1, (int)sizeof(DEVICE_VIEW_LOCAL)); + char *buf = kzalloc(buf_len, GFP_KERNEL); + struct hmdfs_peer *peer; + LIST_HEAD(head); + int ret; + + if (!buf) + return -ENOMEM; + + // lookup real_dst/device_view/local + memcpy(buf, DEVICE_VIEW_LOCAL, sizeof(DEVICE_VIEW_LOCAL)); + comrade = lookup_comrade(path_dev, buf, HMDFS_DEVID_LOCAL, flags); + if (IS_ERR(comrade)) { + ret = PTR_ERR(comrade); + goto out; + } + link_comrade(&head, comrade); + + // lookup real_dst/device_view/cidxx + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + mutex_unlock(&sbi->connections.node_lock); + memcpy(buf, peer->cid, HMDFS_CID_SIZE); + comrade = lookup_comrade(path_dev, buf, peer->device_id, flags); + if (IS_ERR(comrade)) + continue; + + link_comrade(&head, comrade); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + assign_comrades_unlocked(child_dentry, &head); + ret = 0; + +out: + kfree(buf); + return ret; +} + +// mkdir -p +static void lock_root_inode_shared(struct inode *root, bool *locked, bool *down) +{ + struct rw_semaphore *sem = &root->i_rwsem; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) +#define RWSEM_READER_OWNED (1UL << 0) +#define RWSEM_RD_NONSPINNABLE (1UL << 1) +#define RWSEM_WR_NONSPINNABLE (1UL << 2) +#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE) +#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) + struct task_struct *sem_owner = + (struct task_struct *)(atomic_long_read(&sem->owner) & + ~RWSEM_OWNER_FLAGS_MASK); +#else + struct task_struct *sem_owner = sem->owner; +#endif + + *locked = false; + *down = false; + + if (sem_owner != current) + return; + + // It's us that takes the wsem + if (!inode_trylock_shared(root)) { + downgrade_write(sem); + *down = true; + } + *locked = true; +} + +static void restore_root_inode_sem(struct inode *root, bool locked, bool down) +{ + if (!locked) + return; + + inode_unlock_shared(root); + if (down) + inode_lock(root); +} + +static int lookup_merge_root(struct inode *root_inode, + struct dentry *child_dentry, unsigned int flags) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct path path_dev; + int ret = -ENOENT; + int buf_len; + char *buf = NULL; + bool locked, down; + + // consider additional one slash and one '\0' + buf_len = strlen(sbi->real_dst) + 1 + sizeof(DEVICE_VIEW_ROOT); + if (buf_len > PATH_MAX) + return -ENAMETOOLONG; + + buf = kmalloc(buf_len, GFP_KERNEL); + if (unlikely(!buf)) + return -ENOMEM; + + sprintf(buf, "%s/%s", sbi->real_dst, DEVICE_VIEW_ROOT); + lock_root_inode_shared(root_inode, &locked, &down); + ret = hmdfs_get_path_in_sb(child_dentry->d_sb, buf, LOOKUP_DIRECTORY, + &path_dev); + if (ret) + goto free_buf; + + ret = do_lookup_merge_root(path_dev, child_dentry, flags); + path_put(&path_dev); + +free_buf: + kfree(buf); + restore_root_inode_sem(root_inode, locked, down); + return ret; +} + +int init_hmdfs_dentry_info_merge(struct hmdfs_sb_info *sbi, + struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *info = NULL; + + info = kmem_cache_zalloc(hmdfs_dentry_merge_cachep, GFP_NOFS); + if (!info) + return -ENOMEM; + + info->ctime = jiffies; + INIT_LIST_HEAD(&info->comrade_list); + mutex_init(&info->comrade_list_lock); + d_set_d_op(dentry, &hmdfs_dops_merge); + dentry->d_fsdata = info; + return 0; +} + +static void update_dm(struct dentry *dst, struct dentry *src) +{ + struct hmdfs_dentry_info_merge *dmi_dst = hmdfs_dm(dst); + struct hmdfs_dentry_info_merge *dmi_src = hmdfs_dm(src); + LIST_HEAD(tmp_dst); + LIST_HEAD(tmp_src); + + /* Mobilize all the comrades */ + mutex_lock(&dmi_dst->comrade_list_lock); + mutex_lock(&dmi_src->comrade_list_lock); + list_splice_init(&dmi_dst->comrade_list, &tmp_dst); + list_splice_init(&dmi_src->comrade_list, &tmp_src); + list_splice(&tmp_dst, &dmi_src->comrade_list); + list_splice(&tmp_src, &dmi_dst->comrade_list); + mutex_unlock(&dmi_src->comrade_list_lock); + mutex_unlock(&dmi_dst->comrade_list_lock); +} + +// do this in a map-reduce manner +struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + bool create = flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET); + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct hmdfs_inode_info *pii = hmdfs_i(parent_inode); + struct inode *child_inode = NULL; + struct dentry *ret_dentry = NULL; + int err = 0; + + /* + * Internal flags like LOOKUP_CREATE should not pass to device view. + * LOOKUP_REVAL is needed because dentry cache in hmdfs might be stale + * after rename in lower fs. LOOKUP_FOLLOW is not needed because + * get_link is defined for symlink inode in merge_view. + * LOOKUP_DIRECTORY is not needed because merge_view can do the + * judgement that whether result is directory or not. + */ + flags = flags & LOOKUP_REVAL; + + child_dentry->d_fsdata = NULL; + + if (child_dentry->d_name.len > NAME_MAX) { + err = -ENAMETOOLONG; + goto out; + } + + err = init_hmdfs_dentry_info_merge(sbi, child_dentry); + if (unlikely(err)) + goto out; + + if (pii->inode_type == HMDFS_LAYER_ZERO) + err = lookup_merge_root(parent_inode, child_dentry, flags); + else + err = lookup_merge_normal(child_dentry, flags); + + if (!err) { + struct hmdfs_inode_info *info = NULL; + + child_inode = fill_inode_merge(parent_inode->i_sb, parent_inode, + child_dentry, NULL); + ret_dentry = d_splice_alias(child_inode, child_dentry); + if (IS_ERR(ret_dentry)) { + clear_comrades(child_dentry); + err = PTR_ERR(ret_dentry); + goto out; + } + if (ret_dentry) { + update_dm(ret_dentry, child_dentry); + child_dentry = ret_dentry; + } + info = hmdfs_i(child_inode); + if (info->inode_type == HMDFS_LAYER_FIRST_MERGE) + hmdfs_root_inode_perm_init(child_inode); + else + check_and_fixup_ownership_remote(parent_inode, + child_dentry); + + goto out; + } + + if ((err == -ENOENT) && create) + err = 0; + +out: + hmdfs_trace_merge(trace_hmdfs_lookup_merge_end, parent_inode, + child_dentry, err); + return err ? ERR_PTR(err) : ret_dentry; +} + +static int hmdfs_getattr_merge(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + int ret; + struct path lower_path = { + .dentry = hmdfs_get_fst_lo_d(path->dentry), + .mnt = path->mnt, + }; + + if (unlikely(!lower_path.dentry)) { + hmdfs_err("Fatal! No comrades"); + ret = -EINVAL; + goto out; + } + + ret = vfs_getattr(&lower_path, stat, request_mask, flags); +out: + dput(lower_path.dentry); + return ret; +} + +static int hmdfs_setattr_merge(struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = d_inode(dentry); + struct dentry *lower_dentry = hmdfs_get_fst_lo_d(dentry); + struct inode *lower_inode = NULL; + struct iattr lower_ia; + unsigned int ia_valid = ia->ia_valid; + int err = 0; + kuid_t tmp_uid; + + if (!lower_dentry) { + WARN_ON(1); + err = -EINVAL; + goto out; + } + + lower_inode = d_inode(lower_dentry); + memcpy(&lower_ia, ia, sizeof(lower_ia)); + if (ia_valid & ATTR_FILE) + lower_ia.ia_file = hmdfs_f(ia->ia_file)->lower_file; + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + + inode_lock(lower_inode); + tmp_uid = hmdfs_override_inode_uid(lower_inode); + + err = notify_change(lower_dentry, &lower_ia, NULL); + i_size_write(inode, i_size_read(lower_inode)); + inode->i_atime = lower_inode->i_atime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_ctime = lower_inode->i_ctime; + hmdfs_revert_inode_uid(lower_inode, tmp_uid); + + inode_unlock(lower_inode); + +out: + dput(lower_dentry); + return err; +} + +const struct inode_operations hmdfs_file_iops_merge = { + .getattr = hmdfs_getattr_merge, + .setattr = hmdfs_setattr_merge, + .permission = hmdfs_permission, +}; + +int do_mkdir_merge(struct inode *parent_inode, struct dentry *child_dentry, + umode_t mode, struct inode *lo_i_parent, + struct dentry *lo_d_child) +{ + int ret = 0; + struct super_block *sb = parent_inode->i_sb; + struct inode *child_inode = NULL; + + ret = vfs_mkdir(lo_i_parent, lo_d_child, mode); + if (ret) + goto out; + + child_inode = + fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child); + if (IS_ERR(child_inode)) { + ret = PTR_ERR(child_inode); + goto out; + } + + d_add(child_dentry, child_inode); + /* nlink should be increased with the joining of children */ + set_nlink(parent_inode, 2); +out: + return ret; +} + +int do_create_merge(struct inode *parent_inode, struct dentry *child_dentry, + umode_t mode, bool want_excl, struct inode *lo_i_parent, + struct dentry *lo_d_child) +{ + int ret = 0; + struct super_block *sb = parent_inode->i_sb; + struct inode *child_inode = NULL; + + ret = vfs_create(lo_i_parent, lo_d_child, mode, want_excl); + if (ret) + goto out; + + child_inode = + fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child); + if (IS_ERR(child_inode)) { + ret = PTR_ERR(child_inode); + goto out; + } + + d_add(child_dentry, child_inode); + /* nlink should be increased with the joining of children */ + set_nlink(parent_inode, 2); +out: + return ret; +} + +int do_symlink_merge(struct inode *parent_inode, struct dentry *child_dentry, + const char *symname, struct inode *lower_parent_inode, + struct dentry *lo_d_child) +{ + int ret = 0; + struct super_block *sb = parent_inode->i_sb; + struct inode *child_inode = NULL; + + ret = vfs_symlink(lower_parent_inode, lo_d_child, symname); + if (ret) + goto out; + + child_inode = + fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child); + if (IS_ERR(child_inode)) { + ret = PTR_ERR(child_inode); + goto out; + } + + d_add(child_dentry, child_inode); + fsstack_copy_attr_times(parent_inode, lower_parent_inode); + fsstack_copy_inode_size(parent_inode, lower_parent_inode); +out: + return ret; +} + +int hmdfs_do_ops_merge(struct inode *i_parent, struct dentry *d_child, + struct dentry *lo_d_child, struct path path, + struct hmdfs_recursive_para *rec_op_para) +{ + int ret = 0; + + if (rec_op_para->is_last) { + switch (rec_op_para->opcode) { + case F_MKDIR_MERGE: + ret = do_mkdir_merge(i_parent, d_child, + rec_op_para->mode, + d_inode(path.dentry), lo_d_child); + break; + case F_CREATE_MERGE: + ret = do_create_merge(i_parent, d_child, + rec_op_para->mode, + rec_op_para->want_excl, + d_inode(path.dentry), lo_d_child); + break; + case F_SYMLINK_MERGE: + ret = do_symlink_merge(i_parent, d_child, + rec_op_para->name, + d_inode(path.dentry), + lo_d_child); + break; + default: + ret = -EINVAL; + break; + } + } else { + ret = vfs_mkdir(d_inode(path.dentry), lo_d_child, + rec_op_para->mode); + } + if (ret) + hmdfs_err("vfs_ops failed, ops %d, err = %d", + rec_op_para->opcode, ret); + return ret; +} + +int hmdfs_create_lower_dentry(struct inode *i_parent, struct dentry *d_child, + struct dentry *lo_d_parent, bool is_dir, + struct hmdfs_recursive_para *rec_op_para) +{ + struct hmdfs_sb_info *sbi = i_parent->i_sb->s_fs_info; + struct hmdfs_dentry_comrade *new_comrade = NULL; + struct dentry *lo_d_child = NULL; + char *path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *absolute_path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *path_name = NULL; + struct path path = { .mnt = NULL, .dentry = NULL }; + int ret = 0; + + if (unlikely(!path_buf || !absolute_path_buf)) { + ret = -ENOMEM; + goto out; + } + + path_name = dentry_path_raw(lo_d_parent, path_buf, PATH_MAX); + if (IS_ERR(path_name)) { + ret = PTR_ERR(path_name); + goto out; + } + if ((strlen(sbi->real_dst) + strlen(path_name) + + strlen(d_child->d_name.name) + 2) > PATH_MAX) { + ret = -ENAMETOOLONG; + goto out; + } + + sprintf(absolute_path_buf, "%s%s/%s", sbi->real_dst, path_name, + d_child->d_name.name); + + if (is_dir) + lo_d_child = kern_path_create(AT_FDCWD, absolute_path_buf, + &path, LOOKUP_DIRECTORY); + else + lo_d_child = kern_path_create(AT_FDCWD, absolute_path_buf, + &path, 0); + if (IS_ERR(lo_d_child)) { + ret = PTR_ERR(lo_d_child); + goto out; + } + // to ensure link_comrade after vfs_mkdir succeed + ret = hmdfs_do_ops_merge(i_parent, d_child, lo_d_child, path, + rec_op_para); + if (ret) + goto out_put; + new_comrade = alloc_comrade(lo_d_child, HMDFS_DEVID_LOCAL); + if (IS_ERR(new_comrade)) { + ret = PTR_ERR(new_comrade); + goto out_put; + } else { + link_comrade_unlocked(d_child, new_comrade); + } + +out_put: + done_path_create(&path, lo_d_child); +out: + kfree(absolute_path_buf); + kfree(path_buf); + return ret; +} + +static int create_lo_d_parent_recur(struct dentry *d_parent, + struct dentry *d_child, umode_t mode, + struct hmdfs_recursive_para *rec_op_para) +{ + struct dentry *lo_d_parent, *d_pparent; + int ret = 0; + + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + d_pparent = dget_parent(d_parent); + ret = create_lo_d_parent_recur(d_pparent, d_parent, + d_inode(d_parent)->i_mode, + rec_op_para); + dput(d_pparent); + if (ret) + goto out; + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + ret = -ENOENT; + goto out; + } + } + rec_op_para->is_last = false; + rec_op_para->mode = mode; + ret = hmdfs_create_lower_dentry(d_inode(d_parent), d_child, lo_d_parent, + true, rec_op_para); +out: + dput(lo_d_parent); + return ret; +} + +int create_lo_d_child(struct inode *i_parent, struct dentry *d_child, + bool is_dir, struct hmdfs_recursive_para *rec_op_para) +{ + struct dentry *d_pparent, *lo_d_parent, *lo_d_child; + struct dentry *d_parent = dget_parent(d_child); + int ret = 0; + mode_t d_child_mode = rec_op_para->mode; + + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + d_pparent = dget_parent(d_parent); + ret = create_lo_d_parent_recur(d_pparent, d_parent, + d_inode(d_parent)->i_mode, + rec_op_para); + dput(d_pparent); + if (unlikely(ret)) { + lo_d_child = ERR_PTR(ret); + goto out; + } + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + lo_d_child = ERR_PTR(-ENOENT); + goto out; + } + } + rec_op_para->is_last = true; + rec_op_para->mode = d_child_mode; + ret = hmdfs_create_lower_dentry(i_parent, d_child, lo_d_parent, is_dir, + rec_op_para); + +out: + dput(d_parent); + dput(lo_d_parent); + return ret; +} + +void hmdfs_init_recursive_para(struct hmdfs_recursive_para *rec_op_para, + int opcode, mode_t mode, bool want_excl, + const char *name) +{ + rec_op_para->is_last = true; + rec_op_para->opcode = opcode; + rec_op_para->mode = mode; + rec_op_para->want_excl = want_excl; + rec_op_para->name = name; +} + +int hmdfs_mkdir_merge(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int ret = 0; + struct hmdfs_recursive_para *rec_op_para = NULL; + + // confict_name & file_type is checked by hmdfs_mkdir_local + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL); + if (!rec_op_para) { + ret = -ENOMEM; + goto out; + } + + hmdfs_init_recursive_para(rec_op_para, F_MKDIR_MERGE, mode, false, + NULL); + ret = create_lo_d_child(dir, dentry, true, rec_op_para); +out: + hmdfs_trace_merge(trace_hmdfs_mkdir_merge, dir, dentry, ret); + if (ret) + d_drop(dentry); + kfree(rec_op_para); + return ret; +} + +int hmdfs_create_merge(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + struct hmdfs_recursive_para *rec_op_para = NULL; + int ret = 0; + + rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL); + if (!rec_op_para) { + ret = -ENOMEM; + goto out; + } + hmdfs_init_recursive_para(rec_op_para, F_CREATE_MERGE, mode, want_excl, + NULL); + // confict_name & file_type is checked by hmdfs_create_local + ret = create_lo_d_child(dir, dentry, false, rec_op_para); +out: + hmdfs_trace_merge(trace_hmdfs_create_merge, dir, dentry, ret); + if (ret) + d_drop(dentry); + kfree(rec_op_para); + return ret; +} + +int do_rmdir_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *lo_d = NULL; + struct dentry *lo_d_dir = NULL; + struct inode *lo_i_dir = NULL; + + //TODO: 当前只删本地,因不会影响到图库场景 + //TODO:图库重启清除软连接?或者什么场景会删除 + //TODO: remove 调用同时删除空目录以及非空目录,结果不一致 + //TODO: 如果校验会不会有并发问题?就算锁,也只能锁自己 + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lo_d = comrade->lo_d; + lo_d_dir = lock_parent(lo_d); + lo_i_dir = d_inode(lo_d_dir); + //TODO: 部分成功,lo_d确认 + ret = vfs_rmdir(lo_i_dir, lo_d); + unlock_dir(lo_d_dir); + if (ret) + break; + } + mutex_unlock(&dim->comrade_list_lock); + hmdfs_trace_merge(trace_hmdfs_rmdir_merge, dir, dentry, ret); + return ret; +} + +int hmdfs_rmdir_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + + ret = do_rmdir_merge(dir, dentry); + if (ret) { + hmdfs_err("rm dir failed:%d", ret); + goto out; + } + + d_drop(dentry); +out: + hmdfs_trace_merge(trace_hmdfs_rmdir_merge, dir, dentry, ret); + return ret; +} + +int do_unlink_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *lo_d = NULL; + struct dentry *lo_d_dir = NULL; + struct inode *lo_i_dir = NULL; + // TODO:文件场景 list_first_entry + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lo_d = comrade->lo_d; + lo_d_dir = lock_parent(lo_d); + lo_i_dir = d_inode(lo_d_dir); + ret = vfs_unlink(lo_i_dir, lo_d, NULL); // lo_d GET + unlock_dir(lo_d_dir); + if (ret) + break; + } + mutex_unlock(&dim->comrade_list_lock); + + return ret; +} + +int hmdfs_unlink_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + + ret = do_unlink_merge(dir, dentry); + if (ret) { + hmdfs_err("unlink failed:%d", ret); + goto out; + } + + d_drop(dentry); +out: + return ret; +} + +int hmdfs_symlink_merge(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int ret = 0; + struct hmdfs_recursive_para *rec_op_para = NULL; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + + rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL); + if (!rec_op_para) { + ret = -ENOMEM; + goto out; + } + hmdfs_init_recursive_para(rec_op_para, F_SYMLINK_MERGE, 0, false, + symname); + ret = create_lo_d_child(dir, dentry, false, rec_op_para); + +out: + trace_hmdfs_symlink_merge(dir, dentry, ret); + if (ret) + d_drop(dentry); + kfree(rec_op_para); + return ret; +} + +int do_rename_merge(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int ret = 0; + struct hmdfs_sb_info *sbi = (old_dir->i_sb)->s_fs_info; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(old_dentry); + struct hmdfs_dentry_comrade *comrade = NULL, *new_comrade = NULL; + struct path lo_p_new = { .mnt = NULL, .dentry = NULL }; + struct inode *lo_i_old_dir = NULL, *lo_i_new_dir = NULL; + struct dentry *lo_d_old_dir = NULL, *lo_d_old = NULL, + *lo_d_new_dir = NULL, *lo_d_new = NULL; + struct dentry *d_new_dir = NULL; + char *path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *abs_path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *path_name = NULL; + + /* TODO: Will WPS rename a temporary file to another directory? + * could flags with replace bit result in rename ops + * cross_devices? + * currently does not support replace flags. + */ + if (flags & ~RENAME_NOREPLACE) { + ret = -EINVAL; + goto out; + } + + if (unlikely(!path_buf || !abs_path_buf)) { + ret = -ENOMEM; + goto out; + } + + list_for_each_entry(comrade, &dim->comrade_list, list) { + lo_d_old = comrade->lo_d; + d_new_dir = d_find_alias(new_dir); + lo_d_new_dir = hmdfs_get_lo_d(d_new_dir, comrade->dev_id); + dput(d_new_dir); + + if (!lo_d_new_dir) + continue; + path_name = dentry_path_raw(lo_d_new_dir, path_buf, PATH_MAX); + dput(lo_d_new_dir); + if (IS_ERR(path_name)) { + ret = PTR_ERR(path_name); + continue; + } + + if (strlen(sbi->real_dst) + strlen(path_name) + + strlen(new_dentry->d_name.name) + 2 > PATH_MAX) { + ret = -ENAMETOOLONG; + goto out; + } + + snprintf(abs_path_buf, PATH_MAX, "%s%s/%s", sbi->real_dst, + path_name, new_dentry->d_name.name); + if (S_ISDIR(d_inode(old_dentry)->i_mode)) + lo_d_new = kern_path_create(AT_FDCWD, abs_path_buf, + &lo_p_new, + LOOKUP_DIRECTORY); + else + lo_d_new = kern_path_create(AT_FDCWD, abs_path_buf, + &lo_p_new, 0); + if (IS_ERR(lo_d_new)) + continue; + + lo_d_new_dir = dget_parent(lo_d_new); + lo_i_new_dir = d_inode(lo_d_new_dir); + lo_d_old_dir = dget_parent(lo_d_old); + lo_i_old_dir = d_inode(lo_d_old_dir); + + ret = vfs_rename(lo_i_old_dir, lo_d_old, lo_i_new_dir, lo_d_new, + NULL, flags); + new_comrade = alloc_comrade(lo_p_new.dentry, comrade->dev_id); + if (IS_ERR(new_comrade)) { + ret = PTR_ERR(new_comrade); + goto no_comrade; + } + + link_comrade_unlocked(new_dentry, new_comrade); +no_comrade: + done_path_create(&lo_p_new, lo_d_new); + dput(lo_d_old_dir); + dput(lo_d_new_dir); + } +out: + kfree(abs_path_buf); + kfree(path_buf); + return ret; +} + +int hmdfs_rename_merge(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + char *old_dir_buf = NULL; + char *new_dir_buf = NULL; + char *old_dir_path = NULL; + char *new_dir_path = NULL; + struct dentry *old_dir_dentry = NULL; + struct dentry *new_dir_dentry = NULL; + int ret = 0; + + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto rename_out; + } + old_dir_buf = kmalloc(PATH_MAX, GFP_KERNEL); + new_dir_buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!old_dir_buf || !new_dir_buf) { + ret = -ENOMEM; + goto rename_out; + } + + new_dir_dentry = d_find_alias(new_dir); + if (!new_dir_dentry) { + ret = -EINVAL; + goto rename_out; + } + + old_dir_dentry = d_find_alias(old_dir); + if (!old_dir_dentry) { + ret = -EINVAL; + dput(new_dir_dentry); + goto rename_out; + } + + old_dir_path = dentry_path_raw(old_dir_dentry, old_dir_buf, PATH_MAX); + new_dir_path = dentry_path_raw(new_dir_dentry, new_dir_buf, PATH_MAX); + dput(new_dir_dentry); + dput(old_dir_dentry); + if (strcmp(old_dir_path, new_dir_path)) { + ret = -EPERM; + goto rename_out; + } + + trace_hmdfs_rename_merge(old_dir, old_dentry, new_dir, new_dentry, + flags); + ret = do_rename_merge(old_dir, old_dentry, new_dir, new_dentry, flags); + + if (ret != 0) + d_drop(new_dentry); + + if (S_ISREG(old_dentry->d_inode->i_mode) && !ret) + d_invalidate(old_dentry); + +rename_out: + hmdfs_trace_rename_merge(old_dir, old_dentry, new_dir, new_dentry, ret); + kfree(old_dir_buf); + kfree(new_dir_buf); + return ret; +} + +static const char *hmdfs_get_link_merge(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + const char *link = NULL; + struct dentry *lower_dentry = NULL; + struct inode *lower_inode = NULL; + + if (!dentry) { + hmdfs_err("dentry NULL"); + link = ERR_PTR(-ECHILD); + goto link_out; + } + + lower_dentry = hmdfs_get_fst_lo_d(dentry); + if (!lower_dentry) { + WARN_ON(1); + link = ERR_PTR(-EINVAL); + goto out; + } + lower_inode = d_inode(lower_dentry); + if (!lower_inode->i_op || !lower_inode->i_op->get_link) { + hmdfs_err("lower inode hold no operations"); + link = ERR_PTR(-EINVAL); + goto out; + } + + link = lower_inode->i_op->get_link(lower_dentry, lower_inode, done); + if (IS_ERR_OR_NULL(link)) + goto out; + fsstack_copy_attr_atime(inode, lower_inode); +out: + dput(lower_dentry); + trace_hmdfs_get_link_merge(inode, dentry, PTR_ERR_OR_ZERO(link)); +link_out: + return link; +} + +const struct inode_operations hmdfs_symlink_iops_merge = { + .get_link = hmdfs_get_link_merge, + .permission = hmdfs_permission, +}; + +const struct inode_operations hmdfs_dir_iops_merge = { + .lookup = hmdfs_lookup_merge, + .mkdir = hmdfs_mkdir_merge, + .create = hmdfs_create_merge, + .rmdir = hmdfs_rmdir_merge, + .unlink = hmdfs_unlink_merge, + .symlink = hmdfs_symlink_merge, + .rename = hmdfs_rename_merge, + .permission = hmdfs_permission, +}; diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c new file mode 100644 index 000000000000..98a0e34c2253 --- /dev/null +++ b/fs/hmdfs/inode_remote.c @@ -0,0 +1,989 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_remote.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include + +#include "comm/socket_adapter.h" +#include "hmdfs.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" +#include "authority/authentication.h" +#include "stash.h" + +struct hmdfs_lookup_ret *lookup_remote_dentry(struct dentry *child_dentry, + const struct qstr *qstr, + uint64_t dev_id) +{ + struct hmdfs_lookup_ret *lookup_ret; + struct hmdfs_dentry *dentry = NULL; + struct clearcache_item *cache_item = NULL; + struct hmdfs_dcache_lookup_ctx ctx; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + cache_item = hmdfs_find_cache_item(dev_id, child_dentry->d_parent); + if (!cache_item) + return NULL; + + lookup_ret = kmalloc(sizeof(*lookup_ret), GFP_KERNEL); + if (!lookup_ret) + goto out; + + hmdfs_init_dcache_lookup_ctx(&ctx, sbi, qstr, cache_item->filp); + dentry = hmdfs_find_dentry(child_dentry, &ctx); + if (!dentry) { + kfree(lookup_ret); + lookup_ret = NULL; + goto out; + } + + lookup_ret->i_mode = le16_to_cpu(dentry->i_mode); + lookup_ret->i_size = le64_to_cpu(dentry->i_size); + lookup_ret->i_mtime = le64_to_cpu(dentry->i_mtime); + lookup_ret->i_mtime_nsec = le32_to_cpu(dentry->i_mtime_nsec); + lookup_ret->i_ino = le64_to_cpu(dentry->i_ino); + + hmdfs_unlock_file(ctx.filp, get_dentry_group_pos(ctx.bidx), + DENTRYGROUP_SIZE); + kfree(ctx.page); +out: + kref_put(&cache_item->ref, release_cache_item); + return lookup_ret; +} + +/* get_remote_inode_info - fill hmdfs_lookup_ret by info from remote getattr + * + * @dentry: local dentry + * @hmdfs_peer: which remote devcie + * @flags: lookup flags + * + * return allocaed and initialized hmdfs_lookup_ret on success, and NULL on + * failure. + */ +struct hmdfs_lookup_ret *get_remote_inode_info(struct hmdfs_peer *con, + struct dentry *dentry, + unsigned int flags) +{ + int err = 0; + struct hmdfs_lookup_ret *lookup_ret = NULL; + struct hmdfs_getattr_ret *getattr_ret = NULL; + unsigned int expected_flags = 0; + + lookup_ret = kmalloc(sizeof(*lookup_ret), GFP_KERNEL); + if (!lookup_ret) + return NULL; + + err = hmdfs_remote_getattr(con, dentry, flags, &getattr_ret); + if (err) { + hmdfs_debug("inode info get failed with err %d", err); + kfree(lookup_ret); + return NULL; + } + /* make sure we got everything we need */ + expected_flags = STATX_INO | STATX_SIZE | STATX_MODE | STATX_MTIME; + if ((getattr_ret->stat.result_mask & expected_flags) != + expected_flags) { + hmdfs_debug("remote getattr failed with flag %x", + getattr_ret->stat.result_mask); + kfree(lookup_ret); + kfree(getattr_ret); + return NULL; + } + + lookup_ret->i_mode = getattr_ret->stat.mode; + lookup_ret->i_size = getattr_ret->stat.size; + lookup_ret->i_mtime = getattr_ret->stat.mtime.tv_sec; + lookup_ret->i_mtime_nsec = getattr_ret->stat.mtime.tv_nsec; + lookup_ret->i_ino = getattr_ret->stat.ino; + kfree(getattr_ret); + return lookup_ret; +} + +static void hmdfs_remote_readdir_work(struct work_struct *work) +{ + struct hmdfs_readdir_work *rw = + container_of(to_delayed_work(work), struct hmdfs_readdir_work, + dwork); + struct dentry *dentry = rw->dentry; + struct hmdfs_peer *con = rw->con; + const struct cred *old_cred = hmdfs_override_creds(con->sbi->cred); + bool empty = false; + + get_remote_dentry_file(dentry, con); + hmdfs_d(dentry)->async_readdir_in_progress = false; + hmdfs_revert_creds(old_cred); + + dput(dentry); + peer_put(con); + spin_lock(&con->sbi->async_readdir_work_lock); + list_del(&rw->head); + empty = list_empty(&con->sbi->async_readdir_work_list); + spin_unlock(&con->sbi->async_readdir_work_lock); + kfree(rw); + + if (empty) + wake_up_interruptible(&con->sbi->async_readdir_wq); +} + +static void get_remote_dentry_file_in_wq(struct dentry *dentry, + struct hmdfs_peer *con) +{ + struct hmdfs_readdir_work *rw = NULL; + + /* do nothing if async readdir is already in progress */ + if (cmpxchg_relaxed(&hmdfs_d(dentry)->async_readdir_in_progress, false, + true)) + return; + + rw = kmalloc(sizeof(*rw), GFP_KERNEL); + if (!rw) { + hmdfs_d(dentry)->async_readdir_in_progress = false; + return; + } + + dget(dentry); + peer_get(con); + rw->dentry = dentry; + rw->con = con; + spin_lock(&con->sbi->async_readdir_work_lock); + INIT_DELAYED_WORK(&rw->dwork, hmdfs_remote_readdir_work); + list_add(&rw->head, &con->sbi->async_readdir_work_list); + spin_unlock(&con->sbi->async_readdir_work_lock); + queue_delayed_work(con->dentry_wq, &rw->dwork, 0); +} + +void get_remote_dentry_file_sync(struct dentry *dentry, struct hmdfs_peer *con) +{ + get_remote_dentry_file_in_wq(dentry, con); + flush_workqueue(con->dentry_wq); +} + +struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con, + struct dentry *dentry, + struct qstr *qstr, + unsigned int flags, + const char *relative_path) +{ + struct hmdfs_lookup_ret *result = NULL; + + if (con->version > USERSPACE_MAX_VER) { + /* + * LOOKUP_REVAL means we found stale info from dentry file, thus + * we need to use remote getattr. + */ + if (flags & LOOKUP_REVAL) { + /* + * HMDFS_LOOKUP_REVAL means we need to skip dentry cache + * in lookup, because dentry cache in server might have + * stale data. + */ + result = get_remote_inode_info(con, dentry, + HMDFS_LOOKUP_REVAL); + get_remote_dentry_file_in_wq(dentry->d_parent, con); + return result; + } + + /* If cache file is still valid */ + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), + con->device_id, dentry->d_parent)) { + result = lookup_remote_dentry(dentry, qstr, + con->device_id); + /* + * If lookup from cache file failed, use getattr to see + * if remote have created the file. + */ + if (!(flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) && + !result) + result = get_remote_inode_info(con, dentry, 0); + /* If cache file expired, use getattr directly + * except create and rename opt + */ + } else { + result = get_remote_inode_info(con, dentry, 0); + get_remote_dentry_file_in_wq(dentry->d_parent, con); + } + } else { + if (!relative_path) + return NULL; + + result = con->conn_operations->remote_lookup( + con, relative_path, dentry->d_name.name); + } + + return result; +} + +/* + * hmdfs_update_inode_size - update inode size when finding aready existed + * inode. + * + * First of all, if the file is opened for writing, we don't update inode size + * here, because inode size is about to be changed after writing. + * + * If the file is not opened, simply update getattr_isize(not actual inode size, + * just a value showed to user). This is safe because inode size will be + * up-to-date after open. + * + * If the file is opened for read: + * a. getattr_isize == HMDFS_STALE_REMOTE_ISIZE + * 1) i_size == new_size, nothing need to be done. + * 2) i_size > new_size, we keep the i_size and set getattr_isize to new_size, + * stale data might be readed in this case, which is fine because file is + * opened before remote truncate the file. + * 3) i_size < new_size, we drop the last page of the file if i_size is not + * aligned to PAGE_SIZE, clear getattr_isize, and update i_size to + * new_size. + * b. getattr_isize != HMDFS_STALE_REMOTE_ISIZE, getattr_isize will only be set + * after 2). + * 4) getattr_isize > i_size, this situation is impossible. + * 5) i_size >= new_size, this case is the same as 2). + * 6) i_size < new_size, this case is the same as 3). + */ +static void hmdfs_update_inode_size(struct inode *inode, uint64_t new_size) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + int writecount; + uint64_t size; + + inode_lock(inode); + size = info->getattr_isize; + if (size == HMDFS_STALE_REMOTE_ISIZE) + size = i_size_read(inode); + if (size == new_size) { + inode_unlock(inode); + return; + } + + writecount = atomic_read(&inode->i_writecount); + /* check if writing is in progress */ + if (writecount > 0) { + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + inode_unlock(inode); + return; + } + + /* check if there is no one who opens the file */ + if (kref_read(&info->ref) == 0) + goto update_info; + + /* check if there is someone who opens the file for read */ + if (writecount == 0) { + uint64_t aligned_size; + + /* use inode size here instead of getattr_isize */ + size = i_size_read(inode); + if (new_size <= size) + goto update_info; + /* + * if the old inode size is not aligned to HMDFS_PAGE_SIZE, we + * need to drop the last page of the inode, otherwise zero will + * be returned while reading the new range in the page after + * chaning inode size. + */ + aligned_size = round_down(size, HMDFS_PAGE_SIZE); + if (aligned_size != size) + truncate_inode_pages(inode->i_mapping, aligned_size); + i_size_write(inode, new_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + inode_unlock(inode); + return; + } + +update_info: + info->getattr_isize = new_size; + inode_unlock(inode); +} + +static void hmdfs_update_inode(struct inode *inode, + struct hmdfs_lookup_ret *lookup_result) +{ + struct hmdfs_time_t remote_mtime = { + .tv_sec = lookup_result->i_mtime, + .tv_nsec = lookup_result->i_mtime_nsec, + }; + + /* + * We only update mtime if the file is not opened for writing. If we do + * update it before writing is about to start, user might see the mtime + * up-and-down if system time in server and client do not match. However + * mtime in client will eventually match server after timeout without + * writing. + */ + if (!inode_is_open_for_write(inode)) + inode->i_mtime = remote_mtime; + + /* + * We don't care i_size of dir, and lock inode for dir + * might cause deadlock. + */ + if (S_ISREG(inode->i_mode)) + hmdfs_update_inode_size(inode, lookup_result->i_size); +} + +static void hmdfs_fill_inode_android(struct inode *inode, struct inode *dir, + umode_t mode) +{ +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = dir->i_uid; + inode->i_gid = dir->i_gid; +#endif +} + +struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, + struct hmdfs_lookup_ret *res, struct inode *dir) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info; + umode_t mode = res->i_mode; + + inode = hmdfs_iget5_locked_remote(sb, con, res->i_ino); + if (!inode) + return ERR_PTR(-ENOMEM); + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_OTHER_REMOTE; + if (con->version > USERSPACE_MAX_VER) { + /* the inode was found in cache */ + if (!(inode->i_state & I_NEW)) { + hmdfs_fill_inode_android(inode, dir, mode); + hmdfs_update_inode(inode, res); + return inode; + } + + hmdfs_remote_init_stash_status(con, inode, mode); + } + + inode->i_ctime.tv_sec = 0; + inode->i_ctime.tv_nsec = 0; + inode->i_mtime.tv_sec = res->i_mtime; + inode->i_mtime.tv_nsec = res->i_mtime_nsec; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + if (S_ISDIR(mode)) + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + else if (S_ISREG(mode)) + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + else if (S_ISLNK(mode)) + inode->i_mode = S_IFREG | S_IRWXU | S_IRWXG; + + if (S_ISREG(mode) || S_ISLNK(mode)) { // Reguler file + inode->i_op = con->conn_operations->remote_file_iops; + inode->i_fop = con->conn_operations->remote_file_fops; + inode->i_size = res->i_size; + set_nlink(inode, 1); + } else if (S_ISDIR(mode)) { // Directory + inode->i_op = &hmdfs_dev_dir_inode_ops_remote; + inode->i_fop = &hmdfs_dev_dir_ops_remote; + set_nlink(inode, 2); + } + inode->i_mapping->a_ops = con->conn_operations->remote_file_aops; + + hmdfs_fill_inode_android(inode, dir, mode); + unlock_new_inode(inode); + return inode; +} + +static struct dentry *hmdfs_lookup_remote_dentry(struct inode *parent_inode, + struct dentry *child_dentry, + int flags) +{ + struct dentry *ret = NULL; + struct inode *inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_lookup_ret *lookup_result = NULL; + struct hmdfs_peer *con = NULL; + char *file_name = NULL; + int file_name_len = child_dentry->d_name.len; + struct qstr qstr; + struct hmdfs_dentry_info *gdi = hmdfs_d(child_dentry); + uint64_t device_id = 0; + char *relative_path = NULL; + + file_name = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!file_name) + return ERR_PTR(-ENOMEM); + strncpy(file_name, child_dentry->d_name.name, file_name_len); + + qstr.name = file_name; + qstr.len = strlen(file_name); + + device_id = gdi->device_id; + con = hmdfs_lookup_from_devid(sbi, device_id); + if (!con) { + ret = ERR_PTR(-ESHUTDOWN); + goto done; + } + + relative_path = hmdfs_get_dentry_relative_path(child_dentry->d_parent); + if (unlikely(!relative_path)) { + ret = ERR_PTR(-ENOMEM); + hmdfs_err("get relative path failed %d", -ENOMEM); + goto done; + } + + lookup_result = hmdfs_lookup_by_con(con, child_dentry, &qstr, flags, + relative_path); + if (lookup_result != NULL) { + if (S_ISLNK(lookup_result->i_mode)) + gdi->file_type = HM_SYMLINK; + inode = fill_inode_remote(sb, con, lookup_result, parent_inode); + ret = d_splice_alias(inode, child_dentry); + if (!IS_ERR_OR_NULL(ret)) + child_dentry = ret; + if (!IS_ERR(ret)) + check_and_fixup_ownership_remote(parent_inode, + child_dentry); + } else { + ret = ERR_PTR(-ENOENT); + } + +done: + if (con) + peer_put(con); + kfree(relative_path); + kfree(lookup_result); + kfree(file_name); + return ret; +} + +struct dentry *hmdfs_lookup_remote(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + int err = 0; + struct dentry *ret = NULL; + struct hmdfs_dentry_info *gdi = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + trace_hmdfs_lookup_remote(parent_inode, child_dentry, flags); + if (child_dentry->d_name.len > NAME_MAX) { + err = -ENAMETOOLONG; + ret = ERR_PTR(-ENAMETOOLONG); + goto out; + } + + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_OTHER_REMOTE); + if (err) { + ret = ERR_PTR(err); + goto out; + } + gdi = hmdfs_d(child_dentry); + gdi->device_id = hmdfs_d(child_dentry->d_parent)->device_id; + + if (is_current_hmdfs_server_ctx()) + goto out; + + ret = hmdfs_lookup_remote_dentry(parent_inode, child_dentry, flags); + /* + * don't return error if inode do not exist, so that vfs can continue + * to create it. + */ + if (IS_ERR_OR_NULL(ret)) { + err = PTR_ERR(ret); + if (err == -ENOENT) + ret = NULL; + } else { + child_dentry = ret; + } + +out: + if (!err) + hmdfs_set_time(child_dentry, jiffies); + trace_hmdfs_lookup_remote_end(parent_inode, child_dentry, err); + return ret; +} + +/* delete dentry in cache file */ +void delete_in_cache_file(uint64_t dev_id, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + + item = hmdfs_find_cache_item(dev_id, dentry->d_parent); + if (item) { + hmdfs_delete_dentry(dentry, item->filp); + kref_put(&item->ref, release_cache_item); + } else { + hmdfs_info("find cache item failed, con:%llu", dev_id); + } +} + +int hmdfs_mkdir_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry, + umode_t mode) +{ + int err = 0; + char *dir_path = NULL; + struct dentry *parent_dentry = dentry->d_parent; + struct inode *parent_inode = d_inode(parent_dentry); + struct super_block *sb = parent_inode->i_sb; + const unsigned char *d_name = dentry->d_name.name; + struct hmdfs_lookup_ret *mkdir_ret = NULL; + struct inode *inode = NULL; + + mkdir_ret = kmalloc(sizeof(*mkdir_ret), GFP_KERNEL); + if (!mkdir_ret) { + err = -ENOMEM; + return err; + } + dir_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!dir_path) { + err = -EACCES; + goto mkdir_out; + } + err = hmdfs_client_start_mkdir(conn, dir_path, d_name, mode, mkdir_ret); + if (err) { + hmdfs_err("hmdfs_client_start_mkdir failed err = %d", err); + goto mkdir_out; + } + if (mkdir_ret) { + inode = fill_inode_remote(sb, conn, mkdir_ret, parent_inode); + if (!IS_ERR(inode)) + d_add(dentry, inode); + else + err = PTR_ERR(inode); + check_and_fixup_ownership_remote(parent_inode, dentry); + } else { + err = -ENOENT; + } + +mkdir_out: + kfree(dir_path); + kfree(mkdir_ret); + return err; +} + +int hmdfs_mkdir_remote(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dir); + struct hmdfs_peer *con = info->conn; + + if (!con) { + hmdfs_warning("qpb_debug: con is null!"); + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_mkdir_remote_dentry(con, dentry, mode); + if (!err) + create_in_cache_file(con->device_id, dentry); + else + hmdfs_err("remote mkdir failed err = %d", err); + +out: + trace_hmdfs_mkdir_remote(dir, dentry, err); + return err; +} + +int hmdfs_create_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry, + umode_t mode, bool want_excl) +{ + int err = 0; + char *dir_path = NULL; + struct dentry *parent_dentry = dentry->d_parent; + struct inode *parent_inode = d_inode(parent_dentry); + struct super_block *sb = parent_inode->i_sb; + const unsigned char *d_name = dentry->d_name.name; + struct hmdfs_lookup_ret *create_ret = NULL; + struct inode *inode = NULL; + + create_ret = kmalloc(sizeof(*create_ret), GFP_KERNEL); + if (!create_ret) { + err = -ENOMEM; + return err; + } + dir_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!dir_path) { + err = -EACCES; + goto create_out; + } + err = hmdfs_client_start_create(conn, dir_path, d_name, mode, + want_excl, create_ret); + if (err) { + hmdfs_err("hmdfs_client_start_create failed err = %d", err); + goto create_out; + } + if (create_ret) { + inode = fill_inode_remote(sb, conn, create_ret, parent_inode); + if (!IS_ERR(inode)) + d_add(dentry, inode); + else + err = PTR_ERR(inode); + check_and_fixup_ownership_remote(parent_inode, dentry); + } else { + err = -ENOENT; + hmdfs_err("get remote inode info failed err = %d", err); + } + +create_out: + kfree(dir_path); + kfree(create_ret); + return err; +} + +int hmdfs_create_remote(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dir); + struct hmdfs_peer *con = info->conn; + + if (!con) { + hmdfs_warning("qpb_debug: con is null!"); + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_create_remote_dentry(con, dentry, mode, want_excl); + if (!err) + create_in_cache_file(con->device_id, dentry); + else + hmdfs_err("remote create failed err = %d", err); + +out: + trace_hmdfs_create_remote(dir, dentry, err); + return err; +} + +int hmdfs_rmdir_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry) +{ + int error = 0; + char *dir_path = NULL; + const char *dentry_name = dentry->d_name.name; + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto rmdir_out; + } + + error = hmdfs_client_start_rmdir(conn, dir_path, dentry_name); + if (!error) + delete_in_cache_file(conn->device_id, dentry); + +rmdir_out: + kfree(dir_path); + return error; +} + +int hmdfs_rmdir_remote(struct inode *dir, struct dentry *dentry) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct hmdfs_peer *con = info->conn; + + if (!con) + goto out; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_rmdir_remote_dentry(con, dentry); + /* drop dentry even remote failed + * it maybe cause that one remote devices disconnect + * when doing remote rmdir + */ + d_drop(dentry); +out: + /* return connect device's errcode */ + trace_hmdfs_rmdir_remote(dir, dentry, err); + return err; +} + +int hmdfs_dev_unlink_from_con(struct hmdfs_peer *conn, struct dentry *dentry) +{ + int error = 0; + char *dir_path = NULL; + const char *dentry_name = dentry->d_name.name; + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto unlink_out; + } + error = hmdfs_client_start_unlink(conn, dir_path, dentry_name); + if (!error) { + delete_in_cache_file(conn->device_id, dentry); + drop_nlink(d_inode(dentry)); + d_drop(dentry); + } +unlink_out: + kfree(dir_path); + return error; +} + +int hmdfs_unlink_remote(struct inode *dir, struct dentry *dentry) +{ + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct hmdfs_peer *conn = info->conn; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) + return -EACCES; + + if (!conn) + return 0; + + if (conn->status != NODE_STAT_ONLINE) + return 0; + + return conn->conn_operations->remote_unlink(conn, dentry); +} + +/* rename dentry in cache file */ +static void rename_in_cache_file(uint64_t dev_id, struct dentry *old_dentry, + struct dentry *new_dentry) +{ + struct clearcache_item *old_item = NULL; + struct clearcache_item *new_item = NULL; + + old_item = hmdfs_find_cache_item(dev_id, old_dentry->d_parent); + new_item = hmdfs_find_cache_item(dev_id, new_dentry->d_parent); + if (old_item != NULL && new_item != NULL) { + hmdfs_rename_dentry(old_dentry, new_dentry, old_item->filp, + new_item->filp); + } else if (old_item != NULL) { + hmdfs_err("new cache item find failed!"); + } else if (new_item != NULL) { + hmdfs_err("old cache item find failed!"); + } else { + hmdfs_err("both cache item find failed!"); + } + + if (old_item) + kref_put(&old_item->ref, release_cache_item); + if (new_item) + kref_put(&new_item->ref, release_cache_item); +} + +int hmdfs_rename_remote(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err = 0; + int ret = 0; + const char *old_dentry_d_name = old_dentry->d_name.name; + char *relative_old_dir_path = 0; + const char *new_dentry_d_name = new_dentry->d_name.name; + char *relative_new_dir_path = 0; + struct hmdfs_inode_info *info = hmdfs_i(old_dentry->d_inode); + struct hmdfs_peer *con = info->conn; + + trace_hmdfs_rename_remote(old_dir, old_dentry, new_dir, new_dentry, + flags); + + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + return -EACCES; + } + + relative_old_dir_path = + hmdfs_get_dentry_relative_path(old_dentry->d_parent); + relative_new_dir_path = + hmdfs_get_dentry_relative_path(new_dentry->d_parent); + if (!relative_old_dir_path || !relative_new_dir_path) { + err = -EACCES; + goto rename_out; + } + if (S_ISREG(old_dentry->d_inode->i_mode)) { + if (con->version > USERSPACE_MAX_VER) { + hmdfs_debug("send MSG to remote devID %llu", + con->device_id); + err = hmdfs_client_start_rename( + con, relative_old_dir_path, old_dentry_d_name, + relative_new_dir_path, new_dentry_d_name, + flags); + if (!err) + rename_in_cache_file(con->device_id, old_dentry, + new_dentry); + } + } else if (S_ISDIR(old_dentry->d_inode->i_mode)) { + if ((con->status == NODE_STAT_ONLINE) && + (con->version > USERSPACE_MAX_VER)) { + ret = hmdfs_client_start_rename( + con, relative_old_dir_path, old_dentry_d_name, + relative_new_dir_path, new_dentry_d_name, + flags); + if (!ret) + rename_in_cache_file(con->device_id, old_dentry, + new_dentry); + else + err = ret; + } + } + if (!err) + d_invalidate(old_dentry); +rename_out: + kfree(relative_old_dir_path); + kfree(relative_new_dir_path); + return err; +} + +static int hmdfs_dir_setattr_remote(struct dentry *dentry, struct iattr *ia) +{ + // Do not support dir setattr + return 0; +} + +const struct inode_operations hmdfs_dev_dir_inode_ops_remote = { + .lookup = hmdfs_lookup_remote, + .mkdir = hmdfs_mkdir_remote, + .create = hmdfs_create_remote, + .rmdir = hmdfs_rmdir_remote, + .unlink = hmdfs_unlink_remote, + .rename = hmdfs_rename_remote, + .setattr = hmdfs_dir_setattr_remote, + .permission = hmdfs_permission, +}; + +static int hmdfs_setattr_remote(struct dentry *dentry, struct iattr *ia) +{ + struct hmdfs_inode_info *info = hmdfs_i(d_inode(dentry)); + struct hmdfs_peer *conn = info->conn; + struct inode *inode = d_inode(dentry); + char *send_buf = NULL; + int err = 0; + + if (hmdfs_inode_is_stashing(info)) + return -EAGAIN; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) { + err = -ENOMEM; + goto out_free; + } + if (ia->ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(inode, ia->ia_size); + if (err) + goto out_free; + truncate_setsize(inode, ia->ia_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + } + if (ia->ia_valid & ATTR_MTIME) + inode->i_mtime = ia->ia_mtime; + + if ((ia->ia_valid & ATTR_SIZE) || (ia->ia_valid & ATTR_MTIME)) { + struct setattr_info send_setattr_info = { + .size = cpu_to_le64(ia->ia_size), + .valid = cpu_to_le32(ia->ia_valid), + .mtime = cpu_to_le64(ia->ia_mtime.tv_sec), + .mtime_nsec = cpu_to_le32(ia->ia_mtime.tv_nsec), + }; + err = hmdfs_send_setattr(conn, send_buf, &send_setattr_info); + } +out_free: + kfree(send_buf); + return err; +} + +int hmdfs_remote_getattr(struct hmdfs_peer *conn, struct dentry *dentry, + unsigned int lookup_flags, + struct hmdfs_getattr_ret **result) +{ + char *send_buf = NULL; + struct hmdfs_getattr_ret *attr = NULL; + int err = 0; + + if (dentry->d_sb != conn->sbi->sb || !result) + return -EINVAL; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) { + kfree(attr); + return -ENOMEM; + } + + err = hmdfs_send_getattr(conn, send_buf, lookup_flags, attr); + kfree(send_buf); + + if (err) { + kfree(attr); + return err; + } + + *result = attr; + return 0; +} + +static int hmdfs_get_cached_attr_remote(const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int flags) +{ + struct inode *inode = d_inode(path->dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + uint64_t size = info->getattr_isize; + + stat->ino = inode->i_ino; + stat->mtime = inode->i_mtime; + stat->mode = inode->i_mode; + stat->uid.val = inode->i_uid.val; + stat->gid.val = inode->i_gid.val; + if (size == HMDFS_STALE_REMOTE_ISIZE) + size = i_size_read(inode); + + stat->size = size; + return 0; +} + +ssize_t hmdfs_remote_listxattr(struct dentry *dentry, char *list, size_t size) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + ssize_t res = 0; + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (size > HMDFS_LISTXATTR_SIZE_MAX) + r_size = HMDFS_LISTXATTR_SIZE_MAX; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_listxattr(conn, send_buf, list, r_size); + kfree(send_buf); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support listxattr size over than %d", + HMDFS_LISTXATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} + +const struct inode_operations hmdfs_dev_file_iops_remote = { + .setattr = hmdfs_setattr_remote, + .permission = hmdfs_permission, + .getattr = hmdfs_get_cached_attr_remote, + .listxattr = hmdfs_remote_listxattr, +}; diff --git a/fs/hmdfs/inode_root.c b/fs/hmdfs/inode_root.c new file mode 100644 index 000000000000..30d0ca6a2264 --- /dev/null +++ b/fs/hmdfs/inode_root.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_root.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" +#include "hmdfs_trace.h" + +static struct inode *fill_device_local_inode(struct super_block *sb, + struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV_LOCAL, lower_inode, + NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_SECOND_LOCAL; + + inode->i_mode = + (lower_inode->i_mode & S_IFMT) | S_IRWXU | S_IRWXG | S_IXOTH; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_op = &hmdfs_dir_inode_ops_local; + inode->i_fop = &hmdfs_dir_ops_local; + + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} + +static struct inode *fill_device_inode_remote(struct super_block *sb, + uint64_t dev_id) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + struct hmdfs_peer *con = NULL; + + con = hmdfs_lookup_from_devid(sb->s_fs_info, dev_id); + if (!con) + return ERR_PTR(-ENOENT); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV_REMOTE, NULL, con); + if (!inode) { + hmdfs_err("get inode NULL"); + inode = ERR_PTR(-ENOMEM); + goto out; + } + if (!(inode->i_state & I_NEW)) + goto out; + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_SECOND_REMOTE; + + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + inode->i_op = &hmdfs_dev_dir_inode_ops_remote; + inode->i_fop = &hmdfs_dev_dir_ops_remote; + + unlock_new_inode(inode); + +out: + peer_put(con); + return inode; +} + +struct dentry *hmdfs_device_lookup(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + struct inode *root_inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct dentry *ret_dentry = NULL; + int err = 0; + struct hmdfs_peer *con = NULL; + struct hmdfs_dentry_info *di = NULL; + uint8_t *cid = NULL; + struct path *root_lower_path = NULL; + + trace_hmdfs_device_lookup(parent_inode, child_dentry, flags); + if (!strncmp(d_name, DEVICE_VIEW_LOCAL, + sizeof(DEVICE_VIEW_LOCAL) - 1)) { + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_SECOND_LOCAL); + if (err) { + ret_dentry = ERR_PTR(err); + goto out; + } + di = hmdfs_d(sb->s_root); + root_lower_path = &(di->lower_path); + hmdfs_set_lower_path(child_dentry, root_lower_path); + path_get(root_lower_path); + root_inode = fill_device_local_inode( + sb, d_inode(root_lower_path->dentry)); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + ret_dentry = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out; + } + ret_dentry = d_splice_alias(root_inode, child_dentry); + if (IS_ERR(ret_dentry)) { + err = PTR_ERR(ret_dentry); + ret_dentry = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out; + } + } else { + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_SECOND_REMOTE); + di = hmdfs_d(child_dentry); + if (err) { + ret_dentry = ERR_PTR(err); + goto out; + } + cid = kzalloc(HMDFS_CID_SIZE + 1, GFP_KERNEL); + if (!cid) { + err = -ENOMEM; + ret_dentry = ERR_PTR(err); + goto out; + } + memcpy(cid, d_name, HMDFS_CID_SIZE); + cid[HMDFS_CID_SIZE] = '\0'; + con = hmdfs_lookup_from_cid(sbi, cid); + if (!con) { + kfree(cid); + err = -ENOENT; + ret_dentry = ERR_PTR(err); + goto out; + } + di->device_id = con->device_id; + root_inode = fill_device_inode_remote(sb, di->device_id); + if (IS_ERR(root_inode)) { + kfree(cid); + err = PTR_ERR(root_inode); + ret_dentry = ERR_PTR(err); + goto out; + } + ret_dentry = d_splice_alias(root_inode, child_dentry); + kfree(cid); + } + if (root_inode) + hmdfs_root_inode_perm_init(root_inode); + if (!err) + hmdfs_set_time(child_dentry, jiffies); +out: + if (con) + peer_put(con); + trace_hmdfs_device_lookup_end(parent_inode, child_dentry, err); + return ret_dentry; +} + +struct dentry *hmdfs_root_lookup(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + struct inode *root_inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct dentry *ret = ERR_PTR(-ENOENT); + struct path root_path; + + trace_hmdfs_root_lookup(parent_inode, child_dentry, flags); + if (sbi->s_merge_switch && !strcmp(d_name, MERGE_VIEW_ROOT)) { + ret = hmdfs_lookup_merge(parent_inode, child_dentry, flags); + if (ret && !IS_ERR(ret)) + child_dentry = ret; + root_inode = d_inode(child_dentry); + } else if (!strcmp(d_name, DEVICE_VIEW_ROOT)) { + ret = ERR_PTR(init_hmdfs_dentry_info( + sbi, child_dentry, HMDFS_LAYER_FIRST_DEVICE)); + if (IS_ERR(ret)) + goto out; + ret = ERR_PTR(kern_path(sbi->local_src, 0, &root_path)); + if (IS_ERR(ret)) + goto out; + root_inode = fill_device_inode(sb, d_inode(root_path.dentry)); + ret = d_splice_alias(root_inode, child_dentry); + path_put(&root_path); + } + if (!IS_ERR(ret) && root_inode) + hmdfs_root_inode_perm_init(root_inode); + +out: + trace_hmdfs_root_lookup_end(parent_inode, child_dentry, + PTR_ERR_OR_ZERO(ret)); + return ret; +} + +const struct inode_operations hmdfs_device_ops = { + .lookup = hmdfs_device_lookup, +}; + +const struct inode_operations hmdfs_root_ops = { + .lookup = hmdfs_root_lookup, +}; + +struct inode *fill_device_inode(struct super_block *sb, + struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV, NULL, NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) + return inode; + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_FIRST_DEVICE; + + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | S_IXUSR | + S_IRGRP | S_IXGRP | S_IXOTH; + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + inode->i_op = &hmdfs_device_ops; + inode->i_fop = &hmdfs_device_fops; + + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} + +struct inode *fill_root_inode(struct super_block *sb, struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_ANCESTOR, lower_inode, + NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_ZERO; + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | S_IXUSR | + S_IRGRP | S_IXGRP | S_IXOTH; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = lower_inode->i_uid; + inode->i_gid = lower_inode->i_gid; +#else + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); +#endif + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_op = &hmdfs_root_ops; + inode->i_fop = &hmdfs_root_fops; + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c new file mode 100644 index 000000000000..c9b28e8cb9f1 --- /dev/null +++ b/fs/hmdfs/main.c @@ -0,0 +1,1069 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/main.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + + +#include "hmdfs.h" + +#include +#include +#include +#include +#include +#if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE +#include +#else +#include +#endif + +#include "authority/authentication.h" +#include "hmdfs_server.h" +#include "comm/device_node.h" +#include "comm/message_verify.h" +#include "comm/protocol.h" +#include "comm/socket_adapter.h" +#include "hmdfs_merge_view.h" +#include "server_writeback.h" + +#include "comm/node_cb.h" +#include "stash.h" + +#define CREATE_TRACE_POINTS +#include "hmdfs_trace.h" + +#define HMDFS_BOOT_COOKIE_RAND_SHIFT 33 + +#define HMDFS_SB_SEQ_FROM 1 + +struct hmdfs_mount_priv { + const char *dev_name; + const char *raw_data; +}; + +struct syncfs_item { + struct list_head list; + struct completion done; + bool need_abort; +}; + +static DEFINE_IDA(hmdfs_sb_seq); + +static inline int hmdfs_alloc_sb_seq(void) +{ + return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL); +} + +static inline void hmdfs_free_sb_seq(unsigned int seq) +{ + if (!seq) + return; + ida_simple_remove(&hmdfs_sb_seq, seq); +} + +static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct path lower_path; + ssize_t res = 0; + + hmdfs_get_lower_path(dentry, &lower_path); + res = vfs_getxattr(lower_path.dentry, name, value, size); + hmdfs_put_lower_path(&lower_path); + return res; +} + +static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + ssize_t res = 0; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_getxattr(conn, send_buf, name, value, size); + kfree(send_buf); + return res; +} + +static int hmdfs_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) +{ + int res = 0; + struct hmdfs_inode_info *info = hmdfs_i(inode); + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + return -EOPNOTSUPP; + + if (size > HMDFS_XATTR_SIZE_MAX) + r_size = HMDFS_XATTR_SIZE_MAX; + + if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) + res = hmdfs_xattr_local_get(dentry, name, value, r_size); + else + res = hmdfs_xattr_remote_get(dentry, name, value, r_size); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support xattr value size over than: %d", + HMDFS_XATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} + +static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct path lower_path; + int res = 0; + + hmdfs_get_lower_path(dentry, &lower_path); + if (value) { + res = vfs_setxattr(lower_path.dentry, name, value, size, flags); + } else { + WARN_ON(flags != XATTR_REPLACE); + res = vfs_removexattr(lower_path.dentry, name); + } + + hmdfs_put_lower_path(&lower_path); + return res; +} + +static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + int res = 0; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags); + kfree(send_buf); + return res; +} + +static int hmdfs_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + return -EOPNOTSUPP; + + if (size > HMDFS_XATTR_SIZE_MAX) { + hmdfs_info("no support too long xattr value: %zu", size); + return -E2BIG; + } + + if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) + return hmdfs_xattr_local_set(dentry, name, value, size, flags); + + return hmdfs_xattr_remote_set(dentry, name, value, size, flags); +} + +const struct xattr_handler hmdfs_xattr_handler = { + .prefix = "", /* catch all */ + .get = hmdfs_xattr_get, + .set = hmdfs_xattr_set, +}; + +static const struct xattr_handler *hmdfs_xattr_handlers[] = { + &hmdfs_xattr_handler, +}; + +#define HMDFS_NODE_EVT_CB_DELAY 2 + +struct kmem_cache *hmdfs_inode_cachep; +struct kmem_cache *hmdfs_dentry_cachep; + +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + kmem_cache_free(hmdfs_inode_cachep, + container_of(inode, struct hmdfs_inode_info, + vfs_inode)); +} + +static void hmdfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, i_callback); +} + +static void hmdfs_evict_inode(struct inode *inode) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE || + info->inode_type == HMDFS_LAYER_SECOND_REMOTE) + return; + if (info->inode_type == HMDFS_LAYER_ZERO || + info->inode_type == HMDFS_LAYER_OTHER_LOCAL || + info->inode_type == HMDFS_LAYER_SECOND_LOCAL) { + iput(info->lower_inode); + info->lower_inode = NULL; + } +} + +void hmdfs_put_super(struct super_block *sb) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + struct super_block *lower_sb = sbi->lower_sb; + + hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst, + sbi->local_src); + + hmdfs_fault_inject_fini(&sbi->fault_inject); + hmdfs_cfn_destroy(sbi); + hmdfs_unregister_sysfs(sbi); + hmdfs_connections_stop(sbi); + hmdfs_destroy_server_writeback(sbi); + hmdfs_exit_stash(sbi); + atomic_dec(&lower_sb->s_active); + put_cred(sbi->cred); + if (sbi->system_cred) + put_cred(sbi->system_cred); + hmdfs_destroy_writeback(sbi); + kfree(sbi->local_src); + kfree(sbi->local_dst); + kfree(sbi->real_dst); + kfree(sbi->cache_dir); + kfifo_free(&sbi->notify_fifo); + sb->s_fs_info = NULL; + sbi->lower_sb = NULL; + hmdfs_release_sysfs(sbi); + /* After all access are completed */ + hmdfs_free_sb_seq(sbi->seq); + kfree(sbi->s_server_statis); + kfree(sbi->s_client_statis); + kfree(sbi); +} + +static struct inode *hmdfs_alloc_inode(struct super_block *sb) +{ + struct hmdfs_inode_info *gi = + kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL); + if (!gi) + return NULL; + memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode)); + INIT_LIST_HEAD(&gi->wb_list); + init_rwsem(&gi->wpage_sem); + gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + atomic64_set(&gi->write_counter, 0); + gi->fid.id = HMDFS_INODE_INVALID_FILE_ID; + spin_lock_init(&gi->fid_lock); + INIT_LIST_HEAD(&gi->wr_opened_node); + atomic_set(&gi->wr_opened_cnt, 0); + init_waitqueue_head(&gi->fid_wq); + INIT_LIST_HEAD(&gi->stash_node); + spin_lock_init(&gi->stash_lock); + return &gi->vfs_inode; +} + +static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int error = 0; + int ret = 0; + char *dir_path = NULL; + char *name_path = NULL; + struct hmdfs_peer *con = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb); + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto rmdir_out; + } + + name_path = hmdfs_connect_path(dir_path, dentry->d_name.name); + if (!name_path) { + error = -EACCES; + goto rmdir_out; + } + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (con->status == NODE_STAT_ONLINE && + con->version > USERSPACE_MAX_VER) { + peer_get(con); + mutex_unlock(&sbi->connections.node_lock); + hmdfs_debug("send MSG to remote devID %llu", + con->device_id); + ret = hmdfs_send_statfs(con, name_path, buf); + if (ret != 0) + error = ret; + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + } + } + mutex_unlock(&sbi->connections.node_lock); + +rmdir_out: + kfree(dir_path); + kfree(name_path); + return error; +} + +static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err = 0; + struct path lower_path; + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct super_block *sb = d_inode(dentry)->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + + trace_hmdfs_statfs(dentry, info->inode_type); + // merge_view & merge_view/xxx & device_view assigned src_inode info + if (hmdfs_i_merge(info) || + (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) { + err = kern_path(sbi->local_src, 0, &lower_path); + if (err) + goto out; + err = vfs_statfs(&lower_path, buf); + path_put(&lower_path); + } else if (!IS_ERR_OR_NULL(info->lower_inode)) { + hmdfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, buf); + hmdfs_put_lower_path(&lower_path); + } else { + err = hmdfs_remote_statfs(dentry, buf); + } + + buf->f_type = HMDFS_SUPER_MAGIC; +out: + return err; +} + +static int hmdfs_show_options(struct seq_file *m, struct dentry *root) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb); + + if (sbi->s_case_sensitive) + seq_puts(m, ",sensitive"); + else + seq_puts(m, ",insensitive"); + + if (sbi->s_merge_switch) + seq_puts(m, ",merge_enable"); + else + seq_puts(m, ",merge_disable"); + + seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages); + + if (sbi->cache_dir) + seq_printf(m, ",cache_dir=%s", sbi->cache_dir); + if (sbi->real_dst) + seq_printf(m, ",real_dst=%s", sbi->real_dst); + + seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_"); + seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_"); + + return 0; +} + +static int hmdfs_sync_fs(struct super_block *sb, int wait) +{ + int time_left; + int err = 0; + struct hmdfs_peer *con = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS); + struct syncfs_item item, *entry = NULL, *tmp = NULL; + + if (!wait) + return 0; + + trace_hmdfs_syncfs_enter(sbi); + + spin_lock(&sbi->hsi.list_lock); + if (!sbi->hsi.is_executing) { + sbi->hsi.is_executing = true; + item.need_abort = false; + spin_unlock(&sbi->hsi.list_lock); + } else { + init_completion(&item.done); + list_add_tail(&item.list, &sbi->hsi.wait_list); + spin_unlock(&sbi->hsi.list_lock); + wait_for_completion(&item.done); + } + + if (item.need_abort) + goto out; + + /* + * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make + * sure all remote syncfs calls return back or timeout by waiting, + * during the waiting period we must protect @sbi->remote_syncfs_count + * and @sbi->remote_syncfs_ret from concurrent executing. + */ + + spin_lock(&sbi->hsi.v_lock); + sbi->hsi.version++; + /* + * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count + * into spinlock protection area to avoid following scenario caused + * by out-of-order execution: + * + * synfs syncfs_cb + * sbi->hsi.remote_ret = 0; + * atomic_set(&sbi->hsi.wait_count, 0); + * lock + * version == old_version + * sbi->hsi.remote_ret = resp->ret_code + * atomic_dec(&sbi->hsi.wait_count); + * unlock + * lock + * version = old_version + 1 + * unlock + * + * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned + * before spin lock which may compete with syncfs_cb(), making + * these two values' assignment protected by spinlock can fix this. + */ + sbi->hsi.remote_ret = 0; + atomic_set(&sbi->hsi.wait_count, 0); + spin_unlock(&sbi->hsi.v_lock); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + /* + * Dirty data does not need to be synchronized to remote + * devices that go offline normally. It's okay to drop + * them. + */ + if (con->status != NODE_STAT_ONLINE) + continue; + + peer_get(con); + mutex_unlock(&sbi->connections.node_lock); + + /* + * There exists a gap between sync_inodes_sb() and sync_fs() + * which may race with remote writing, leading error count + * on @sb_dirty_count. The dirty data produced during the + * gap period won't be synced in next syncfs operation. + * To avoid this, we have to invoke sync_inodes_sb() again + * after getting @con->sb_dirty_count. + */ + con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count); + sync_inodes_sb(sb); + + if (!con->old_sb_dirty_count) { + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + continue; + } + + err = hmdfs_send_syncfs(con, syncfs_timeout); + if (err) { + hmdfs_warning("send syncfs failed with %d on node %llu", + err, con->device_id); + sbi->hsi.remote_ret = err; + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + continue; + } + + atomic_inc(&sbi->hsi.wait_count); + + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + /* + * Async work in background will make sure @sbi->remote_syncfs_count + * decreased to zero finally whether syncfs success or fail. + */ + time_left = wait_event_interruptible( + sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0); + if (time_left < 0) { + hmdfs_warning("syncfs is interrupted by external signal"); + err = -EINTR; + } + + if (!err && sbi->hsi.remote_ret) + err = sbi->hsi.remote_ret; + + /* Abandon syncfs processes in pending_list */ + list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) { + entry->need_abort = true; + complete(&entry->done); + } + INIT_LIST_HEAD(&sbi->hsi.pending_list); + + /* Pick the last syncfs process in wait_list */ + spin_lock(&sbi->hsi.list_lock); + if (list_empty(&sbi->hsi.wait_list)) { + sbi->hsi.is_executing = false; + } else { + entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item, + list); + list_del_init(&entry->list); + list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list); + entry->need_abort = false; + complete(&entry->done); + } + spin_unlock(&sbi->hsi.list_lock); + +out: + trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count), + get_cmd_timeout(sbi, F_SYNCFS), err); + + /* TODO: Return synfs err back to syscall */ + + return err; +} + +struct super_operations hmdfs_sops = { + .alloc_inode = hmdfs_alloc_inode, + .destroy_inode = hmdfs_destroy_inode, + .evict_inode = hmdfs_evict_inode, + .put_super = hmdfs_put_super, + .statfs = hmdfs_statfs, + .show_options = hmdfs_show_options, + .sync_fs = hmdfs_sync_fs, +}; + +static void init_once(void *obj) +{ + struct hmdfs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +static int __init hmdfs_init_caches(void) +{ + int err = -ENOMEM; + + hmdfs_inode_cachep = + kmem_cache_create("hmdfs_inode_cache", + sizeof(struct hmdfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + if (unlikely(!hmdfs_inode_cachep)) + goto out; + hmdfs_dentry_cachep = + kmem_cache_create("hmdfs_dentry_cache", + sizeof(struct hmdfs_dentry_info), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (unlikely(!hmdfs_dentry_cachep)) + goto out_des_ino; + hmdfs_dentry_merge_cachep = + kmem_cache_create("hmdfs_dentry_merge_cache", + sizeof(struct hmdfs_dentry_info_merge), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (unlikely(!hmdfs_dentry_merge_cachep)) + goto out_des_dc; + return 0; + +out_des_dc: + kmem_cache_destroy(hmdfs_dentry_cachep); +out_des_ino: + kmem_cache_destroy(hmdfs_inode_cachep); +out: + return err; +} + +static void hmdfs_destroy_caches(void) +{ + rcu_barrier(); + kmem_cache_destroy(hmdfs_inode_cachep); + hmdfs_inode_cachep = NULL; + kmem_cache_destroy(hmdfs_dentry_cachep); + hmdfs_dentry_cachep = NULL; + kmem_cache_destroy(hmdfs_dentry_merge_cachep); + hmdfs_dentry_merge_cachep = NULL; +} + +uint64_t path_hash(const char *path, int len, bool case_sense) +{ + uint64_t res = 0; + const char *kp = path; + char c; + /* Mocklisp hash function. */ + while (*kp) { + c = *kp; + if (!case_sense) + c = tolower(c); + res = (res << 5) - res + (uint64_t)(c); + kp++; + } + return res; +} + +static char *get_full_path(struct path *path) +{ + char *buf, *tmp; + char *ret = NULL; + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + goto out; + + tmp = d_path(path, buf, PATH_MAX); + if (IS_ERR(tmp)) + goto out; + + ret = kstrdup(tmp, GFP_KERNEL); +out: + kfree(buf); + return ret; +} + +static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi) +{ + memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout)); + + set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S); + set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S); + set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S); + set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON); +} + +static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi) +{ + int ret; + + ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL); + if (ret) + goto out; + + /* + * We have to use dynamic memory since struct server/client_statistic + * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h. + */ + sbi->s_server_statis = + kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL); + sbi->s_client_statis = + kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL); + if (!sbi->s_server_statis || !sbi->s_client_statis) { + ret = -ENOMEM; + goto out; + } + + ret = hmdfs_alloc_sb_seq(); + if (ret < 0) { + hmdfs_err("no sb seq available err %d", ret); + goto out; + } + sbi->seq = ret; + ret = 0; + + spin_lock_init(&sbi->notify_fifo_lock); + sbi->s_case_sensitive = false; + sbi->s_features = HMDFS_FEATURE_READPAGES | + HMDFS_FEATURE_READPAGES_OPEN | + HMDFS_ATOMIC_OPEN; + sbi->s_merge_switch = false; + sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD; + sbi->dcache_precision = DEFAULT_DCACHE_PRECISION; + sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT; + sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT; + hmdfs_init_cmd_timeout(sbi); + sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY; + sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE; + sbi->s_offline_stash = true; + sbi->s_dentry_cache = true; + sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS; + /* Initialize before hmdfs_register_sysfs() */ + atomic_set(&sbi->connections.conn_seq, 0); + mutex_init(&sbi->connections.node_lock); + INIT_LIST_HEAD(&sbi->connections.node_list); + + init_waitqueue_head(&sbi->async_readdir_wq); + INIT_LIST_HEAD(&sbi->async_readdir_msg_list); + INIT_LIST_HEAD(&sbi->async_readdir_work_list); + spin_lock_init(&sbi->async_readdir_msg_lock); + spin_lock_init(&sbi->async_readdir_work_lock); + + return 0; + +out: + return ret; +} + +void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, + enum hmdfs_resp_type type, unsigned long start, + unsigned long end) +{ + unsigned long duration; + + switch (type) { + case HMDFS_RESP_DELAY: + sbi->s_client_statis[cmd].delay_resp_cnt++; + break; + case HMDFS_RESP_TIMEOUT: + sbi->s_client_statis[cmd].timeout_cnt++; + break; + case HMDFS_RESP_NORMAL: + duration = end - start; + sbi->s_client_statis[cmd].total += duration; + sbi->s_client_statis[cmd].resp_cnt++; + if (sbi->s_client_statis[cmd].max < duration) + sbi->s_client_statis[cmd].max = duration; + break; + default: + hmdfs_err("Wrong cmd %d with resp type %d", cmd, type); + } +} + +static int hmdfs_update_dst(struct hmdfs_sb_info *sbi) +{ + int err = 0; + const char *path_local = UPDATE_LOCAL_DST; + int len = 0; + + sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL); + if (!sbi->real_dst) { + err = -ENOMEM; + goto out_err; + } + kfree(sbi->local_dst); + + len = strlen(sbi->real_dst) + strlen(path_local) + 1; + if (len > PATH_MAX) { + err = -EINVAL; + goto out_err; + } + sbi->local_dst = kmalloc(len, GFP_KERNEL); + if (!sbi->local_dst) { + err = -ENOMEM; + goto out_err; + } + snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1, + "%s%s", sbi->real_dst, path_local); +out_err: + return err; +} + +/* + * Generate boot cookie like following format: + * + * | random | boot time(ms) | 0x00 | + * |--------|-----------------|-------| + * 16 33 15 (bits) + * + * This will make sure boot cookie is unique in a period + * 2^33 / 1000 / 3600 / 24 = 99.4(days). + */ +uint64_t hmdfs_gen_boot_cookie(void) +{ + uint64_t now; + uint16_t rand; + + now = ktime_to_ms(ktime_get()); + prandom_bytes(&rand, sizeof(rand)); + + now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1; + now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT); + + return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT; +} + +static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data; + const char *dev_name = priv->dev_name; + const char *raw_data = priv->raw_data; + struct hmdfs_sb_info *sbi; + int err = 0; + struct inode *root_inode; + struct path lower_path; + struct super_block *lower_sb; + struct dentry *root_dentry; + char ctrl_path[CTRL_PATH_MAX_LEN]; + uint64_t ctrl_hash; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) { + err = -ENOMEM; + goto out_err; + } + err = hmdfs_init_sbi(sbi); + if (err) + goto out_freesbi; + sbi->sb = sb; + err = hmdfs_parse_options(sbi, raw_data); + if (err) + goto out_freesbi; + + sb->s_fs_info = sbi; + sb->s_magic = HMDFS_SUPER_MAGIC; + sb->s_xattr = hmdfs_xattr_handlers; + sb->s_op = &hmdfs_sops; + + sbi->boot_cookie = hmdfs_gen_boot_cookie(); + + err = hmdfs_init_writeback(sbi); + if (err) + goto out_freesbi; + err = hmdfs_init_server_writeback(sbi); + if (err) + goto out_freesbi; + + err = hmdfs_init_stash(sbi); + if (err) + goto out_freesbi; + + // add ctrl sysfs node + ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true); + scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash); + hmdfs_debug("hash %llu", ctrl_hash); + err = hmdfs_register_sysfs(ctrl_path, sbi); + if (err) + goto out_freesbi; + + err = hmdfs_update_dst(sbi); + if (err) + goto out_unreg_sysfs; + + err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &lower_path); + if (err) { + hmdfs_err("open dev failed, errno = %d", err); + goto out_unreg_sysfs; + } + + lower_sb = lower_path.dentry->d_sb; + atomic_inc(&lower_sb->s_active); + sbi->lower_sb = lower_sb; + sbi->local_src = get_full_path(&lower_path); + if (!sbi->local_src) { + hmdfs_err("get local_src failed!"); + goto out_sput; + } + + sb->s_time_gran = lower_sb->s_time_gran; + sb->s_maxbytes = lower_sb->s_maxbytes; + sb->s_stack_depth = lower_sb->s_stack_depth + 1; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + hmdfs_err("maximum fs stacking depth exceeded"); + err = -EINVAL; + goto out_sput; + } + root_inode = fill_root_inode(sb, d_inode(lower_path.dentry)); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto out_sput; + } + hmdfs_root_inode_perm_init(root_inode); + sb->s_root = root_dentry = d_make_root(root_inode); + if (!root_dentry) { + err = -ENOMEM; + goto out_sput; + } + + err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO); + if (err) + goto out_freeroot; + hmdfs_set_lower_path(root_dentry, &lower_path); + d_rehash(sb->s_root); + sbi->cred = get_cred(current_cred()); + INIT_LIST_HEAD(&sbi->client_cache); + INIT_LIST_HEAD(&sbi->server_cache); + INIT_LIST_HEAD(&sbi->to_delete); + mutex_init(&sbi->cache_list_lock); + hmdfs_cfn_load(sbi); + + /* Initialize syncfs info */ + spin_lock_init(&sbi->hsi.v_lock); + init_waitqueue_head(&sbi->hsi.wq); + sbi->hsi.version = 0; + sbi->hsi.is_executing = false; + INIT_LIST_HEAD(&sbi->hsi.wait_list); + INIT_LIST_HEAD(&sbi->hsi.pending_list); + spin_lock_init(&sbi->hsi.list_lock); + hmdfs_fault_inject_init(&sbi->fault_inject, ctrl_path); + + return err; +out_freeroot: + dput(sb->s_root); + sb->s_root = NULL; +out_sput: + atomic_dec(&lower_sb->s_active); + path_put(&lower_path); +out_unreg_sysfs: + hmdfs_unregister_sysfs(sbi); + hmdfs_release_sysfs(sbi); +out_freesbi: + if (sbi) { + sb->s_fs_info = NULL; + hmdfs_exit_stash(sbi); + hmdfs_destroy_writeback(sbi); + hmdfs_destroy_server_writeback(sbi); + kfifo_free(&sbi->notify_fifo); + hmdfs_free_sb_seq(sbi->seq); + kfree(sbi->local_src); + kfree(sbi->local_dst); + kfree(sbi->real_dst); + kfree(sbi->cache_dir); + kfree(sbi->s_server_statis); + kfree(sbi->s_client_statis); + kfree(sbi); + } +out_err: + return err; +} + +static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct hmdfs_mount_priv priv = { + .dev_name = dev_name, + .raw_data = raw_data, + }; + return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super); +} + + +static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi) +{ + struct sendmsg_wait_queue *msg_wq = NULL; + struct hmdfs_readdir_work *rw = NULL; + struct hmdfs_readdir_work *tmp = NULL; + struct list_head del_work; + + /* cancel work that are not running */ + + INIT_LIST_HEAD(&del_work); + spin_lock(&sbi->async_readdir_work_lock); + list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) { + if (cancel_delayed_work(&rw->dwork)) + list_move(&rw->head, &del_work); + } + spin_unlock(&sbi->async_readdir_work_lock); + + list_for_each_entry_safe(rw, tmp, &del_work, head) { + dput(rw->dentry); + peer_put(rw->con); + kfree(rw); + } + + /* wake up async readdir that are waiting for remote */ + spin_lock(&sbi->async_readdir_msg_lock); + sbi->async_readdir_prohibit = true; + list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg) + hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL); + spin_unlock(&sbi->async_readdir_msg_lock); + + /* wait for all async readdir to finish */ + if (!list_empty(&sbi->async_readdir_work_list)) + wait_event_interruptible_timeout(sbi->async_readdir_wq, + (list_empty(&sbi->async_readdir_work_list)), HZ); + + WARN_ON(!(list_empty(&sbi->async_readdir_work_list))); +} + +static void hmdfs_kill_super(struct super_block *sb) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + + /* + * async readdir is holding ref for dentry, not for vfsmount. Thus + * shrink_dcache_for_umount() will warn about dentry still in use + * if async readdir is not done. + */ + if (sbi) + hmdfs_cancel_async_readdir(sbi); + kill_anon_super(sb); +} + +static struct file_system_type hmdfs_fs_type = { + .owner = THIS_MODULE, + .name = "hmdfs", + .mount = hmdfs_mount, + .kill_sb = hmdfs_kill_super, +}; + +static int __init hmdfs_init(void) +{ + int err = 0; + + err = hmdfs_init_caches(); + if (err) + goto out_err; + + hmdfs_node_evt_cb_init(); + + hmdfs_stash_add_node_evt_cb(); + hmdfs_client_add_node_evt_cb(); + hmdfs_server_add_node_evt_cb(); + + err = register_filesystem(&hmdfs_fs_type); + if (err) { + hmdfs_err("hmdfs register failed!"); + goto out_err; + } + err = hmdfs_sysfs_init(); + if (err) + goto out_err; + + hmdfs_message_verify_init(); + hmdfs_create_debugfs_root(); + return 0; +out_err: + hmdfs_sysfs_exit(); + unregister_filesystem(&hmdfs_fs_type); + hmdfs_destroy_caches(); + hmdfs_err("hmdfs init failed!"); + return err; +} + +static void __exit hmdfs_exit(void) +{ + hmdfs_destroy_debugfs_root(); + hmdfs_sysfs_exit(); + unregister_filesystem(&hmdfs_fs_type); + ida_destroy(&hmdfs_sb_seq); + hmdfs_destroy_caches(); + hmdfs_info("hmdfs exited!"); +} + +module_init(hmdfs_init); +module_exit(hmdfs_exit); + +EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao"); +MODULE_DESCRIPTION("Harmony distributed file system"); diff --git a/fs/hmdfs/server_writeback.c b/fs/hmdfs/server_writeback.c new file mode 100644 index 000000000000..b3a18ff67691 --- /dev/null +++ b/fs/hmdfs/server_writeback.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/server_writeback.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" +#include "server_writeback.h" + +#define HMDFS_SRV_WB_DEF_DIRTY_THRESH 50UL + +static void hmdfs_srv_wb_handler(struct work_struct *work) +{ + struct hmdfs_server_writeback *hswb = container_of(work, + struct hmdfs_server_writeback, + dirty_sb_writeback_work); + struct super_block *lower_sb = hswb->sbi->lower_sb; + int dirty_pages; + + if (writeback_in_progress(&lower_sb->s_bdi->wb) || + !down_read_trylock(&lower_sb->s_umount)) + return; + + dirty_pages = hswb->dirty_nr_pages_to_wb; + writeback_inodes_sb_nr(lower_sb, dirty_pages, WB_REASON_FS_FREE_SPACE); + up_read(&lower_sb->s_umount); + + trace_hmdfs_start_srv_wb(hswb->sbi, dirty_pages, hswb->dirty_thresh_pg); +} + +void hmdfs_server_check_writeback(struct hmdfs_server_writeback *hswb) +{ + unsigned long old_time, now; + int dirty_nr_pages; + + old_time = hswb->last_reset_time; + now = jiffies; + dirty_nr_pages = atomic_inc_return(&hswb->dirty_nr_pages); + if (time_after(now, old_time + HZ) && + cmpxchg(&hswb->last_reset_time, old_time, now) == old_time) { + /* + * We calculate the speed of page dirting to handle + * following situations: + * + * 1. Dense writing, average page writing speed + * exceeds @hswb->dirty_thresh_pg: + * 0-1s 100MB + * 2. Sporadic writing, average page writing speed + * belows @hswb->dirty_thresh_pg: + * 0-0.1s 40MB + * 3.1-3.2 20MB + */ + unsigned int writepage_speed; + + writepage_speed = dirty_nr_pages / ((now - old_time) / HZ); + if (writepage_speed >= hswb->dirty_thresh_pg) { + /* + * Writeback @hswb->dirty_nr_pages_to_wb pages in + * server-writeback work. If work is delayed after + * 1s, @hswb->dirty_nr_pages_to_wb could be assigned + * another new value (eg. 60MB), the old value (eg. + * 80MB) will be overwritten, which means 80MB data + * will be omitted to writeback. We can tolerate this + * situation, The writeback pressure is too high if + * the previous work is not completed, so it's + * meaningless to continue subsequent work. + */ + hswb->dirty_nr_pages_to_wb = dirty_nr_pages; + /* + * There are 3 conditions to trigger queuing work: + * + * A. Server successfully handles writepage for client + * B. Every 1 second interval + * C. Speed for page dirting exceeds @dirty_thresh_pg + */ + queue_work(hswb->dirty_writeback_wq, + &hswb->dirty_sb_writeback_work); + } + + /* + * There is no need to account the number of dirty pages + * from remote client very accurately. Allow the missing + * count to increase by other process in the gap between + * increment and zero out. + */ + atomic_set(&hswb->dirty_nr_pages, 0); + } +} + +void hmdfs_destroy_server_writeback(struct hmdfs_sb_info *sbi) +{ + if (!sbi->h_swb) + return; + + flush_work(&sbi->h_swb->dirty_sb_writeback_work); + destroy_workqueue(sbi->h_swb->dirty_writeback_wq); + kfree(sbi->h_swb); + sbi->h_swb = NULL; +} + +int hmdfs_init_server_writeback(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_server_writeback *hswb; + char name[HMDFS_WQ_NAME_LEN]; + + hswb = kzalloc(sizeof(struct hmdfs_server_writeback), GFP_KERNEL); + if (!hswb) + return -ENOMEM; + + hswb->sbi = sbi; + hswb->dirty_writeback_control = true; + hswb->dirty_thresh_pg = HMDFS_SRV_WB_DEF_DIRTY_THRESH << + HMDFS_MB_TO_PAGE_SHIFT; + atomic_set(&hswb->dirty_nr_pages, 0); + hswb->last_reset_time = jiffies; + + snprintf(name, sizeof(name), "dfs_srv_wb%u", sbi->seq); + hswb->dirty_writeback_wq = create_singlethread_workqueue(name); + if (!hswb->dirty_writeback_wq) { + hmdfs_err("Failed to create server writeback workqueue!"); + kfree(hswb); + return -ENOMEM; + } + INIT_WORK(&hswb->dirty_sb_writeback_work, hmdfs_srv_wb_handler); + sbi->h_swb = hswb; + + return 0; +} + diff --git a/fs/hmdfs/server_writeback.h b/fs/hmdfs/server_writeback.h new file mode 100644 index 000000000000..eb645e6391e9 --- /dev/null +++ b/fs/hmdfs/server_writeback.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/server_writeback.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef SERVER_WRITEBACK_H +#define SERVER_WRITEBACK_H + +#include "hmdfs.h" + +#define HMDFS_MB_TO_PAGE_SHIFT (20 - HMDFS_PAGE_OFFSET) + +struct hmdfs_server_writeback { + struct hmdfs_sb_info *sbi; + /* Enable hmdfs server dirty writeback control */ + bool dirty_writeback_control; + + /* Current # of dirty pages from remote client in recent 1s */ + atomic_t dirty_nr_pages; + /* Current # of dirty pages to writeback */ + int dirty_nr_pages_to_wb; + /* Dirty thresh(Dirty data pages in 1s) to trigger wb */ + unsigned int dirty_thresh_pg; + /* Last reset timestamp(in jiffies) for @dirty_nr_pages */ + unsigned long last_reset_time; + + struct workqueue_struct *dirty_writeback_wq; + /* Per-fs pages from client writeback work */ + struct work_struct dirty_sb_writeback_work; +}; + +void hmdfs_server_check_writeback(struct hmdfs_server_writeback *hswb); + +void hmdfs_destroy_server_writeback(struct hmdfs_sb_info *sbi); + +int hmdfs_init_server_writeback(struct hmdfs_sb_info *sbi); + +#endif diff --git a/fs/hmdfs/stash.c b/fs/hmdfs/stash.c new file mode 100644 index 000000000000..c320af7f60e0 --- /dev/null +++ b/fs/hmdfs/stash.c @@ -0,0 +1,2247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/stash.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stash.h" +#include "comm/node_cb.h" +#include "comm/protocol.h" +#include "comm/connection.h" +#include "file_remote.h" +#include "hmdfs_dentryfile.h" +#include "authority/authentication.h" + +/* Head magic used to identify a stash file */ +#define HMDFS_STASH_FILE_HEAD_MAGIC 0xF7AB06C3 +/* Head and path in stash file are aligned with HMDFS_STASH_BLK_SIZE */ +#define HMDFS_STASH_BLK_SIZE 4096 +#define HMDFS_STASH_BLK_SHIFT 12 +#define HMDFS_STASH_PAGE_TO_SECTOR_SHIFT 3 +#define HMDFS_STASH_DIR_NAME "stash" +#define HMDFS_STASH_FMT_DIR_NAME "v1" +#define HMDFS_STASH_WORK_DIR_NAME \ + (HMDFS_STASH_DIR_NAME "/" HMDFS_STASH_FMT_DIR_NAME) + +#define HMDFS_STASH_FILE_NAME_LEN 20 + +#define HMDFS_STASH_FLUSH_CNT 2 + +#define HMDFS_STASH_PATH_LEN (HMDFS_CID_SIZE + HMDFS_STASH_FILE_NAME_LEN + 1) + +struct hmdfs_cache_file_head { + __le32 magic; + __le32 crc_offset; + __le64 ino; + __le64 size; + __le64 blocks; + __le64 last_write_pos; + __le64 ctime; + __le32 ctime_nsec; + __le32 change_detect_cap; + __le64 ichange_count; + __le32 path_offs; + __le32 path_len; + __le32 path_cnt; + __le32 data_offs; + /* Attention: expand new fields in here to compatible with old ver */ + __le32 crc32; +} __packed; + +struct hmdfs_stash_work { + struct hmdfs_peer *conn; + struct list_head *list; + struct work_struct work; + struct completion done; +}; + +struct hmdfs_inode_tbl { + unsigned int cnt; + unsigned int max; + uint64_t inodes[0]; +}; + +struct hmdfs_stash_dir_context { + struct dir_context dctx; + char name[NAME_MAX + 1]; + struct hmdfs_inode_tbl *tbl; +}; + +struct hmdfs_restore_stats { + unsigned int succeed; + unsigned int fail; + unsigned int keep; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_stash_stats { + unsigned int succeed; + unsigned int donothing; + unsigned int fail; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_file_restore_ctx { + struct hmdfs_peer *conn; + struct path src_dir_path; + struct path dst_root_path; + char *dst; + char *page; + struct file *src_filp; + uint64_t inum; + uint64_t pages; + unsigned int seq; + unsigned int data_offs; + /* output */ + bool keep; +}; + +struct hmdfs_copy_args { + struct file *src; + struct file *dst; + void *buf; + size_t buf_len; + unsigned int seq; + unsigned int data_offs; + uint64_t inum; +}; + +struct hmdfs_copy_ctx { + struct hmdfs_copy_args args; + loff_t src_pos; + loff_t dst_pos; + /* output */ + size_t copied; + bool eof; +}; + +struct hmdfs_rebuild_stats { + unsigned int succeed; + unsigned int total; + unsigned int fail; + unsigned int invalid; +}; + +struct hmdfs_check_work { + struct hmdfs_peer *conn; + struct work_struct work; + struct completion done; +}; + +typedef int (*stash_operation_func)(struct hmdfs_peer *, + unsigned int, + struct path *, + const struct hmdfs_inode_tbl *, + void *); + +static struct dentry *hmdfs_do_vfs_mkdir(struct dentry *parent, + const char *name, int namelen, + umode_t mode) +{ + struct inode *dir = d_inode(parent); + struct dentry *child = NULL; + int err; + + inode_lock_nested(dir, I_MUTEX_PARENT); + + child = lookup_one_len(name, parent, namelen); + if (IS_ERR(child)) + goto out; + + if (d_is_positive(child)) { + if (d_can_lookup(child)) + goto out; + + dput(child); + child = ERR_PTR(-EINVAL); + goto out; + } + + err = vfs_mkdir(dir, child, mode); + if (err) { + dput(child); + child = ERR_PTR(err); + goto out; + } + +out: + inode_unlock(dir); + return child; +} + +struct dentry *hmdfs_stash_new_work_dir(struct dentry *parent) +{ + struct dentry *base = NULL; + struct dentry *work = NULL; + + base = hmdfs_do_vfs_mkdir(parent, HMDFS_STASH_DIR_NAME, + strlen(HMDFS_STASH_DIR_NAME), 0700); + if (IS_ERR(base)) + return base; + + work = hmdfs_do_vfs_mkdir(base, HMDFS_STASH_FMT_DIR_NAME, + strlen(HMDFS_STASH_FMT_DIR_NAME), 0700); + dput(base); + + return work; +} + +static struct file *hmdfs_new_stash_file(struct path *d_path, const char *cid) +{ + struct dentry *parent = NULL; + struct dentry *child = NULL; + struct file *filp = NULL; + struct path stash; + int err; + + parent = hmdfs_do_vfs_mkdir(d_path->dentry, cid, strlen(cid), 0700); + if (IS_ERR(parent)) { + err = PTR_ERR(parent); + hmdfs_err("mkdir error %d", err); + goto mkdir_err; + } + + child = vfs_tmpfile(parent, S_IFREG | 0600, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + hmdfs_err("new stash file error %d", err); + goto tmpfile_err; + } + + stash.mnt = d_path->mnt; + stash.dentry = child; + filp = dentry_open(&stash, O_LARGEFILE | O_WRONLY, current_cred()); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + hmdfs_err("open stash file error %d", err); + goto open_err; + } + + dput(child); + dput(parent); + + return filp; + +open_err: + dput(child); +tmpfile_err: + dput(parent); +mkdir_err: + return ERR_PTR(err); +} + +static inline bool hmdfs_is_dir(struct dentry *child) +{ + return d_is_positive(child) && d_can_lookup(child); +} + +static inline bool hmdfs_is_reg(struct dentry *child) +{ + return d_is_positive(child) && d_is_reg(child); +} + +static void hmdfs_set_stash_file_head(const struct hmdfs_cache_info *cache, + uint64_t ino, + struct hmdfs_cache_file_head *head) +{ + long long blocks; + unsigned int crc_offset; + + memset(head, 0, sizeof(*head)); + head->magic = cpu_to_le32(HMDFS_STASH_FILE_HEAD_MAGIC); + head->ino = cpu_to_le64(ino); + head->size = cpu_to_le64(i_size_read(file_inode(cache->cache_file))); + blocks = atomic64_read(&cache->written_pgs) << + HMDFS_STASH_PAGE_TO_SECTOR_SHIFT; + head->blocks = cpu_to_le64(blocks); + head->path_offs = cpu_to_le32(cache->path_offs); + head->path_len = cpu_to_le32(cache->path_len); + head->path_cnt = cpu_to_le32(cache->path_cnt); + head->data_offs = cpu_to_le32(cache->data_offs); + crc_offset = offsetof(struct hmdfs_cache_file_head, crc32); + head->crc_offset = cpu_to_le32(crc_offset); + head->crc32 = cpu_to_le32(crc32(0, head, crc_offset)); +} + +static int hmdfs_flush_stash_file_metadata(struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + struct hmdfs_peer *conn = info->conn; + struct hmdfs_cache_file_head cache_head; + size_t written; + loff_t pos; + unsigned int head_size; + + /* No metadata if no cache file info */ + cache = info->cache; + if (!cache) + return -EINVAL; + + if (strlen(cache->path) == 0) { + long long to_write_pgs = atomic64_read(&cache->to_write_pgs); + + /* Nothing to stash. No need to flush meta data. */ + if (to_write_pgs == 0) + return 0; + + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx lost %lld pages due to no path", + conn->owner, conn->device_id, + info->remote_ino, to_write_pgs); + return -EINVAL; + } + + hmdfs_set_stash_file_head(cache, info->remote_ino, &cache_head); + + /* Write head */ + pos = 0; + head_size = sizeof(cache_head); + written = kernel_write(cache->cache_file, &cache_head, head_size, &pos); + if (written != head_size) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write head len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + head_size, written); + return -EIO; + } + /* Write path */ + pos = (loff_t)cache->path_offs << HMDFS_STASH_BLK_SHIFT; + written = kernel_write(cache->cache_file, cache->path, cache->path_len, + &pos); + if (written != cache->path_len) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write path len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + cache->path_len, written); + return -EIO; + } + + return 0; +} + +/* Mainly from inode_wait_for_writeback() */ +static void hmdfs_wait_remote_writeback_once(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *inode = &info->vfs_inode; + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); + wait_queue_head_t *wq_head = NULL; + bool in_sync = false; + + spin_lock(&inode->i_lock); + in_sync = inode->i_state & I_SYNC; + spin_unlock(&inode->i_lock); + + if (!in_sync) + return; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx wait for wb once", + conn->owner, conn->device_id, info->remote_ino); + + wq_head = bit_waitqueue(&inode->i_state, __I_SYNC); + __wait_on_bit(wq_head, &wq, bit_wait, TASK_UNINTERRUPTIBLE); +} + +static void hmdfs_reset_remote_write_err(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct address_space *mapping = info->vfs_inode.i_mapping; + int flags_err; + errseq_t old; + int wb_err; + + flags_err = filemap_check_errors(mapping); + + old = errseq_sample(&mapping->wb_err); + wb_err = errseq_check_and_advance(&mapping->wb_err, &old); + if (flags_err || wb_err) + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx wb error %d %d before stash", + conn->owner, conn->device_id, info->remote_ino, + flags_err, wb_err); +} + +static bool hmdfs_is_mapping_clean(struct address_space *mapping) +{ + bool clean = false; + + /* b93b016313b3b ("page cache: use xa_lock") introduces i_pages */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + xa_lock_irq(&mapping->i_pages); +#else + spin_lock_irq(&mapping->tree_lock); +#endif + clean = !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + xa_unlock_irq(&mapping->i_pages); +#else + spin_unlock_irq(&mapping->tree_lock); +#endif + return clean; +} + +static int hmdfs_flush_stash_file_data(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *inode = &info->vfs_inode; + struct address_space *mapping = inode->i_mapping; + bool all_clean = true; + int err = 0; + int i; + + /* Wait for the completion of write syscall */ + inode_lock(inode); + inode_unlock(inode); + + all_clean = hmdfs_is_mapping_clean(mapping); + if (all_clean) { + hmdfs_reset_remote_write_err(conn, info); + return 0; + } + + /* + * No-sync_all writeback during offline may have not seen + * the setting of stash_status as HMDFS_REMOTE_INODE_STASHING + * and will call mapping_set_error() after we just reset + * the previous error. So waiting for these writeback once, + * and the following writeback will do local write. + */ + hmdfs_wait_remote_writeback_once(conn, info); + + /* Need to clear previous error ? */ + hmdfs_reset_remote_write_err(conn, info); + + /* + * 1. dirty page: do write back + * 2. writeback page: wait for its completion + * 3. writeback -> redirty page: do filemap_write_and_wait() + * twice, so 2th writeback should not allow + * writeback -> redirty transition + */ + for (i = 0; i < HMDFS_STASH_FLUSH_CNT; i++) { + err = filemap_write_and_wait(mapping); + if (err) { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx #%d stash flush error %d", + conn->owner, conn->device_id, + info->remote_ino, i, err); + return err; + } + } + + if (!hmdfs_is_mapping_clean(mapping)) + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx is still dirty dt %d wb %d", + conn->owner, conn->device_id, info->remote_ino, + !!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY), + !!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)); + + return 0; +} + +static int hmdfs_flush_stash_file(struct hmdfs_inode_info *info) +{ + int err; + + err = hmdfs_flush_stash_file_data(info->conn, info); + if (!err) + err = hmdfs_flush_stash_file_metadata(info); + + return err; +} + +static int hmdfs_enable_stash_file(struct hmdfs_inode_info *info, + struct dentry *stash) +{ + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct dentry *parent = NULL; + struct inode *dir = NULL; + struct dentry *child = NULL; + int err = 0; + bool retried = false; + + snprintf(name, sizeof(name), "0x%llx", info->remote_ino); + + parent = lock_parent(stash); + dir = d_inode(parent); + +lookup_again: + child = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(child)) { + err = PTR_ERR(child); + child = NULL; + hmdfs_err("lookup %s err %d", name, err); + goto out; + } + + if (d_is_positive(child)) { + hmdfs_warning("%s exists (mode 0%o)", + name, d_inode(child)->i_mode); + + err = vfs_unlink(dir, child, NULL); + if (err) { + hmdfs_err("unlink %s err %d", name, err); + goto out; + } + if (retried) { + err = -EEXIST; + goto out; + } + + retried = true; + dput(child); + goto lookup_again; + } + + err = vfs_link(stash, dir, child, NULL); + if (err) { + hmdfs_err("link stash file to %s err %d", name, err); + goto out; + } + +out: + unlock_dir(parent); + if (child) + dput(child); + + return err; +} + +/* Return 1 if stash is done, 0 if nothing is stashed */ +static int hmdfs_close_stash_file(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct file *cache_file = info->cache->cache_file; + struct dentry *c_dentry = file_dentry(cache_file); + struct inode *c_inode = d_inode(c_dentry); + long long to_write_pgs = atomic64_read(&info->cache->to_write_pgs); + int err; + + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx stashed bytes %lld pages %lld", + conn->owner, conn->device_id, info->remote_ino, + i_size_read(c_inode), to_write_pgs); + + if (to_write_pgs == 0) + return 0; + + err = vfs_fsync(cache_file, 0); + if (!err) + err = hmdfs_enable_stash_file(info, c_dentry); + else + hmdfs_err("fsync stash file err %d", err); + + return err < 0 ? err : 1; +} + +static void hmdfs_del_file_cache(struct hmdfs_cache_info *cache) +{ + if (!cache) + return; + + fput(cache->cache_file); + kfree(cache->path_buf); + kfree(cache); +} + +static struct hmdfs_cache_info * +hmdfs_new_file_cache(struct hmdfs_peer *conn, struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + struct dentry *stash_dentry = NULL; + int err; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return ERR_PTR(-ENOMEM); + + atomic64_set(&cache->to_write_pgs, 0); + atomic64_set(&cache->written_pgs, 0); + cache->path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!cache->path_buf) { + err = -ENOMEM; + goto free_cache; + } + + /* Need to handle "hardlink" ? */ + stash_dentry = d_find_any_alias(&info->vfs_inode); + if (stash_dentry) { + /* Needs full path in hmdfs, will be a device-view path */ + cache->path = dentry_path_raw(stash_dentry, cache->path_buf, + PATH_MAX); + dput(stash_dentry); + if (IS_ERR(cache->path)) { + err = PTR_ERR(cache->path); + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx gen path err %d", + conn->owner, conn->device_id, + info->remote_ino, err); + goto free_path; + } + } else { + /* Write-opened file was closed before finding dentry */ + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx no dentry found", + conn->owner, conn->device_id, info->remote_ino); + cache->path_buf[0] = '\0'; + cache->path = cache->path_buf; + } + + cache->path_cnt = 1; + cache->path_len = strlen(cache->path) + 1; + cache->path_offs = DIV_ROUND_UP(sizeof(struct hmdfs_cache_file_head), + HMDFS_STASH_BLK_SIZE); + cache->data_offs = cache->path_offs + DIV_ROUND_UP(cache->path_len, + HMDFS_STASH_BLK_SIZE); + cache->cache_file = hmdfs_new_stash_file(&conn->sbi->stash_work_dir, + conn->cid); + if (IS_ERR(cache->cache_file)) { + err = PTR_ERR(cache->cache_file); + goto free_path; + } + + return cache; + +free_path: + kfree(cache->path_buf); +free_cache: + kfree(cache); + return ERR_PTR(err); +} + +static void hmdfs_init_stash_file_cache(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + + cache = hmdfs_new_file_cache(conn, info); + if (IS_ERR(cache)) + /* + * Continue even creating stash info failed. + * We need to ensure there is no dirty pages + * after stash completes + */ + cache = NULL; + + /* Make write() returns */ + spin_lock(&info->stash_lock); + info->cache = cache; + info->stash_status = HMDFS_REMOTE_INODE_STASHING; + spin_unlock(&info->stash_lock); +} + +static void hmdfs_update_stash_stats(struct hmdfs_stash_stats *stats, + const struct hmdfs_cache_info *cache, + int err) +{ + unsigned long long ok_pages, fail_pages; + + if (cache) { + ok_pages = err > 0 ? atomic64_read(&cache->written_pgs) : 0; + fail_pages = atomic64_read(&cache->to_write_pgs) - ok_pages; + stats->ok_pages += ok_pages; + stats->fail_pages += fail_pages; + } + + if (err > 0) + stats->succeed++; + else if (!err) + stats->donothing++; + else + stats->fail++; +} + +/* Return 1 if stash is done, 0 if nothing is stashed */ +static int hmdfs_stash_remote_inode(struct hmdfs_inode_info *info, + struct hmdfs_stash_stats *stats) +{ + struct hmdfs_cache_info *cache = info->cache; + struct hmdfs_peer *conn = info->conn; + unsigned int status; + int err = 0; + + hmdfs_info("stash peer 0x%x:0x%llx ino 0x%llx", + conn->owner, conn->device_id, info->remote_ino); + + err = hmdfs_flush_stash_file(info); + if (!err) + err = hmdfs_close_stash_file(conn, info); + + if (err <= 0) + set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + status = err > 0 ? HMDFS_REMOTE_INODE_RESTORING : + HMDFS_REMOTE_INODE_NONE; + spin_lock(&info->stash_lock); + info->cache = NULL; + /* + * Use smp_store_release() to ensure order between HMDFS_FID_NEED_OPEN + * and HMDFS_REMOTE_INODE_NONE. + */ + smp_store_release(&info->stash_status, status); + spin_unlock(&info->stash_lock); + + hmdfs_update_stash_stats(stats, cache, err); + hmdfs_del_file_cache(cache); + + return err; +} + +static void hmdfs_init_cache_for_stash_files(struct hmdfs_peer *conn, + struct list_head *list) +{ + const struct cred *old_cred = NULL; + struct hmdfs_inode_info *info = NULL; + + /* For file creation under stash_work_dir */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + list_for_each_entry(info, list, stash_node) + hmdfs_init_stash_file_cache(conn, info); + hmdfs_revert_creds(old_cred); +} + +static void hmdfs_init_stash_cache_work_fn(struct work_struct *base) +{ + struct hmdfs_stash_work *work = + container_of(base, struct hmdfs_stash_work, work); + + hmdfs_init_cache_for_stash_files(work->conn, work->list); + complete(&work->done); +} + +static void hmdfs_init_cache_for_stash_files_by_work(struct hmdfs_peer *conn, + struct list_head *list) +{ + struct hmdfs_stash_work work = { + .conn = conn, + .list = list, + .done = COMPLETION_INITIALIZER_ONSTACK(work.done), + }; + + INIT_WORK_ONSTACK(&work.work, hmdfs_init_stash_cache_work_fn); + schedule_work(&work.work); + wait_for_completion(&work.done); +} + +static void hmdfs_stash_fetch_ready_files(struct hmdfs_peer *conn, + bool check, struct list_head *list) +{ + struct hmdfs_inode_info *info = NULL; + + spin_lock(&conn->wr_opened_inode_lock); + list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) { + int status; + + /* Paired with *_release() in hmdfs_reset_stashed_inode() */ + status = smp_load_acquire(&info->stash_status); + if (status == HMDFS_REMOTE_INODE_NONE) { + list_add_tail(&info->stash_node, list); + /* + * Prevent close() removing the inode from + * writeable-opened inode list + */ + hmdfs_remote_add_wr_opened_inode_nolock(conn, info); + /* Prevent the inode from eviction */ + ihold(&info->vfs_inode); + } else if (check && status == HMDFS_REMOTE_INODE_STASHING) { + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unexpected stash status %d", + conn->owner, conn->device_id, + info->remote_ino, status); + } + } + spin_unlock(&conn->wr_opened_inode_lock); +} + +static void hmdfs_stash_offline_prepare(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + LIST_HEAD(preparing); + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + mutex_lock(&conn->offline_cb_lock); + + hmdfs_stash_fetch_ready_files(conn, true, &preparing); + + if (list_empty(&preparing)) + goto out; + + hmdfs_init_cache_for_stash_files_by_work(conn, &preparing); +out: + mutex_unlock(&conn->offline_cb_lock); +} + +static void hmdfs_track_inode_locked(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->stashed_inode_lock); + list_add_tail(&info->stash_node, &conn->stashed_inode_list); + conn->stashed_inode_nr++; + spin_unlock(&conn->stashed_inode_lock); +} + +static void +hmdfs_update_peer_stash_stats(struct hmdfs_stash_statistics *stash_stats, + const struct hmdfs_stash_stats *stats) +{ + stash_stats->cur_ok = stats->succeed; + stash_stats->cur_nothing = stats->donothing; + stash_stats->cur_fail = stats->fail; + stash_stats->total_ok += stats->succeed; + stash_stats->total_nothing += stats->donothing; + stash_stats->total_fail += stats->fail; + stash_stats->ok_pages += stats->ok_pages; + stash_stats->fail_pages += stats->fail_pages; +} + +static void hmdfs_stash_remote_inodes(struct hmdfs_peer *conn, + struct list_head *list) +{ + const struct cred *old_cred = NULL; + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + struct hmdfs_stash_stats stats; + + /* For file creation, write and relink under stash_work_dir */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + + memset(&stats, 0, sizeof(stats)); + list_for_each_entry_safe(info, next, list, stash_node) { + int err; + + list_del_init(&info->stash_node); + + err = hmdfs_stash_remote_inode(info, &stats); + if (err > 0) + hmdfs_track_inode_locked(conn, info); + + hmdfs_remote_del_wr_opened_inode(conn, info); + if (err <= 0) + iput(&info->vfs_inode); + } + hmdfs_revert_creds(old_cred); + + hmdfs_update_peer_stash_stats(&conn->stats.stash, &stats); + hmdfs_info("peer 0x%x:0x%llx total stashed %u cur ok %u none %u fail %u", + conn->owner, conn->device_id, conn->stashed_inode_nr, + stats.succeed, stats.donothing, stats.fail); +} + +static void hmdfs_stash_offline_do_stash(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_inode_info *info = NULL; + LIST_HEAD(preparing); + LIST_HEAD(stashing); + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + /* release seq_lock to prevent blocking no-offline sync cb */ + mutex_unlock(&conn->seq_lock); + /* acquire offline_cb_lock to serialized with offline sync cb */ + mutex_lock(&conn->offline_cb_lock); + + hmdfs_stash_fetch_ready_files(conn, false, &preparing); + if (!list_empty(&preparing)) + hmdfs_init_cache_for_stash_files(conn, &preparing); + + spin_lock(&conn->wr_opened_inode_lock); + list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) { + int status = READ_ONCE(info->stash_status); + + if (status == HMDFS_REMOTE_INODE_STASHING) + list_add_tail(&info->stash_node, &stashing); + } + spin_unlock(&conn->wr_opened_inode_lock); + + if (list_empty(&stashing)) + goto unlock; + + hmdfs_stash_remote_inodes(conn, &stashing); + +unlock: + mutex_unlock(&conn->offline_cb_lock); + mutex_lock(&conn->seq_lock); +} + +static struct hmdfs_inode_info * +hmdfs_lookup_stash_inode(struct hmdfs_peer *conn, uint64_t inum) +{ + struct hmdfs_inode_info *info = NULL; + + list_for_each_entry(info, &conn->stashed_inode_list, stash_node) { + if (info->remote_ino == inum) + return info; + } + + return NULL; +} + +static void hmdfs_untrack_stashed_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + list_del_init(&info->stash_node); + iput(&info->vfs_inode); + + conn->stashed_inode_nr--; +} + +static void hmdfs_reset_stashed_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *ino = &info->vfs_inode; + + /* + * For updating stash_status after iput() + * in hmdfs_untrack_stashed_inode() + */ + ihold(ino); + hmdfs_untrack_stashed_inode(conn, info); + /* + * Ensure the order of stash_node and stash_status: + * only update stash_status to NONE after removal of + * stash_node is completed. + */ + smp_store_release(&info->stash_status, + HMDFS_REMOTE_INODE_NONE); + iput(ino); +} + +static void hmdfs_drop_stashed_inodes(struct hmdfs_peer *conn) +{ + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + + if (list_empty(&conn->stashed_inode_list)) + return; + + hmdfs_warning("peer 0x%x:0x%llx drop unrestorable file %u", + conn->owner, conn->device_id, conn->stashed_inode_nr); + + list_for_each_entry_safe(info, next, + &conn->stashed_inode_list, stash_node) { + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unrestorable status %u", + conn->owner, conn->device_id, info->remote_ino, + READ_ONCE(info->stash_status)); + + hmdfs_reset_stashed_inode(conn, info); + } +} + +static struct file *hmdfs_open_stash_dir(struct path *d_path, const char *cid) +{ + int err = 0; + struct dentry *parent = d_path->dentry; + struct inode *dir = d_inode(parent); + struct dentry *child = NULL; + struct path peer_path; + struct file *filp = NULL; + + inode_lock_nested(dir, I_MUTEX_PARENT); + child = lookup_one_len(cid, parent, strlen(cid)); + if (!IS_ERR(child)) { + if (!hmdfs_is_dir(child)) { + if (d_is_positive(child)) { + hmdfs_err("invalid stash dir mode 0%o", d_inode(child)->i_mode); + err = -EINVAL; + } else { + err = -ENOENT; + } + dput(child); + } + } else { + err = PTR_ERR(child); + hmdfs_err("lookup stash dir err %d", err); + } + inode_unlock(dir); + + if (err) + return ERR_PTR(err); + + peer_path.mnt = d_path->mnt; + peer_path.dentry = child; + filp = dentry_open(&peer_path, O_RDONLY | O_DIRECTORY, current_cred()); + if (IS_ERR(filp)) + hmdfs_err("open err %d", (int)PTR_ERR(filp)); + + dput(child); + + return filp; +} + +static int hmdfs_new_inode_tbl(struct hmdfs_inode_tbl **tbl) +{ + struct hmdfs_inode_tbl *new = NULL; + + new = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!new) + return -ENOMEM; + + new->cnt = 0; + new->max = (PAGE_SIZE - offsetof(struct hmdfs_inode_tbl, inodes)) / + sizeof(new->inodes[0]); + *tbl = new; + + return 0; +} + +static int hmdfs_parse_stash_file_name(struct dir_context *dctx, + const char *name, + int namelen, + unsigned int d_type, + uint64_t *stash_inum) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + int err; + + if (d_type != DT_UNKNOWN && d_type != DT_REG) + return 0; + if (namelen > NAME_MAX) + return 0; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + memcpy(ctx->name, name, namelen); + ctx->name[namelen] = '\0'; + err = kstrtoull(ctx->name, 16, stash_inum); + if (err) { + hmdfs_err("unexpected stash file err %d", err); + return 0; + } + return 1; +} + +static int hmdfs_has_stash_file(struct dir_context *dctx, const char *name, + int namelen, loff_t offset, + u64 inum, unsigned int d_type) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + uint64_t stash_inum; + int err; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + err = hmdfs_parse_stash_file_name(dctx, name, namelen, + d_type, &stash_inum); + if (!err) + return 0; + + ctx->tbl->cnt++; + return 1; +} + +static int hmdfs_fill_stash_file(struct dir_context *dctx, const char *name, + int namelen, loff_t offset, + u64 inum, unsigned int d_type) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + uint64_t stash_inum; + int err; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + err = hmdfs_parse_stash_file_name(dctx, name, namelen, + d_type, &stash_inum); + if (!err) + return 0; + if (ctx->tbl->cnt >= ctx->tbl->max) + return 1; + + ctx->tbl->inodes[ctx->tbl->cnt++] = stash_inum; + + return 0; +} + +static int hmdfs_del_stash_file(struct dentry *parent, struct dentry *child) +{ + struct inode *dir = d_inode(parent); + int err = 0; + + /* Prevent d_delete() from calling dentry_unlink_inode() */ + dget(child); + + inode_lock_nested(dir, I_MUTEX_PARENT); + err = vfs_unlink(dir, child, NULL); + if (err) + hmdfs_err("remove stash file err %d", err); + inode_unlock(dir); + + dput(child); + + return err; +} + +static inline bool hmdfs_is_node_offlined(const struct hmdfs_peer *conn, + unsigned int seq) +{ + /* + * open()/fsync() may fail due to "status = NODE_STAT_OFFLINE" + * in hmdfs_disconnect_node(). + * Pair with smp_mb() in hmdfs_disconnect_node() to ensure + * getting the newest event sequence. + */ + smp_mb__before_atomic(); + return hmdfs_node_evt_seq(conn) != seq; +} + +static int hmdfs_verify_restore_file_head(struct hmdfs_file_restore_ctx *ctx, + const struct hmdfs_cache_file_head *head) +{ + struct inode *inode = file_inode(ctx->src_filp); + struct hmdfs_peer *conn = ctx->conn; + unsigned int crc, read_crc, crc_offset; + loff_t path_offs, data_offs, isize; + int err = 0; + + if (le32_to_cpu(head->magic) != HMDFS_STASH_FILE_HEAD_MAGIC) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid magic: got 0x%x, exp 0x%x", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->magic), + HMDFS_STASH_FILE_HEAD_MAGIC); + goto out; + } + + crc_offset = le32_to_cpu(head->crc_offset); + read_crc = le32_to_cpu(*((__le32 *)((char *)head + crc_offset))); + crc = crc32(0, head, crc_offset); + if (read_crc != crc) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid crc: got 0x%x, exp 0x%x", + conn->owner, conn->device_id, ctx->inum, + read_crc, crc); + goto out; + } + + if (le64_to_cpu(head->ino) != ctx->inum) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid ino: got %llu, exp %llu", + conn->owner, conn->device_id, ctx->inum, + le64_to_cpu(head->ino), ctx->inum); + goto out; + } + + path_offs = (loff_t)le32_to_cpu(head->path_offs) << + HMDFS_STASH_BLK_SHIFT; + if (path_offs <= 0 || path_offs >= i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_offs %d, stash file size %llu", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->path_offs), i_size_read(inode)); + goto out; + } + + data_offs = (loff_t)le32_to_cpu(head->data_offs) << + HMDFS_STASH_BLK_SHIFT; + if (path_offs >= data_offs) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, path_offs %d", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->data_offs), + le32_to_cpu(head->path_offs)); + goto out; + } + if (data_offs <= 0 || data_offs >= i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, stash file size %llu", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->data_offs), i_size_read(inode)); + goto out; + } + + isize = le64_to_cpu(head->size); + if (isize != i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid isize: got %llu, exp %llu", + conn->owner, conn->device_id, ctx->inum, + le64_to_cpu(head->size), i_size_read(inode)); + goto out; + } + + if (le32_to_cpu(head->path_cnt) < 1) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_cnt %d", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->path_cnt)); + goto out; + } + +out: + return err; +} + +static int hmdfs_get_restore_file_metadata(struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_cache_file_head head; + struct hmdfs_peer *conn = ctx->conn; + unsigned int head_size, read_size, head_crc_offset; + loff_t pos; + ssize_t rd; + int err = 0; + + head_size = sizeof(struct hmdfs_cache_file_head); + memset(&head, 0, head_size); + /* Read part head */ + pos = 0; + read_size = offsetof(struct hmdfs_cache_file_head, crc_offset) + + sizeof(head.crc_offset); + rd = kernel_read(ctx->src_filp, &head, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read part head err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + head_crc_offset = le32_to_cpu(head.crc_offset); + if (head_crc_offset + sizeof(head.crc32) < head_crc_offset || + head_crc_offset + sizeof(head.crc32) > head_size) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx got bad head: Too long crc_offset %u which exceeds head size %u", + conn->owner, conn->device_id, ctx->inum, + head_crc_offset, head_size); + goto out; + } + + /* Read full head */ + pos = 0; + read_size = le32_to_cpu(head.crc_offset) + sizeof(head.crc32); + rd = kernel_read(ctx->src_filp, &head, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read full head err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + + err = hmdfs_verify_restore_file_head(ctx, &head); + if (err) + goto out; + + ctx->pages = le64_to_cpu(head.blocks) >> + HMDFS_STASH_PAGE_TO_SECTOR_SHIFT; + ctx->data_offs = le32_to_cpu(head.data_offs); + /* Read path */ + read_size = min_t(unsigned int, le32_to_cpu(head.path_len), PATH_MAX); + pos = (loff_t)le32_to_cpu(head.path_offs) << HMDFS_STASH_BLK_SHIFT; + rd = kernel_read(ctx->src_filp, ctx->dst, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + if (strnlen(ctx->dst, read_size) >= read_size) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path not end with \\0", + conn->owner, conn->device_id, ctx->inum); + goto out; + } + /* TODO: Pick a valid path from all paths */ + +out: + return err; +} + +static int hmdfs_open_restore_dst_file(struct hmdfs_file_restore_ctx *ctx, + unsigned int rw_flag, struct file **filp) +{ + struct hmdfs_peer *conn = ctx->conn; + struct file *dst = NULL; + int err = 0; + + err = hmdfs_get_restore_file_metadata(ctx); + if (err) + goto out; + + /* Error comes from connection or server ? */ + dst = file_open_root(&ctx->dst_root_path, + ctx->dst, O_LARGEFILE | rw_flag, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + hmdfs_err("open remote file ino 0x%llx err %d", ctx->inum, err); + if (hmdfs_is_node_offlined(conn, ctx->seq)) + err = -ESHUTDOWN; + goto out; + } + + *filp = dst; +out: + return err; +} + +static bool hmdfs_need_abort_restore(struct hmdfs_file_restore_ctx *ctx, + struct hmdfs_inode_info *pinned, + struct file *opened_file) +{ + struct hmdfs_inode_info *opened = hmdfs_i(file_inode(opened_file)); + + if (opened->inode_type != HMDFS_LAYER_OTHER_REMOTE) + goto abort; + + if (opened == pinned) + return false; + +abort: + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx invalid remote file", + ctx->conn->owner, ctx->conn->device_id, ctx->inum); + hmdfs_warning("got: peer 0x%x:0x%llx inode 0x%llx type %d status %d", + opened->conn ? opened->conn->owner : 0, + opened->conn ? opened->conn->device_id : 0, + opened->remote_ino, opened->inode_type, + opened->stash_status); + hmdfs_warning("pinned: peer 0x%x:0x%llx inode 0x%llx type %d status %d", + pinned->conn->owner, pinned->conn->device_id, + pinned->remote_ino, pinned->inode_type, + pinned->stash_status); + return true; +} + +static void hmdfs_init_copy_args(const struct hmdfs_file_restore_ctx *ctx, + struct file *dst, struct hmdfs_copy_args *args) +{ + args->src = ctx->src_filp; + args->dst = dst; + args->buf = ctx->page; + args->buf_len = PAGE_SIZE; + args->seq = ctx->seq; + args->data_offs = ctx->data_offs; + args->inum = ctx->inum; +} + +static ssize_t hmdfs_write_dst(struct hmdfs_peer *conn, struct file *filp, + void *buf, size_t len, loff_t pos) +{ + mm_segment_t old_fs; + struct kiocb kiocb; + struct iovec iov; + struct iov_iter iter; + ssize_t wr; + int err = 0; + + file_start_write(filp); + + old_fs = force_uaccess_begin(); + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = pos; + + iov.iov_base = buf; + iov.iov_len = len; + iov_iter_init(&iter, WRITE, &iov, 1, len); + + wr = hmdfs_file_write_iter_remote_nocheck(&kiocb, &iter); + + force_uaccess_end(old_fs); + + file_end_write(filp); + + if (wr != len) { + struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); + + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short write ret %zd exp %zu", + conn->owner, conn->device_id, info->remote_ino, + wr, len); + err = wr < 0 ? (int)wr : -EFAULT; + } + + return err; +} + +static int hmdfs_rd_src_wr_dst(struct hmdfs_peer *conn, + struct hmdfs_copy_ctx *ctx) +{ + const struct hmdfs_copy_args *args = NULL; + int err = 0; + loff_t rd_pos; + ssize_t rd; + + ctx->eof = false; + ctx->copied = 0; + + args = &ctx->args; + rd_pos = ctx->src_pos; + rd = kernel_read(args->src, args->buf, args->buf_len, &rd_pos); + if (rd < 0) { + err = (int)rd; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short read err %d", + conn->owner, conn->device_id, args->inum, err); + goto out; + } else if (rd == 0) { + ctx->eof = true; + goto out; + } + + err = hmdfs_write_dst(conn, args->dst, args->buf, rd, ctx->dst_pos); + if (!err) + ctx->copied = rd; + else if (hmdfs_is_node_offlined(conn, args->seq)) + err = -ESHUTDOWN; +out: + return err; +} + +static int hmdfs_copy_src_to_dst(struct hmdfs_peer *conn, + const struct hmdfs_copy_args *args) +{ + int err = 0; + struct file *src = NULL; + struct hmdfs_copy_ctx ctx; + loff_t seek_pos, data_init_pos; + loff_t src_size; + + ctx.args = *args; + + src = ctx.args.src; + data_init_pos = (loff_t)ctx.args.data_offs << HMDFS_STASH_BLK_SHIFT; + seek_pos = data_init_pos; + src_size = i_size_read(file_inode(src)); + while (true) { + loff_t data_pos; + + data_pos = vfs_llseek(src, seek_pos, SEEK_DATA); + if (data_pos > seek_pos) { + seek_pos = data_pos; + continue; + } else if (data_pos < 0) { + if (data_pos == -ENXIO) { + loff_t src_blks = file_inode(src)->i_blocks; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx end at 0x%llx (sz 0x%llx blk 0x%llx)", + conn->owner, conn->device_id, + args->inum, seek_pos, + src_size, src_blks); + } else { + err = (int)data_pos; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx seek pos 0x%llx err %d", + conn->owner, conn->device_id, + args->inum, seek_pos, err); + } + break; + } + + hmdfs_debug("peer 0x%x:0x%llx ino 0x%llx seek to 0x%llx", + conn->owner, conn->device_id, args->inum, data_pos); + + ctx.src_pos = data_pos; + ctx.dst_pos = data_pos - data_init_pos; + err = hmdfs_rd_src_wr_dst(conn, &ctx); + if (err || ctx.eof) + break; + + seek_pos += ctx.copied; + if (seek_pos >= src_size) + break; + } + + return err; +} + +static int hmdfs_restore_src_to_dst(struct hmdfs_file_restore_ctx *ctx, + struct file *dst) +{ + struct file *src = ctx->src_filp; + struct hmdfs_copy_args args; + int err; + + hmdfs_init_copy_args(ctx, dst, &args); + err = hmdfs_copy_src_to_dst(ctx->conn, &args); + if (err) + goto out; + + err = vfs_fsync(dst, 0); + if (err) { + hmdfs_err("fsync remote file ino 0x%llx err %d", ctx->inum, err); + if (hmdfs_is_node_offlined(ctx->conn, ctx->seq)) + err = -ESHUTDOWN; + } + +out: + if (err) + truncate_inode_pages(file_inode(dst)->i_mapping, 0); + + /* Remove the unnecessary cache */ + invalidate_mapping_pages(file_inode(src)->i_mapping, 0, -1); + + return err; +} + + +static int hmdfs_restore_file(struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_peer *conn = ctx->conn; + uint64_t inum = ctx->inum; + struct hmdfs_inode_info *pinned_info = NULL; + struct file *dst_filp = NULL; + int err = 0; + bool keep = false; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx do restore", + conn->owner, conn->device_id, inum); + + pinned_info = hmdfs_lookup_stash_inode(conn, inum); + if (pinned_info) { + unsigned int status = READ_ONCE(pinned_info->stash_status); + + if (status != HMDFS_REMOTE_INODE_RESTORING) { + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid status %u", + conn->owner, conn->device_id, inum, status); + err = -EINVAL; + goto clean; + } + } else { + hmdfs_warning("peer 0x%x:0x%llx ino 0x%llx doesn't being pinned", + conn->owner, conn->device_id, inum); + err = -EINVAL; + goto clean; + } + + set_bit(HMDFS_FID_NEED_OPEN, &pinned_info->fid_flags); + err = hmdfs_open_restore_dst_file(ctx, O_RDWR, &dst_filp); + if (err) { + if (err == -ESHUTDOWN) + keep = true; + goto clean; + } + + if (hmdfs_need_abort_restore(ctx, pinned_info, dst_filp)) + goto abort; + + err = hmdfs_restore_src_to_dst(ctx, dst_filp); + if (err == -ESHUTDOWN) + keep = true; +abort: + fput(dst_filp); +clean: + if (pinned_info && !keep) + hmdfs_reset_stashed_inode(conn, pinned_info); + ctx->keep = keep; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx restore err %d keep %d", + conn->owner, conn->device_id, inum, err, ctx->keep); + + return err; +} + +static int hmdfs_init_file_restore_ctx(struct hmdfs_peer *conn, + unsigned int seq, struct path *src_dir, + struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct path dst_root; + char *dst = NULL; + char *page = NULL; + int err = 0; + + err = hmdfs_get_path_in_sb(sbi->sb, sbi->real_dst, LOOKUP_DIRECTORY, + &dst_root); + if (err) + return err; + + dst = kmalloc(PATH_MAX, GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto put_path; + } + + page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto free_dst; + } + + ctx->conn = conn; + ctx->src_dir_path = *src_dir; + ctx->dst_root_path = dst_root; + ctx->dst = dst; + ctx->page = page; + ctx->seq = seq; + + return 0; +free_dst: + kfree(dst); +put_path: + path_put(&dst_root); + return err; +} + +static void hmdfs_exit_file_restore_ctx(struct hmdfs_file_restore_ctx *ctx) +{ + path_put(&ctx->dst_root_path); + kfree(ctx->dst); + kfree(ctx->page); +} + +static struct file *hmdfs_open_stash_file(struct path *p_path, char *name) +{ + struct dentry *parent = NULL; + struct inode *dir = NULL; + struct dentry *child = NULL; + struct file *filp = NULL; + struct path c_path; + int err = 0; + + parent = p_path->dentry; + dir = d_inode(parent); + inode_lock_nested(dir, I_MUTEX_PARENT); + child = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(child) && !hmdfs_is_reg(child)) { + if (d_is_positive(child)) { + hmdfs_err("invalid stash file (mode 0%o)", + d_inode(child)->i_mode); + err = -EINVAL; + } else { + hmdfs_err("missing stash file"); + err = -ENOENT; + } + dput(child); + } else if (IS_ERR(child)) { + err = PTR_ERR(child); + hmdfs_err("lookup stash file err %d", err); + } + inode_unlock(dir); + + if (err) + return ERR_PTR(err); + + c_path.mnt = p_path->mnt; + c_path.dentry = child; + filp = dentry_open(&c_path, O_RDONLY | O_LARGEFILE, current_cred()); + if (IS_ERR(filp)) + hmdfs_err("open stash file err %d", (int)PTR_ERR(filp)); + + dput(child); + + return filp; +} + +static void hmdfs_update_restore_stats(struct hmdfs_restore_stats *stats, + bool keep, uint64_t pages, int err) +{ + if (!err) { + stats->succeed++; + stats->ok_pages += pages; + } else if (keep) { + stats->keep++; + } else { + stats->fail++; + stats->fail_pages += pages; + } +} + +static int hmdfs_restore_files(struct hmdfs_peer *conn, + unsigned int seq, struct path *dir, + const struct hmdfs_inode_tbl *tbl, + void *priv) +{ + unsigned int i; + struct hmdfs_file_restore_ctx ctx; + int err = 0; + struct hmdfs_restore_stats *stats = priv; + + err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx); + if (err) + return err; + + for (i = 0; i < tbl->cnt; i++) { + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct file *filp = NULL; + + snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]); + filp = hmdfs_open_stash_file(dir, name); + /* Continue to restore if any error */ + if (IS_ERR(filp)) { + stats->fail++; + continue; + } + + ctx.inum = tbl->inodes[i]; + ctx.src_filp = filp; + ctx.keep = false; + ctx.pages = 0; + err = hmdfs_restore_file(&ctx); + hmdfs_update_restore_stats(stats, ctx.keep, ctx.pages, err); + + if (!ctx.keep) + hmdfs_del_stash_file(dir->dentry, + file_dentry(ctx.src_filp)); + fput(ctx.src_filp); + + /* Continue to restore */ + if (err == -ESHUTDOWN) + break; + err = 0; + } + + hmdfs_exit_file_restore_ctx(&ctx); + + return err; +} + +static bool hmdfs_is_valid_stash_status(struct hmdfs_inode_info *inode_info, + uint64_t ino) +{ + return (inode_info->inode_type == HMDFS_LAYER_OTHER_REMOTE && + inode_info->stash_status == HMDFS_REMOTE_INODE_RESTORING && + inode_info->remote_ino == ino); +} + +static int hmdfs_rebuild_stash_list(struct hmdfs_peer *conn, + unsigned int seq, + struct path *dir, + const struct hmdfs_inode_tbl *tbl, + void *priv) +{ + struct hmdfs_file_restore_ctx ctx; + unsigned int i; + int err; + struct hmdfs_rebuild_stats *stats = priv; + + err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx); + if (err) + return err; + + stats->total += tbl->cnt; + + for (i = 0; i < tbl->cnt; i++) { + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct file *src_filp = NULL; + struct file *dst_filp = NULL; + struct hmdfs_inode_info *inode_info = NULL; + bool is_valid = true; + + snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]); + src_filp = hmdfs_open_stash_file(dir, name); + if (IS_ERR(src_filp)) { + stats->fail++; + continue; + } + ctx.inum = tbl->inodes[i]; + ctx.src_filp = src_filp; + + /* No need to track the open which only needs meta info */ + err = hmdfs_open_restore_dst_file(&ctx, O_RDONLY, &dst_filp); + if (err) { + fput(src_filp); + if (err == -ESHUTDOWN) + break; + stats->fail++; + err = 0; + continue; + } + + inode_info = hmdfs_i(file_inode(dst_filp)); + is_valid = hmdfs_is_valid_stash_status(inode_info, + ctx.inum); + if (is_valid) { + stats->succeed++; + } else { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx invalid state: type: %d, status: %u, inode: %llu", + conn->owner, conn->device_id, ctx.inum, + inode_info->inode_type, + READ_ONCE(inode_info->stash_status), + inode_info->remote_ino); + stats->invalid++; + } + + fput(ctx.src_filp); + fput(dst_filp); + } + + hmdfs_exit_file_restore_ctx(&ctx); + return err; +} + +static int hmdfs_iter_stash_file(struct hmdfs_peer *conn, + unsigned int seq, + struct file *filp, + stash_operation_func op, + void *priv) +{ + int err = 0; + struct hmdfs_stash_dir_context ctx = { + .dctx.actor = hmdfs_fill_stash_file, + }; + struct hmdfs_inode_tbl *tbl = NULL; + struct path dir; + + err = hmdfs_new_inode_tbl(&tbl); + if (err) + goto out; + + dir.mnt = filp->f_path.mnt; + dir.dentry = file_dentry(filp); + + ctx.tbl = tbl; + ctx.dctx.pos = 0; + do { + tbl->cnt = 0; + err = iterate_dir(filp, &ctx.dctx); + if (err || !tbl->cnt) { + if (err) + hmdfs_err("iterate stash dir err %d", err); + break; + } + err = op(conn, seq, &dir, tbl, priv); + } while (!err); + +out: + kfree(tbl); + return err; +} + +static void hmdfs_rebuild_check_work_fn(struct work_struct *base) +{ + struct hmdfs_check_work *work = + container_of(base, struct hmdfs_check_work, work); + struct hmdfs_peer *conn = work->conn; + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + struct hmdfs_stash_dir_context ctx = { + .dctx.actor = hmdfs_has_stash_file, + }; + struct hmdfs_inode_tbl tbl; + int err; + + old_cred = hmdfs_override_creds(sbi->cred); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) + goto out; + + memset(&tbl, 0, sizeof(tbl)); + ctx.tbl = &tbl; + err = iterate_dir(filp, &ctx.dctx); + if (!err && ctx.tbl->cnt > 0) + conn->need_rebuild_stash_list = true; + + fput(filp); +out: + hmdfs_revert_creds(old_cred); + hmdfs_info("peer 0x%x:0x%llx %sneed to rebuild stash list", + conn->owner, conn->device_id, + conn->need_rebuild_stash_list ? "" : "don't "); + complete(&work->done); +} + +static void hmdfs_stash_add_do_check(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct hmdfs_check_work work = { + .conn = conn, + .done = COMPLETION_INITIALIZER_ONSTACK(work.done), + }; + + if (!hmdfs_is_stash_enabled(sbi)) + return; + + INIT_WORK_ONSTACK(&work.work, hmdfs_rebuild_check_work_fn); + schedule_work(&work.work); + wait_for_completion(&work.done); +} + +static void +hmdfs_update_peer_rebuild_stats(struct hmdfs_rebuild_statistics *rebuild_stats, + const struct hmdfs_rebuild_stats *stats) +{ + rebuild_stats->cur_ok = stats->succeed; + rebuild_stats->cur_fail = stats->fail; + rebuild_stats->cur_invalid = stats->invalid; + rebuild_stats->total_ok += stats->succeed; + rebuild_stats->total_fail += stats->fail; + rebuild_stats->total_invalid += stats->invalid; +} + +/* rebuild stash inode list */ +static void hmdfs_stash_online_prepare(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + int err; + struct hmdfs_rebuild_stats stats; + + if (!hmdfs_is_stash_enabled(sbi) || + !conn->need_rebuild_stash_list) + return; + + /* release seq_lock to prevent blocking no-online sync cb */ + mutex_unlock(&conn->seq_lock); + old_cred = hmdfs_override_creds(sbi->cred); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) + goto out; + + memset(&stats, 0, sizeof(stats)); + err = hmdfs_iter_stash_file(conn, seq, filp, + hmdfs_rebuild_stash_list, &stats); + if (err == -ESHUTDOWN) { + hmdfs_info("peer 0x%x:0x%llx offline again during rebuild", + conn->owner, conn->device_id); + } else { + WRITE_ONCE(conn->need_rebuild_stash_list, false); + if (err) + hmdfs_warning("partial rebuild fail err %d", err); + } + + hmdfs_update_peer_rebuild_stats(&conn->stats.rebuild, &stats); + hmdfs_info("peer 0x%x:0x%llx rebuild stashed-file total %u succeed %u fail %u invalid %u", + conn->owner, conn->device_id, stats.total, stats.succeed, + stats.fail, stats.invalid); + fput(filp); +out: + conn->stats.rebuild.time++; + hmdfs_revert_creds(old_cred); + if (!READ_ONCE(conn->need_rebuild_stash_list)) { + /* + * Use smp_mb__before_atomic() to ensure order between + * writing @conn->need_rebuild_stash_list and + * reading conn->rebuild_inode_status_nr. + */ + smp_mb__before_atomic(); + /* + * Wait until all inodes finish rebuilding stash status before + * accessing @conn->stashed_inode_list in restoring. + */ + wait_event(conn->rebuild_inode_status_wq, + !atomic_read(&conn->rebuild_inode_status_nr)); + } + mutex_lock(&conn->seq_lock); +} + +static void +hmdfs_update_peer_restore_stats(struct hmdfs_restore_statistics *restore_stats, + const struct hmdfs_restore_stats *stats) +{ + restore_stats->cur_ok = stats->succeed; + restore_stats->cur_fail = stats->fail; + restore_stats->cur_keep = stats->keep; + restore_stats->total_ok += stats->succeed; + restore_stats->total_fail += stats->fail; + restore_stats->total_keep += stats->keep; + restore_stats->ok_pages += stats->ok_pages; + restore_stats->fail_pages += stats->fail_pages; +} + +static void hmdfs_stash_online_do_restore(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + struct hmdfs_restore_stats stats; + int err = 0; + + if (!hmdfs_is_stash_enabled(sbi) || conn->need_rebuild_stash_list) { + if (conn->need_rebuild_stash_list) + hmdfs_info("peer 0x%x:0x%llx skip restoring due to rebuild-need", + conn->owner, conn->device_id); + return; + } + + /* release seq_lock to prevent blocking no-online sync cb */ + mutex_unlock(&conn->seq_lock); + /* For dir iteration, file read and unlink */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + + memset(&stats, 0, sizeof(stats)); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + + err = hmdfs_iter_stash_file(conn, seq, filp, + hmdfs_restore_files, &stats); + + fput(filp); +out: + hmdfs_revert_creds(old_cred); + + /* offline again ? */ + if (err != -ESHUTDOWN) + hmdfs_drop_stashed_inodes(conn); + + hmdfs_update_peer_restore_stats(&conn->stats.restore, &stats); + hmdfs_info("peer 0x%x:0x%llx restore stashed-file ok %u fail %u keep %u", + conn->owner, conn->device_id, + stats.succeed, stats.fail, stats.keep); + + mutex_lock(&conn->seq_lock); +} + +static void hmdfs_stash_del_do_cleanup(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + unsigned int preparing; + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + /* Async cb is cancelled */ + preparing = 0; + list_for_each_entry_safe(info, next, &conn->wr_opened_inode_list, + wr_opened_node) { + int status = READ_ONCE(info->stash_status); + + if (status == HMDFS_REMOTE_INODE_STASHING) { + struct hmdfs_cache_info *cache = NULL; + + spin_lock(&info->stash_lock); + cache = info->cache; + info->cache = NULL; + info->stash_status = HMDFS_REMOTE_INODE_NONE; + spin_unlock(&info->stash_lock); + + hmdfs_remote_del_wr_opened_inode(conn, info); + hmdfs_del_file_cache(cache); + /* put inode after all access are completed */ + iput(&info->vfs_inode); + preparing++; + } + } + hmdfs_info("release %u preparing inodes", preparing); + + hmdfs_info("release %u pinned inodes", conn->stashed_inode_nr); + if (list_empty(&conn->stashed_inode_list)) + return; + + list_for_each_entry_safe(info, next, + &conn->stashed_inode_list, stash_node) + hmdfs_untrack_stashed_inode(conn, info); +} + +void hmdfs_exit_stash(struct hmdfs_sb_info *sbi) +{ + if (!sbi->s_offline_stash) + return; + + if (sbi->stash_work_dir.dentry) { + path_put(&sbi->stash_work_dir); + sbi->stash_work_dir.dentry = NULL; + } +} + +int hmdfs_init_stash(struct hmdfs_sb_info *sbi) +{ + int err = 0; + struct path parent; + struct dentry *child = NULL; + + if (!sbi->s_offline_stash) + return 0; + + err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &parent); + if (err) { + hmdfs_err("invalid cache dir err %d", err); + goto out; + } + + child = hmdfs_stash_new_work_dir(parent.dentry); + if (!IS_ERR(child)) { + sbi->stash_work_dir.mnt = mntget(parent.mnt); + sbi->stash_work_dir.dentry = child; + } else { + err = PTR_ERR(child); + hmdfs_err("create stash work dir err %d", err); + } + + path_put(&parent); +out: + return err; +} + +static int hmdfs_stash_write_local_file(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx, + struct hmdfs_cache_info *cache) +{ + struct page *page = ctx->page; + const struct cred *old_cred = NULL; + void *buf = NULL; + loff_t pos; + unsigned int flags; + ssize_t written; + int err = 0; + + buf = kmap(page); + pos = (loff_t)page->index << PAGE_SHIFT; + /* enable NOFS for memory allocation */ + flags = memalloc_nofs_save(); + old_cred = hmdfs_override_creds(conn->sbi->cred); + pos += cache->data_offs << HMDFS_STASH_BLK_SHIFT; + written = kernel_write(cache->cache_file, buf, ctx->count, &pos); + hmdfs_revert_creds(old_cred); + memalloc_nofs_restore(flags); + kunmap(page); + + if (written != ctx->count) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx page 0x%lx data_offs 0x%x len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + page->index, cache->data_offs, ctx->count, written); + err = -EIO; + } + + return err; +} + +int hmdfs_stash_writepage(struct hmdfs_peer *conn, + struct hmdfs_writepage_context *ctx) +{ + struct inode *inode = ctx->page->mapping->host; + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_cache_info *cache = NULL; + int err; + + /* e.g. fail to create stash file */ + cache = info->cache; + if (!cache) + return -EIO; + + err = hmdfs_stash_write_local_file(conn, info, ctx, cache); + if (!err) { + hmdfs_client_writepage_done(info, ctx); + atomic64_inc(&cache->written_pgs); + put_task_struct(ctx->caller); + kfree(ctx); + } + atomic64_inc(&cache->to_write_pgs); + + return err; +} + +static void hmdfs_stash_rebuild_status(struct hmdfs_peer *conn, + struct inode *inode) +{ + char *path_str = NULL; + struct hmdfs_inode_info *info = NULL; + const struct cred *old_cred = NULL; + struct path path; + struct path *stash_path = NULL; + int err = 0; + + path_str = kmalloc(HMDFS_STASH_PATH_LEN, GFP_KERNEL); + if (!path_str) { + err = -ENOMEM; + return; + } + + info = hmdfs_i(inode); + err = snprintf(path_str, HMDFS_STASH_PATH_LEN, "%s/0x%llx", + conn->cid, info->remote_ino); + if (err >= HMDFS_STASH_PATH_LEN) { + kfree(path_str); + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx too long name len", + conn->owner, conn->device_id, info->remote_ino); + return; + } + old_cred = hmdfs_override_creds(conn->sbi->cred); + stash_path = &conn->sbi->stash_work_dir; + err = vfs_path_lookup(stash_path->dentry, stash_path->mnt, + path_str, 0, &path); + hmdfs_revert_creds(old_cred); + if (!err) { + if (hmdfs_is_reg(path.dentry)) { + WRITE_ONCE(info->stash_status, + HMDFS_REMOTE_INODE_RESTORING); + ihold(&info->vfs_inode); + hmdfs_track_inode_locked(conn, info); + } else { + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx unexpected stashed file mode 0%o", + conn->owner, conn->device_id, + info->remote_ino, + d_inode(path.dentry)->i_mode); + } + + path_put(&path); + } else if (err && err != -ENOENT) { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx find %s err %d", + conn->owner, conn->device_id, info->remote_ino, + path_str, err); + } + + kfree(path_str); +} + +static inline bool +hmdfs_need_rebuild_inode_stash_status(struct hmdfs_peer *conn, umode_t mode) +{ + return hmdfs_is_stash_enabled(conn->sbi) && + READ_ONCE(conn->need_rebuild_stash_list) && + (S_ISREG(mode) || S_ISLNK(mode)); +} + +void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn, + struct inode *inode, umode_t mode) +{ + if (!hmdfs_need_rebuild_inode_stash_status(conn, mode)) + return; + + atomic_inc(&conn->rebuild_inode_status_nr); + /* + * Use smp_mb__after_atomic() to ensure order between writing + * @conn->rebuild_inode_status_nr and reading + * @conn->need_rebuild_stash_list. + */ + smp_mb__after_atomic(); + if (READ_ONCE(conn->need_rebuild_stash_list)) + hmdfs_stash_rebuild_status(conn, inode); + if (atomic_dec_and_test(&conn->rebuild_inode_status_nr)) + wake_up(&conn->rebuild_inode_status_wq); +} + +static struct hmdfs_node_cb_desc stash_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_stash_offline_prepare, + }, + { + .evt = NODE_EVT_OFFLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_offline_do_stash, + }, + /* Don't known peer version yet, so min_version is 0 */ + { + .evt = NODE_EVT_ADD, + .sync = true, + .fn = hmdfs_stash_add_do_check, + }, + { + .evt = NODE_EVT_ONLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_online_prepare, + }, + { + .evt = NODE_EVT_ONLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_online_do_restore, + }, + { + .evt = NODE_EVT_DEL, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_stash_del_do_cleanup, + }, +}; + +void __init hmdfs_stash_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(stash_cb, ARRAY_SIZE(stash_cb)); +} + diff --git a/fs/hmdfs/stash.h b/fs/hmdfs/stash.h new file mode 100644 index 000000000000..f38e737f9472 --- /dev/null +++ b/fs/hmdfs/stash.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/stash.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_STASH_H +#define HMDFS_STASH_H + +#include "hmdfs.h" +#include "hmdfs_client.h" + +extern void hmdfs_stash_add_node_evt_cb(void); + +extern void hmdfs_exit_stash(struct hmdfs_sb_info *sbi); +extern int hmdfs_init_stash(struct hmdfs_sb_info *sbi); + +extern int hmdfs_stash_writepage(struct hmdfs_peer *conn, + struct hmdfs_writepage_context *ctx); + +extern void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn, + struct inode *inode, umode_t mode); + +#endif diff --git a/fs/hmdfs/super.c b/fs/hmdfs/super.c new file mode 100644 index 000000000000..92012f80ab37 --- /dev/null +++ b/fs/hmdfs/super.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/super.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include + +#include "hmdfs.h" + +enum { + OPT_RA_PAGES, + OPT_LOCAL_DST, + OPT_CACHE_DIR, + OPT_S_CASE, + OPT_VIEW_TYPE, + OPT_NO_OFFLINE_STASH, + OPT_NO_DENTRY_CACHE, + OPT_ERR, +}; + +static match_table_t hmdfs_tokens = { + { OPT_RA_PAGES, "ra_pages=%s" }, + { OPT_LOCAL_DST, "local_dst=%s" }, + { OPT_CACHE_DIR, "cache_dir=%s" }, + { OPT_S_CASE, "sensitive" }, + { OPT_VIEW_TYPE, "merge" }, + { OPT_NO_OFFLINE_STASH, "no_offline_stash" }, + { OPT_NO_DENTRY_CACHE, "no_dentry_cache" }, + { OPT_ERR, NULL }, +}; + +#define DEAULT_RA_PAGES 128 + +void __hmdfs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (ratelimited) + printk_ratelimited("%s hmdfs: %s() %pV\n", level, + function, &vaf); + else + printk("%s hmdfs: %s() %pV\n", level, function, &vaf); + va_end(args); +} + +static int hmdfs_match_strdup(const substring_t *s, char **dst) +{ + char *dup = NULL; + + dup = match_strdup(s); + if (!dup) + return -ENOMEM; + + *dst = dup; + + return 0; +} + +int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data) +{ + char *p = NULL; + char *name = NULL; + char *options = NULL; + char *options_src = NULL; + substring_t args[MAX_OPT_ARGS]; + unsigned long value = DEAULT_RA_PAGES; + struct super_block *sb = sbi->sb; + int err = 0; + + options = kstrdup(data, GFP_KERNEL); + if (data && !options) { + err = -ENOMEM; + goto out; + } + options_src = options; + err = super_setup_bdi(sb); + if (err) + goto out; + + while ((p = strsep(&options_src, ",")) != NULL) { + int token; + + if (!*p) + continue; + args[0].to = args[0].from = NULL; + token = match_token(p, hmdfs_tokens, args); + + switch (token) { + case OPT_RA_PAGES: + name = match_strdup(&args[0]); + if (name) { + err = kstrtoul(name, 10, &value); + if (err) + goto out; + kfree(name); + name = NULL; + } + break; + case OPT_LOCAL_DST: + err = hmdfs_match_strdup(&args[0], &sbi->local_dst); + if (err) + goto out; + break; + case OPT_CACHE_DIR: + err = hmdfs_match_strdup(&args[0], &sbi->cache_dir); + if (err) + goto out; + break; + case OPT_S_CASE: + sbi->s_case_sensitive = true; + break; + case OPT_VIEW_TYPE: + sbi->s_merge_switch = true; + break; + case OPT_NO_OFFLINE_STASH: + sbi->s_offline_stash = false; + break; + case OPT_NO_DENTRY_CACHE: + sbi->s_dentry_cache = false; + break; + default: + err = -EINVAL; + goto out; + } + } +out: + kfree(options); + sb->s_bdi->ra_pages = value; + if (sbi->local_dst == NULL) + err = -EINVAL; + + if (sbi->s_offline_stash && !sbi->cache_dir) { + hmdfs_warning("no cache_dir for offline stash"); + sbi->s_offline_stash = false; + } + + if (sbi->s_dentry_cache && !sbi->cache_dir) { + hmdfs_warning("no cache_dir for dentry cache"); + sbi->s_dentry_cache = false; + } + + return err; +} -- Gitee