diff --git a/OAT.xml b/OAT.xml new file mode 100644 index 0000000000000000000000000000000000000000..4f2fcc9b122539fbde514e32f28d90fe0bfad8d0 --- /dev/null +++ b/OAT.xml @@ -0,0 +1,448 @@ + + + + + + COPYING + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/README.OpenSource b/README.OpenSource new file mode 100644 index 0000000000000000000000000000000000000000..8313cf171abe785223d6a57cd172c5f900b7e8e5 --- /dev/null +++ b/README.OpenSource @@ -0,0 +1,11 @@ +[ + { + "Name": "linux-5.10", + "License": "GPL-2.0+", + "License File": "COPYING", + "Version Number": "5.10.79", + "Owner": "liuyu82@huawei.com", + "Upstream URL": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/log/?h=linux-5.10.y", + "Description": "linux kernel 5.10" + } +] \ No newline at end of file diff --git a/fs/Kconfig b/fs/Kconfig index da524c4d7b7e03f7d9a218f9e78ef0849b16de2d..b95f212be39e31501f342b42bdada3d3b1f4b1df 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -22,6 +22,7 @@ config FS_IOMAP source "fs/ext2/Kconfig" source "fs/ext4/Kconfig" +source "fs/hmdfs/Kconfig" source "fs/jbd2/Kconfig" config FS_MBCACHE diff --git a/fs/Makefile b/fs/Makefile index 999d1a23f036c9f96a06e056d333e2e3832cdc37..d71954aaba20e3adf2e640c5f91549605d71af69 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_DLM) += dlm/ obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT4_FS) += ext4/ +obj-$(CONFIG_HMDFS_FS) += hmdfs/ # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the # ext2 driver, which doesn't know about journalling! Explicitly request ext2 # by giving the rootfstype= parameter. diff --git a/fs/hmdfs/Kconfig b/fs/hmdfs/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..379606a6f46630df6aa76b36c30993f682c8f353 --- /dev/null +++ b/fs/hmdfs/Kconfig @@ -0,0 +1,48 @@ +config HMDFS_FS + tristate "HMDFS filesystem support" + help + HMDFS is an overlay file system. Relying on the underlying file system, + under the premise of networking, file exchanges across devices can be + realized. Device view and merge view are provided. In the device view, + the shared directories of the corresponding devices are provided under + different device directories; in the merge view, acollection of shared + files of all devices is provided. + +config HMDFS_FS_PERMISSION + bool "HMDFS application permission management" + depends on HMDFS_FS + help + HMDFS provides cross-device file and directory sharing. Only the same + application can access the files and directories under the corresponding + package directory. it provides management and control of access + permissions. + + If unsure, say N. + +config HMDFS_FS_ENCRYPTION + bool "HMDFS message encryption" + depends on HMDFS_FS && TLS + help + HMDFS provides cross-device file and directory sharing by sending and + receiving network messages. To ensure data security, TLS encryption is + provided. + + If you want to improve performance, say N. + +config HMDFS_FS_DEBUG + bool "HMDFS debug log" + depends on HMDFS_FS + help + HMDFS print a lot of logs, but many of them are debugging information, + which is actually unnecessary during operation. If there is a problem, + it works. + + If unsure, say N. + +config HMDFS_FS_FAULT_INJECT + bool "HMDFS fault inject" + depends on HMDFS_FS + help + HMDFS provide fault inject for test. + + If unsure, say N. diff --git a/fs/hmdfs/Makefile b/fs/hmdfs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..25c3eef3dd9d6cdcb55f9da348bb075fbaeae948 --- /dev/null +++ b/fs/hmdfs/Makefile @@ -0,0 +1,15 @@ +obj-$(CONFIG_HMDFS_FS) += hmdfs.o +ccflags-y += -I$(src) + +hmdfs-y := main.o super.o inode.o dentry.o inode_root.o file_merge.o +hmdfs-y += hmdfs_client.o hmdfs_server.o inode_local.o inode_remote.o +hmdfs-y += inode_merge.o hmdfs_dentryfile.o file_root.o file_remote.o +hmdfs-y += file_local.o client_writeback.o server_writeback.o stash.o + +hmdfs-y += comm/device_node.o comm/message_verify.o comm/node_cb.o +hmdfs-y += comm/connection.o comm/socket_adapter.o comm/transport.o + +hmdfs-$(CONFIG_HMDFS_FS_ENCRYPTION) += comm/crypto.o +hmdfs-$(CONFIG_HMDFS_FS_PERMISSION) += authority/authentication.o + +hmdfs-$(CONFIG_FS_FAULT_INJECTION) += comm/fault_inject.o diff --git a/fs/hmdfs/authority/authentication.c b/fs/hmdfs/authority/authentication.c new file mode 100644 index 0000000000000000000000000000000000000000..97d842147050e0fdc8db47824d5912be573e5ef9 --- /dev/null +++ b/fs/hmdfs/authority/authentication.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/authority/authentication.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "authentication.h" +#include +#include + +#include "hmdfs.h" + +struct fs_struct *hmdfs_override_fsstruct(struct fs_struct *saved_fs) +{ +#if (defined CONFIG_HMDFS_FS_PERMISSION) && (defined CONFIG_SDCARD_FS) + struct fs_struct *copied_fs = copy_fs_struct(saved_fs); + + if (!copied_fs) + return NULL; + copied_fs->umask = 0; + task_lock(current); + current->fs = copied_fs; + task_unlock(current); + return copied_fs; +#else + return saved_fs; +#endif +} + +void hmdfs_revert_fsstruct(struct fs_struct *saved_fs, + struct fs_struct *copied_fs) +{ +#if (defined CONFIG_HMDFS_FS_PERMISSION) && (defined CONFIG_SDCARD_FS) + task_lock(current); + current->fs = saved_fs; + task_unlock(current); + free_fs_struct(copied_fs); +#endif +} + +const struct cred *hmdfs_override_fsids(bool is_recv_thread) +{ + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + + cred = prepare_creds(); + if (!cred) + return NULL; + + cred->fsuid = MEDIA_RW_UID; + cred->fsgid = is_recv_thread ? + KGIDT_INIT((gid_t)AID_EVERYBODY) : MEDIA_RW_GID; + + old_cred = override_creds(cred); + + return old_cred; +} + +const struct cred *hmdfs_override_dir_fsids(struct inode *dir, + struct dentry *dentry, __u16 *_perm) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + __u16 level = hmdfs_perm_get_next_level(hii->perm); + __u16 perm = 0; + + cred = prepare_creds(); + if (!cred) + return NULL; + + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + cred->fsuid = SYSTEM_UID; + perm = (hii->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + cred->fsuid = SYSTEM_UID; + perm = HMDFS_DIR_DATA | level; + } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) { + cred->fsuid = SYSTEM_UID; + perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level; + } else { + cred->fsuid = MEDIA_RW_UID; + perm = HMDFS_DIR_PUBLIC | level; + } + break; + case HMDFS_PERM_PKG: + if (is_data_dir(hii->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + uid_t app_id = 0; + + if (app_id != 0) + cred->fsuid = KUIDT_INIT(app_id); + else + cred->fsuid = ROOT_UID; + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + cred->fsuid = dir->i_uid; + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + cred->fsuid = dir->i_uid; + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + + cred->fsgid = MEDIA_RW_GID; + *_perm = perm; + old_cred = override_creds(cred); + + return old_cred; +} + +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm) +{ + or->saved_cred = hmdfs_override_dir_fsids(dir, dentry, perm); + if (!or->saved_cred) + return -ENOMEM; + + or->saved_fs = current->fs; + or->copied_fs = hmdfs_override_fsstruct(or->saved_fs); + if (!or->copied_fs) { + hmdfs_revert_fsids(or->saved_cred); + return -ENOMEM; + } + + return 0; +} + +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or) +{ + hmdfs_revert_fsstruct(or->saved_fs, or->copied_fs); + hmdfs_revert_fsids(or->saved_cred); +} + +const struct cred *hmdfs_override_file_fsids(struct inode *dir, __u16 *_perm) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + __u16 level = hmdfs_perm_get_next_level(hii->perm); + uint16_t perm; + + perm = HMDFS_FILE_DEFAULT | level; + + cred = prepare_creds(); + if (!cred) + return NULL; + + cred->fsuid = dir->i_uid; + cred->fsgid = dir->i_gid; + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_FILE_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_FILE_DEFAULT | level; + + *_perm = perm; + old_cred = override_creds(cred); + + return old_cred; +} + +void hmdfs_revert_fsids(const struct cred *old_cred) +{ + const struct cred *cur_cred; + + cur_cred = current->cred; + revert_creds(old_cred); + put_cred(cur_cred); +} + +int hmdfs_persist_perm(struct dentry *dentry, __u16 *perm) +{ + int err; + struct inode *minode = d_inode(dentry); + + if (!minode) + return -EINVAL; + + inode_lock(minode); + err = __vfs_setxattr(dentry, minode, HMDFS_PERM_XATTR, perm, + sizeof(*perm), XATTR_CREATE); + if (!err) + fsnotify_xattr(dentry); + else if (err && err != -EEXIST) + hmdfs_err("failed to setxattr, err=%d", err); + inode_unlock(minode); + return err; +} + +__u16 hmdfs_read_perm(struct inode *inode) +{ + __u16 ret = 0; + int size = 0; + struct dentry *dentry = d_find_alias(inode); + + if (!dentry) + return ret; + + size = __vfs_getxattr(dentry, inode, HMDFS_PERM_XATTR, &ret, + sizeof(ret)); + /* + * some file may not set setxattr with perm + * eg. files created in sdcard dir by other user + **/ + if (size < 0 || size != sizeof(ret)) + ret = HMDFS_ALL_MASK; + + dput(dentry); + return ret; +} + +static __u16 __inherit_perm_dir(struct inode *parent, struct inode *inode) +{ + __u16 perm = 0; + struct hmdfs_inode_info *info = hmdfs_i(parent); + __u16 level = hmdfs_perm_get_next_level(info->perm); + struct dentry *dentry = d_find_alias(inode); + + if (!dentry) + return perm; + + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + perm = (info->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + // "data" + perm = HMDFS_DIR_DATA | level; + } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) { + // "system" + perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level; + } else { + perm = HMDFS_DIR_PUBLIC | level; + } + break; + case HMDFS_PERM_PKG: + if (is_data_dir(info->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + perm = (info->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + if (is_pkg_auth(info->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (info->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + dput(dentry); + return perm; +} + +static __u16 __inherit_perm_file(struct inode *parent) +{ + struct hmdfs_inode_info *hii = hmdfs_i(parent); + __u16 level = hmdfs_perm_get_next_level(hii->perm); + uint16_t perm; + + perm = HMDFS_FILE_DEFAULT | level; + + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_FILE_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_FILE_DEFAULT | level; + + return perm; +} + +static void fixup_ownership(struct inode *child, struct dentry *lower_dentry, + uid_t uid) +{ + int err; + struct iattr newattrs; + + newattrs.ia_valid = ATTR_UID | ATTR_FORCE; + newattrs.ia_uid = KUIDT_INIT(uid); + if (!S_ISDIR(d_inode(lower_dentry)->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV; + + inode_lock(d_inode(lower_dentry)); + err = notify_change(lower_dentry, &newattrs, NULL); + inode_unlock(d_inode(lower_dentry)); + + if (!err) + child->i_uid = KUIDT_INIT(uid); + else + hmdfs_err("update PKG uid failed, err = %d", err); +} + +static void fixup_ownership_user_group(struct inode *child, struct dentry *lower_dentry, + uid_t uid, gid_t gid) +{ + int err; + struct iattr newattrs; + + newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_FORCE; + newattrs.ia_uid = KUIDT_INIT(uid); + newattrs.ia_gid = KGIDT_INIT(gid); + if (!S_ISDIR(d_inode(lower_dentry)->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + + inode_lock(d_inode(lower_dentry)); + err = notify_change(lower_dentry, &newattrs, NULL); + inode_unlock(d_inode(lower_dentry)); + + if (!err) { + child->i_uid = KUIDT_INIT(uid); + child->i_gid = KGIDT_INIT(gid); + } else { + hmdfs_err("update PKG uid failed, err = %d", err); + } +} + +__u16 hmdfs_perm_inherit(struct inode *parent_inode, struct inode *child) +{ + __u16 perm; + + if (S_ISDIR(child->i_mode)) + perm = __inherit_perm_dir(parent_inode, child); + else + perm = __inherit_perm_file(parent_inode); + return perm; +} + +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child, + struct dentry *lower_dentry, const char *name) +{ + uid_t appid; + struct hmdfs_inode_info *info = hmdfs_i(child); + + if (info->perm == HMDFS_ALL_MASK) + info->perm = hmdfs_perm_inherit(parent_inode, child); + + switch (info->perm & HMDFS_DIR_TYPE_MASK) { + case HMDFS_DIR_PKG: + appid = 0; + if (appid != child->i_uid.val) + fixup_ownership(child, lower_dentry, appid); + + break; + case HMDFS_DIR_DATA: + case HMDFS_FILE_PKG_SUB: + case HMDFS_DIR_DEFAULT: + case HMDFS_FILE_DEFAULT: + if (parent_inode->i_uid.val != child->i_uid.val || + parent_inode->i_gid.val != child->i_gid.val) + fixup_ownership_user_group(child, lower_dentry, + parent_inode->i_uid.val, + parent_inode->i_gid.val); + break; + case HMDFS_DIR_PUBLIC: + fixup_ownership(child, lower_dentry, (uid_t)AID_MEDIA_RW); + + break; + default: + break; + } +} + +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct inode *dinode = d_inode(dentry); + struct hmdfs_inode_info *dinfo = hmdfs_i(dinode); + __u16 level = hmdfs_perm_get_next_level(hii->perm); + __u16 perm = 0; + + hmdfs_debug("level:0x%X", level); + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + dinode->i_uid = SYSTEM_UID; + perm = (hii->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + // "data" + dinode->i_uid = SYSTEM_UID; + perm = HMDFS_DIR_DATA | level; + } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) { + // "system" + dinode->i_uid = SYSTEM_UID; + perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level; + } else { + dinode->i_uid = MEDIA_RW_UID; + perm = HMDFS_DIR_PUBLIC | level; + } + break; + case HMDFS_PERM_PKG: + if (is_data_dir(hii->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + uid_t app_id = 0; + + if (app_id != 0) + dinode->i_uid = KUIDT_INIT(app_id); + else + dinode->i_uid = ROOT_UID; + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + dinode->i_uid = dir->i_uid; + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + dinode->i_uid = dir->i_uid; + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + + dinode->i_gid = MEDIA_RW_GID; + dinfo->perm = perm; +} + +void hmdfs_root_inode_perm_init(struct inode *root_inode) +{ + struct hmdfs_inode_info *hii = hmdfs_i(root_inode); + + hii->perm = HMDFS_DIR_ROOT | HMDFS_PERM_MNT; + set_inode_uid(root_inode, SYSTEM_UID); + set_inode_gid(root_inode, MEDIA_RW_GID); +} diff --git a/fs/hmdfs/authority/authentication.h b/fs/hmdfs/authority/authentication.h new file mode 100644 index 0000000000000000000000000000000000000000..e8b7bed53fb9197b0456f3d8e446a8a43cb57fad --- /dev/null +++ b/fs/hmdfs/authority/authentication.h @@ -0,0 +1,328 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/authority/authentication.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef AUTHENTICATION_H +#define AUTHENTICATION_H + +#include +#include +#include +#include +#include +#include "hmdfs.h" + +struct cache_fs_override { + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; + const struct cred *saved_cred; +}; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + +#define AID_ROOT 0 +#define AID_SYSTEM 1000 +#define AID_SDCARD_RW 1015 +#define AID_MEDIA_RW 1023 +#define AID_EVERYBODY 9997 + +/* copied from sdcardfs/multiuser.h */ +#define AID_USER_OFFSET 100000 /* offset for uid ranges for each user */ + +#define HMDFS_PERM_XATTR "user.hmdfs.perm" + +#define ROOT_UID KUIDT_INIT(AID_ROOT) +#define SYSTEM_UID KUIDT_INIT(AID_SYSTEM) +#define MEDIA_RW_UID KUIDT_INIT(AID_MEDIA_RW) + +#define SYSTEM_GID KGIDT_INIT((gid_t) AID_SYSTEM) +#define MEDIA_RW_GID KGIDT_INIT(AID_MEDIA_RW) +#define SDCARD_RW_GID KGIDT_INIT(AID_SDCARD_RW) + +#define PKG_ROOT_NAME "data" +#define SYSTEM_NAME "system" + +/* + * | perm fix | permmnt | permdfs | permpkg | perm other + * /mnt/mdfs/ accoundID / device view / local / DATA / packageName /... + * / system /... + * / documents /... + * / devid /....... + * / merge view / + * / sdcard / + **/ +#define HMDFS_PERM_MASK 0x000F + +#define HMDFS_PERM_FIX 0 +#define HMDFS_PERM_MNT 1 +#define HMDFS_PERM_DFS 2 +#define HMDFS_PERM_PKG 3 +#define HMDFS_PERM_OTHER 4 + +static inline bool is_perm_fix(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_FIX; +} + +static inline bool is_perm_mnt(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_MNT; +} + +static inline bool is_perm_dfs(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_DFS; +} + +static inline bool is_perm_pkg(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_PKG; +} + +static inline bool is_perm_other(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_OTHER; +} + +static inline void hmdfs_check_cred(const struct cred *cred) +{ + if (cred->fsuid.val != AID_SYSTEM || cred->fsgid.val != AID_SYSTEM) + hmdfs_warning("uid is %u, gid is %u", cred->fsuid.val, + cred->fsgid.val); +} + +/* dir and file type mask for hmdfs */ +#define HMDFS_DIR_TYPE_MASK 0x00F0 + +/* LEVEL 0 perm fix - permmnt , only root dir */ +#define HMDFS_DIR_ROOT 0x0010 + +/* LEVEL 1 perm dfs */ +#define HMDFS_DIR_PUBLIC 0x0020 +#define HMDFS_DIR_DATA 0x0030 +#define HMDFS_DIR_SYSTEM 0x0040 + +/* LEVEL 2 HMDFS_PERM_PKG */ +#define HMDFS_DIR_PKG 0x0050 + +/* LEVEL 2~n HMDFS_PERM_OTHER */ +#define PUBLIC_FILE 0x0060 +#define PUBLIC_SUB_DIR 0x0070 +#define SYSTEM_SUB_DIR 0x0080 +#define SYSTEM_SUB_FILE 0x0090 + +#define HMDFS_DIR_PKG_SUB 0x00A0 +#define HMDFS_FILE_PKG_SUB 0x00B0 + +/* access right is derived + * PUBLIC_SUB_DIR SYSTEM_SUB_DIR HMDFS_DIR_PKG_SUB + * PUBLIC_FILE SYSTEM_SUB_FILE HMDFS_FILE_PKG_SUB + */ +#define HMDFS_DIR_DEFAULT 0x00C0 +#define HMDFS_FILE_DEFAULT 0x00D0 +#define HMDFS_TYPE_DEFAULT 0x0000 + +static inline bool is_data_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_DATA; +} + +static inline bool is_pkg_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_PKG; +} + +static inline bool is_pkg_sub_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_PKG_SUB; +} + +static inline bool is_pkg_sub_file(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_FILE_PKG_SUB; +} + +static inline bool is_default_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_DEFAULT; +} + +static inline bool is_default_file(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_FILE_DEFAULT; +} + +#define AUTH_MASK 0x0F00 +#define AUTH_PKG 0x0100 +#define AUTH_SYSTEM 0x0200 + +static inline bool is_pkg_auth(__u16 perm) +{ + return (perm & AUTH_MASK) == AUTH_PKG; +} + +static inline bool is_system_auth(__u16 perm) +{ + return (perm & AUTH_MASK) == AUTH_SYSTEM; +} + +#define HMDFS_MOUNT_POINT_MASK 0xF000 +#define HMDFS_MNT_COMMON 0x0000 // sdcard +#define HMDFS_MNT_SDCARD 0x1000 // sdcard +#define HMDFS_MNT_ACNTID 0x2000 // accound id + +#define HMDFS_ALL_MASK (HMDFS_MOUNT_POINT_MASK | AUTH_MASK | HMDFS_DIR_TYPE_MASK | HMDFS_PERM_MASK) + + +static inline void set_inode_gid(struct inode *inode, kgid_t gid) +{ + inode->i_gid = gid; +} + +static inline kuid_t get_inode_uid(struct inode *inode) +{ + kuid_t uid = inode->i_uid; + return uid; +} + +static inline void set_inode_uid(struct inode *inode, kuid_t uid) +{ + inode->i_uid = uid; +} + +static inline kuid_t hmdfs_override_inode_uid(struct inode *inode) +{ + kuid_t uid = get_inode_uid(inode); + + set_inode_uid(inode, current_fsuid()); + return uid; +} + +static inline void hmdfs_revert_inode_uid(struct inode *inode, kuid_t uid) +{ + set_inode_uid(inode, uid); +} + +static inline const struct cred *hmdfs_override_creds(const struct cred *new) +{ + if (!new) + return NULL; + + return override_creds(new); +} + +static inline void hmdfs_revert_creds(const struct cred *old) +{ + if (old) + revert_creds(old); +} + +static inline __u16 hmdfs_perm_get_next_level(__u16 perm) +{ + __u16 level = (perm & HMDFS_PERM_MASK) + 1; + + if (level <= HMDFS_PERM_OTHER) + return level; + else + return HMDFS_PERM_OTHER; +} + +struct fs_struct *hmdfs_override_fsstruct(struct fs_struct *saved_fs); +void hmdfs_revert_fsstruct(struct fs_struct *saved_fs, + struct fs_struct *copied_fs); +const struct cred *hmdfs_override_fsids(bool is_recv_thread); +const struct cred *hmdfs_override_dir_fsids(struct inode *dir, + struct dentry *dentry, __u16 *perm); +const struct cred *hmdfs_override_file_fsids(struct inode *dir, __u16 *perm); +void hmdfs_revert_fsids(const struct cred *old_cred); +int hmdfs_persist_perm(struct dentry *dentry, __u16 *perm); +__u16 hmdfs_read_perm(struct inode *inode); +void hmdfs_root_inode_perm_init(struct inode *root_inode); +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child, + struct dentry *lower_dentry, const char *name); +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm); +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or); +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry); + +#else + +static inline +void hmdfs_root_inode_perm_init(struct inode *root_inode) +{ +} + +static inline +void hmdfs_revert_fsids(const struct cred *old_cred) +{ +} + +static inline +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm) +{ + return 0; +} + +static inline +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or) +{ +} + +static inline +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child, + struct dentry *lower_dentry, const char *name) +{ +} + +static inline +const struct cred *hmdfs_override_fsids(bool is_recv_thread) +{ + return ERR_PTR(-ENOTTY); +} + +static inline +const struct cred *hmdfs_override_creds(const struct cred *new) +{ + return ERR_PTR(-ENOTTY); +} + +static inline +void hmdfs_revert_creds(const struct cred *old) +{ + +} + +static inline +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry) +{ +} + +static inline +kuid_t hmdfs_override_inode_uid(struct inode *inode) +{ + return KUIDT_INIT((uid_t)0); +} + +static inline +void hmdfs_revert_inode_uid(struct inode *inode, kuid_t uid) +{ +} + +static inline +void hmdfs_check_cred(const struct cred *cred) +{ +} + +#endif /* CONFIG_HMDFS_FS_PERMISSION */ + +#endif diff --git a/fs/hmdfs/client_writeback.c b/fs/hmdfs/client_writeback.c new file mode 100644 index 0000000000000000000000000000000000000000..d4da7ec482a5b5bea34b21e2793e4120da16d090 --- /dev/null +++ b/fs/hmdfs/client_writeback.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/client_writeback.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" + +/* 200ms */ +#define HMDFS_MAX_PAUSE max((HZ / 5), 1) +#define HMDFS_BANDWIDTH_INTERVAL max((HZ / 5), 1) +/* Dirty type */ +#define HMDFS_DIRTY_FS 0 +#define HMDFS_DIRTY_FILE 1 +/* Exceed flags */ +#define HMDFS_FS_EXCEED (1 << HMDFS_DIRTY_FS) +#define HMDFS_FILE_EXCEED (1 << HMDFS_DIRTY_FILE) +/* Ratelimit calculate shift */ +#define HMDFS_LIMIT_SHIFT 10 + +void hmdfs_writeback_inodes_sb_handler(struct work_struct *work) +{ + struct hmdfs_writeback *hwb = container_of( + work, struct hmdfs_writeback, dirty_sb_writeback_work.work); + + try_to_writeback_inodes_sb(hwb->sbi->sb, WB_REASON_FS_FREE_SPACE); +} + +void hmdfs_writeback_inode_handler(struct work_struct *work) +{ + struct hmdfs_inode_info *info = NULL; + struct inode *inode = NULL; + struct hmdfs_writeback *hwb = container_of( + work, struct hmdfs_writeback, dirty_inode_writeback_work.work); + + spin_lock(&hwb->inode_list_lock); + while (likely(!list_empty(&hwb->inode_list_head))) { + info = list_first_entry(&hwb->inode_list_head, + struct hmdfs_inode_info, wb_list); + list_del_init(&info->wb_list); + spin_unlock(&hwb->inode_list_lock); + + inode = &info->vfs_inode; + write_inode_now(inode, 0); + iput(inode); + spin_lock(&hwb->inode_list_lock); + } + spin_unlock(&hwb->inode_list_lock); +} + +static void hmdfs_writeback_inodes_sb_delayed(struct super_block *sb, + unsigned int delay) +{ + struct hmdfs_sb_info *sbi = sb->s_fs_info; + unsigned long timeout; + + timeout = msecs_to_jiffies(delay); + if (!timeout || !work_busy(&sbi->h_wb->dirty_sb_writeback_work.work)) + mod_delayed_work(sbi->h_wb->dirty_sb_writeback_wq, + &sbi->h_wb->dirty_sb_writeback_work, timeout); +} + +static inline void hmdfs_writeback_inodes_sb(struct super_block *sb) +{ + hmdfs_writeback_inodes_sb_delayed(sb, 0); +} + +static void hmdfs_writeback_inode(struct super_block *sb, struct inode *inode) +{ + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + struct hmdfs_inode_info *info = hmdfs_i(inode); + + spin_lock(&hwb->inode_list_lock); + if (list_empty(&info->wb_list)) { + ihold(inode); + list_add_tail(&info->wb_list, &hwb->inode_list_head); + queue_delayed_work(hwb->dirty_inode_writeback_wq, + &hwb->dirty_inode_writeback_work, 0); + } + spin_unlock(&hwb->inode_list_lock); +} + +static unsigned long hmdfs_idirty_pages(struct inode *inode, int tag) +{ + struct pagevec pvec; + unsigned long nr_dirty_pages = 0; + pgoff_t index = 0; + +#if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE + pagevec_init(&pvec); +#else + pagevec_init(&pvec, 0); +#endif + while (pagevec_lookup_tag(&pvec, inode->i_mapping, &index, tag)) { + nr_dirty_pages += pagevec_count(&pvec); + pagevec_release(&pvec); + cond_resched(); + } + return nr_dirty_pages; +} + +static inline unsigned long hmdfs_ratio_thresh(unsigned long ratio, + unsigned long thresh) +{ + unsigned long ret = (ratio * thresh) >> HMDFS_LIMIT_SHIFT; + + return (ret == 0) ? 1 : ret; +} + +static inline unsigned long hmdfs_thresh_ratio(unsigned long base, + unsigned long thresh) +{ + unsigned long ratio = (base << HMDFS_LIMIT_SHIFT) / thresh; + + return (ratio == 0) ? 1 : ratio; +} + +void hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb) +{ + hwb->dirty_fs_thresh = DIV_ROUND_UP(hwb->dirty_fs_bytes, PAGE_SIZE); + hwb->dirty_file_thresh = DIV_ROUND_UP(hwb->dirty_file_bytes, PAGE_SIZE); + hwb->dirty_fs_bg_thresh = + DIV_ROUND_UP(hwb->dirty_fs_bg_bytes, PAGE_SIZE); + hwb->dirty_file_bg_thresh = + DIV_ROUND_UP(hwb->dirty_file_bg_bytes, PAGE_SIZE); + + hwb->fs_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_fs_bg_thresh, + hwb->dirty_fs_thresh); + hwb->file_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_file_bg_thresh, + hwb->dirty_file_thresh); + hwb->fs_file_ratio = hmdfs_thresh_ratio(hwb->dirty_file_thresh, + hwb->dirty_fs_thresh); +} + +static void hmdfs_init_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc) +{ + struct hmdfs_writeback *hwb = hdtc->hwb; + + hdtc->fs_thresh = hdtc->hwb->dirty_fs_thresh; + hdtc->file_thresh = hdtc->hwb->dirty_file_thresh; + hdtc->fs_bg_thresh = hdtc->hwb->dirty_fs_bg_thresh; + hdtc->file_bg_thresh = hdtc->hwb->dirty_file_bg_thresh; + + if (!hwb->dirty_auto_threshold) + return; + + /* + * Init thresh according the previous bandwidth adjusted thresh, + * thresh should be no more than setting thresh. + */ + if (hwb->bw_fs_thresh < hdtc->fs_thresh) { + hdtc->fs_thresh = hwb->bw_fs_thresh; + hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio, + hdtc->fs_thresh); + } + if (hwb->bw_file_thresh < hdtc->file_thresh) { + hdtc->file_thresh = hwb->bw_file_thresh; + hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio, + hdtc->file_thresh); + } + /* + * The thresh should be updated in the first time of dirty pages + * exceed the freerun ceiling. + */ + hdtc->thresh_time_stamp = jiffies - HMDFS_BANDWIDTH_INTERVAL - 1; +} + +static void hmdfs_update_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc) +{ + struct hmdfs_writeback *hwb = hdtc->hwb; + struct bdi_writeback *wb = hwb->wb; + unsigned int time_limit = hwb->writeback_timelimit; + unsigned long bw = wb->avg_write_bandwidth; + unsigned long thresh; + + if (!hwb->dirty_auto_threshold) + return; + + spin_lock(&hwb->write_bandwidth_lock); + if (bw > hwb->max_write_bandwidth) + hwb->max_write_bandwidth = bw; + + if (bw < hwb->min_write_bandwidth) + hwb->min_write_bandwidth = bw; + hwb->avg_write_bandwidth = bw; + spin_unlock(&hwb->write_bandwidth_lock); + + /* + * If the bandwidth is lower than the lower limit, it may propably + * offline, there is meaningless to set such a lower thresh. + */ + bw = max(bw, hwb->bw_thresh_lowerlimit); + thresh = bw * time_limit / roundup_pow_of_two(HZ); + if (thresh >= hwb->dirty_fs_thresh) { + hdtc->fs_thresh = hwb->dirty_fs_thresh; + hdtc->file_thresh = hwb->dirty_file_thresh; + hdtc->fs_bg_thresh = hwb->dirty_fs_bg_thresh; + hdtc->file_bg_thresh = hwb->dirty_file_bg_thresh; + } else { + /* Adjust thresh according to current bandwidth */ + hdtc->fs_thresh = thresh; + hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio, + hdtc->fs_thresh); + hdtc->file_thresh = hmdfs_ratio_thresh(hwb->fs_file_ratio, + hdtc->fs_thresh); + hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio, + hdtc->file_thresh); + } + /* Save bandwidth adjusted thresh */ + hwb->bw_fs_thresh = hdtc->fs_thresh; + hwb->bw_file_thresh = hdtc->file_thresh; + /* Update time stamp */ + hdtc->thresh_time_stamp = jiffies; +} + +void hmdfs_update_ratelimit(struct hmdfs_writeback *hwb) +{ + struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb}; + + hmdfs_init_dirty_limit(&hdtc); + + /* hdtc.file_bg_thresh should be the lowest thresh */ + hwb->ratelimit_pages = hdtc.file_bg_thresh / + (num_online_cpus() * HMDFS_RATELIMIT_PAGES_GAP); + if (hwb->ratelimit_pages < HMDFS_MIN_RATELIMIT_PAGES) + hwb->ratelimit_pages = HMDFS_MIN_RATELIMIT_PAGES; +} + +/* This is a copy of wb_max_pause() */ +static unsigned long hmdfs_wb_pause(struct bdi_writeback *wb, + unsigned long wb_dirty) +{ + unsigned long bw = wb->avg_write_bandwidth; + unsigned long t; + + /* + * Limit pause time for small memory systems. If sleeping for too long + * time, a small pool of dirty/writeback pages may go empty and disk go + * idle. + * + * 8 serves as the safety ratio. + */ + t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8)); + t++; + + return min_t(unsigned long, t, HMDFS_MAX_PAUSE); +} + +static unsigned long +hmdfs_dirty_freerun_ceiling(struct hmdfs_dirty_throttle_control *hdtc, + unsigned int type) +{ + if (type == HMDFS_DIRTY_FS) + return (hdtc->fs_thresh + hdtc->fs_bg_thresh) / 2; + else /* HMDFS_DIRTY_FILE_TYPE */ + return (hdtc->file_thresh + hdtc->file_bg_thresh) / 2; +} + +/* This is a copy of dirty_poll_interval() */ +static inline unsigned long hmdfs_dirty_intv(unsigned long dirty, + unsigned long thresh) +{ + if (thresh > dirty) + return 1UL << (ilog2(thresh - dirty) >> 1); + return 1; +} + +static void hmdfs_balance_dirty_pages(struct address_space *mapping) +{ + struct inode *inode = mapping->host; + struct super_block *sb = inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; + struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb}; + unsigned int dirty_exceeded = 0; + unsigned long start_time = jiffies; + unsigned long pause = 0; + + /* Add delay work to trigger timeout writeback */ + if (hwb->dirty_writeback_interval != 0) + hmdfs_writeback_inodes_sb_delayed( + sb, hwb->dirty_writeback_interval * 10); + + hmdfs_init_dirty_limit(&hdtc); + + while (1) { + unsigned long exceed = 0; + unsigned long diff; + + /* Per-filesystem overbalance writeback */ + hdtc.fs_nr_dirty = wb_stat_sum(wb, WB_RECLAIMABLE); + hdtc.fs_nr_reclaimable = + hdtc.fs_nr_dirty + wb_stat_sum(wb, WB_WRITEBACK); + if (hdtc.fs_nr_reclaimable < hdtc.file_bg_thresh) { + diff = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable, + hdtc.file_thresh); + goto free_running; + } + + /* Per-file overbalance writeback */ + hdtc.file_nr_dirty = + hmdfs_idirty_pages(inode, PAGECACHE_TAG_DIRTY); + hdtc.file_nr_reclaimable = + hmdfs_idirty_pages(inode, PAGECACHE_TAG_WRITEBACK) + + hdtc.file_nr_dirty; + if ((hdtc.fs_nr_reclaimable < + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) && + (hdtc.file_nr_reclaimable < + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FILE))) { + unsigned long fs_intv, file_intv; + + fs_intv = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable, + hdtc.fs_thresh); + file_intv = hmdfs_dirty_intv(hdtc.file_nr_reclaimable, + hdtc.file_thresh); + diff = min(fs_intv, file_intv); +free_running: + current->nr_dirtied_pause = diff; + current->nr_dirtied = 0; + break; + } + + if (hdtc.fs_nr_reclaimable >= + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) { + if (unlikely(!writeback_in_progress(wb))) + hmdfs_writeback_inodes_sb(sb); + } else { + hmdfs_writeback_inode(sb, inode); + } + + /* + * If dirty_auto_threshold is enabled, recalculate writeback + * thresh according to current bandwidth. Update bandwidth + * could be better if possible, but wb_update_bandwidth() is + * not exported, so we cannot update bandwidth here, so the + * bandwidth' update will be delayed if writing a lot to a + * single file. + */ + if (hwb->dirty_auto_threshold && + time_is_before_jiffies(hdtc.thresh_time_stamp + + HMDFS_BANDWIDTH_INTERVAL)) + hmdfs_update_dirty_limit(&hdtc); + + if (unlikely(hdtc.fs_nr_reclaimable >= hdtc.fs_thresh)) + exceed |= HMDFS_FS_EXCEED; + if (unlikely(hdtc.file_nr_reclaimable >= hdtc.file_thresh)) + exceed |= HMDFS_FILE_EXCEED; + + if (!exceed) { + trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, + 0UL, start_time); + current->nr_dirtied = 0; + break; + } + /* + * Per-file or per-fs reclaimable pages exceed throttle limit, + * sleep pause time and check again. + */ + dirty_exceeded |= exceed; + if (dirty_exceeded && !hwb->dirty_exceeded) + hwb->dirty_exceeded = true; + + /* Pause */ + pause = hmdfs_wb_pause(wb, hdtc.fs_nr_reclaimable); + + trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, pause, + start_time); + + __set_current_state(TASK_KILLABLE); + io_schedule_timeout(pause); + + if (fatal_signal_pending(current)) + break; + } + + if (!dirty_exceeded && hwb->dirty_exceeded) + hwb->dirty_exceeded = false; + + if (hdtc.fs_nr_reclaimable >= hdtc.fs_bg_thresh) { + if (unlikely(!writeback_in_progress(wb))) + hmdfs_writeback_inodes_sb(sb); + } else if (hdtc.file_nr_reclaimable >= hdtc.file_bg_thresh) { + hmdfs_writeback_inode(sb, inode); + } +} + +void hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping) +{ + struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + int *bdp_ratelimits = NULL; + int ratelimit; + + if (!hwb->dirty_writeback_control) + return; + + /* Add delay work to trigger timeout writeback */ + if (hwb->dirty_writeback_interval != 0) + hmdfs_writeback_inodes_sb_delayed( + mapping->host->i_sb, + hwb->dirty_writeback_interval * 10); + + ratelimit = current->nr_dirtied_pause; + if (hwb->dirty_exceeded) + ratelimit = min(ratelimit, HMDFS_DIRTY_EXCEED_RATELIMIT); + + /* + * This prevents one CPU to accumulate too many dirtied pages + * without calling into hmdfs_balance_dirty_pages(), which can + * happen when there are 1000+ tasks, all of them start dirtying + * pages at exactly the same time, hence all honoured too large + * initial task->nr_dirtied_pause. + */ + preempt_disable(); + bdp_ratelimits = this_cpu_ptr(hwb->bdp_ratelimits); + + trace_hmdfs_balance_dirty_pages_ratelimited(sbi, hwb, *bdp_ratelimits); + + if (unlikely(current->nr_dirtied >= ratelimit)) { + *bdp_ratelimits = 0; + } else if (unlikely(*bdp_ratelimits >= hwb->ratelimit_pages)) { + *bdp_ratelimits = 0; + ratelimit = 0; + } + preempt_enable(); + + if (unlikely(current->nr_dirtied >= ratelimit)) + hmdfs_balance_dirty_pages(mapping); +} + +void hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi) +{ + if (!sbi->h_wb) + return; + + flush_delayed_work(&sbi->h_wb->dirty_sb_writeback_work); + flush_delayed_work(&sbi->h_wb->dirty_inode_writeback_work); + destroy_workqueue(sbi->h_wb->dirty_sb_writeback_wq); + destroy_workqueue(sbi->h_wb->dirty_inode_writeback_wq); + free_percpu(sbi->h_wb->bdp_ratelimits); + kfree(sbi->h_wb); + sbi->h_wb = NULL; +} + +int hmdfs_init_writeback(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_writeback *hwb; + char name[HMDFS_WQ_NAME_LEN]; + int ret = -ENOMEM; + + hwb = kzalloc(sizeof(struct hmdfs_writeback), GFP_KERNEL); + if (!hwb) + return ret; + + hwb->sbi = sbi; + hwb->wb = &sbi->sb->s_bdi->wb; + hwb->dirty_writeback_control = true; + hwb->dirty_writeback_interval = HM_DEFAULT_WRITEBACK_INTERVAL; + hwb->dirty_file_bg_bytes = HMDFS_FILE_BG_WB_BYTES; + hwb->dirty_fs_bg_bytes = HMDFS_FS_BG_WB_BYTES; + hwb->dirty_file_bytes = HMDFS_FILE_WB_BYTES; + hwb->dirty_fs_bytes = HMDFS_FS_WB_BYTES; + hmdfs_calculate_dirty_thresh(hwb); + hwb->bw_file_thresh = hwb->dirty_file_thresh; + hwb->bw_fs_thresh = hwb->dirty_fs_thresh; + spin_lock_init(&hwb->inode_list_lock); + INIT_LIST_HEAD(&hwb->inode_list_head); + hwb->dirty_exceeded = false; + hwb->ratelimit_pages = HMDFS_DEF_RATELIMIT_PAGES; + hwb->dirty_auto_threshold = true; + hwb->writeback_timelimit = HMDFS_DEF_WB_TIMELIMIT; + hwb->bw_thresh_lowerlimit = HMDFS_BW_THRESH_DEF_LIMIT; + spin_lock_init(&hwb->write_bandwidth_lock); + hwb->avg_write_bandwidth = 0; + hwb->max_write_bandwidth = 0; + hwb->min_write_bandwidth = ULONG_MAX; + hwb->bdp_ratelimits = alloc_percpu(int); + if (!hwb->bdp_ratelimits) + goto free_hwb; + + snprintf(name, sizeof(name), "dfs_ino_wb%u", sbi->seq); + hwb->dirty_inode_writeback_wq = create_singlethread_workqueue(name); + if (!hwb->dirty_inode_writeback_wq) { + hmdfs_err("Failed to create inode writeback workqueue!"); + goto free_bdp; + } + snprintf(name, sizeof(name), "dfs_sb_wb%u", sbi->seq); + hwb->dirty_sb_writeback_wq = create_singlethread_workqueue(name); + if (!hwb->dirty_sb_writeback_wq) { + hmdfs_err("Failed to create filesystem writeback workqueue!"); + goto free_i_wq; + } + INIT_DELAYED_WORK(&hwb->dirty_sb_writeback_work, + hmdfs_writeback_inodes_sb_handler); + INIT_DELAYED_WORK(&hwb->dirty_inode_writeback_work, + hmdfs_writeback_inode_handler); + sbi->h_wb = hwb; + return 0; +free_i_wq: + destroy_workqueue(hwb->dirty_inode_writeback_wq); +free_bdp: + free_percpu(sbi->h_wb->bdp_ratelimits); +free_hwb: + kfree(hwb); + return ret; +} diff --git a/fs/hmdfs/client_writeback.h b/fs/hmdfs/client_writeback.h new file mode 100644 index 0000000000000000000000000000000000000000..689a5e733ece47a4894a10ce9eac099a5b9047f1 --- /dev/null +++ b/fs/hmdfs/client_writeback.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/client_writeback.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef CLIENT_WRITEBACK_H +#define CLIENT_WRITEBACK_H + +#include "hmdfs.h" + +/* + * HM_DEFAULT_WRITEBACK_INTERVAL - centiseconds + * HMDFS_FILE_BG_WB_BYTES - background per-file threshold 10M + * HMDFS_FS_BG_WB_BYTES - background per-fs threshold 50M + * HMDFS_FILE_WB_BYTES - per-file throttle threshold + * HMDFS_FS_WB_BYTES - per-fs throttle threshold + */ +#define HM_DEFAULT_WRITEBACK_INTERVAL 500 +#define HMDFS_FILE_BG_WB_BYTES (10 * 1024 * 1024) +#define HMDFS_FS_BG_WB_BYTES (50 * 1024 * 1024) +#define HMDFS_FILE_WB_BYTES (HMDFS_FILE_BG_WB_BYTES << 1) +#define HMDFS_FS_WB_BYTES (HMDFS_FS_BG_WB_BYTES << 1) + +/* writeback time limit (default 5s) */ +#define HMDFS_DEF_WB_TIMELIMIT (5 * HZ) +#define HMDFS_MAX_WB_TIMELIMIT (30 * HZ) + +/* bandwidth adjusted lower limit (default 1MB/s) */ +#define HMDFS_BW_THRESH_MIN_LIMIT (1 << (20 - PAGE_SHIFT)) +#define HMDFS_BW_THRESH_MAX_LIMIT (100 << (20 - PAGE_SHIFT)) +#define HMDFS_BW_THRESH_DEF_LIMIT HMDFS_BW_THRESH_MIN_LIMIT + +#define HMDFS_DIRTY_EXCEED_RATELIMIT (32 >> (PAGE_SHIFT - 10)) +#define HMDFS_RATELIMIT_PAGES_GAP 16 +#define HMDFS_DEF_RATELIMIT_PAGES 32 +#define HMDFS_MIN_RATELIMIT_PAGES 1 + +struct hmdfs_dirty_throttle_control { + struct hmdfs_writeback *hwb; + /* last time threshes are updated */ + unsigned long thresh_time_stamp; + + unsigned long file_bg_thresh; + unsigned long fs_bg_thresh; + unsigned long file_thresh; + unsigned long fs_thresh; + + unsigned long file_nr_dirty; + unsigned long fs_nr_dirty; + unsigned long file_nr_reclaimable; + unsigned long fs_nr_reclaimable; +}; + +struct hmdfs_writeback { + struct hmdfs_sb_info *sbi; + struct bdi_writeback *wb; + /* enable hmdfs dirty writeback control */ + bool dirty_writeback_control; + + /* writeback per-file inode list */ + struct list_head inode_list_head; + spinlock_t inode_list_lock; + + /* centiseconds */ + unsigned int dirty_writeback_interval; + /* per-file background threshold */ + unsigned long dirty_file_bg_bytes; + unsigned long dirty_file_bg_thresh; + /* per-fs background threshold */ + unsigned long dirty_fs_bg_bytes; + unsigned long dirty_fs_bg_thresh; + /* per-file throttle threshold */ + unsigned long dirty_file_bytes; + unsigned long dirty_file_thresh; + /* per-fs throttle threshold */ + unsigned long dirty_fs_bytes; + unsigned long dirty_fs_thresh; + /* ratio between background thresh and throttle thresh */ + unsigned long fs_bg_ratio; + unsigned long file_bg_ratio; + /* ratio between file and fs throttle thresh */ + unsigned long fs_file_ratio; + + /* + * Enable auto-thresh. If enabled, the background and throttle + * thresh are nolonger a fixed value storeed in dirty_*_bytes, + * they are determined by the bandwidth of the network and the + * writeback timelimit. + */ + bool dirty_auto_threshold; + unsigned int writeback_timelimit; + /* bandwitdh adjusted filesystem throttle thresh */ + unsigned long bw_fs_thresh; + /* bandwidth adjusted per-file throttle thresh */ + unsigned long bw_file_thresh; + /* bandwidth adjusted thresh lower limit */ + unsigned long bw_thresh_lowerlimit; + + /* reclaimable pages exceed throttle thresh */ + bool dirty_exceeded; + /* percpu dirty pages ratelimit */ + long ratelimit_pages; + /* count percpu dirty pages */ + int __percpu *bdp_ratelimits; + + /* per-fs writeback work */ + struct workqueue_struct *dirty_sb_writeback_wq; + struct delayed_work dirty_sb_writeback_work; + /* per-file writeback work */ + struct workqueue_struct *dirty_inode_writeback_wq; + struct delayed_work dirty_inode_writeback_work; + + /* per-fs writeback bandwidth */ + spinlock_t write_bandwidth_lock; + unsigned long max_write_bandwidth; + unsigned long min_write_bandwidth; + unsigned long avg_write_bandwidth; +}; + +void hmdfs_writeback_inodes_sb_handler(struct work_struct *work); + +void hmdfs_writeback_inode_handler(struct work_struct *work); + +void hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb); + +void hmdfs_update_ratelimit(struct hmdfs_writeback *hwb); + +void hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping); + +void hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi); + +int hmdfs_init_writeback(struct hmdfs_sb_info *sbi); + +#endif diff --git a/fs/hmdfs/comm/connection.c b/fs/hmdfs/comm/connection.c new file mode 100644 index 0000000000000000000000000000000000000000..51e6f829eb343b7bc929b899d5d2243cfe31ba49 --- /dev/null +++ b/fs/hmdfs/comm/connection.c @@ -0,0 +1,1311 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/connection.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "connection.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "device_node.h" +#include "hmdfs.h" +#include "message_verify.h" +#include "node_cb.h" +#include "protocol.h" +#include "socket_adapter.h" + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include "crypto.h" +#endif + +#define HMDFS_WAIT_REQUEST_END_MIN 20 +#define HMDFS_WAIT_REQUEST_END_MAX 30 + +#define HMDFS_WAIT_CONN_RELEASE (3 * HZ) + +#define HMDFS_RETRY_WB_WQ_MAX_ACTIVE 16 + +static void hs_fill_crypto_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct crypto_body *body = NULL; + + if (len < sizeof(struct crypto_body)) { + hmdfs_info("crpto body len %u is err", len); + return; + } + body = (struct crypto_body *)data; + + /* this is only test, later need to fill right algorithm. */ + body->crypto |= HMDFS_HS_CRYPTO_KTLS_AES128; + body->crypto = cpu_to_le32(body->crypto); + + hmdfs_info("fill crypto. ccrtypto=0x%08x", body->crypto); +} + +static int hs_parse_crypto_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct crypto_body *hs_crypto = NULL; + uint32_t crypto; + + if (len < sizeof(struct crypto_body)) { + hmdfs_info("handshake msg len error, len=%u", len); + return -1; + } + hs_crypto = (struct crypto_body *)data; + crypto = le16_to_cpu(hs_crypto->crypto); + conn_impl->crypto = crypto; + hmdfs_info("ops=%u, len=%u, crypto=0x%08x", ops, len, crypto); + return 0; +} + +static void hs_fill_case_sense_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct case_sense_body *body = (struct case_sense_body *)data; + + if (len < sizeof(struct case_sense_body)) { + hmdfs_err("case sensitive len %u is err", len); + return; + } + body->case_sensitive = conn_impl->node->sbi->s_case_sensitive; +} + +static int hs_parse_case_sense_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct case_sense_body *body = (struct case_sense_body *)data; + __u8 sensitive = conn_impl->node->sbi->s_case_sensitive ? 1 : 0; + + if (len < sizeof(struct case_sense_body)) { + hmdfs_info("case sensitive len %u is err", len); + return -1; + } + if (body->case_sensitive != sensitive) { + hmdfs_err("case sensitive inconsistent, server: %u,client: %u, ops: %u", + body->case_sensitive, sensitive, ops); + return -1; + } + return 0; +} + +static void hs_fill_feature_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct feature_body *body = (struct feature_body *)data; + + if (len < sizeof(struct feature_body)) { + hmdfs_err("feature len %u is err", len); + return; + } + body->features = cpu_to_le64(conn_impl->node->sbi->s_features); + body->reserved = cpu_to_le64(0); +} + +static int hs_parse_feature_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct feature_body *body = (struct feature_body *)data; + + if (len < sizeof(struct feature_body)) { + hmdfs_err("feature len %u is err", len); + return -1; + } + + conn_impl->node->features = le64_to_cpu(body->features); + return 0; +} + +/* should ensure len is small than 0xffff. */ +static const struct conn_hs_extend_reg s_hs_extend_reg[HS_EXTEND_CODE_COUNT] = { + [HS_EXTEND_CODE_CRYPTO] = { + .len = sizeof(struct crypto_body), + .resv = 0, + .filler = hs_fill_crypto_data, + .parser = hs_parse_crypto_data + }, + [HS_EXTEND_CODE_CASE_SENSE] = { + .len = sizeof(struct case_sense_body), + .resv = 0, + .filler = hs_fill_case_sense_data, + .parser = hs_parse_case_sense_data, + }, + [HS_EXTEND_CODE_FEATURE_SUPPORT] = { + .len = sizeof(struct feature_body), + .resv = 0, + .filler = hs_fill_feature_data, + .parser = hs_parse_feature_data, + }, + [HS_EXTEND_CODE_FEATURE_SUPPORT] = { + .len = sizeof(struct feature_body), + .resv = 0, + .filler = hs_fill_feature_data, + .parser = hs_parse_feature_data, + }, +}; + +static __u32 hs_get_extend_data_len(void) +{ + __u32 len; + int i; + + len = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < HS_EXTEND_CODE_COUNT; i++) { + len += sizeof(struct extend_field_head); + len += s_hs_extend_reg[i].len; + } + + hmdfs_info("extend data total len is %u", len); + return len; +} + +static void hs_fill_extend_data(struct connection *conn_impl, __u8 ops, + void *extend_data, __u32 len) +{ + struct conn_hs_extend_head *extend_head = NULL; + struct extend_field_head *field = NULL; + uint8_t *body = NULL; + __u32 offset; + __u16 i; + + if (sizeof(struct conn_hs_extend_head) > len) { + hmdfs_info("len error. len=%u", len); + return; + } + extend_head = (struct conn_hs_extend_head *)extend_data; + extend_head->field_cn = 0; + offset = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < HS_EXTEND_CODE_COUNT; i++) { + if (sizeof(struct extend_field_head) > (len - offset)) + break; + field = (struct extend_field_head *)((uint8_t *)extend_data + + offset); + offset += sizeof(struct extend_field_head); + + if (s_hs_extend_reg[i].len > (len - offset)) + break; + body = (uint8_t *)extend_data + offset; + offset += s_hs_extend_reg[i].len; + + field->code = cpu_to_le16(i); + field->len = cpu_to_le16(s_hs_extend_reg[i].len); + + if (s_hs_extend_reg[i].filler) + s_hs_extend_reg[i].filler(conn_impl, ops, + body, s_hs_extend_reg[i].len); + + extend_head->field_cn += 1; + } + + extend_head->field_cn = cpu_to_le32(extend_head->field_cn); +} + +static int hs_parse_extend_data(struct connection *conn_impl, __u8 ops, + void *extend_data, __u32 extend_len) +{ + struct conn_hs_extend_head *extend_head = NULL; + struct extend_field_head *field = NULL; + uint8_t *body = NULL; + __u32 offset; + __u32 field_cnt; + __u16 code; + __u16 len; + int i; + int ret; + + if (sizeof(struct conn_hs_extend_head) > extend_len) { + hmdfs_err("ops=%u,extend_len=%u", ops, extend_len); + return -1; + } + extend_head = (struct conn_hs_extend_head *)extend_data; + field_cnt = le32_to_cpu(extend_head->field_cn); + hmdfs_info("extend_len=%u,field_cnt=%u", extend_len, field_cnt); + + offset = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < field_cnt; i++) { + if (sizeof(struct extend_field_head) > (extend_len - offset)) { + hmdfs_err("cnt err, op=%u, extend_len=%u, cnt=%u, i=%u", + ops, extend_len, field_cnt, i); + return -1; + } + field = (struct extend_field_head *)((uint8_t *)extend_data + + offset); + offset += sizeof(struct extend_field_head); + code = le16_to_cpu(field->code); + len = le16_to_cpu(field->len); + if (len > (extend_len - offset)) { + hmdfs_err("len err, op=%u, extend_len=%u, cnt=%u, i=%u", + ops, extend_len, field_cnt, i); + hmdfs_err("len err, code=%u, len=%u, offset=%u", code, + len, offset); + return -1; + } + + body = (uint8_t *)extend_data + offset; + offset += len; + if ((code < HS_EXTEND_CODE_COUNT) && + (s_hs_extend_reg[code].parser)) { + ret = s_hs_extend_reg[code].parser(conn_impl, ops, + body, len); + if (ret) + return ret; + } + } + return 0; +} + +static int hs_proc_msg_data(struct connection *conn_impl, __u8 ops, void *data, + __u32 data_len) +{ + struct connection_handshake_req *hs_req = NULL; + uint8_t *extend_data = NULL; + __u32 extend_len; + __u32 req_len; + int ret; + + if (!data) { + hmdfs_err("err, msg data is null"); + return -1; + } + + if (data_len < sizeof(struct connection_handshake_req)) { + hmdfs_err("ack msg data len error. data_len=%u, device_id=%llu", + data_len, conn_impl->node->device_id); + return -1; + } + + hs_req = (struct connection_handshake_req *)data; + req_len = le32_to_cpu(hs_req->len); + if (req_len > (data_len - sizeof(struct connection_handshake_req))) { + hmdfs_info( + "ack msg hs_req len(%u) error. data_len=%u, device_id=%llu", + req_len, data_len, conn_impl->node->device_id); + return -1; + } + extend_len = + data_len - sizeof(struct connection_handshake_req) - req_len; + extend_data = (uint8_t *)data + + sizeof(struct connection_handshake_req) + req_len; + ret = hs_parse_extend_data(conn_impl, ops, extend_data, extend_len); + if (!ret) + hmdfs_info( + "hs msg rcv, ops=%u, data_len=%u, device_id=%llu, req_len=%u", + ops, data_len, conn_impl->node->device_id, hs_req->len); + return ret; +} +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +static int connection_handshake_init_tls(struct connection *conn_impl, __u8 ops) +{ + // init ktls config, use key1/key2 as init write-key of each direction + __u8 key1[HMDFS_KEY_SIZE]; + __u8 key2[HMDFS_KEY_SIZE]; + int ret; + + if ((ops != CONNECT_MESG_HANDSHAKE_RESPONSE) && + (ops != CONNECT_MESG_HANDSHAKE_ACK)) { + hmdfs_err("ops %u is err", ops); + return -EINVAL; + } + + update_key(conn_impl->master_key, key1, HKDF_TYPE_KEY_INITIATOR); + update_key(conn_impl->master_key, key2, HKDF_TYPE_KEY_ACCEPTER); + + if (ops == CONNECT_MESG_HANDSHAKE_ACK) { + memcpy(conn_impl->send_key, key1, HMDFS_KEY_SIZE); + memcpy(conn_impl->recv_key, key2, HMDFS_KEY_SIZE); + } else { + memcpy(conn_impl->send_key, key2, HMDFS_KEY_SIZE); + memcpy(conn_impl->recv_key, key1, HMDFS_KEY_SIZE); + } + + memset(key1, 0, HMDFS_KEY_SIZE); + memset(key2, 0, HMDFS_KEY_SIZE); + + hmdfs_info("hs: ops=%u start set crypto tls", ops); + ret = tls_crypto_info_init(conn_impl); + if (ret) + hmdfs_err("setting tls fail. ops is %u", ops); + + return ret; +} +#endif + +static int do_send_handshake(struct connection *conn_impl, __u8 ops, + __le16 request_id) +{ + int err; + struct connection_msg_head *hs_head = NULL; + struct connection_handshake_req *hs_data = NULL; + uint8_t *hs_extend_data = NULL; + struct hmdfs_send_data msg; + __u32 send_len; + __u32 len; + __u32 extend_len; + char buf[HMDFS_CID_SIZE] = { 0 }; + + len = scnprintf(buf, HMDFS_CID_SIZE, "%llu", 0ULL); + send_len = sizeof(struct connection_msg_head) + + sizeof(struct connection_handshake_req) + len; + + if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) || + (ops == CONNECT_MESG_HANDSHAKE_ACK)) && + (conn_impl->node->version >= DFS_2_0)) { + extend_len = hs_get_extend_data_len(); + send_len += extend_len; + } + + hs_head = kzalloc(send_len, GFP_KERNEL); + if (!hs_head) + return -ENOMEM; + + hs_data = (struct connection_handshake_req + *)((uint8_t *)hs_head + + sizeof(struct connection_msg_head)); + + hs_data->len = cpu_to_le32(len); + memcpy(hs_data->dev_id, buf, len); + + if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) || + ops == CONNECT_MESG_HANDSHAKE_ACK) && + (conn_impl->node->version >= DFS_2_0)) { + hs_extend_data = (uint8_t *)hs_data + + sizeof(struct connection_handshake_req) + len; + hs_fill_extend_data(conn_impl, ops, hs_extend_data, extend_len); + } + + hs_head->magic = HMDFS_MSG_MAGIC; + hs_head->version = DFS_2_0; + hs_head->flags |= 0x1; + hmdfs_info("Send handshake message: ops = %d, fd = %d", ops, + ((struct tcp_handle *)(conn_impl->connect_handle))->fd); + hs_head->operations = ops; + hs_head->request_id = request_id; + hs_head->datasize = cpu_to_le32(send_len); + hs_head->source = 0; + hs_head->msg_id = 0; + + msg.head = hs_head; + msg.head_len = sizeof(struct connection_msg_head); + msg.data = hs_data; + msg.len = send_len - msg.head_len; + msg.sdesc = NULL; + msg.sdesc_len = 0; + err = conn_impl->send_message(conn_impl, &msg); + kfree(hs_head); + return err; +} + +static int hmdfs_node_waiting_evt_sum(const struct hmdfs_peer *node) +{ + int sum = 0; + int i; + + for (i = 0; i < RAW_NODE_EVT_NR; i++) + sum += node->waiting_evt[i]; + + return sum; +} + +static int hmdfs_update_node_waiting_evt(struct hmdfs_peer *node, int evt, + unsigned int *seq) +{ + int last; + int sum; + unsigned int next; + + sum = hmdfs_node_waiting_evt_sum(node); + if (sum % RAW_NODE_EVT_NR) + last = !node->pending_evt; + else + last = node->pending_evt; + + /* duplicated event */ + if (evt == last) { + node->dup_evt[evt]++; + return 0; + } + + node->waiting_evt[evt]++; + hmdfs_debug("add node->waiting_evt[%d]=%d", evt, + node->waiting_evt[evt]); + + /* offline wait + online wait + offline wait = offline wait + * online wait + offline wait + online wait != online wait + * As the first online related resource (e.g. fd) must be invalidated + */ + if (node->waiting_evt[RAW_NODE_EVT_OFF] >= 2 && + node->waiting_evt[RAW_NODE_EVT_ON] >= 1) { + node->waiting_evt[RAW_NODE_EVT_OFF] -= 1; + node->waiting_evt[RAW_NODE_EVT_ON] -= 1; + node->seq_wr_idx -= 2; + node->merged_evt += 2; + } + + next = hmdfs_node_inc_evt_seq(node); + node->seq_tbl[(node->seq_wr_idx++) % RAW_NODE_EVT_MAX_NR] = next; + *seq = next; + + return 1; +} + +static void hmdfs_run_evt_cb_verbosely(struct hmdfs_peer *node, int raw_evt, + bool sync, unsigned int seq) +{ + int evt = (raw_evt == RAW_NODE_EVT_OFF) ? NODE_EVT_OFFLINE : + NODE_EVT_ONLINE; + int cur_evt_idx = sync ? 1 : 0; + + node->cur_evt[cur_evt_idx] = raw_evt; + node->cur_evt_seq[cur_evt_idx] = seq; + hmdfs_node_call_evt_cb(node, evt, sync, seq); + node->cur_evt[cur_evt_idx] = RAW_NODE_EVT_NR; +} + +static void hmdfs_node_evt_work(struct work_struct *work) +{ + struct hmdfs_peer *node = + container_of(work, struct hmdfs_peer, evt_dwork.work); + unsigned int seq; + + /* + * N-th sync cb completes before N-th async cb, + * so use seq_lock as a barrier in read & write path + * to ensure we can read the required seq. + */ + mutex_lock(&node->seq_lock); + seq = node->seq_tbl[(node->seq_rd_idx++) % RAW_NODE_EVT_MAX_NR]; + hmdfs_run_evt_cb_verbosely(node, node->pending_evt, false, seq); + mutex_unlock(&node->seq_lock); + + mutex_lock(&node->evt_lock); + if (hmdfs_node_waiting_evt_sum(node)) { + node->pending_evt = !node->pending_evt; + node->pending_evt_seq = + node->seq_tbl[node->seq_rd_idx % RAW_NODE_EVT_MAX_NR]; + node->waiting_evt[node->pending_evt]--; + /* sync cb has been done */ + schedule_delayed_work(&node->evt_dwork, + node->sbi->async_cb_delay * HZ); + } else { + node->last_evt = node->pending_evt; + node->pending_evt = RAW_NODE_EVT_NR; + } + mutex_unlock(&node->evt_lock); +} + +/* + * The running orders of cb are: + * + * (1) sync callbacks are invoked according to the queue order of raw events: + * ensured by seq_lock. + * (2) async callbacks are invoked according to the queue order of raw events: + * ensured by evt_lock & evt_dwork + * (3) async callback is invoked after sync callback of the same raw event: + * ensured by seq_lock. + * (4) async callback of N-th raw event and sync callback of (N+x)-th raw + * event can run concurrently. + */ +static void hmdfs_queue_raw_node_evt(struct hmdfs_peer *node, int evt) +{ + unsigned int seq = 0; + + mutex_lock(&node->evt_lock); + if (node->pending_evt == RAW_NODE_EVT_NR) { + if (evt == node->last_evt) { + node->dup_evt[evt]++; + mutex_unlock(&node->evt_lock); + return; + } + node->pending_evt = evt; + seq = hmdfs_node_inc_evt_seq(node); + node->seq_tbl[(node->seq_wr_idx++) % RAW_NODE_EVT_MAX_NR] = seq; + node->pending_evt_seq = seq; + mutex_lock(&node->seq_lock); + mutex_unlock(&node->evt_lock); + /* call sync cb, then async cb */ + hmdfs_run_evt_cb_verbosely(node, evt, true, seq); + mutex_unlock(&node->seq_lock); + schedule_delayed_work(&node->evt_dwork, + node->sbi->async_cb_delay * HZ); + } else if (hmdfs_update_node_waiting_evt(node, evt, &seq) > 0) { + /* + * Take seq_lock firstly to ensure N-th sync cb + * is called before N-th async cb. + */ + mutex_lock(&node->seq_lock); + mutex_unlock(&node->evt_lock); + hmdfs_run_evt_cb_verbosely(node, evt, true, seq); + mutex_unlock(&node->seq_lock); + } else { + mutex_unlock(&node->evt_lock); + } +} + +void connection_send_handshake(struct connection *conn_impl, __u8 ops, + __le16 request_id) +{ + struct tcp_handle *tcp = NULL; + int err = do_send_handshake(conn_impl, ops, request_id); + + if (likely(err >= 0)) + return; + + tcp = conn_impl->connect_handle; + hmdfs_err("Failed to send handshake: err = %d, fd = %d", err, tcp->fd); + hmdfs_reget_connection(conn_impl); +} + +void connection_handshake_notify(struct hmdfs_peer *node, int notify_type) +{ + struct notify_param param; + + param.notify = notify_type; + param.fd = INVALID_SOCKET_FD; + memcpy(param.remote_cid, node->cid, HMDFS_CID_SIZE); + notify(node, ¶m); +} + + +void peer_online(struct hmdfs_peer *peer) +{ + // To evaluate if someone else has made the peer online + u8 prev_stat = xchg(&peer->status, NODE_STAT_ONLINE); + unsigned long jif_tmp = jiffies; + + if (prev_stat == NODE_STAT_ONLINE) + return; + WRITE_ONCE(peer->conn_time, jif_tmp); + WRITE_ONCE(peer->sbi->connections.recent_ol, jif_tmp); + hmdfs_queue_raw_node_evt(peer, RAW_NODE_EVT_ON); +} + +void connection_to_working(struct hmdfs_peer *node) +{ + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + if (!node) + return; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == CONNECT_TYPE_TCP && + conn_impl->status == CONNECT_STAT_WAIT_RESPONSE) { + tcp = conn_impl->connect_handle; + hmdfs_info("fd %d to working", tcp->fd); + conn_impl->status = CONNECT_STAT_WORKING; + } + } + mutex_unlock(&node->conn_impl_list_lock); + peer_online(node); +} + +static int connection_check_version(__u8 version) +{ + __u8 min_ver = USERSPACE_MAX_VER; + + if (version <= min_ver || version >= MAX_VERSION) { + hmdfs_info("version err. version %u", version); + return -1; + } + return 0; +} + +void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len) +{ + __u8 version; + __u8 ops; + __u8 status; + int fd = ((struct tcp_handle *)(conn_impl->connect_handle))->fd; + struct connection_msg_head *head = (struct connection_msg_head *)buf; + int ret; + + version = head->version; + conn_impl->node->version = version; + if (connection_check_version(version) != 0) + goto out; + conn_impl->node->conn_operations = hmdfs_get_peer_operation(version); + ops = head->operations; + status = conn_impl->status; + switch (ops) { + case CONNECT_MESG_HANDSHAKE_REQUEST: + hmdfs_info( + "Recved handshake request: device_id = %llu, version = %d, head->len = %d, tcp->fd = %d", + conn_impl->node->device_id, version, head->datasize, fd); + connection_send_handshake(conn_impl, + CONNECT_MESG_HANDSHAKE_RESPONSE, + head->msg_id); + if (conn_impl->node->version >= DFS_2_0) { + conn_impl->status = CONNECT_STAT_WAIT_ACK; + conn_impl->node->status = NODE_STAT_SHAKING; + } else { + conn_impl->status = CONNECT_STAT_WORKING; + } + break; + case CONNECT_MESG_HANDSHAKE_RESPONSE: + hmdfs_info( + "Recved handshake response: device_id = %llu, cmd->status = %hhu, tcp->fd = %d", + conn_impl->node->device_id, status, fd); + if (status == CONNECT_STAT_WAIT_REQUEST) { + // must be 10.1 device, no need to set ktls + connection_to_working(conn_impl->node); + goto out; + } + + if (conn_impl->node->version >= DFS_2_0) { + ret = hs_proc_msg_data(conn_impl, ops, data, data_len); + if (ret) + goto nego_err; + connection_send_handshake(conn_impl, + CONNECT_MESG_HANDSHAKE_ACK, + head->msg_id); + hmdfs_info("respon rcv handle,conn_impl->crypto=0x%0x", + conn_impl->crypto); +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + ret = connection_handshake_init_tls(conn_impl, ops); + if (ret) { + hmdfs_err("init_tls_key fail, ops %u", ops); + goto out; + } +#endif + } + + conn_impl->status = CONNECT_STAT_WORKING; + peer_online(conn_impl->node); + break; + case CONNECT_MESG_HANDSHAKE_ACK: + if (conn_impl->node->version >= DFS_2_0) { + ret = hs_proc_msg_data(conn_impl, ops, data, data_len); + if (ret) + goto nego_err; + hmdfs_info("ack rcv handle, conn_impl->crypto=0x%0x", + conn_impl->crypto); +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + ret = connection_handshake_init_tls(conn_impl, ops); + if (ret) { + hmdfs_err("init_tls_key fail, ops %u", ops); + goto out; + } +#endif + conn_impl->status = CONNECT_STAT_WORKING; + peer_online(conn_impl->node); + break; + } + fallthrough; + default: + return; + } +out: + kfree(data); + return; +nego_err: + conn_impl->status = CONNECT_STAT_NEGO_FAIL; + connection_handshake_notify(conn_impl->node, + NOTIFY_OFFLINE); + hmdfs_err("protocol negotiation failed, remote device_id = %llu, tcp->fd = %d", + conn_impl->node->device_id, fd); + goto out; +} + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +static void update_tls_crypto_key(struct connection *conn, + struct hmdfs_head_cmd *head, void *data, + __u32 data_len) +{ + // rekey message handler + struct connection_rekey_request *rekey_req = NULL; + int ret = 0; + + if (hmdfs_message_verify(conn->node, head, data) < 0) { + hmdfs_err("Rekey msg %d has been abandoned", head->msg_id); + goto out_err; + } + + hmdfs_info("recv REKEY request"); + set_crypto_info(conn, SET_CRYPTO_RECV); + // update send key if requested + rekey_req = data; + if (le32_to_cpu(rekey_req->update_request) == UPDATE_REQUESTED) { + ret = tcp_send_rekey_request(conn); + if (ret == 0) + set_crypto_info(conn, SET_CRYPTO_SEND); + } +out_err: + kfree(data); +} + +static bool cmd_update_tls_crypto_key(struct connection *conn, + struct hmdfs_head_cmd *head) +{ + __u8 version = conn->node->version; + struct tcp_handle *tcp = conn->connect_handle; + + if (version < DFS_2_0 || conn->type != CONNECT_TYPE_TCP || !tcp) + return false; + return head->operations.command == F_CONNECT_REKEY; +} +#endif + +void connection_working_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len) +{ +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + if (cmd_update_tls_crypto_key(conn_impl, buf)) { + update_tls_crypto_key(conn_impl, buf, data, data_len); + return; + } +#endif + conn_impl->node->conn_operations->recvmsg(conn_impl->node, buf, data); +} + +static void connection_release(struct kref *ref) +{ + struct tcp_handle *tcp = NULL; + struct connection *conn = container_of(ref, struct connection, ref_cnt); + + hmdfs_info("connection release"); + memset(conn->master_key, 0, HMDFS_KEY_SIZE); + memset(conn->send_key, 0, HMDFS_KEY_SIZE); + memset(conn->recv_key, 0, HMDFS_KEY_SIZE); + if (conn->close) + conn->close(conn); + tcp = conn->connect_handle; + crypto_free_aead(conn->tfm); + // need to check and test: fput(tcp->sock->file); + if (tcp && tcp->sock) { + hmdfs_info("connection release: fd = %d, refcount %ld", tcp->fd, + file_count(tcp->sock->file)); + sockfd_put(tcp->sock); + } + if (tcp && tcp->recv_cache) + kmem_cache_destroy(tcp->recv_cache); + + if (!list_empty(&conn->list)) { + mutex_lock(&conn->node->conn_impl_list_lock); + list_del(&conn->list); + mutex_unlock(&conn->node->conn_impl_list_lock); + /* + * wakup hmdfs_disconnect_node to check + * conn_deleting_list if empty. + */ + wake_up_interruptible(&conn->node->deleting_list_wq); + } + + kfree(tcp); + kfree(conn); +} + +static void hmdfs_peer_release(struct kref *ref) +{ + struct hmdfs_peer *peer = container_of(ref, struct hmdfs_peer, ref_cnt); + struct mutex *lock = &peer->sbi->connections.node_lock; + + if (!list_empty(&peer->list)) + hmdfs_info("releasing a on-sbi peer: device_id %llu ", + peer->device_id); + else + hmdfs_info("releasing a redundant peer: device_id %llu ", + peer->device_id); + + cancel_delayed_work_sync(&peer->evt_dwork); + list_del(&peer->list); + idr_destroy(&peer->msg_idr); + idr_destroy(&peer->file_id_idr); + flush_workqueue(peer->req_handle_wq); + flush_workqueue(peer->async_wq); + flush_workqueue(peer->retry_wb_wq); + destroy_workqueue(peer->dentry_wq); + destroy_workqueue(peer->req_handle_wq); + destroy_workqueue(peer->async_wq); + destroy_workqueue(peer->retry_wb_wq); + destroy_workqueue(peer->reget_conn_wq); + kfree(peer); + mutex_unlock(lock); +} + +void connection_put(struct connection *conn) +{ + struct mutex *lock = &conn->ref_lock; + + kref_put_mutex(&conn->ref_cnt, connection_release, lock); +} + +void peer_put(struct hmdfs_peer *peer) +{ + struct mutex *lock = &peer->sbi->connections.node_lock; + + kref_put_mutex(&peer->ref_cnt, hmdfs_peer_release, lock); +} + +static void hmdfs_dump_deleting_list(struct hmdfs_peer *node) +{ + struct connection *con = NULL; + struct tcp_handle *tcp = NULL; + int count = 0; + + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(con, &node->conn_deleting_list, list) { + tcp = con->connect_handle; + hmdfs_info("deleting list %d:device_id %llu tcp_fd %d refcnt %d", + count, node->device_id, tcp ? tcp->fd : -1, + kref_read(&con->ref_cnt)); + count++; + } + mutex_unlock(&node->conn_impl_list_lock); +} + +static bool hmdfs_conn_deleting_list_empty(struct hmdfs_peer *node) +{ + bool empty = false; + + mutex_lock(&node->conn_impl_list_lock); + empty = list_empty(&node->conn_deleting_list); + mutex_unlock(&node->conn_impl_list_lock); + + return empty; +} + +void hmdfs_disconnect_node(struct hmdfs_peer *node) +{ + LIST_HEAD(local_conns); + struct connection *conn_impl = NULL; + struct connection *next = NULL; + struct tcp_handle *tcp = NULL; + + if (unlikely(!node)) + return; + + hmdfs_node_inc_evt_seq(node); + /* Refer to comments in hmdfs_is_node_offlined() */ + smp_mb__after_atomic(); + node->status = NODE_STAT_OFFLINE; + hmdfs_info("Try to disconnect peer: device_id %llu", node->device_id); + + mutex_lock(&node->conn_impl_list_lock); + if (!list_empty(&node->conn_impl_list)) + list_replace_init(&node->conn_impl_list, &local_conns); + mutex_unlock(&node->conn_impl_list_lock); + + list_for_each_entry_safe(conn_impl, next, &local_conns, list) { + tcp = conn_impl->connect_handle; + if (tcp && tcp->sock) { + kernel_sock_shutdown(tcp->sock, SHUT_RDWR); + hmdfs_info("shudown sock: fd = %d, refcount %ld", + tcp->fd, file_count(tcp->sock->file)); + } + if (tcp) + tcp->fd = INVALID_SOCKET_FD; + + tcp_close_socket(tcp); + list_del_init(&conn_impl->list); + + connection_put(conn_impl); + } + + if (wait_event_interruptible_timeout(node->deleting_list_wq, + hmdfs_conn_deleting_list_empty(node), + HMDFS_WAIT_CONN_RELEASE) <= 0) + hmdfs_dump_deleting_list(node); + + /* wait all request process end */ + spin_lock(&node->idr_lock); + while (node->msg_idr_process) { + spin_unlock(&node->idr_lock); + usleep_range(HMDFS_WAIT_REQUEST_END_MIN, + HMDFS_WAIT_REQUEST_END_MAX); + spin_lock(&node->idr_lock); + } + spin_unlock(&node->idr_lock); + + hmdfs_queue_raw_node_evt(node, RAW_NODE_EVT_OFF); +} + +static void hmdfs_run_simple_evt_cb(struct hmdfs_peer *node, int evt) +{ + unsigned int seq = hmdfs_node_inc_evt_seq(node); + + mutex_lock(&node->seq_lock); + hmdfs_node_call_evt_cb(node, evt, true, seq); + mutex_unlock(&node->seq_lock); +} + +static void hmdfs_del_peer(struct hmdfs_peer *node) +{ + /* + * No need for offline evt cb, because all files must + * have been flushed and closed, else the filesystem + * will be un-mountable. + */ + cancel_delayed_work_sync(&node->evt_dwork); + + hmdfs_run_simple_evt_cb(node, NODE_EVT_DEL); + + hmdfs_release_peer_sysfs(node); + + flush_workqueue(node->reget_conn_wq); + peer_put(node); +} + +void hmdfs_connections_stop(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_peer *node = NULL; + struct hmdfs_peer *con_tmp = NULL; + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry_safe(node, con_tmp, &sbi->connections.node_list, + list) { + mutex_unlock(&sbi->connections.node_lock); + hmdfs_disconnect_node(node); + hmdfs_del_peer(node); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); +} + +struct connection *get_conn_impl(struct hmdfs_peer *node, int connect_type) +{ + struct connection *conn_impl = NULL; + + if (!node) + return NULL; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == connect_type && + conn_impl->status == CONNECT_STAT_WORKING) { + connection_get(conn_impl); + mutex_unlock(&node->conn_impl_list_lock); + return conn_impl; + } + } + mutex_unlock(&node->conn_impl_list_lock); + hmdfs_err_ratelimited("device %llu not find connection, type %d", + node->device_id, connect_type); + return NULL; +} + +void set_conn_sock_quickack(struct hmdfs_peer *node) +{ + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + int option = 1; + + if (!node) + return; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == CONNECT_TYPE_TCP && + conn_impl->status == CONNECT_STAT_WORKING && + conn_impl->connect_handle) { + tcp = (struct tcp_handle *)(conn_impl->connect_handle); + tcp_sock_set_quickack(tcp->sock->sk, option); + } + } + mutex_unlock(&node->conn_impl_list_lock); +} + +struct hmdfs_peer *hmdfs_lookup_from_devid(struct hmdfs_sb_info *sbi, + uint64_t device_id) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *lookup = NULL; + + if (!sbi) + return NULL; + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (con->status != NODE_STAT_ONLINE || + con->device_id != device_id) + continue; + lookup = con; + peer_get(lookup); + break; + } + mutex_unlock(&sbi->connections.node_lock); + return lookup; +} + +struct hmdfs_peer *hmdfs_lookup_from_cid(struct hmdfs_sb_info *sbi, + uint8_t *cid) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *lookup = NULL; + + if (!sbi) + return NULL; + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (strncmp(con->cid, cid, HMDFS_CID_SIZE) != 0) + continue; + lookup = con; + peer_get(lookup); + break; + } + mutex_unlock(&sbi->connections.node_lock); + return lookup; +} + +static struct hmdfs_peer *lookup_peer_by_cid_unsafe(struct hmdfs_sb_info *sbi, + uint8_t *cid) +{ + struct hmdfs_peer *node = NULL; + + list_for_each_entry(node, &sbi->connections.node_list, list) + if (!strncmp(node->cid, cid, HMDFS_CID_SIZE)) { + peer_get(node); + return node; + } + return NULL; +} + +static struct hmdfs_peer *add_peer_unsafe(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer2add) +{ + struct hmdfs_peer *peer; + int err; + + peer = lookup_peer_by_cid_unsafe(sbi, peer2add->cid); + if (peer) + return peer; + + err = hmdfs_register_peer_sysfs(sbi, peer2add); + if (err) { + hmdfs_err("register peer %llu sysfs err %d", + peer2add->device_id, err); + return ERR_PTR(err); + } + list_add_tail(&peer2add->list, &sbi->connections.node_list); + peer_get(peer2add); + hmdfs_run_simple_evt_cb(peer2add, NODE_EVT_ADD); + return peer2add; +} + +static struct hmdfs_peer * +alloc_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, + const struct connection_operations *conn_operations) +{ + struct hmdfs_peer *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!node) + return NULL; + + node->device_id = (u32)atomic_inc_return(&sbi->connections.conn_seq); + + node->async_wq = alloc_workqueue("dfs_async%u_%llu", WQ_MEM_RECLAIM, 0, + sbi->seq, node->device_id); + if (!node->async_wq) { + hmdfs_err("Failed to alloc async wq"); + goto out_err; + } + node->req_handle_wq = alloc_workqueue("dfs_req%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + sbi->async_req_max_active, + sbi->seq, node->device_id); + if (!node->req_handle_wq) { + hmdfs_err("Failed to alloc req wq"); + goto out_err; + } + node->dentry_wq = alloc_workqueue("dfs_dentry%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + 0, sbi->seq, node->device_id); + if (!node->dentry_wq) { + hmdfs_err("Failed to alloc dentry wq"); + goto out_err; + } + node->retry_wb_wq = alloc_workqueue("dfs_rwb%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + HMDFS_RETRY_WB_WQ_MAX_ACTIVE, + sbi->seq, node->device_id); + if (!node->retry_wb_wq) { + hmdfs_err("Failed to alloc retry writeback wq"); + goto out_err; + } + node->reget_conn_wq = alloc_workqueue("dfs_regetcon%u_%llu", + WQ_UNBOUND, 0, + sbi->seq, node->device_id); + if (!node->reget_conn_wq) { + hmdfs_err("Failed to alloc reget conn wq"); + goto out_err; + } + INIT_LIST_HEAD(&node->conn_impl_list); + mutex_init(&node->conn_impl_list_lock); + INIT_LIST_HEAD(&node->conn_deleting_list); + init_waitqueue_head(&node->deleting_list_wq); + idr_init(&node->msg_idr); + spin_lock_init(&node->idr_lock); + idr_init(&node->file_id_idr); + spin_lock_init(&node->file_id_lock); + INIT_LIST_HEAD(&node->list); + kref_init(&node->ref_cnt); + node->owner = sbi->seq; + node->conn_operations = conn_operations; + node->sbi = sbi; + node->status = NODE_STAT_SHAKING; + node->conn_time = jiffies; + memcpy(node->cid, cid, HMDFS_CID_SIZE); + atomic64_set(&node->sb_dirty_count, 0); + node->fid_cookie = 0; + atomic_set(&node->evt_seq, 0); + mutex_init(&node->seq_lock); + mutex_init(&node->offline_cb_lock); + mutex_init(&node->evt_lock); + node->pending_evt = RAW_NODE_EVT_NR; + node->last_evt = RAW_NODE_EVT_NR; + node->cur_evt[0] = RAW_NODE_EVT_NR; + node->cur_evt[1] = RAW_NODE_EVT_NR; + node->seq_wr_idx = (unsigned char)UINT_MAX; + node->seq_rd_idx = node->seq_wr_idx; + INIT_DELAYED_WORK(&node->evt_dwork, hmdfs_node_evt_work); + node->msg_idr_process = 0; + node->offline_start = false; + spin_lock_init(&node->wr_opened_inode_lock); + INIT_LIST_HEAD(&node->wr_opened_inode_list); + spin_lock_init(&node->stashed_inode_lock); + node->stashed_inode_nr = 0; + atomic_set(&node->rebuild_inode_status_nr, 0); + init_waitqueue_head(&node->rebuild_inode_status_wq); + INIT_LIST_HEAD(&node->stashed_inode_list); + node->need_rebuild_stash_list = false; + + return node; + +out_err: + if (node->async_wq) { + destroy_workqueue(node->async_wq); + node->async_wq = NULL; + } + if (node->req_handle_wq) { + destroy_workqueue(node->req_handle_wq); + node->req_handle_wq = NULL; + } + if (node->dentry_wq) { + destroy_workqueue(node->dentry_wq); + node->dentry_wq = NULL; + } + if (node->retry_wb_wq) { + destroy_workqueue(node->retry_wb_wq); + node->retry_wb_wq = NULL; + } + if (node->reget_conn_wq) { + destroy_workqueue(node->reget_conn_wq); + node->reget_conn_wq = NULL; + } + kfree(node); + return NULL; +} + +struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid) +{ + struct hmdfs_peer *peer = NULL, *on_sbi_peer = NULL; + const struct connection_operations *conn_opr_ptr = NULL; + + mutex_lock(&sbi->connections.node_lock); + peer = lookup_peer_by_cid_unsafe(sbi, cid); + mutex_unlock(&sbi->connections.node_lock); + if (peer) { + hmdfs_info("Got a existing peer: device_id = %llu", + peer->device_id); + goto out; + } + + conn_opr_ptr = hmdfs_get_peer_operation(DFS_2_0); + if (unlikely(!conn_opr_ptr)) { + hmdfs_info("Fatal! Cannot get peer operation"); + goto out; + } + peer = alloc_peer(sbi, cid, conn_opr_ptr); + if (unlikely(!peer)) { + hmdfs_info("Failed to alloc a peer"); + goto out; + } + + mutex_lock(&sbi->connections.node_lock); + on_sbi_peer = add_peer_unsafe(sbi, peer); + mutex_unlock(&sbi->connections.node_lock); + if (IS_ERR(on_sbi_peer)) { + peer_put(peer); + peer = NULL; + goto out; + } else if (unlikely(on_sbi_peer != peer)) { + hmdfs_info("Got a existing peer: device_id = %llu", + on_sbi_peer->device_id); + peer_put(peer); + peer = on_sbi_peer; + } else { + hmdfs_info("Got a newly allocated peer: device_id = %llu", + peer->device_id); + } + +out: + return peer; +} + +static void head_release(struct kref *kref) +{ + struct hmdfs_msg_idr_head *head; + struct hmdfs_peer *con; + + head = (struct hmdfs_msg_idr_head *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + con = head->peer; + idr_remove(&con->msg_idr, head->msg_id); + spin_unlock(&con->idr_lock); + + kfree(head); +} + +void head_put(struct hmdfs_msg_idr_head *head) +{ + kref_put_lock(&head->ref, head_release, &head->peer->idr_lock); +} + +struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id) +{ + struct hmdfs_msg_idr_head *head = NULL; + + spin_lock(&peer->idr_lock); + head = idr_find(&peer->msg_idr, id); + if (head) + kref_get(&head->ref); + spin_unlock(&peer->idr_lock); + + return head; +} + +int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type, + void *ptr) +{ + int ret = -EAGAIN; + struct hmdfs_msg_idr_head *head = ptr; + int end = peer->version < DFS_2_0 ? (USHRT_MAX + 1) : 0; + + idr_preload(GFP_KERNEL); + spin_lock(&peer->idr_lock); + if (!peer->offline_start) + ret = idr_alloc_cyclic(&peer->msg_idr, ptr, + 1, end, GFP_NOWAIT); + if (ret >= 0) { + kref_init(&head->ref); + head->msg_id = ret; + head->type = type; + head->peer = peer; + peer->msg_idr_process++; + ret = 0; + } + spin_unlock(&peer->idr_lock); + idr_preload_end(); + + return ret; +} diff --git a/fs/hmdfs/comm/connection.h b/fs/hmdfs/comm/connection.h new file mode 100644 index 0000000000000000000000000000000000000000..6f3ee1baddf2177a5d9714a5ed1a8b419c73ae11 --- /dev/null +++ b/fs/hmdfs/comm/connection.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/connection.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_CONNECTION_H +#define HMDFS_CONNECTION_H + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include +#endif + +#include +#include +#include "protocol.h" +#include "node_cb.h" + +#define HMDFS_KEY_SIZE 32 +#define HMDFS_IV_SIZE 12 +#define HMDFS_TAG_SIZE 16 +#define HMDFS_CID_SIZE 64 + +enum { + CONNECT_MESG_HANDSHAKE_REQUEST = 1, + CONNECT_MESG_HANDSHAKE_RESPONSE = 2, + CONNECT_MESG_HANDSHAKE_ACK = 3, +}; + +enum { + CONNECT_STAT_WAIT_REQUEST = 0, + CONNECT_STAT_WAIT_RESPONSE, + CONNECT_STAT_WORKING, + CONNECT_STAT_STOP, + CONNECT_STAT_WAIT_ACK, + CONNECT_STAT_NEGO_FAIL, + CONNECT_STAT_COUNT +}; + +enum { + CONNECT_TYPE_TCP = 0, + CONNECT_TYPE_UNSUPPORT, +}; + +struct connection_stat { + int64_t send_bytes; + int64_t recv_bytes; + int send_message_count; + int recv_message_count; + unsigned long rekey_time; +}; + +struct connection { + struct list_head list; + struct kref ref_cnt; + struct mutex ref_lock; + struct hmdfs_peer *node; + int type; + int status; + void *connect_handle; + struct crypto_aead *tfm; + u8 master_key[HMDFS_KEY_SIZE]; + u8 send_key[HMDFS_KEY_SIZE]; + u8 recv_key[HMDFS_KEY_SIZE]; + struct connection_stat stat; + struct work_struct reget_work; +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + struct tls12_crypto_info_aes_gcm_128 send_crypto_info; + struct tls12_crypto_info_aes_gcm_128 recv_crypto_info; +#endif + void (*close)(struct connection *connect); + int (*send_message)(struct connection *connect, + struct hmdfs_send_data *msg); + uint32_t crypto; +}; + +enum { + NODE_STAT_SHAKING = 0, + NODE_STAT_ONLINE, + NODE_STAT_OFFLINE, +}; + +struct hmdfs_async_work { + struct hmdfs_msg_idr_head head; + struct page *page; + struct delayed_work d_work; + unsigned long start; +}; + +enum { + RAW_NODE_EVT_OFF = 0, + RAW_NODE_EVT_ON, + RAW_NODE_EVT_NR, +}; + +#define RAW_NODE_EVT_MAX_NR 4 + +struct hmdfs_stash_statistics { + unsigned int cur_ok; + unsigned int cur_nothing; + unsigned int cur_fail; + unsigned int total_ok; + unsigned int total_nothing; + unsigned int total_fail; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_restore_statistics { + unsigned int cur_ok; + unsigned int cur_fail; + unsigned int cur_keep; + unsigned int total_ok; + unsigned int total_fail; + unsigned int total_keep; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_rebuild_statistics { + unsigned int cur_ok; + unsigned int cur_fail; + unsigned int cur_invalid; + unsigned int total_ok; + unsigned int total_fail; + unsigned int total_invalid; + unsigned int time; +}; + +struct hmdfs_peer_statistics { + /* stash statistics */ + struct hmdfs_stash_statistics stash; + /* restore statistics */ + struct hmdfs_restore_statistics restore; + /* rebuild statistics */ + struct hmdfs_rebuild_statistics rebuild; +}; + +struct hmdfs_peer { + struct list_head list; + struct kref ref_cnt; + unsigned int owner; + uint64_t device_id; + unsigned long conn_time; + uint8_t version; + u8 status; + u64 features; + long long old_sb_dirty_count; + atomic64_t sb_dirty_count; + /* + * cookie for opened file id. + * It will be increased if peer has offlined + */ + uint16_t fid_cookie; + struct mutex conn_impl_list_lock; + struct list_head conn_impl_list; + /* + * when async message process context call hmdfs_reget_connection + * add conn node to conn_deleting_list, so call hmdfs_disconnect_node + * can wait all receive thread exit + */ + struct list_head conn_deleting_list; + wait_queue_head_t deleting_list_wq; + struct idr msg_idr; + spinlock_t idr_lock; + struct idr file_id_idr; + spinlock_t file_id_lock; + int recvbuf_maxsize; + struct crypto_aead *tfm; + char cid[HMDFS_CID_SIZE + 1]; + const struct connection_operations *conn_operations; + struct hmdfs_sb_info *sbi; + struct workqueue_struct *async_wq; + struct workqueue_struct *req_handle_wq; + struct workqueue_struct *dentry_wq; + struct workqueue_struct *retry_wb_wq; + struct workqueue_struct *reget_conn_wq; + atomic_t evt_seq; + /* sync cb may be blocking */ + struct mutex seq_lock; + struct mutex offline_cb_lock; + struct mutex evt_lock; + unsigned char pending_evt; + unsigned char last_evt; + unsigned char waiting_evt[RAW_NODE_EVT_NR]; + unsigned char seq_rd_idx; + unsigned char seq_wr_idx; + unsigned int seq_tbl[RAW_NODE_EVT_MAX_NR]; + unsigned int pending_evt_seq; + unsigned char cur_evt[NODE_EVT_TYPE_NR]; + unsigned int cur_evt_seq[NODE_EVT_TYPE_NR]; + unsigned int merged_evt; + unsigned int dup_evt[RAW_NODE_EVT_NR]; + struct delayed_work evt_dwork; + /* protected by idr_lock */ + uint64_t msg_idr_process; + bool offline_start; + spinlock_t wr_opened_inode_lock; + struct list_head wr_opened_inode_list; + /* + * protect @stashed_inode_list and @stashed_inode_nr in stash process + * and fill_inode_remote->hmdfs_remote_init_stash_status process + */ + spinlock_t stashed_inode_lock; + unsigned int stashed_inode_nr; + struct list_head stashed_inode_list; + bool need_rebuild_stash_list; + /* how many inodes are rebuilding statsh status */ + atomic_t rebuild_inode_status_nr; + wait_queue_head_t rebuild_inode_status_wq; + struct hmdfs_peer_statistics stats; + /* sysfs */ + struct kobject kobj; + struct completion kobj_unregister; +}; + +#define HMDFS_DEVID_LOCAL 0 + +/* Be Compatible to DFS1.0, dont add packed attribute so far */ +struct connection_msg_head { + __u8 magic; + __u8 version; + __u8 operations; + __u8 flags; + __le32 datasize; + __le64 source; + __le16 msg_id; + __le16 request_id; + __le32 reserved1; +} __packed; + +struct connection_handshake_req { + __le32 len; + char dev_id[0]; +} __packed; + +enum { + HS_EXTEND_CODE_CRYPTO = 0, + HS_EXTEND_CODE_CASE_SENSE, + HS_EXTEND_CODE_FEATURE_SUPPORT, + HS_EXTEND_CODE_COUNT +}; + +struct conn_hs_extend_reg { + __u16 len; + __u16 resv; + void (*filler)(struct connection *conn_impl, __u8 ops, + void *data, __u32 len); + int (*parser)(struct connection *conn_impl, __u8 ops, + void *data, __u32 len); +}; + +struct conn_hs_extend_head { + __le32 field_cn; + char data[0]; +}; + +struct extend_field_head { + __le16 code; + __le16 len; +} __packed; + +struct crypto_body { + __le32 crypto; +} __packed; + +struct case_sense_body { + __u8 case_sensitive; +} __packed; + +struct feature_body { + __u64 features; + __u64 reserved; +} __packed; + +#define HMDFS_HS_CRYPTO_KTLS_AES128 0x00000001 +#define HMDFS_HS_CRYPTO_KTLS_AES256 0x00000002 + +static inline bool hmdfs_is_node_online(const struct hmdfs_peer *node) +{ + return READ_ONCE(node->status) == NODE_STAT_ONLINE; +} + +static inline unsigned int hmdfs_node_inc_evt_seq(struct hmdfs_peer *node) +{ + /* Use the atomic as an unsigned integer */ + return atomic_inc_return(&node->evt_seq); +} + +static inline unsigned int hmdfs_node_evt_seq(const struct hmdfs_peer *node) +{ + return atomic_read(&node->evt_seq); +} + +struct connection *get_conn_impl(struct hmdfs_peer *node, int connect_type); + +void set_conn_sock_quickack(struct hmdfs_peer *node); + +struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid); + +struct hmdfs_peer *hmdfs_lookup_from_devid(struct hmdfs_sb_info *sbi, + uint64_t device_id); +struct hmdfs_peer *hmdfs_lookup_from_cid(struct hmdfs_sb_info *sbi, + uint8_t *cid); +void connection_send_handshake(struct connection *conn_impl, __u8 operations, + __le16 request_id); +void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len); +void connection_working_recv_handler(struct connection *conn_impl, void *head, + void *data, __u32 data_len); +static inline void connection_get(struct connection *conn) +{ + kref_get(&conn->ref_cnt); +} + +void connection_put(struct connection *conn); +static inline void peer_get(struct hmdfs_peer *peer) +{ + kref_get(&peer->ref_cnt); +} + +void peer_put(struct hmdfs_peer *peer); + +int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg); +void hmdfs_connections_stop(struct hmdfs_sb_info *sbi); + +void hmdfs_disconnect_node(struct hmdfs_peer *node); + +void connection_to_working(struct hmdfs_peer *node); + +int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type, + void *ptr); +struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id); + +static inline void hmdfs_start_process_offline(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->offline_start = true; + spin_unlock(&peer->idr_lock); +} + +static inline void hmdfs_stop_process_offline(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->offline_start = false; + spin_unlock(&peer->idr_lock); +} + +static inline void hmdfs_dec_msg_idr_process(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->msg_idr_process--; + spin_unlock(&peer->idr_lock); +} +#endif diff --git a/fs/hmdfs/comm/crypto.c b/fs/hmdfs/comm/crypto.c new file mode 100644 index 0000000000000000000000000000000000000000..60bb08f1697f90d72439b8ad64046bdfeb5558df --- /dev/null +++ b/fs/hmdfs/comm/crypto.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/crypto.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "crypto.h" + +#include +#include +#include +#include +#include +#include + +#include "hmdfs.h" + +static void tls_crypto_set_key(struct connection *conn_impl, int tx) +{ + int rc = 0; + struct tcp_handle *tcp = conn_impl->connect_handle; + struct tls_context *ctx = tls_get_ctx(tcp->sock->sk); + struct cipher_context *cctx = NULL; + struct tls_sw_context_tx *sw_ctx_tx = NULL; + struct tls_sw_context_rx *sw_ctx_rx = NULL; + struct crypto_aead **aead = NULL; + struct tls12_crypto_info_aes_gcm_128 *crypto_info = NULL; + + if (tx) { + crypto_info = &conn_impl->send_crypto_info; + cctx = &ctx->tx; + sw_ctx_tx = tls_sw_ctx_tx(ctx); + aead = &sw_ctx_tx->aead_send; + } else { + crypto_info = &conn_impl->recv_crypto_info; + cctx = &ctx->rx; + sw_ctx_rx = tls_sw_ctx_rx(ctx); + aead = &sw_ctx_rx->aead_recv; + } + + memcpy(cctx->iv, crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, crypto_info->iv, + TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(cctx->rec_seq, crypto_info->rec_seq, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + rc = crypto_aead_setkey(*aead, crypto_info->key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + if (rc) + hmdfs_err("crypto set key error"); +} + +int tls_crypto_info_init(struct connection *conn_impl) +{ + int ret = 0; + u8 key_meterial[HMDFS_KEY_SIZE]; + struct tcp_handle *tcp = + (struct tcp_handle *)(conn_impl->connect_handle); + if (conn_impl->node->version < DFS_2_0 || !tcp) + return -EINVAL; + // send + update_key(conn_impl->send_key, key_meterial, HKDF_TYPE_IV); + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TCP, TCP_ULP, + KERNEL_SOCKPTR("tls"), sizeof("tls")); + if (ret) + hmdfs_err("set tls error %d", ret); + tcp->connect->send_crypto_info.info.version = TLS_1_2_VERSION; + tcp->connect->send_crypto_info.info.cipher_type = + TLS_CIPHER_AES_GCM_128; + + memcpy(tcp->connect->send_crypto_info.key, tcp->connect->send_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->send_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->send_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->send_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TLS, TLS_TX, + KERNEL_SOCKPTR(&(tcp->connect->send_crypto_info)), + sizeof(tcp->connect->send_crypto_info)); + if (ret) + hmdfs_err("set tls send_crypto_info error %d", ret); + + // recv + update_key(tcp->connect->recv_key, key_meterial, HKDF_TYPE_IV); + tcp->connect->recv_crypto_info.info.version = TLS_1_2_VERSION; + tcp->connect->recv_crypto_info.info.cipher_type = + TLS_CIPHER_AES_GCM_128; + + memcpy(tcp->connect->recv_crypto_info.key, tcp->connect->recv_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->recv_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->recv_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->recv_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TLS, TLS_RX, + KERNEL_SOCKPTR(&(tcp->connect->recv_crypto_info)), + sizeof(tcp->connect->recv_crypto_info)); + if (ret) + hmdfs_err("set tls recv_crypto_info error %d", ret); + return ret; +} + +static int tls_set_tx(struct tcp_handle *tcp) +{ + int ret = 0; + u8 new_key[HMDFS_KEY_SIZE]; + u8 key_meterial[HMDFS_KEY_SIZE]; + + ret = update_key(tcp->connect->send_key, new_key, HKDF_TYPE_REKEY); + if (ret < 0) + return ret; + memcpy(tcp->connect->send_key, new_key, HMDFS_KEY_SIZE); + ret = update_key(tcp->connect->send_key, key_meterial, HKDF_TYPE_IV); + if (ret < 0) + return ret; + + memcpy(tcp->connect->send_crypto_info.key, tcp->connect->send_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->send_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->send_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->send_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(new_key, 0, HMDFS_KEY_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + + tls_crypto_set_key(tcp->connect, 1); + return 0; +} + +static int tls_set_rx(struct tcp_handle *tcp) +{ + int ret = 0; + u8 new_key[HMDFS_KEY_SIZE]; + u8 key_meterial[HMDFS_KEY_SIZE]; + + ret = update_key(tcp->connect->recv_key, new_key, HKDF_TYPE_REKEY); + if (ret < 0) + return ret; + memcpy(tcp->connect->recv_key, new_key, HMDFS_KEY_SIZE); + ret = update_key(tcp->connect->recv_key, key_meterial, HKDF_TYPE_IV); + if (ret < 0) + return ret; + + memcpy(tcp->connect->recv_crypto_info.key, tcp->connect->recv_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->recv_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->recv_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->recv_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(new_key, 0, HMDFS_KEY_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + tls_crypto_set_key(tcp->connect, 0); + return 0; +} + +int set_crypto_info(struct connection *conn_impl, int set_type) +{ + int ret = 0; + __u8 version = conn_impl->node->version; + struct tcp_handle *tcp = + (struct tcp_handle *)(conn_impl->connect_handle); + if (version < DFS_2_0 || !tcp) + return -EINVAL; + + if (set_type == SET_CRYPTO_SEND) { + ret = tls_set_tx(tcp); + if (ret) { + hmdfs_err("tls set tx fail"); + return ret; + } + } + if (set_type == SET_CRYPTO_RECV) { + ret = tls_set_rx(tcp); + if (ret) { + hmdfs_err("tls set rx fail"); + return ret; + } + } + hmdfs_info("KTLS setting success"); + return ret; +} + +static int hmac_sha256(u8 *key, u8 key_len, char *info, u8 info_len, u8 *output) +{ + struct crypto_shash *tfm = NULL; + struct shash_desc *shash = NULL; + int ret = 0; + + if (!key) + return -EINVAL; + + tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); + if (IS_ERR(tfm)) { + hmdfs_err("crypto_alloc_ahash failed: err %ld", PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + + ret = crypto_shash_setkey(tfm, key, key_len); + if (ret) { + hmdfs_err("crypto_ahash_setkey failed: err %d", ret); + goto failed; + } + + shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm), + GFP_KERNEL); + if (!shash) { + ret = -ENOMEM; + goto failed; + } + + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, info, info_len, output); + + kfree(shash); + +failed: + crypto_free_shash(tfm); + return ret; +} + +static const char *const g_key_lable[] = { "ktls key initiator", + "ktls key accepter", + "ktls key update", "ktls iv&salt" }; +static const int g_key_lable_len[] = { 18, 17, 15, 12 }; + +int update_key(__u8 *old_key, __u8 *new_key, int type) +{ + int ret = 0; + char lable[MAX_LABLE_SIZE]; + u8 lable_size; + + lable_size = g_key_lable_len[type] + sizeof(u16) + sizeof(char); + *((u16 *)lable) = HMDFS_KEY_SIZE; + memcpy(lable + sizeof(u16), g_key_lable[type], g_key_lable_len[type]); + *(lable + sizeof(u16) + g_key_lable_len[type]) = 0x01; + ret = hmac_sha256(old_key, HMDFS_KEY_SIZE, lable, lable_size, new_key); + if (ret < 0) + hmdfs_err("hmac sha256 error"); + return ret; +} diff --git a/fs/hmdfs/comm/crypto.h b/fs/hmdfs/comm/crypto.h new file mode 100644 index 0000000000000000000000000000000000000000..7549f3897336b0358b2a5cea76f8d45391c4f489 --- /dev/null +++ b/fs/hmdfs/comm/crypto.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/crypto.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_FS_ENCRYPTION_H +#define HMDFS_FS_ENCRYPTION_H + +#include "transport.h" + +#define MAX_LABLE_SIZE 30 +#define CRYPTO_IV_OFFSET 0 +#define CRYPTO_SALT_OFFSET (CRYPTO_IV_OFFSET + TLS_CIPHER_AES_GCM_128_IV_SIZE) +#define CRYPTO_SEQ_OFFSET \ + (CRYPTO_SALT_OFFSET + TLS_CIPHER_AES_GCM_128_SALT_SIZE) +#define REKEY_LIFETIME (60 * 60 * HZ) + +enum HKDF_TYPE { + HKDF_TYPE_KEY_INITIATOR = 0, + HKDF_TYPE_KEY_ACCEPTER = 1, + HKDF_TYPE_REKEY = 2, + HKDF_TYPE_IV = 3, +}; + +enum SET_CRYPTO_TYPE { + SET_CRYPTO_SEND = 0, + SET_CRYPTO_RECV = 1, +}; + +int tls_crypto_info_init(struct connection *conn_impl); +int set_crypto_info(struct connection *conn_impl, int set_type); +int update_key(__u8 *old_key, __u8 *new_key, int type); + +#endif diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c new file mode 100644 index 0000000000000000000000000000000000000000..54eaaf06f22373c23570b1fd91ca72a2c27a2a7d --- /dev/null +++ b/fs/hmdfs/comm/device_node.c @@ -0,0 +1,1665 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/device_node.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "device_node.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "client_writeback.h" +#include "server_writeback.h" +#include "connection.h" +#include "hmdfs_client.h" +#include "socket_adapter.h" +#include "authority/authentication.h" + +DEFINE_MUTEX(hmdfs_sysfs_mutex); +static struct kset *hmdfs_kset; + +struct hmdfs_disconnect_node_work { + struct hmdfs_peer *conn; + struct work_struct work; + atomic_t *cnt; + struct wait_queue_head *waitq; +}; + +static void ctrl_cmd_update_socket_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct update_socket_param cmd; + struct hmdfs_peer *node = NULL; + struct connection *conn = NULL; + + if (unlikely(!buf || len != sizeof(cmd))) { + hmdfs_err("len/buf error"); + goto out; + } + memcpy(&cmd, buf, sizeof(cmd)); + + node = hmdfs_get_peer(sbi, cmd.cid); + if (unlikely(!node)) { + hmdfs_err("failed to update ctrl node: cannot get peer"); + goto out; + } + + conn = hmdfs_get_conn_tcp(node, cmd.newfd, cmd.masterkey, cmd.status); + if (unlikely(!conn)) { + hmdfs_err("failed to update ctrl node: cannot get conn"); + } else if (!sbi->system_cred) { + const struct cred *system_cred = get_cred(current_cred()); + + if (cmpxchg_relaxed(&sbi->system_cred, NULL, system_cred)) + put_cred(system_cred); + else + hmdfs_check_cred(system_cred); + } +out: + if (conn) + connection_put(conn); + if (node) + peer_put(node); +} + +static inline void hmdfs_disconnect_node_marked(struct hmdfs_peer *conn) +{ + hmdfs_start_process_offline(conn); + hmdfs_disconnect_node(conn); + hmdfs_stop_process_offline(conn); +} + +static void ctrl_cmd_off_line_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct offline_param cmd; + struct hmdfs_peer *node = NULL; + + if (unlikely(!buf || len != sizeof(cmd))) { + hmdfs_err("Recved a invalid userbuf"); + return; + } + memcpy(&cmd, buf, sizeof(cmd)); + node = hmdfs_lookup_from_cid(sbi, cmd.remote_cid); + if (unlikely(!node)) { + hmdfs_err("Cannot find node by device"); + return; + } + hmdfs_info("Found peer: device_id = %llu", node->device_id); + hmdfs_disconnect_node_marked(node); + peer_put(node); +} + +static void hmdfs_disconnect_node_work_fn(struct work_struct *base) +{ + struct hmdfs_disconnect_node_work *work = + container_of(base, struct hmdfs_disconnect_node_work, work); + + hmdfs_disconnect_node_marked(work->conn); + if (atomic_dec_and_test(work->cnt)) + wake_up(work->waitq); + kfree(work); +} + +static void ctrl_cmd_off_line_all_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct hmdfs_peer *node = NULL; + struct hmdfs_disconnect_node_work *work = NULL; + atomic_t cnt = ATOMIC_INIT(0); + wait_queue_head_t waitq; + + if (unlikely(len != sizeof(struct offline_all_param))) { + hmdfs_err("Recved a invalid userbuf, len %zu, expect %zu\n", + len, sizeof(struct offline_all_param)); + return; + } + + init_waitqueue_head(&waitq); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(node, &sbi->connections.node_list, list) { + mutex_unlock(&sbi->connections.node_lock); + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (work) { + atomic_inc(&cnt); + work->conn = node; + work->cnt = &cnt; + work->waitq = &waitq; + INIT_WORK(&work->work, hmdfs_disconnect_node_work_fn); + schedule_work(&work->work); + } else { + hmdfs_disconnect_node_marked(node); + } + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + wait_event(waitq, !atomic_read(&cnt)); +} + +typedef void (*ctrl_cmd_handler)(const char *buf, size_t len, + struct hmdfs_sb_info *sbi); + +static const ctrl_cmd_handler cmd_handler[CMD_CNT] = { + [CMD_UPDATE_SOCKET] = ctrl_cmd_update_socket_handler, + [CMD_OFF_LINE] = ctrl_cmd_off_line_handler, + [CMD_OFF_LINE_ALL] = ctrl_cmd_off_line_all_handler, +}; + +static ssize_t sbi_cmd_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + struct notify_param param; + int out_len; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + memset(¶m, 0, sizeof(param)); + spin_lock(&sbi->notify_fifo_lock); + out_len = kfifo_out(&sbi->notify_fifo, ¶m, sizeof(param)); + spin_unlock(&sbi->notify_fifo_lock); + if (out_len != sizeof(param)) + param.notify = NOTIFY_NONE; + memcpy(buf, ¶m, sizeof(param)); + return sizeof(param); +} + +static const char *cmd2str(int cmd) +{ + switch (cmd) { + case 0: + return "CMD_UPDATE_SOCKET"; + case 1: + return "CMD_OFF_LINE"; + case 2: + return "CMD_OFF_LINE_ALL"; + default: + return "illegal cmd"; + } +} + +static ssize_t sbi_cmd_store(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int cmd; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + if (!sbi) { + hmdfs_info("Fatal! Empty sbi. Mount fs first"); + return len; + } + if (len < sizeof(int)) { + hmdfs_err("Illegal cmd: cmd len = %zu", len); + return len; + } + cmd = *(int *)buf; + if (cmd < 0 || cmd >= CMD_CNT) { + hmdfs_err("Illegal cmd : cmd = %d", cmd); + return len; + } + hmdfs_info("Recved cmd: %s", cmd2str(cmd)); + if (cmd_handler[cmd]) + cmd_handler[cmd](buf, len, sbi); + return len; +} + +static struct sbi_attribute sbi_cmd_attr = + __ATTR(cmd, 0664, sbi_cmd_show, sbi_cmd_store); + +static ssize_t sbi_status_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + ssize_t size = 0; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + sbi = to_sbi(kobj); + size += sprintf(buf + size, "peers version status\n"); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + size += sprintf(buf + size, "%llu %d %d\n", peer->device_id, + peer->version, peer->status); + // connection information + size += sprintf( + buf + size, + "\t socket_fd connection_status tcp_status ... refcnt\n"); + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + tcp = conn_impl->connect_handle; + size += sprintf(buf + size, "\t %d \t%d \t%d \t%p \t%ld\n", + tcp->fd, conn_impl->status, + tcp->sock->state, tcp->sock, file_count(tcp->sock->file)); + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return size; +} + +static ssize_t sbi_status_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, + size_t len) +{ + return len; +} + +static struct sbi_attribute sbi_status_attr = + __ATTR(status, 0664, sbi_status_show, sbi_status_store); + +static ssize_t sbi_stat_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + ssize_t size = 0; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + sbi = to_sbi(kobj); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + // connection information + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + tcp = conn_impl->connect_handle; + size += sprintf(buf + size, "socket_fd: %d\n", tcp->fd); + size += sprintf(buf + size, + "\tsend_msg %d \tsend_bytes %llu\n", + conn_impl->stat.send_message_count, + conn_impl->stat.send_bytes); + size += sprintf(buf + size, + "\trecv_msg %d \trecv_bytes %llu\n", + conn_impl->stat.recv_message_count, + conn_impl->stat.recv_bytes); + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return size; +} + +static ssize_t sbi_stat_store(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + + sbi = to_sbi(kobj); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + // connection information + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + conn_impl->stat.send_message_count = 0; + conn_impl->stat.send_bytes = 0; + conn_impl->stat.recv_message_count = 0; + conn_impl->stat.recv_bytes = 0; + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return len; +} + +static struct sbi_attribute sbi_statistic_attr = + __ATTR(statistic, 0664, sbi_stat_show, sbi_stat_store); + +static ssize_t sbi_dcache_precision_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", to_sbi(kobj)->dcache_precision); +} + +#define PRECISION_MAX 3600000 + +static ssize_t sbi_dcache_precision_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int ret; + unsigned int precision; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(skip_spaces(buf), 0, &precision); + if (!ret) { + if (precision <= PRECISION_MAX) + sbi->dcache_precision = precision; + else + ret = -EINVAL; + } + + return ret ? ret : len; +} + +static struct sbi_attribute sbi_dcache_precision_attr = + __ATTR(dcache_precision, 0664, sbi_dcache_precision_show, + sbi_dcache_precision_store); + +static ssize_t sbi_dcache_threshold_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%lu\n", + to_sbi(kobj)->dcache_threshold); +} + +static ssize_t sbi_dcache_threshold_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int ret; + unsigned long threshold; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtoul(skip_spaces(buf), 0, &threshold); + if (!ret) + sbi->dcache_threshold = threshold; + + return ret ? ret : len; +} + +static struct sbi_attribute sbi_dcache_threshold_attr = + __ATTR(dcache_threshold, 0664, sbi_dcache_threshold_show, + sbi_dcache_threshold_store); + +static ssize_t server_statistic_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + int i, ret; + const size_t size = PAGE_SIZE - 1; + ssize_t pos = 0; + struct server_statistic *stat = to_sbi(kobj)->s_server_statis; + + for (i = 0; i < F_SIZE; i++) { + + ret = snprintf(buf + pos, size - pos, + "%llu %u %llu %llu\n", + stat[i].cnt, + jiffies_to_msecs(stat[i].max), + stat[i].snd_cnt, stat[i].snd_fail_cnt); + if (ret > size - pos) + break; + pos += ret; + } + + /* If break, we should add a new line */ + if (i < F_SIZE) { + ret = snprintf(buf + pos, size + 1 - pos, "\n"); + pos += ret; + } + return pos; +} + +static struct sbi_attribute sbi_local_op_attr = __ATTR_RO(server_statistic); + +static ssize_t client_statistic_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + int i, ret; + const size_t size = PAGE_SIZE - 1; + ssize_t pos = 0; + struct client_statistic *stat = to_sbi(kobj)->s_client_statis; + + for (i = 0; i < F_SIZE; i++) { + + ret = snprintf(buf + pos, size - pos, + "%llu %llu %llu %llu %llu %u\n", + stat[i].snd_cnt, + stat[i].snd_fail_cnt, + stat[i].resp_cnt, + stat[i].timeout_cnt, + stat[i].delay_resp_cnt, + jiffies_to_msecs(stat[i].max)); + if (ret > size - pos) + break; + pos += ret; + } + + /* If break, we should add a new line */ + if (i < F_SIZE) { + ret = snprintf(buf + pos, size + 1 - pos, "\n"); + pos += ret; + } + + return pos; +} + +static struct sbi_attribute sbi_delay_resp_attr = __ATTR_RO(client_statistic); + +static inline unsigned long pages_to_kbytes(unsigned long page) +{ + return page << (PAGE_SHIFT - 10); +} + +static ssize_t dirty_writeback_stats_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + struct hmdfs_writeback *hwb = sbi->h_wb; + unsigned long avg; + unsigned long max; + unsigned long min; + + spin_lock(&hwb->write_bandwidth_lock); + avg = hwb->avg_write_bandwidth; + max = hwb->max_write_bandwidth; + min = hwb->min_write_bandwidth; + spin_unlock(&hwb->write_bandwidth_lock); + + if (min == ULONG_MAX) + min = 0; + + return snprintf(buf, PAGE_SIZE, + "%10lu\n" + "%10lu\n" + "%10lu\n", + pages_to_kbytes(avg), + pages_to_kbytes(max), + pages_to_kbytes(min)); +} + +static struct sbi_attribute sbi_dirty_writeback_stats_attr = + __ATTR_RO(dirty_writeback_stats); + +static ssize_t sbi_wb_timeout_ms_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->wb_timeout_ms); +} + +static ssize_t sbi_wb_timeout_ms_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int val; + int err; + + err = kstrtouint(buf, 10, &val); + if (err) + return err; + + if (!val || val > HMDFS_MAX_WB_TIMEOUT_MS) + return -EINVAL; + + sbi->wb_timeout_ms = val; + + return len; +} + +static struct sbi_attribute sbi_wb_timeout_ms_attr = + __ATTR(wb_timeout_ms, 0664, sbi_wb_timeout_ms_show, + sbi_wb_timeout_ms_store); + +static ssize_t sbi_dirty_writeback_centisecs_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", + sbi->h_wb->dirty_writeback_interval); +} + +static ssize_t sbi_dirty_writeback_centisecs_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + int err; + + err = kstrtouint(buf, 10, &sbi->h_wb->dirty_writeback_interval); + if (err) + return err; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_centisecs_attr = + __ATTR(dirty_writeback_centisecs, 0664, + sbi_dirty_writeback_centisecs_show, + sbi_dirty_writeback_centisecs_store); + +static ssize_t sbi_dirty_file_background_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", + sbi->h_wb->dirty_file_bg_bytes); +} + +static ssize_t sbi_dirty_file_background_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long file_background_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &file_background_bytes); + if (err) + return err; + if (file_background_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_fs_bytes = + max(sbi->h_wb->dirty_fs_bytes, file_background_bytes); + sbi->h_wb->dirty_fs_bg_bytes = + max(sbi->h_wb->dirty_fs_bg_bytes, file_background_bytes); + sbi->h_wb->dirty_file_bytes = + max(sbi->h_wb->dirty_file_bytes, file_background_bytes); + + sbi->h_wb->dirty_file_bg_bytes = file_background_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static ssize_t sbi_dirty_fs_background_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_fs_bg_bytes); +} + +static ssize_t sbi_dirty_fs_background_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long fs_background_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &fs_background_bytes); + if (err) + return err; + if (fs_background_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, fs_background_bytes); + sbi->h_wb->dirty_fs_bytes = + max(sbi->h_wb->dirty_fs_bytes, fs_background_bytes); + + sbi->h_wb->dirty_fs_bg_bytes = fs_background_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static struct sbi_attribute sbi_dirty_file_background_bytes_attr = + __ATTR(dirty_file_background_bytes, 0644, + sbi_dirty_file_background_bytes_show, + sbi_dirty_file_background_bytes_store); +static struct sbi_attribute sbi_dirty_fs_background_bytes_attr = + __ATTR(dirty_fs_background_bytes, 0644, + sbi_dirty_fs_background_bytes_show, + sbi_dirty_fs_background_bytes_store); + +static ssize_t sbi_dirty_file_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_file_bytes); +} + +static ssize_t sbi_dirty_file_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long file_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &file_bytes); + if (err) + return err; + if (file_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, file_bytes); + sbi->h_wb->dirty_fs_bytes = max(sbi->h_wb->dirty_fs_bytes, file_bytes); + + sbi->h_wb->dirty_file_bytes = file_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static ssize_t sbi_dirty_fs_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_fs_bytes); +} + +static ssize_t sbi_dirty_fs_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long fs_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &fs_bytes); + if (err) + return err; + if (fs_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, fs_bytes); + sbi->h_wb->dirty_file_bytes = + min(sbi->h_wb->dirty_file_bytes, fs_bytes); + sbi->h_wb->dirty_fs_bg_bytes = + min(sbi->h_wb->dirty_fs_bg_bytes, fs_bytes); + + sbi->h_wb->dirty_fs_bytes = fs_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static struct sbi_attribute sbi_dirty_file_bytes_attr = + __ATTR(dirty_file_bytes, 0644, sbi_dirty_file_bytes_show, + sbi_dirty_file_bytes_store); +static struct sbi_attribute sbi_dirty_fs_bytes_attr = + __ATTR(dirty_fs_bytes, 0644, sbi_dirty_fs_bytes_show, + sbi_dirty_fs_bytes_store); + +static ssize_t sbi_dirty_writeback_timelimit_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", + sbi->h_wb->writeback_timelimit / HZ); +} + +static ssize_t sbi_dirty_writeback_timelimit_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int time_limit = 0; + int err; + + err = kstrtouint(buf, 10, &time_limit); + if (err) + return err; + if (time_limit == 0 || time_limit > (HMDFS_MAX_WB_TIMELIMIT / HZ)) + return -EINVAL; + + sbi->h_wb->writeback_timelimit = time_limit * HZ; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_timelimit_attr = +__ATTR(dirty_writeback_timelimit, 0644, sbi_dirty_writeback_timelimit_show, + sbi_dirty_writeback_timelimit_store); + +static ssize_t sbi_dirty_thresh_lowerlimit_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", + sbi->h_wb->bw_thresh_lowerlimit << PAGE_SHIFT); +} + +static ssize_t sbi_dirty_thresh_lowerlimit_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long bw_thresh_lowerbytes = 0; + unsigned long bw_thresh_lowerlimit; + int err; + + err = kstrtoul(buf, 10, &bw_thresh_lowerbytes); + if (err) + return err; + + bw_thresh_lowerlimit = DIV_ROUND_UP(bw_thresh_lowerbytes, PAGE_SIZE); + if (bw_thresh_lowerlimit < HMDFS_BW_THRESH_MIN_LIMIT || + bw_thresh_lowerlimit > HMDFS_BW_THRESH_MAX_LIMIT) + return -EINVAL; + + sbi->h_wb->bw_thresh_lowerlimit = bw_thresh_lowerlimit; + return len; +} + +static struct sbi_attribute sbi_dirty_thresh_lowerlimit_attr = +__ATTR(dirty_thresh_lowerlimit, 0644, sbi_dirty_thresh_lowerlimit_show, + sbi_dirty_thresh_lowerlimit_store); + +static ssize_t sbi_dirty_writeback_autothresh_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_wb->dirty_auto_threshold); +} + +static ssize_t sbi_dirty_writeback_autothresh_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + bool dirty_auto_threshold = false; + int err; + + err = kstrtobool(buf, &dirty_auto_threshold); + if (err) + return err; + + sbi->h_wb->dirty_auto_threshold = dirty_auto_threshold; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_autothresh_attr = +__ATTR(dirty_writeback_autothresh, 0644, sbi_dirty_writeback_autothresh_show, + sbi_dirty_writeback_autothresh_store); + +static ssize_t sbi_dirty_writeback_control_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_wb->dirty_writeback_control); +} + +static ssize_t sbi_dirty_writeback_control_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int dirty_writeback_control = 0; + int err; + + err = kstrtouint(buf, 10, &dirty_writeback_control); + if (err) + return err; + + sbi->h_wb->dirty_writeback_control = (bool)dirty_writeback_control; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_control_attr = + __ATTR(dirty_writeback_control, 0644, sbi_dirty_writeback_control_show, + sbi_dirty_writeback_control_store); + +static ssize_t sbi_srv_dirty_thresh_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_swb->dirty_thresh_pg >> HMDFS_MB_TO_PAGE_SHIFT); +} + +static ssize_t sbi_srv_dirty_thresh_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_server_writeback *hswb = to_sbi(kobj)->h_swb; + int dirty_thresh_mb; + unsigned long long pages; + int err; + + err = kstrtoint(buf, 10, &dirty_thresh_mb); + if (err) + return err; + + if (dirty_thresh_mb <= 0) + return -EINVAL; + + pages = dirty_thresh_mb; + pages <<= HMDFS_MB_TO_PAGE_SHIFT; + if (pages > INT_MAX) { + hmdfs_err("Illegal dirty_thresh_mb %d, its page count beyonds max int", + dirty_thresh_mb); + return -EINVAL; + } + + hswb->dirty_thresh_pg = (unsigned int)pages; + return len; +} + +static struct sbi_attribute sbi_srv_dirty_thresh_attr = +__ATTR(srv_dirty_thresh, 0644, sbi_srv_dirty_thresh_show, + sbi_srv_dirty_thresh_store); + + +static ssize_t sbi_srv_dirty_wb_control_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_swb->dirty_writeback_control); +} + +static ssize_t sbi_srv_dirty_wb_conctrol_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_server_writeback *hswb = to_sbi(kobj)->h_swb; + bool dirty_writeback_control = true; + int err; + + err = kstrtobool(buf, &dirty_writeback_control); + if (err) + return err; + + hswb->dirty_writeback_control = dirty_writeback_control; + + return len; +} + +static struct sbi_attribute sbi_srv_dirty_wb_control_attr = +__ATTR(srv_dirty_writeback_control, 0644, sbi_srv_dirty_wb_control_show, + sbi_srv_dirty_wb_conctrol_store); + +static ssize_t sbi_dcache_timeout_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->dcache_timeout); +} + +static ssize_t sbi_dcache_timeout_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int timeout; + int err; + + err = kstrtouint(buf, 0, &timeout); + if (err) + return err; + + /* zero is invalid, and it doesn't mean no cache */ + if (timeout == 0 || timeout > MAX_DCACHE_TIMEOUT) + return -EINVAL; + + sbi->dcache_timeout = timeout; + + return len; +} + +static struct sbi_attribute sbi_dcache_timeout_attr = + __ATTR(dcache_timeout, 0644, sbi_dcache_timeout_show, + sbi_dcache_timeout_store); + +static ssize_t sbi_write_cache_timeout_sec_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", + to_sbi(kobj)->write_cache_timeout); +} + +static ssize_t sbi_write_cache_timeout_sec_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned int timeout; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(buf, 0, &timeout); + if (ret) + return ret; + + /* set write_cache_timeout to 0 means this functionality is disabled */ + sbi->write_cache_timeout = timeout; + + return len; +} + +static struct sbi_attribute sbi_write_cache_timeout_sec_attr = + __ATTR(write_cache_timeout_sec, 0664, sbi_write_cache_timeout_sec_show, + sbi_write_cache_timeout_sec_store); + +static ssize_t sbi_node_evt_cb_delay_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->async_cb_delay); +} + +static ssize_t sbi_node_evt_cb_delay_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int delay = 0; + int err; + + err = kstrtouint(buf, 10, &delay); + if (err) + return err; + + sbi->async_cb_delay = delay; + + return len; +} + +static struct sbi_attribute sbi_node_evt_cb_delay_attr = +__ATTR(node_event_delay, 0644, sbi_node_evt_cb_delay_show, + sbi_node_evt_cb_delay_store); + +static int calc_idr_number(struct idr *idr) +{ + void *entry = NULL; + int id; + int number = 0; + + idr_for_each_entry(idr, entry, id) { + number++; + if (number % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } + + return number; +} + +static ssize_t sbi_show_idr_stats(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf, bool showmsg) +{ + ssize_t size = 0; + int count; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct idr *idr = NULL; + + sbi = to_sbi(kobj); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + idr = showmsg ? &peer->msg_idr : &peer->file_id_idr; + count = calc_idr_number(idr); + size += snprintf(buf + size, PAGE_SIZE - size, + "device-id\tcount\tnext-id\n\t%llu\t\t%d\t%u\n", + peer->device_id, count, idr_get_cursor(idr)); + if (size >= PAGE_SIZE) { + size = PAGE_SIZE; + break; + } + } + mutex_unlock(&sbi->connections.node_lock); + + return size; +} + +static ssize_t pending_message_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + return sbi_show_idr_stats(kobj, attr, buf, true); +} + +static struct sbi_attribute sbi_pending_message_attr = + __ATTR_RO(pending_message); + +static ssize_t peer_opened_fd_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return sbi_show_idr_stats(kobj, attr, buf, false); +} + +static struct sbi_attribute sbi_peer_opened_fd_attr = __ATTR_RO(peer_opened_fd); + +static ssize_t sbi_srv_req_max_active_attr_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->async_req_max_active); +} + +static ssize_t sbi_srv_req_max_active_attr_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned int max_active; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(buf, 0, &max_active); + if (ret) + return ret; + + sbi->async_req_max_active = max_active; + + return len; +} + +static struct sbi_attribute sbi_srv_req_max_active_attr = +__ATTR(srv_req_handle_max_active, 0644, sbi_srv_req_max_active_attr_show, + sbi_srv_req_max_active_attr_store); + + +static ssize_t cache_file_show(struct hmdfs_sb_info *sbi, + struct list_head *head, char *buf) +{ + struct cache_file_node *cfn = NULL; + ssize_t pos = 0; + + mutex_lock(&sbi->cache_list_lock); + list_for_each_entry(cfn, head, list) { + pos += snprintf(buf + pos, PAGE_SIZE - pos, + "dev_id: %s relative_path: %s\n", + cfn->cid, cfn->relative_path); + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + } + mutex_unlock(&sbi->cache_list_lock); + + return pos; +} + +static ssize_t client_cache_file_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return cache_file_show(to_sbi(kobj), &to_sbi(kobj)->client_cache, buf); +} +static ssize_t server_cache_file_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return cache_file_show(to_sbi(kobj), &to_sbi(kobj)->server_cache, buf); +} + +static struct sbi_attribute sbi_server_cache_file_attr = + __ATTR_RO(server_cache_file); +static struct sbi_attribute sbi_client_cache_file_attr = + __ATTR_RO(client_cache_file); + +static ssize_t sb_seq_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", to_sbi(kobj)->seq); +} + +static struct sbi_attribute sbi_seq_attr = __ATTR_RO(sb_seq); + +static ssize_t peers_sum_attr_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + struct hmdfs_peer *node = NULL; + unsigned int stash_ok = 0, stash_fail = 0, restore_ok = 0, + restore_fail = 0, rebuild_ok = 0, rebuild_fail = 0, rebuild_invalid = 0, + rebuild_time = 0; + unsigned long long stash_ok_pages = 0, stash_fail_pages = 0, + restore_ok_pages = 0, restore_fail_pages = 0; + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(node, &sbi->connections.node_list, list) { + peer_get(node); + mutex_unlock(&sbi->connections.node_lock); + stash_ok += node->stats.stash.total_ok; + stash_fail += node->stats.stash.total_fail; + stash_ok_pages += node->stats.stash.ok_pages; + stash_fail_pages += node->stats.stash.fail_pages; + restore_ok += node->stats.restore.total_ok; + restore_fail += node->stats.restore.total_fail; + restore_ok_pages += node->stats.restore.ok_pages; + restore_fail_pages += node->stats.restore.fail_pages; + rebuild_ok += node->stats.rebuild.total_ok; + rebuild_fail += node->stats.rebuild.total_fail; + rebuild_invalid += node->stats.rebuild.total_invalid; + rebuild_time += node->stats.rebuild.time; + peer_put(node); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + return snprintf(buf, PAGE_SIZE, + "%u %u %llu %llu\n" + "%u %u %llu %llu\n" + "%u %u %u %u\n", + stash_ok, stash_fail, stash_ok_pages, stash_fail_pages, + restore_ok, restore_fail, restore_ok_pages, + restore_fail_pages, rebuild_ok, rebuild_fail, + rebuild_invalid, rebuild_time); +} + +static struct sbi_attribute sbi_peers_attr = __ATTR_RO(peers_sum_attr); + +const char * const flag_name[] = { + "READPAGES", + "READPAGES_OPEN", + "ATOMIC_OPEN", +}; + +static ssize_t fill_features(char *buf, unsigned long long flag) +{ + int i; + ssize_t pos = 0; + bool sep = false; + int flag_name_count = ARRAY_SIZE(flag_name) / sizeof(flag_name[0]); + + for (i = 0; i < sizeof(flag) * BITS_PER_BYTE; ++i) { + if (!(flag & BIT(i))) + continue; + + if (sep) + pos += snprintf(buf + pos, PAGE_SIZE - pos, "|"); + sep = true; + + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + + if (i < flag_name_count && flag_name[i]) + pos += snprintf(buf + pos, PAGE_SIZE - pos, "%s", + flag_name[i]); + else + pos += snprintf(buf + pos, PAGE_SIZE - pos, "%d", i); + + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + } + pos += snprintf(buf + pos, PAGE_SIZE - pos, "\n"); + if (pos >= PAGE_SIZE) + pos = PAGE_SIZE; + + return pos; +} + +static ssize_t sbi_features_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return fill_features(buf, sbi->s_features); +} + +static struct sbi_attribute sbi_features_attr = __ATTR(features, 0444, + sbi_features_show, NULL); + +static struct attribute *sbi_attrs[] = { + &sbi_cmd_attr.attr, + &sbi_status_attr.attr, + &sbi_statistic_attr.attr, + &sbi_dcache_precision_attr.attr, + &sbi_dcache_threshold_attr.attr, + &sbi_dcache_timeout_attr.attr, + &sbi_write_cache_timeout_sec_attr.attr, + &sbi_local_op_attr.attr, + &sbi_delay_resp_attr.attr, + &sbi_wb_timeout_ms_attr.attr, + &sbi_dirty_writeback_centisecs_attr.attr, + &sbi_dirty_file_background_bytes_attr.attr, + &sbi_dirty_fs_background_bytes_attr.attr, + &sbi_dirty_file_bytes_attr.attr, + &sbi_dirty_fs_bytes_attr.attr, + &sbi_dirty_writeback_autothresh_attr.attr, + &sbi_dirty_writeback_timelimit_attr.attr, + &sbi_dirty_thresh_lowerlimit_attr.attr, + &sbi_dirty_writeback_control_attr.attr, + &sbi_dirty_writeback_stats_attr.attr, + &sbi_srv_dirty_thresh_attr.attr, + &sbi_srv_dirty_wb_control_attr.attr, + &sbi_node_evt_cb_delay_attr.attr, + &sbi_srv_req_max_active_attr.attr, + &sbi_pending_message_attr.attr, + &sbi_peer_opened_fd_attr.attr, + &sbi_server_cache_file_attr.attr, + &sbi_client_cache_file_attr.attr, + &sbi_seq_attr.attr, + &sbi_peers_attr.attr, + &sbi_features_attr.attr, + NULL, +}; + +static ssize_t sbi_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct sbi_attribute *sbi_attr = to_sbi_attr(attr); + + if (!sbi_attr->show) + return -EIO; + return sbi_attr->show(kobj, sbi_attr, buf); +} + +static ssize_t sbi_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct sbi_attribute *sbi_attr = to_sbi_attr(attr); + + if (!sbi_attr->store) + return -EIO; + return sbi_attr->store(kobj, sbi_attr, buf, len); +} + +static const struct sysfs_ops sbi_sysfs_ops = { + .show = sbi_attr_show, + .store = sbi_attr_store, +}; + +static void sbi_release(struct kobject *kobj) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + complete(&sbi->s_kobj_unregister); +} + +static struct kobj_type sbi_ktype = { + .sysfs_ops = &sbi_sysfs_ops, + .default_attrs = sbi_attrs, + .release = sbi_release, +}; + +static inline struct sbi_cmd_attribute *to_sbi_cmd_attr(struct attribute *x) +{ + return container_of(x, struct sbi_cmd_attribute, attr); +} + +static inline struct hmdfs_sb_info *cmd_kobj_to_sbi(struct kobject *x) +{ + return container_of(x, struct hmdfs_sb_info, s_cmd_timeout_kobj); +} + +static ssize_t cmd_timeout_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + int cmd = to_sbi_cmd_attr(attr)->command; + struct hmdfs_sb_info *sbi = cmd_kobj_to_sbi(kobj); + + if (cmd < 0 && cmd >= F_SIZE) + return 0; + + return snprintf(buf, PAGE_SIZE, "%u\n", get_cmd_timeout(sbi, cmd)); +} + +static ssize_t cmd_timeout_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + unsigned int value; + int cmd = to_sbi_cmd_attr(attr)->command; + int ret = kstrtouint(skip_spaces(buf), 0, &value); + struct hmdfs_sb_info *sbi = cmd_kobj_to_sbi(kobj); + + if (cmd < 0 && cmd >= F_SIZE) + return -EINVAL; + + if (!ret) + set_cmd_timeout(sbi, cmd, value); + + return ret ? ret : len; +} + +#define HMDFS_CMD_ATTR(_name, _cmd) \ + static struct sbi_cmd_attribute hmdfs_attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = 0664 }, \ + .command = (_cmd), \ + } + +HMDFS_CMD_ATTR(open, F_OPEN); +HMDFS_CMD_ATTR(release, F_RELEASE); +HMDFS_CMD_ATTR(readpage, F_READPAGE); +HMDFS_CMD_ATTR(writepage, F_WRITEPAGE); +HMDFS_CMD_ATTR(iterate, F_ITERATE); +HMDFS_CMD_ATTR(rmdir, F_RMDIR); +HMDFS_CMD_ATTR(unlink, F_UNLINK); +HMDFS_CMD_ATTR(rename, F_RENAME); +HMDFS_CMD_ATTR(setattr, F_SETATTR); +HMDFS_CMD_ATTR(statfs, F_STATFS); +HMDFS_CMD_ATTR(drop_push, F_DROP_PUSH); +HMDFS_CMD_ATTR(getattr, F_GETATTR); +HMDFS_CMD_ATTR(fsync, F_FSYNC); +HMDFS_CMD_ATTR(syncfs, F_SYNCFS); +HMDFS_CMD_ATTR(getxattr, F_GETXATTR); +HMDFS_CMD_ATTR(setxattr, F_SETXATTR); +HMDFS_CMD_ATTR(listxattr, F_LISTXATTR); + +#define ATTR_LIST(_name) (&hmdfs_attr_##_name.attr) + +static struct attribute *sbi_timeout_attrs[] = { + ATTR_LIST(open), ATTR_LIST(release), + ATTR_LIST(readpage), ATTR_LIST(writepage), + ATTR_LIST(iterate), ATTR_LIST(rmdir), + ATTR_LIST(unlink), ATTR_LIST(rename), + ATTR_LIST(setattr), + ATTR_LIST(statfs), ATTR_LIST(drop_push), + ATTR_LIST(getattr), ATTR_LIST(fsync), + ATTR_LIST(syncfs), ATTR_LIST(getxattr), + ATTR_LIST(setxattr), ATTR_LIST(listxattr), + NULL +}; + +static const struct sysfs_ops sbi_cmd_sysfs_ops = { + .show = cmd_timeout_show, + .store = cmd_timeout_store, +}; + +static void sbi_timeout_release(struct kobject *kobj) +{ + struct hmdfs_sb_info *sbi = container_of(kobj, struct hmdfs_sb_info, + s_cmd_timeout_kobj); + + complete(&sbi->s_timeout_kobj_unregister); +} + +static struct kobj_type sbi_timeout_ktype = { + .sysfs_ops = &sbi_cmd_sysfs_ops, + .default_attrs = sbi_timeout_attrs, + .release = sbi_timeout_release, +}; + +void hmdfs_release_sysfs(struct hmdfs_sb_info *sbi) +{ + kobject_put(&sbi->s_cmd_timeout_kobj); + wait_for_completion(&sbi->s_timeout_kobj_unregister); + kobject_put(&sbi->kobj); + wait_for_completion(&sbi->s_kobj_unregister); +} + +int hmdfs_register_sysfs(const char *name, struct hmdfs_sb_info *sbi) +{ + int ret; + struct kobject *kobj = NULL; + + mutex_lock(&hmdfs_sysfs_mutex); + kobj = kset_find_obj(hmdfs_kset, name); + if (kobj) { + hmdfs_err("mount failed, already exist"); + kobject_put(kobj); + mutex_unlock(&hmdfs_sysfs_mutex); + return -EEXIST; + } + + sbi->kobj.kset = hmdfs_kset; + init_completion(&sbi->s_kobj_unregister); + ret = kobject_init_and_add(&sbi->kobj, &sbi_ktype, + &hmdfs_kset->kobj, "%s", name); + mutex_unlock(&hmdfs_sysfs_mutex); + + if (ret) { + kobject_put(&sbi->kobj); + wait_for_completion(&sbi->s_kobj_unregister); + return ret; + } + + init_completion(&sbi->s_timeout_kobj_unregister); + ret = kobject_init_and_add(&sbi->s_cmd_timeout_kobj, &sbi_timeout_ktype, + &sbi->kobj, "cmd_timeout"); + if (ret) { + hmdfs_release_sysfs(sbi); + return ret; + } + + kobject_uevent(&sbi->kobj, KOBJ_ADD); + return 0; +} + +void hmdfs_unregister_sysfs(struct hmdfs_sb_info *sbi) +{ + kobject_del(&sbi->s_cmd_timeout_kobj); + kobject_del(&sbi->kobj); +} + +static inline int to_sysfs_fmt_evt(unsigned int evt) +{ + return evt == RAW_NODE_EVT_NR ? -1 : evt; +} + +static ssize_t features_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return fill_features(buf, peer->features); +} + +static ssize_t event_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_async evt %d seq %u\n" + "cur_sync evt %d seq %u\n" + "pending evt %d seq %u\n" + "merged evt %u\n" + "dup_drop evt %u %u\n" + "waiting evt %u %u\n" + "seq_tbl %u %u %u %u\n" + "seq_rd_idx %u\n" + "seq_wr_idx %u\n", + to_sysfs_fmt_evt(peer->cur_evt[0]), + peer->cur_evt_seq[0], + to_sysfs_fmt_evt(peer->cur_evt[1]), + peer->cur_evt_seq[1], + to_sysfs_fmt_evt(peer->pending_evt), + peer->pending_evt_seq, + peer->merged_evt, + peer->dup_evt[RAW_NODE_EVT_OFF], + peer->dup_evt[RAW_NODE_EVT_ON], + peer->waiting_evt[RAW_NODE_EVT_OFF], + peer->waiting_evt[RAW_NODE_EVT_ON], + peer->seq_tbl[0], peer->seq_tbl[1], peer->seq_tbl[2], + peer->seq_tbl[3], + peer->seq_rd_idx % RAW_NODE_EVT_MAX_NR, + peer->seq_wr_idx % RAW_NODE_EVT_MAX_NR); +} + +static ssize_t stash_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_nothing %u\n" + "cur_fail %u\n" + "total_ok %u\n" + "total_nothing %u\n" + "total_fail %u\n" + "ok_pages %llu\n" + "fail_pages %llu\n", + peer->stats.stash.cur_ok, + peer->stats.stash.cur_nothing, + peer->stats.stash.cur_fail, + peer->stats.stash.total_ok, + peer->stats.stash.total_nothing, + peer->stats.stash.total_fail, + peer->stats.stash.ok_pages, + peer->stats.stash.fail_pages); +} + +static ssize_t restore_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_fail %u\n" + "cur_keep %u\n" + "total_ok %u\n" + "total_fail %u\n" + "total_keep %u\n" + "ok_pages %llu\n" + "fail_pages %llu\n", + peer->stats.restore.cur_ok, + peer->stats.restore.cur_fail, + peer->stats.restore.cur_keep, + peer->stats.restore.total_ok, + peer->stats.restore.total_fail, + peer->stats.restore.total_keep, + peer->stats.restore.ok_pages, + peer->stats.restore.fail_pages); +} + +static ssize_t rebuild_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_fail %u\n" + "cur_invalid %u\n" + "total_ok %u\n" + "total_fail %u\n" + "total_invalid %u\n" + "time %u\n", + peer->stats.rebuild.cur_ok, + peer->stats.rebuild.cur_fail, + peer->stats.rebuild.cur_invalid, + peer->stats.rebuild.total_ok, + peer->stats.rebuild.total_fail, + peer->stats.rebuild.total_invalid, + peer->stats.rebuild.time); +} + +static struct peer_attribute peer_features_attr = __ATTR_RO(features); +static struct peer_attribute peer_event_attr = __ATTR_RO(event); +static struct peer_attribute peer_stash_attr = __ATTR_RO(stash); +static struct peer_attribute peer_restore_attr = __ATTR_RO(restore); +static struct peer_attribute peer_rebuild_attr = __ATTR_RO(rebuild); + +static struct attribute *peer_attrs[] = { + &peer_features_attr.attr, + &peer_event_attr.attr, + &peer_stash_attr.attr, + &peer_restore_attr.attr, + &peer_rebuild_attr.attr, + NULL, +}; + +static ssize_t peer_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct peer_attribute *peer_attr = to_peer_attr(attr); + + if (!peer_attr->show) + return -EIO; + return peer_attr->show(kobj, peer_attr, buf); +} + +static ssize_t peer_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct peer_attribute *peer_attr = to_peer_attr(attr); + + if (!peer_attr->store) + return -EIO; + return peer_attr->store(kobj, peer_attr, buf, len); +} + +static const struct sysfs_ops peer_sysfs_ops = { + .show = peer_attr_show, + .store = peer_attr_store, +}; + +static void peer_sysfs_release(struct kobject *kobj) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + complete(&peer->kobj_unregister); +} + +static struct kobj_type peer_ktype = { + .sysfs_ops = &peer_sysfs_ops, + .default_attrs = peer_attrs, + .release = peer_sysfs_release, +}; + +int hmdfs_register_peer_sysfs(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer) +{ + int err = 0; + + init_completion(&peer->kobj_unregister); + err = kobject_init_and_add(&peer->kobj, &peer_ktype, &sbi->kobj, + "peer_%llu", peer->device_id); + return err; +} + +void hmdfs_release_peer_sysfs(struct hmdfs_peer *peer) +{ + kobject_del(&peer->kobj); + kobject_put(&peer->kobj); + wait_for_completion(&peer->kobj_unregister); +} + +void notify(struct hmdfs_peer *node, struct notify_param *param) +{ + struct hmdfs_sb_info *sbi = node->sbi; + int in_len; + + if (!param) + return; + spin_lock(&sbi->notify_fifo_lock); + in_len = + kfifo_in(&sbi->notify_fifo, param, sizeof(struct notify_param)); + spin_unlock(&sbi->notify_fifo_lock); + if (in_len != sizeof(struct notify_param)) + return; + sysfs_notify(&sbi->kobj, NULL, "cmd"); +} + +int hmdfs_sysfs_init(void) +{ + hmdfs_kset = kset_create_and_add("hmdfs", NULL, fs_kobj); + if (!hmdfs_kset) + return -ENOMEM; + + return 0; +} + +void hmdfs_sysfs_exit(void) +{ + kset_unregister(hmdfs_kset); + hmdfs_kset = NULL; +} diff --git a/fs/hmdfs/comm/device_node.h b/fs/hmdfs/comm/device_node.h new file mode 100644 index 0000000000000000000000000000000000000000..3c99c7fb679fbe723deb4c56e77a52d115992969 --- /dev/null +++ b/fs/hmdfs/comm/device_node.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/device_node.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DEVICE_NODE_H +#define HMDFS_DEVICE_NODE_H + +#include "hmdfs.h" +#include "transport.h" + +enum CTRL_NODE_CMD { + CMD_UPDATE_SOCKET = 0, + CMD_OFF_LINE, + CMD_OFF_LINE_ALL, + CMD_CNT, +}; + +struct update_socket_param { + int32_t cmd; + int32_t newfd; + uint8_t status; + uint8_t masterkey[HMDFS_KEY_SIZE]; + uint8_t cid[HMDFS_CID_SIZE]; +} __packed; + +struct offline_param { + int32_t cmd; + uint8_t remote_cid[HMDFS_CID_SIZE]; +} __packed; + +struct offline_all_param { + int32_t cmd; +} __packed; + +enum NOTIFY { + NOTIFY_GET_SESSION, + NOTIFY_OFFLINE, + NOTIFY_NONE, + NOTIFY_CNT, +}; + +struct notify_param { + int32_t notify; + int32_t fd; + uint8_t remote_cid[HMDFS_CID_SIZE]; +} __packed; + +struct sbi_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct sbi_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len); +}; + +struct peer_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct peer_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct peer_attribute *attr, + const char *buf, size_t len); +}; + +struct sbi_cmd_attribute { + struct attribute attr; + int command; +}; + +void notify(struct hmdfs_peer *node, struct notify_param *param); +int hmdfs_register_sysfs(const char *name, struct hmdfs_sb_info *sbi); +void hmdfs_unregister_sysfs(struct hmdfs_sb_info *sbi); +void hmdfs_release_sysfs(struct hmdfs_sb_info *sbi); +int hmdfs_register_peer_sysfs(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer); +void hmdfs_release_peer_sysfs(struct hmdfs_peer *peer); +int hmdfs_sysfs_init(void); +void hmdfs_sysfs_exit(void); + +static inline struct sbi_attribute *to_sbi_attr(struct attribute *x) +{ + return container_of(x, struct sbi_attribute, attr); +} + +static inline struct hmdfs_sb_info *to_sbi(struct kobject *x) +{ + return container_of(x, struct hmdfs_sb_info, kobj); +} + +static inline struct peer_attribute *to_peer_attr(struct attribute *x) +{ + return container_of(x, struct peer_attribute, attr); +} + +static inline struct hmdfs_peer *to_peer(struct kobject *x) +{ + return container_of(x, struct hmdfs_peer, kobj); +} +#endif diff --git a/fs/hmdfs/comm/fault_inject.c b/fs/hmdfs/comm/fault_inject.c new file mode 100644 index 0000000000000000000000000000000000000000..11779b53b0ea38e6daebcf03d4a27edc07b46a47 --- /dev/null +++ b/fs/hmdfs/comm/fault_inject.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/fault_inject.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs.h" +#include "fault_inject.h" +#include "connection.h" + +static DECLARE_FAULT_ATTR(fail_default_attr); +static struct dentry *hmdfs_debugfs_root; + +void __init hmdfs_create_debugfs_root(void) +{ + hmdfs_debugfs_root = debugfs_create_dir("hmdfs", NULL); + if (!hmdfs_debugfs_root) + hmdfs_warning("failed to create debugfs directory"); +} + +void hmdfs_destroy_debugfs_root(void) +{ + debugfs_remove_recursive(hmdfs_debugfs_root); + hmdfs_debugfs_root = NULL; +} + +void hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject, + const char *name) +{ + struct dentry *dir = NULL; + struct dentry *parent = NULL; + struct fault_attr *attr = &fault_inject->attr; + + if (!hmdfs_debugfs_root) + return; + + parent = debugfs_create_dir(name, hmdfs_debugfs_root); + if (!parent) { + hmdfs_warning("failed to create %s debugfs directory", name); + return; + } + + *attr = fail_default_attr; + dir = fault_create_debugfs_attr("fault_inject", parent, attr); + if (IS_ERR(dir)) { + hmdfs_warning("hmdfs: failed to create debugfs attr"); + debugfs_remove_recursive(parent); + return; + } + fault_inject->parent = parent; + debugfs_create_ulong("op_mask", 0600, dir, &fault_inject->op_mask); + debugfs_create_ulong("fail_send_message", 0600, dir, + &fault_inject->fail_send_message); + debugfs_create_ulong("fake_fid_ver", 0600, dir, + &fault_inject->fake_fid_ver); + debugfs_create_bool("fail_req", 0600, dir, &fault_inject->fail_req); +} + +void hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject) +{ + debugfs_remove_recursive(fault_inject->parent); +} + +bool hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, + struct hmdfs_send_data *msg, int *err) +{ + struct hmdfs_head_cmd *head = (struct hmdfs_head_cmd *)msg->head; + unsigned long type = fault_inject->fail_send_message; + + if (!test_bit(head->operations.command, &fault_inject->op_mask)) + return false; + + if (type != T_MSG_FAIL && type != T_MSG_DISCARD) + return false; + + if (!should_fail(&fault_inject->attr, 1)) + return false; + + if (type == T_MSG_FAIL) + *err = -EINVAL; + else if (type == T_MSG_DISCARD) + *err = 0; + + hmdfs_err( + "fault injection err %d, %s message, device_id %llu, msg_id %u, cmd %d", + *err, (type == T_MSG_FAIL) ? "fail" : "discard", con->device_id, + le32_to_cpu(head->msg_id), head->operations.command); + return true; +} + +bool hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + int *err) +{ + if (!test_bit(cmd->operations.command, &fault_inject->op_mask)) + return false; + + if (!fault_inject->fail_req) + return false; + + if (!should_fail(&fault_inject->attr, 1)) + return false; + + *err = -EIO; + hmdfs_err("fault injection err %d, device_id %llu, msg_id %u, cmd %d", + *err, con->device_id, le32_to_cpu(cmd->msg_id), + cmd->operations.command); + return true; +} + +bool hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + enum CHANGE_FID_VER_TYPE fake_type) +{ + unsigned long type = fault_inject->fake_fid_ver; + + if (!test_bit(cmd->operations.command, &fault_inject->op_mask)) + return false; + + if (type != fake_type) + return false; + + if (!should_fail(&fault_inject->attr, 1)) + return false; + + hmdfs_err( + "fault injection to change fid ver by %s cookie, device_id %llu, msg_id %u, cmd %d", + (type == T_BOOT_COOKIE) ? "boot" : "con", con->device_id, + le32_to_cpu(cmd->msg_id), cmd->operations.command); + return true; +} diff --git a/fs/hmdfs/comm/fault_inject.h b/fs/hmdfs/comm/fault_inject.h new file mode 100644 index 0000000000000000000000000000000000000000..be8876ab0328e4a1aa1d71b5aa1f2b9946db348b --- /dev/null +++ b/fs/hmdfs/comm/fault_inject.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/fault_inject.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_FAULT_INJECT_H +#define HMDFS_FAULT_INJECT_H + +#include +#include "protocol.h" + +struct hmdfs_fault_inject { +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + struct fault_attr attr; + struct dentry *parent; + unsigned long op_mask; + unsigned long fail_send_message; + unsigned long fake_fid_ver; + bool fail_req; +#endif +}; + +enum FAIL_MESSAGE_TYPE { + T_MSG_FAIL = 1, + T_MSG_DISCARD = 2, +}; + +enum CHANGE_FID_VER_TYPE { + T_BOOT_COOKIE = 1, + T_CON_COOKIE = 2, +}; + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +void __init hmdfs_create_debugfs_root(void); +void hmdfs_destroy_debugfs_root(void); + +void hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject, + const char *name); +void hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject); +bool hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, + struct hmdfs_send_data *msg, int *err); +bool hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + int *err); +bool hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + enum CHANGE_FID_VER_TYPE fake_type); +#else +static inline void __init hmdfs_create_debugfs_root(void) {} +static inline void hmdfs_destroy_debugfs_root(void) {} + +static inline void +hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject, + const char *name) +{ +} +static inline void +hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject) +{ +} +static inline bool +hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_send_data *msg, + int *err) +{ + return false; +} +static inline bool +hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + int *err) +{ + return false; +} +static inline bool +hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject, + struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + enum CHANGE_FID_VER_TYPE fake_type) +{ + return false; +} +#endif + +#endif // HMDFS_FAULT_INJECT_H diff --git a/fs/hmdfs/comm/message_verify.c b/fs/hmdfs/comm/message_verify.c new file mode 100644 index 0000000000000000000000000000000000000000..c9eb94d8b615eaf4c0675b5a6756147f2ab6a3a5 --- /dev/null +++ b/fs/hmdfs/comm/message_verify.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/message_verify.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "message_verify.h" + +#include +#include +#include + +#include "connection.h" +#include "hmdfs.h" +#include "hmdfs_server.h" + +size_t message_length[C_FLAG_SIZE][F_SIZE][HMDFS_MESSAGE_MIN_MAX]; +bool need_response[F_SIZE]; + +void hmdfs_message_verify_init(void) +{ + int flag, cmd; + + for (cmd = 0; cmd < F_SIZE; cmd++) + need_response[cmd] = true; + need_response[F_RELEASE] = false; + need_response[F_CONNECT_REKEY] = false; + need_response[F_DROP_PUSH] = false; + + for (flag = 0; flag < C_FLAG_SIZE; flag++) { + for (cmd = 0; cmd < F_SIZE; cmd++) { + message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX] = 1; + message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + } + } + + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct open_request); + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct open_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct open_response); + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct atomic_open_request); + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct atomic_open_request) + PATH_MAX + NAME_MAX + 1; + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] + = MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct atomic_open_response); + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] + = MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct release_request); + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct release_request); + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct fsync_request); + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct fsync_request); + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpage_request); + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpage_request); + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_MAX_INDEX] = + HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpages_request); + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_request); + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_MAX_INDEX] = + HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_READPAGES_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpages_open_request); + message_length[C_REQUEST][F_READPAGES_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_open_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_READPAGES_OPEN][ + HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_READPAGES_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + 0; + message_length[C_RESPONSE][F_READPAGES_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_open_response) + + HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGES_OPEN][ + HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct writepage_response); + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readdir_request); + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readdir_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(__le64) + HMDFS_MAX_MESSAGE_LEN; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct mkdir_request); + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct mkdir_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct create_request); + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct create_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct rmdir_request); + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct rmdir_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct unlink_request); + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct unlink_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct rename_request); + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct rename_request) + 4 + 4 * PATH_MAX; + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct setattr_request); + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct setattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct getattr_request); + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getattr_response); + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct statfs_request); + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct statfs_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct statfs_response); + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct syncfs_request); + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct syncfs_request); + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct getxattr_request); + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getxattr_request) + PATH_MAX + XATTR_NAME_MAX + 2; + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getxattr_response) + HMDFS_XATTR_SIZE_MAX; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct setxattr_request); + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct setxattr_request) + PATH_MAX + XATTR_NAME_MAX + + HMDFS_XATTR_SIZE_MAX + 2; + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct listxattr_request); + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct listxattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct listxattr_response) + HMDFS_LISTXATTR_SIZE_MAX; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_CONNECT_REKEY][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct connection_rekey_request); + message_length[C_REQUEST][F_CONNECT_REKEY][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct connection_rekey_request); + message_length[C_REQUEST][F_CONNECT_REKEY] + [HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct drop_push_request); + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct drop_push_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; +} + +static void find_first_no_slash(const char **name, int *len) +{ + const char *s = *name; + int l = *len; + + while (*s == '/' && l > 0) { + s++; + l--; + } + + *name = s; + *len = l; +} + +static void find_first_slash(const char **name, int *len) +{ + const char *s = *name; + int l = *len; + + while (*s != '/' && l > 0) { + s++; + l--; + } + + *name = s; + *len = l; +} + +static bool path_contain_dotdot(const char *name, int len) +{ + while (true) { + find_first_no_slash(&name, &len); + + if (len == 0) + return false; + + if (len >= 2 && name[0] == '.' && name[1] == '.' && + (len == 2 || name[2] == '/')) + return true; + + find_first_slash(&name, &len); + } +} + +static int hmdfs_open_message_verify(int flag, size_t len, void *data) +{ + struct open_request *req = NULL; + size_t tmp_len = 0; + int path_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + path_len = le32_to_cpu(req->path_len); + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len == PATH_MAX || + tmp_len != len - sizeof(struct open_request) - 1 || + path_len != tmp_len) { + hmdfs_err("verify fail"); + return -EINVAL; + } + + /* + * We only allow server to open file in hmdfs, thus we need to + * make sure path don't contain "..". + */ + if (path_contain_dotdot(req->buf, path_len)) { + hmdfs_err("verify fail, path contain dotdot"); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_atomic_open_verify(int flag, size_t len, void *data) +{ + struct atomic_open_request *req = NULL; + size_t total_len; + size_t path_len; + size_t max_path_size; + size_t file_len; + size_t max_file_size; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + total_len = len - sizeof(*req); + max_path_size = min_t(size_t, PATH_MAX, total_len); + path_len = strnlen(req->buf, max_path_size); + /* file name need 2 byte at least */ + if (path_len == max_path_size || path_len + 3 > total_len) { + hmdfs_err("verify fail, len %zu, path_len %zu", len, path_len); + return -EINVAL; + } + + max_file_size = min_t(size_t, NAME_MAX + 1, total_len - path_len - 1); + file_len = strnlen(req->buf + path_len + 1, max_file_size); + + if (file_len == max_file_size || + total_len != path_len + 1 + file_len + 1 || + le32_to_cpu(req->path_len) != path_len || + le32_to_cpu(req->file_len) != file_len) { + hmdfs_err("verify fail total len %zu path_len %zu, decalared path len %u, file_len %zu, decalared file_len %u", + total_len, path_len, le32_to_cpu(req->path_len), + file_len, le32_to_cpu(req->file_len) != file_len); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_iterate_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct readdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (le32_to_cpu(tmp_request->path_len) != tmp_len || + len - sizeof(struct readdir_request) - 1 != tmp_len) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_mkdir_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct mkdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct mkdir_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + return err; +} + +static int hmdfs_create_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct create_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct create_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + return err; +} + +static int hmdfs_rmdir_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct rmdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct rmdir_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_unlink_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct unlink_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct unlink_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_rename_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct rename_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_old_path_len = 0; + size_t tmp_new_path_len = 0; + size_t tmp_old_name_len = 0; + size_t tmp_new_name_len = 0; + size_t tmp_char_old_path_len = 0; + size_t tmp_char_new_path_len = 0; + size_t tmp_char_old_name_len = 0; + size_t tmp_char_new_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + + tmp_old_path_len = + le32_to_cpu(tmp_request->old_path_len); + tmp_new_path_len = + le32_to_cpu(tmp_request->new_path_len); + tmp_old_name_len = + le32_to_cpu(tmp_request->old_name_len); + tmp_new_name_len = + le32_to_cpu(tmp_request->new_name_len); + + tmp_char_old_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_new_path_len = strnlen( + tmp_char + tmp_char_old_path_len + 1, PATH_MAX); + + tmp_char_old_name_len = + strnlen(tmp_char + tmp_char_old_path_len + 1 + + tmp_char_new_path_len + 1, + PATH_MAX); + tmp_char_new_name_len = + strnlen(tmp_char + tmp_char_old_path_len + 1 + + tmp_char_new_path_len + 1 + + tmp_char_old_name_len + 1, + PATH_MAX); + } else { + return err; + } + + if (tmp_new_name_len != tmp_char_new_name_len || + tmp_old_name_len != tmp_char_old_name_len || + tmp_new_path_len != tmp_char_new_path_len || + tmp_old_path_len != tmp_char_old_path_len || + len - sizeof(struct rename_request) != + tmp_new_name_len + 1 + tmp_old_name_len + 1 + + tmp_new_path_len + 1 + tmp_old_path_len + + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_setattr_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct setattr_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->buf; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (tmp_len != len - sizeof(struct setattr_request) - 1 || + le32_to_cpu(tmp_request->path_len) != tmp_len) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_getattr_verify(int flag, size_t len, void *data) +{ + struct getattr_request *req = NULL; + size_t tmp_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len != len - sizeof(struct getattr_request) - 1 || + le32_to_cpu(req->path_len) != tmp_len) { + hmdfs_err("verify fail"); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_getxattr_verify(int flag, size_t len, void *data) +{ + struct getxattr_request *req = NULL; + struct getxattr_response *resp = NULL; + size_t path_len = 0; + size_t name_len = 0; + size_t size = 0; + + if (!data) + return 0; + + if (flag == C_REQUEST) { + req = data; + path_len = le32_to_cpu(req->path_len); + name_len = le32_to_cpu(req->name_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || + path_len != strnlen(req->buf, PATH_MAX) || + name_len != + strnlen(req->buf + path_len + 1, XATTR_NAME_MAX) || + size > HMDFS_XATTR_SIZE_MAX) + return -EINVAL; + } else { + resp = data; + size = le32_to_cpu(resp->size); + if (len != sizeof(struct getxattr_response) && + len < sizeof(struct getxattr_response) + size) + return -EINVAL; + } + + return 0; +} + +static int hmdfs_setxattr_verify(int flag, size_t len, void *data) +{ + struct setxattr_request *req = NULL; + size_t path_len = 0; + size_t name_len = 0; + size_t size = 0; + + /* No need to verify response */ + if (flag != C_REQUEST || !data) + return 0; + + req = data; + path_len = le32_to_cpu(req->path_len); + name_len = le32_to_cpu(req->name_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || path_len != strnlen(req->buf, PATH_MAX) || + name_len != strnlen(req->buf + path_len + 1, XATTR_NAME_MAX) || + len != path_len + name_len + size + 2 + + sizeof(struct setxattr_request) || + size > HMDFS_XATTR_SIZE_MAX) + return -EINVAL; + + return 0; +} + +static int hmdfs_listxattr_verify(int flag, size_t len, void *data) +{ + struct listxattr_request *req = NULL; + struct listxattr_response *resp = NULL; + size_t path_len = 0; + size_t size = 0; + + if (!data) + return 0; + + if (flag == C_REQUEST) { + req = data; + path_len = le32_to_cpu(req->path_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || + path_len != strnlen(req->buf, PATH_MAX) || + size > HMDFS_LISTXATTR_SIZE_MAX) + return -EINVAL; + } else { + resp = data; + size = le32_to_cpu(resp->size); + if (len != sizeof(struct listxattr_response) && + len < sizeof(struct listxattr_response) + size) + return -EINVAL; + } + + return 0; +} + +static int hmdfs_writepage_verify(int flag, size_t len, void *data) +{ + struct writepage_request *req = NULL; + __u32 count; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + count = le32_to_cpu(req->count); + if (count == 0 || count > HMDFS_PAGE_SIZE || + len - sizeof(struct writepage_request) != HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, count is %d", count); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_statfs_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct statfs_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (le32_to_cpu(tmp_request->path_len) != tmp_len || + tmp_len != len - sizeof(struct statfs_request) - 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_readpages_verify(int flag, size_t len, void *data) +{ + struct readpages_request *req = NULL; + unsigned int size; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + size = le32_to_cpu(req->size); + if (size > HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, invalid req->size %u", size); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_readpages_open_verify(int flag, size_t len, void *data) +{ + struct readpages_open_request *req = NULL; + unsigned int size; + size_t tmp_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + size = le32_to_cpu(req->size); + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len + 1 != len - sizeof(*req) || + le32_to_cpu(req->path_len) != tmp_len || + size > HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, req->size %u", size); + return -EINVAL; + } + + return 0; +} + +typedef int (*hmdfs_message_verify_func)(int, size_t, void *); + +static const hmdfs_message_verify_func message_verify[F_SIZE] = { + [F_OPEN] = hmdfs_open_message_verify, + [F_WRITEPAGE] = hmdfs_writepage_verify, + [F_ITERATE] = hmdfs_iterate_verify, + [F_MKDIR] = hmdfs_mkdir_verify, + [F_CREATE] = hmdfs_create_verify, + [F_RMDIR] = hmdfs_rmdir_verify, + [F_UNLINK] = hmdfs_unlink_verify, + [F_RENAME] = hmdfs_rename_verify, + [F_SETATTR] = hmdfs_setattr_verify, + [F_STATFS] = hmdfs_statfs_verify, + [F_GETATTR] = hmdfs_getattr_verify, + [F_GETXATTR] = hmdfs_getxattr_verify, + [F_SETXATTR] = hmdfs_setxattr_verify, + [F_LISTXATTR] = hmdfs_listxattr_verify, + [F_READPAGES] = hmdfs_readpages_verify, + [F_READPAGES_OPEN] = hmdfs_readpages_open_verify, + [F_ATOMIC_OPEN] = hmdfs_atomic_open_verify, +}; + +static void handle_bad_message(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, int *err) +{ + /* + * Bad message won't be awared by upper layer, so ETIME is + * always given to upper layer. It is prefer to pass EOPNOTSUPP + * to upper layer when bad message (eg. caused by wrong len) + * received. + */ + if (head->operations.cmd_flag == C_RESPONSE) { + /* + * Change msg ret code. To let upper layer handle + * EOPNOTSUPP, hmdfs_message_verify() should return + * 0, so err code is modified either. + */ + head->ret_code = cpu_to_le32(-EOPNOTSUPP); + *err = 0; + } else { + if (head->operations.command >= F_SIZE) + return; + /* + * Some request messages do not need to be responded. + * Even if a response is returned, the response msg + * is automatically ignored in hmdfs_response_recv(). + * Therefore, it is normal to directly return a response. + */ + if (need_response[head->operations.command]) + hmdfs_send_err_response(con, head, -EOPNOTSUPP); + } +} + +int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + void *data) +{ + int err = 0; + int flag, cmd, len_type; + size_t len, min, max; + + if (!head) + return -EINVAL; + + flag = head->operations.cmd_flag; + if (flag != C_REQUEST && flag != C_RESPONSE) + return -EINVAL; + + cmd = head->operations.command; + if (cmd >= F_SIZE || cmd < F_OPEN || cmd == F_RESERVED_0 || + (cmd >= F_RESERVED_1 && cmd <= F_RESERVED_4) || cmd == F_RESERVED_5) { + err = -EINVAL; + goto handle_bad_msg; + } + + if (head->version == DFS_2_0) { + len = le32_to_cpu(head->data_len) - + sizeof(struct hmdfs_head_cmd); + min = message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX]; + if (head->operations.command == F_ITERATE && flag == C_RESPONSE) + max = sizeof(struct slice_descriptor) + PAGE_SIZE; + else + max = message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX]; + len_type = + message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX]; + + if (len_type == MESSAGE_LEN_JUDGE_RANGE) { + if (len < min || len > max) { + hmdfs_err( + "cmd %d -> %d message verify fail, len = %zu", + cmd, flag, len); + err = -EINVAL; + goto handle_bad_msg; + } + } else { + if (len != min && len != max) { + hmdfs_err( + "cmd %d -> %d message verify fail, len = %zu", + cmd, flag, len); + err = -EINVAL; + goto handle_bad_msg; + } + } + + if (message_verify[cmd]) + err = message_verify[cmd](flag, len, data); + + if (err) + goto handle_bad_msg; + + return err; + } + +handle_bad_msg: + if (err) { + handle_bad_message(con, head, &err); + return err; + } + + if (head->version == DFS_1_0) + return err; // now, DFS_1_0 version do not verify + + return -EINVAL; +} diff --git a/fs/hmdfs/comm/message_verify.h b/fs/hmdfs/comm/message_verify.h new file mode 100644 index 0000000000000000000000000000000000000000..99e696a448f122735707ed03d86572d280773e70 --- /dev/null +++ b/fs/hmdfs/comm/message_verify.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/message_verify.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_MESSAGE_VERIFY_H +#define HMDFS_MESSAGE_VERIFY_H + +#include "protocol.h" + +enum MESSAGE_LEN_JUDGE_TYPE { + MESSAGE_LEN_JUDGE_RANGE = 0, + MESSAGE_LEN_JUDGE_BIN = 1, +}; + +#define HMDFS_MESSAGE_MIN_INDEX 0 +#define HMDFS_MESSAGE_MAX_INDEX 1 +#define HMDFS_MESSAGE_LEN_JUDGE_INDEX 2 +#define HMDFS_MESSAGE_MIN_MAX 3 + +void hmdfs_message_verify_init(void); +int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + void *data); + +#endif diff --git a/fs/hmdfs/comm/node_cb.c b/fs/hmdfs/comm/node_cb.c new file mode 100644 index 0000000000000000000000000000000000000000..21b84d2fff82af4217b88876aceb20605625c854 --- /dev/null +++ b/fs/hmdfs/comm/node_cb.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/node_cb.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include + +#include "node_cb.h" +#include "connection.h" + +static struct list_head cb_head[NODE_EVT_NR][NODE_EVT_TYPE_NR]; + +static const char *evt_str_tbl[NODE_EVT_NR] = { + "add", "online", "offline", "del", +}; + +static inline bool hmdfs_is_valid_node_evt(int evt) +{ + return (evt >= 0 && evt < NODE_EVT_NR); +} + +static const char *hmdfs_evt_str(int evt) +{ + if (!hmdfs_is_valid_node_evt(evt)) + return "unknown"; + return evt_str_tbl[evt]; +} + +void hmdfs_node_evt_cb_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cb_head); i++) { + int j; + + for (j = 0; j < ARRAY_SIZE(cb_head[0]); j++) + INIT_LIST_HEAD(&cb_head[i][j]); + } +} + +void hmdfs_node_add_evt_cb(struct hmdfs_node_cb_desc *desc, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + int evt = desc[i].evt; + bool sync = desc[i].sync; + + if (!hmdfs_is_valid_node_evt(evt)) + continue; + + list_add_tail(&desc[i].list, &cb_head[evt][sync]); + } +} + +void hmdfs_node_call_evt_cb(struct hmdfs_peer *conn, int evt, bool sync, + unsigned int seq) +{ + struct hmdfs_node_cb_desc *desc = NULL; + + hmdfs_info("node 0x%x:0x%llx call %s %s cb seq %u", + conn->owner, conn->device_id, hmdfs_evt_str(evt), + sync ? "sync" : "async", seq); + + if (!hmdfs_is_valid_node_evt(evt)) + return; + + list_for_each_entry(desc, &cb_head[evt][sync], list) { + if (conn->version < desc->min_version) + continue; + + desc->fn(conn, evt, seq); + } +} diff --git a/fs/hmdfs/comm/node_cb.h b/fs/hmdfs/comm/node_cb.h new file mode 100644 index 0000000000000000000000000000000000000000..fe53b946f66846909d9e328a6093b496c380e0cf --- /dev/null +++ b/fs/hmdfs/comm/node_cb.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/node_cb.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_NODE_CB_H +#define HMDFS_NODE_CB_H + +#include "hmdfs.h" + +/* async & sync */ +#define NODE_EVT_TYPE_NR 2 + +enum { + NODE_EVT_ADD = 0, + NODE_EVT_ONLINE, + NODE_EVT_OFFLINE, + NODE_EVT_DEL, + NODE_EVT_NR, +}; + +struct hmdfs_peer; + +typedef void (*hmdfs_node_evt_cb)(struct hmdfs_peer *conn, + int evt, unsigned int seq); + +struct hmdfs_node_cb_desc { + int evt; + bool sync; + unsigned char min_version; + hmdfs_node_evt_cb fn; + struct list_head list; +}; + +extern void hmdfs_node_evt_cb_init(void); + +/* Only initialize during module init */ +extern void hmdfs_node_add_evt_cb(struct hmdfs_node_cb_desc *desc, int nr); +extern void hmdfs_node_call_evt_cb(struct hmdfs_peer *node, int evt, bool sync, + unsigned int seq); + +#endif /* HMDFS_NODE_CB_H */ diff --git a/fs/hmdfs/comm/protocol.h b/fs/hmdfs/comm/protocol.h new file mode 100644 index 0000000000000000000000000000000000000000..a873143f20d7989c45879ae54f9de30c8e977409 --- /dev/null +++ b/fs/hmdfs/comm/protocol.h @@ -0,0 +1,489 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/protocol.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_PROTOCOL_H +#define HMDFS_PROTOCOL_H + +#include +#include +#include +#include + +struct hmdfs_cmd { + __u8 reserved; + __u8 cmd_flag; + __u8 command; + __u8 reserved2; +} __packed; + +#define HMDFS_MSG_MAGIC 0xF7 +#define HMDFS_MAX_MESSAGE_LEN (8 * 1024 * 1024) + +struct hmdfs_head_cmd { + __u8 magic; + __u8 version; + __le16 reserved; + __le32 data_len; + struct hmdfs_cmd operations; + __le32 ret_code; + __le32 msg_id; + __le32 reserved1; +} __packed; + +enum FILE_RECV_STATE { + FILE_RECV_PROCESS = 0, + FILE_RECV_SUCC, + FILE_RECV_ERR_NET, + FILE_RECV_ERR_SPC, +}; + +struct file_recv_info { + void *local_filp; + atomic_t local_fslices; + atomic_t state; +}; + +enum MSG_IDR_TYPE { + MSG_IDR_1_0_NONE = 0, + MSG_IDR_1_0_MESSAGE_SYNC, + MSG_IDR_1_0_PAGE, + MSG_IDR_MESSAGE_SYNC, + MSG_IDR_MESSAGE_ASYNC, + MSG_IDR_PAGE, + MSG_IDR_MAX, +}; + +struct hmdfs_msg_idr_head { + __u32 type; + __u32 msg_id; + struct kref ref; + struct hmdfs_peer *peer; +}; + +struct sendmsg_wait_queue { + struct hmdfs_msg_idr_head head; + wait_queue_head_t response_q; + struct list_head async_msg; + atomic_t valid; + __u32 size; + void *buf; + __u32 ret; + unsigned long start; + struct file_recv_info recv_info; +}; + +struct hmdfs_send_command { + struct hmdfs_cmd operations; + void *data; + size_t len; + void *local_filp; + void *out_buf; + size_t out_len; + __u32 ret_code; +}; + +struct hmdfs_req { + struct hmdfs_cmd operations; + /* + * Normally, the caller ought set timeout to TIMEOUT_CONFIG, so that + * hmdfs_send_async_request will search s_cmd_timeout for the user- + * configured timeout values. + * + * However, consider the given scenery: + * The caller may want to issue multiple requests sharing the same + * timeout value, but the users may update the value during the gap. + * To ensure the "atomicty" of timeout-using for these requests, we + * provide the timeout field for hacking. + */ + unsigned int timeout; + void *data; + size_t data_len; + + void *private; // optional + size_t private_len; // optional +}; + +struct hmdfs_resp { + void *out_buf; + size_t out_len; + __u32 ret_code; +}; + +struct hmdfs_msg_parasite { + struct hmdfs_msg_idr_head head; + struct delayed_work d_work; + bool wfired; + struct hmdfs_req req; + struct hmdfs_resp resp; + unsigned long start; +}; + +struct hmdfs_send_data { + // sect1: head + void *head; + size_t head_len; + + // sect2: slice descriptor + void *sdesc; + size_t sdesc_len; + + // sect3: request / response / file slice + void *data; + size_t len; +}; + +struct slice_descriptor { + __le32 num_slices; + __le32 slice_size; + __le32 slice_sn; + __le32 content_size; +} __packed; + +enum DFS_VERSION { + INVALID_VERSION = 0, + DFS_1_0, + + USERSPACE_MAX_VER = 0x3F, + DFS_2_0, + + MAX_VERSION = 0xFF +}; + +enum CONN_OPERATIONS_VERSION { USERDFS_VERSION, PROTOCOL_VERSION }; + +enum CMD_FLAG { C_REQUEST = 0, C_RESPONSE = 1, C_FLAG_SIZE }; + +enum FILE_CMD { + F_OPEN = 0, + F_RELEASE = 1, + F_READPAGE = 2, + F_WRITEPAGE = 3, + F_ITERATE = 4, + F_RESERVED_1 = 5, + F_RESERVED_2 = 6, + F_RESERVED_3 = 7, + F_RESERVED_4 = 8, + F_MKDIR = 9, + F_RMDIR = 10, + F_CREATE = 11, + F_UNLINK = 12, + F_RENAME = 13, + F_SETATTR = 14, + F_RESERVED_5 = 15, + F_STATFS = 16, + F_CONNECT_REKEY = 17, + F_DROP_PUSH = 18, + F_RESERVED_0 = 19, + F_GETATTR = 20, + F_FSYNC = 21, + F_SYNCFS = 22, + F_GETXATTR = 23, + F_SETXATTR = 24, + F_LISTXATTR = 25, + F_READPAGES = 26, + F_READPAGES_OPEN = 27, + F_ATOMIC_OPEN = 28, + F_SIZE, +}; + +struct open_request { + __u8 file_type; + __le32 flags; + __le32 path_len; + char buf[0]; +} __packed; + +struct open_response { + __le32 change_detect_cap; + __le64 file_ver; + __le32 file_id; + __le64 file_size; + __le64 ino; + __le64 ctime; + __le32 ctime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le64 stable_ctime; + __le32 stable_ctime_nsec; + __le64 ichange_count; +} __packed; + +enum hmdfs_open_flags { + HMDFS_O_TRUNC = O_TRUNC, + HMDFS_O_EXCL = O_EXCL, +}; + +struct atomic_open_request { + __le32 open_flags; + __le16 mode; + __le16 reserved1; + __le32 path_len; + __le32 file_len; + __le64 reserved2[4]; + char buf[0]; +} __packed; + +struct atomic_open_response { + __le32 fno; + __le16 i_mode; + __le16 reserved1; + __le32 i_flags; + __le32 reserved2; + __le64 reserved3[4]; + struct open_response open_resp; +} __packed; + +struct release_request { + __le64 file_ver; + __le32 file_id; +} __packed; + +struct fsync_request { + __le64 file_ver; + __le32 file_id; + __le32 datasync; + __le64 start; + __le64 end; +} __packed; + +struct readpage_request { + __le64 file_ver; + __le32 file_id; + __le32 size; + __le64 index; +} __packed; + +struct readpage_response { + char buf[0]; +} __packed; + +struct readpages_request { + __le64 file_ver; + __le32 file_id; + __le32 size; + __le64 index; + __le64 reserved; +} __packed; + +struct readpages_response { + char buf[0]; +} __packed; + +struct readpages_open_request { + __u8 file_type; + __u8 reserved1[3]; + __le32 flags; + __le32 path_len; + __le32 size; + __le64 index; + __le64 reserved2; + char buf[0]; +} __packed; + +struct readpages_open_response { + struct open_response open_resp; + __le64 reserved[4]; + char buf[0]; +} __packed; + +struct writepage_request { + __le64 file_ver; + __le32 file_id; + __le64 index; + __le32 count; + char buf[0]; +} __packed; + +struct writepage_response { + __le64 ichange_count; + __le64 ctime; + __le32 ctime_nsec; +} __packed; + +struct readdir_request { + __le64 dcache_crtime; + __le64 dcache_crtime_nsec; + __le64 dentry_ctime; + __le64 dentry_ctime_nsec; + __le64 num; + __le32 verify_cache; + __le32 path_len; + char path[0]; +} __packed; + +struct hmdfs_inodeinfo_response { + __le64 i_size; + __le64 i_mtime; + __le32 i_mtime_nsec; + __le32 fno; + __le16 i_mode; + __le64 i_ino; + __le32 i_flags; + __le32 i_reserved; +} __packed; + +struct mkdir_request { + __le32 path_len; + __le32 name_len; + __le16 mode; + char path[0]; +} __packed; + +struct create_request { + __le32 path_len; + __le32 name_len; + __le16 mode; + __u8 want_excl; + char path[0]; +} __packed; + +struct rmdir_request { + __le32 path_len; + __le32 name_len; + char path[0]; +} __packed; + +struct unlink_request { + __le32 path_len; + __le32 name_len; + char path[0]; +} __packed; + +struct rename_request { + __le32 old_path_len; + __le32 new_path_len; + __le32 old_name_len; + __le32 new_name_len; + __le32 flags; + char path[0]; +} __packed; + +struct drop_push_request { + __le32 path_len; + char path[0]; +} __packed; + +struct setattr_request { + __le64 size; + __le32 valid; + __le16 mode; + __le32 uid; + __le32 gid; + __le64 atime; + __le32 atime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le32 path_len; + char buf[0]; +} __packed; + +struct getattr_request { + __le32 lookup_flags; + __le32 path_len; + char buf[0]; +} __packed; + +struct getattr_response { + __le32 change_detect_cap; + __le32 result_mask; + __le32 flags; + __le64 fsid; + __le16 mode; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 rdev; + __le64 ino; + __le64 size; + __le64 blocks; + __le32 blksize; + __le64 atime; + __le32 atime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le64 ctime; + __le32 ctime_nsec; + __le64 crtime; + __le32 crtime_nsec; + __le64 ichange_count; +} __packed; + +struct statfs_request { + __le32 path_len; + char path[0]; +} __packed; + +struct statfs_response { + __le64 f_type; + __le64 f_bsize; + __le64 f_blocks; + __le64 f_bfree; + __le64 f_bavail; + __le64 f_files; + __le64 f_ffree; + __le32 f_fsid_0; + __le32 f_fsid_1; + __le64 f_namelen; + __le64 f_frsize; + __le64 f_flags; + __le64 f_spare_0; + __le64 f_spare_1; + __le64 f_spare_2; + __le64 f_spare_3; +} __packed; + +struct syncfs_request { + __le64 version; + __le32 flags; +} __packed; + +struct getxattr_request { + __le32 path_len; + __le32 name_len; + __le32 size; + char buf[0]; +} __packed; + +struct getxattr_response { + __le32 size; + char value[0]; /* xattr value may non-printable */ +} __packed; + +struct setxattr_request { + __le32 path_len; + __le32 name_len; + __le32 size; + __le32 flags; + __u8 del; /* remove xattr */ + char buf[0]; +} __packed; + +struct listxattr_request { + __le32 path_len; + __le32 size; + char buf[0]; +} __packed; + +struct listxattr_response { + __le32 size; + char list[0]; +} __packed; + +struct connection_rekey_request { + __le32 update_request; +} __packed; + +enum CONNECTION_KEY_UPDATE_REQUEST { + UPDATE_NOT_REQUESTED = 0, + UPDATE_REQUESTED = 1 +}; + +enum MSG_QUEUE_STATUS { + MSG_Q_SEND = 0, + MSG_Q_END_RECV, +}; +#endif diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c new file mode 100644 index 0000000000000000000000000000000000000000..769b6d28ebcef214973e36d9d19bb9bbe254f4f5 --- /dev/null +++ b/fs/hmdfs/comm/socket_adapter.c @@ -0,0 +1,1151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/socket_adapter.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "socket_adapter.h" + +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/device_node.h" +#include "hmdfs_client.h" +#include "hmdfs_server.h" +#include "hmdfs_trace.h" +#include "message_verify.h" + +#define ACQUIRE_WFIRED_INTVAL_USEC_MIN 10 +#define ACQUIRE_WFIRED_INTVAL_USEC_MAX 30 + +typedef void (*request_callback)(struct hmdfs_peer *, struct hmdfs_head_cmd *, + void *); +typedef void (*response_callback)(struct hmdfs_peer *, + struct sendmsg_wait_queue *, void *, size_t); + +static const request_callback s_recv_callbacks[F_SIZE] = { + [F_OPEN] = hmdfs_server_open, + [F_READPAGE] = hmdfs_server_readpage, + [F_RELEASE] = hmdfs_server_release, + [F_WRITEPAGE] = hmdfs_server_writepage, + [F_ITERATE] = hmdfs_server_readdir, + [F_MKDIR] = hmdfs_server_mkdir, + [F_CREATE] = hmdfs_server_create, + [F_RMDIR] = hmdfs_server_rmdir, + [F_UNLINK] = hmdfs_server_unlink, + [F_RENAME] = hmdfs_server_rename, + [F_SETATTR] = hmdfs_server_setattr, + [F_STATFS] = hmdfs_server_statfs, + [F_DROP_PUSH] = hmdfs_server_get_drop_push, + [F_GETATTR] = hmdfs_server_getattr, + [F_FSYNC] = hmdfs_server_fsync, + [F_SYNCFS] = hmdfs_server_syncfs, + [F_GETXATTR] = hmdfs_server_getxattr, + [F_SETXATTR] = hmdfs_server_setxattr, + [F_LISTXATTR] = hmdfs_server_listxattr, + [F_READPAGES] = hmdfs_server_readpages, + [F_READPAGES_OPEN] = hmdfs_server_readpages_open, + [F_ATOMIC_OPEN] = hmdfs_server_atomic_open, +}; + +typedef void (*file_request_callback)(struct hmdfs_peer *, + struct hmdfs_send_command *); + +struct async_req_callbacks { + void (*on_wakeup)(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); +}; + +static const struct async_req_callbacks g_async_req_callbacks[F_SIZE] = { + [F_SYNCFS] = { .on_wakeup = hmdfs_recv_syncfs_cb }, + [F_WRITEPAGE] = { .on_wakeup = hmdfs_writepage_cb }, +}; + +static void msg_release(struct kref *kref) +{ + struct sendmsg_wait_queue *msg_wq; + struct hmdfs_peer *con; + + msg_wq = (struct sendmsg_wait_queue *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + con = msg_wq->head.peer; + idr_remove(&con->msg_idr, msg_wq->head.msg_id); + spin_unlock(&con->idr_lock); + + kfree(msg_wq->buf); + if (msg_wq->recv_info.local_filp) + fput(msg_wq->recv_info.local_filp); + kfree(msg_wq); +} + +// Always remember to find before put, and make sure con is avilable +void msg_put(struct sendmsg_wait_queue *msg_wq) +{ + kref_put_lock(&msg_wq->head.ref, msg_release, + &msg_wq->head.peer->idr_lock); +} + +static void recv_info_init(struct file_recv_info *recv_info) +{ + memset(recv_info, 0, sizeof(struct file_recv_info)); + atomic_set(&recv_info->local_fslices, 0); + atomic_set(&recv_info->state, FILE_RECV_PROCESS); +} + +static int msg_init(struct hmdfs_peer *con, struct sendmsg_wait_queue *msg_wq) +{ + int ret = 0; + struct file_recv_info *recv_info = &msg_wq->recv_info; + + ret = hmdfs_alloc_msg_idr(con, MSG_IDR_MESSAGE_SYNC, msg_wq); + if (unlikely(ret)) + return ret; + + atomic_set(&msg_wq->valid, MSG_Q_SEND); + init_waitqueue_head(&msg_wq->response_q); + recv_info_init(recv_info); + msg_wq->start = jiffies; + return 0; +} + +static inline void statistic_con_sb_dirty(struct hmdfs_peer *con, + const struct hmdfs_cmd *op) +{ + if (op->command == F_WRITEPAGE && op->cmd_flag == C_REQUEST) + atomic64_inc(&con->sb_dirty_count); +} + +int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg) +{ + int ret = 0; + struct connection *connect = NULL; + struct tcp_handle *tcp = NULL; + struct hmdfs_head_cmd *head = msg->head; + const struct cred *old_cred; + + if (!node) { + hmdfs_err("node NULL when send cmd %d", + head->operations.command); + ret = -EAGAIN; + goto out_err; + } else if (node->status != NODE_STAT_ONLINE) { + hmdfs_err("device %llu OFFLINE %d when send cmd %d", + node->device_id, node->status, + head->operations.command); + ret = -EAGAIN; + goto out; + } + + if (hmdfs_should_fail_sendmsg(&node->sbi->fault_inject, node, msg, + &ret)) + goto out; + + old_cred = hmdfs_override_creds(node->sbi->system_cred); + + do { + connect = get_conn_impl(node, CONNECT_TYPE_TCP); + if (!connect) { + hmdfs_info_ratelimited( + "device %llu no connection available when send cmd %d, get new session", + node->device_id, head->operations.command); + if (node->status != NODE_STAT_OFFLINE) { + struct notify_param param; + + memcpy(param.remote_cid, node->cid, + HMDFS_CID_SIZE); + param.notify = NOTIFY_OFFLINE; + param.fd = INVALID_SOCKET_FD; + notify(node, ¶m); + } + ret = -EAGAIN; + goto revert_cred; + } + + ret = connect->send_message(connect, msg); + if (ret == -ESHUTDOWN) { + hmdfs_info("device %llu send cmd %d message fail, connection stop", + node->device_id, head->operations.command); + connect->status = CONNECT_STAT_STOP; + tcp = connect->connect_handle; + if (node->status != NODE_STAT_OFFLINE) { + connection_get(connect); + if (!queue_work(node->reget_conn_wq, + &connect->reget_work)) + connection_put(connect); + } + connection_put(connect); + /* + * node->status is OFFLINE can not ensure + * node_seq will be increased before + * hmdfs_sendmessage() returns. + */ + hmdfs_node_inc_evt_seq(node); + } else { + connection_put(connect); + goto revert_cred; + } + } while (node->status != NODE_STAT_OFFLINE); +revert_cred: + hmdfs_revert_creds(old_cred); + + if (!ret) + statistic_con_sb_dirty(node, &head->operations); +out: + if (node->version == DFS_2_0 && + head->operations.cmd_flag == C_REQUEST) + hmdfs_client_snd_statis(node->sbi, + head->operations.command, ret); + else if (node->version == DFS_2_0 && + head->operations.cmd_flag == C_RESPONSE) + hmdfs_server_snd_statis(node->sbi, + head->operations.command, ret); +out_err: + return ret; +} + +int hmdfs_sendmessage_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, __u32 data_len, + void *buf, __u32 ret_code) +{ + int ret; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = cmd->operations; + head.operations.cmd_flag = C_RESPONSE; + head.data_len = cpu_to_le32(data_len + sizeof(struct hmdfs_head_cmd)); + head.ret_code = cpu_to_le32(ret_code); + head.msg_id = cmd->msg_id; + head.reserved = cmd->reserved; + head.reserved1 = cmd->reserved1; + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = buf; + msg.len = data_len; + msg.sdesc = NULL; + msg.sdesc_len = 0; + + ret = hmdfs_sendmessage(con, &msg); + return ret; +} + +static void mp_release(struct kref *kref) +{ + struct hmdfs_msg_parasite *mp = NULL; + struct hmdfs_peer *peer = NULL; + + mp = (struct hmdfs_msg_parasite *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + peer = mp->head.peer; + idr_remove(&peer->msg_idr, mp->head.msg_id); + spin_unlock(&peer->idr_lock); + + peer_put(peer); + kfree(mp->resp.out_buf); + kfree(mp); +} + +void mp_put(struct hmdfs_msg_parasite *mp) +{ + kref_put_lock(&mp->head.ref, mp_release, &mp->head.peer->idr_lock); +} + +static void async_request_cb_on_wakeup_fn(struct work_struct *w) +{ + struct hmdfs_msg_parasite *mp = + container_of(w, struct hmdfs_msg_parasite, d_work.work); + struct async_req_callbacks cbs; + const struct cred *old_cred = + hmdfs_override_creds(mp->head.peer->sbi->cred); + + if (mp->resp.ret_code == -ETIME) + hmdfs_client_resp_statis(mp->head.peer->sbi, + mp->req.operations.command, + HMDFS_RESP_TIMEOUT, 0, 0); + + cbs = g_async_req_callbacks[mp->req.operations.command]; + if (cbs.on_wakeup) + (*cbs.on_wakeup)(mp->head.peer, &mp->req, &mp->resp); + mp_put(mp); + hmdfs_revert_creds(old_cred); +} + +static struct hmdfs_msg_parasite *mp_alloc(struct hmdfs_peer *peer, + const struct hmdfs_req *req) +{ + struct hmdfs_msg_parasite *mp = kzalloc(sizeof(*mp), GFP_KERNEL); + int ret; + + if (unlikely(!mp)) + return ERR_PTR(-ENOMEM); + + ret = hmdfs_alloc_msg_idr(peer, MSG_IDR_MESSAGE_ASYNC, mp); + if (unlikely(ret)) { + kfree(mp); + return ERR_PTR(ret); + } + + mp->start = jiffies; + peer_get(mp->head.peer); + mp->resp.ret_code = -ETIME; + INIT_DELAYED_WORK(&mp->d_work, async_request_cb_on_wakeup_fn); + mp->wfired = false; + mp->req = *req; + return mp; +} + +/** + * hmdfs_send_async_request - sendout a async request + * @peer: target device node + * @req: request descriptor + necessary contexts + * + * Sendout a request synchronously and wait for its response asynchronously + * Return -ESHUTDOWN when the device node is unachievable + * Return -EAGAIN if the network is recovering + * Return -ENOMEM if out of memory + * + * Register g_async_req_callbacks to recv the response + */ +int hmdfs_send_async_request(struct hmdfs_peer *peer, + const struct hmdfs_req *req) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + struct hmdfs_msg_parasite *mp = NULL; + size_t msg_len = req->data_len + sizeof(struct hmdfs_head_cmd); + unsigned int timeout; + + if (req->timeout == TIMEOUT_CONFIG) + timeout = get_cmd_timeout(peer->sbi, req->operations.command); + else + timeout = req->timeout; + if (timeout == TIMEOUT_UNINIT || timeout == TIMEOUT_NONE) { + hmdfs_err("send msg %d with uninitialized/invalid timeout", + req->operations.command); + return -EINVAL; + } + + if (!hmdfs_is_node_online(peer)) + return -EAGAIN; + + mp = mp_alloc(peer, req); + if (IS_ERR(mp)) + return PTR_ERR(mp); + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.data_len = cpu_to_le32(msg_len); + head.operations = mp->req.operations; + head.msg_id = cpu_to_le32(mp->head.msg_id); + head.reserved = 0; + head.reserved1 = 0; + + msg.head = &head; + msg.head_len = sizeof(head); + msg.data = mp->req.data; + msg.len = mp->req.data_len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + + ret = hmdfs_sendmessage(peer, &msg); + if (unlikely(ret)) { + mp_put(mp); + goto out; + } + + queue_delayed_work(peer->async_wq, &mp->d_work, timeout * HZ); + /* + * The work may havn't been queued upon the arriving of it's response, + * resulting in meaningless waiting. So we use the membar to tell the + * recv thread if the work has been queued + */ + smp_store_release(&mp->wfired, true); +out: + hmdfs_dec_msg_idr_process(peer); + return ret; +} + +static int hmdfs_record_async_readdir(struct hmdfs_peer *con, + struct sendmsg_wait_queue *msg_wq) +{ + struct hmdfs_sb_info *sbi = con->sbi; + + spin_lock(&sbi->async_readdir_msg_lock); + if (sbi->async_readdir_prohibit) { + spin_unlock(&sbi->async_readdir_msg_lock); + return -EINTR; + } + + list_add(&msg_wq->async_msg, &sbi->async_readdir_msg_list); + spin_unlock(&sbi->async_readdir_msg_lock); + + return 0; +} + +static void hmdfs_untrack_async_readdir(struct hmdfs_peer *con, + struct sendmsg_wait_queue *msg_wq) +{ + struct hmdfs_sb_info *sbi = con->sbi; + + spin_lock(&sbi->async_readdir_msg_lock); + list_del(&msg_wq->async_msg); + spin_unlock(&sbi->async_readdir_msg_lock); +} + +int hmdfs_sendmessage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *sm) +{ + int time_left; + int ret = 0; + struct sendmsg_wait_queue *msg_wq = NULL; + struct hmdfs_send_data msg; + size_t outlen = sm->len + sizeof(struct hmdfs_head_cmd); + unsigned int timeout = + get_cmd_timeout(con->sbi, sm->operations.command); + struct hmdfs_head_cmd *head = NULL; + bool dec = false; + + if (!hmdfs_is_node_online(con)) + return -EAGAIN; + + if (timeout == TIMEOUT_UNINIT) { + hmdfs_err_ratelimited("send msg %d with uninitialized timeout", + sm->operations.command); + return -EINVAL; + } + + head = kzalloc(sizeof(struct hmdfs_head_cmd), GFP_KERNEL); + if (!head) + return -ENOMEM; + + sm->out_buf = NULL; + head->magic = HMDFS_MSG_MAGIC; + head->version = DFS_2_0; + head->operations = sm->operations; + head->data_len = cpu_to_le32(outlen); + head->ret_code = cpu_to_le32(sm->ret_code); + head->reserved = 0; + head->reserved1 = 0; + if (timeout != TIMEOUT_NONE) { + msg_wq = kzalloc(sizeof(*msg_wq), GFP_KERNEL); + if (!msg_wq) { + ret = -ENOMEM; + goto free; + } + ret = msg_init(con, msg_wq); + if (ret) { + kfree(msg_wq); + msg_wq = NULL; + goto free; + } + dec = true; + head->msg_id = cpu_to_le32(msg_wq->head.msg_id); + if (sm->operations.command == F_ITERATE) + msg_wq->recv_info.local_filp = sm->local_filp; + } + msg.head = head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = sm->data; + msg.len = sm->len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + ret = hmdfs_sendmessage(con, &msg); + if (ret) { + hmdfs_err_ratelimited("send err sm->device_id, %lld, msg_id %u", + con->device_id, head->msg_id); + goto free; + } + + if (timeout == TIMEOUT_NONE) + goto free; + + hmdfs_dec_msg_idr_process(con); + dec = false; + + if (sm->operations.command == F_ITERATE) { + ret = hmdfs_record_async_readdir(con, msg_wq); + if (ret) { + atomic_set(&msg_wq->recv_info.state, FILE_RECV_ERR_SPC); + goto free; + } + } + + time_left = wait_event_interruptible_timeout( + msg_wq->response_q, + (atomic_read(&msg_wq->valid) == MSG_Q_END_RECV), timeout * HZ); + + if (sm->operations.command == F_ITERATE) + hmdfs_untrack_async_readdir(con, msg_wq); + + if (time_left == -ERESTARTSYS || time_left == 0) { + hmdfs_err("timeout err sm->device_id %lld, msg_id %d cmd %d", + con->device_id, head->msg_id, + head->operations.command); + if (sm->operations.command == F_ITERATE) + atomic_set(&msg_wq->recv_info.state, FILE_RECV_ERR_NET); + ret = -ETIME; + hmdfs_client_resp_statis(con->sbi, sm->operations.command, + HMDFS_RESP_TIMEOUT, 0, 0); + goto free; + } + sm->out_buf = msg_wq->buf; + msg_wq->buf = NULL; + sm->out_len = msg_wq->size - sizeof(struct hmdfs_head_cmd); + ret = msg_wq->ret; + +free: + if (msg_wq) + msg_put(msg_wq); + if (dec) + hmdfs_dec_msg_idr_process(con); + kfree(head); + return ret; +} + +static int hmdfs_send_slice(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + struct slice_descriptor *sdesc, void *slice_buf) +{ + int ret; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + int content_size = le32_to_cpu(sdesc->content_size); + int msg_len = sizeof(struct hmdfs_head_cmd) + content_size + + sizeof(struct slice_descriptor); + + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = cmd->operations; + head.operations.cmd_flag = C_RESPONSE; + head.data_len = cpu_to_le32(msg_len); + head.ret_code = cpu_to_le32(0); + head.msg_id = cmd->msg_id; + head.reserved = cmd->reserved; + head.reserved1 = cmd->reserved1; + + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.sdesc = sdesc; + msg.sdesc_len = le32_to_cpu(sizeof(struct slice_descriptor)); + msg.data = slice_buf; + msg.len = content_size; + + ret = hmdfs_sendmessage(con, &msg); + + return ret; +} + +int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + struct file *filp) +{ + int ret; + const unsigned int slice_size = PAGE_SIZE; + char *slice_buf = NULL; + loff_t file_offset = 0, file_size; + ssize_t size; + struct slice_descriptor sdesc; + unsigned int slice_sn = 0; + + if (!filp) + return hmdfs_sendmessage_response(con, head, 0, NULL, 0); + + sdesc.slice_size = cpu_to_le32(slice_size); + file_size = i_size_read(file_inode(filp)); + file_size = round_up(file_size, slice_size); + sdesc.num_slices = cpu_to_le32(file_size / slice_size); + + slice_buf = kmalloc(slice_size, GFP_KERNEL); + if (!slice_buf) { + ret = -ENOMEM; + goto out; + } + + while (1) { + sdesc.slice_sn = cpu_to_le32(slice_sn++); + size = kernel_read(filp, slice_buf, (size_t)slice_size, + &file_offset); + if (IS_ERR_VALUE(size)) { + ret = (int)size; + goto out; + } + sdesc.content_size = cpu_to_le32(size); + ret = hmdfs_send_slice(con, head, &sdesc, slice_buf); + if (ret) { + hmdfs_info("Cannot send file slice %d ", + le32_to_cpu(sdesc.slice_sn)); + break; + } + if (file_offset >= i_size_read(file_inode(filp))) + break; + } + +out: + kfree(slice_buf); + if (ret) + hmdfs_sendmessage_response(con, head, 0, NULL, ret); + return ret; +} + +static void asw_release(struct kref *kref) +{ + struct hmdfs_async_work *asw = NULL; + struct hmdfs_peer *peer = NULL; + + asw = (struct hmdfs_async_work *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + peer = asw->head.peer; + idr_remove(&peer->msg_idr, asw->head.msg_id); + spin_unlock(&peer->idr_lock); + kfree(asw); +} + +void asw_put(struct hmdfs_async_work *asw) +{ + kref_put_lock(&asw->head.ref, asw_release, &asw->head.peer->idr_lock); +} + +void hmdfs_recv_page_work_fn(struct work_struct *ptr) +{ + struct hmdfs_async_work *async_work = + container_of(ptr, struct hmdfs_async_work, d_work.work); + + if (async_work->head.peer->version >= DFS_2_0) + hmdfs_client_resp_statis(async_work->head.peer->sbi, + F_READPAGE, HMDFS_RESP_TIMEOUT, 0, 0); + hmdfs_err_ratelimited("timeout and release page, msg_id:%u", + async_work->head.msg_id); + asw_done(async_work); +} + +int hmdfs_sendpage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *sm) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct hmdfs_async_work *async_work = NULL; + size_t outlen = sm->len + sizeof(struct hmdfs_head_cmd); + struct hmdfs_head_cmd head; + unsigned int timeout; + unsigned long start = jiffies; + + WARN_ON(!sm->out_buf); + + timeout = get_cmd_timeout(con->sbi, sm->operations.command); + if (timeout == TIMEOUT_UNINIT) { + hmdfs_err("send msg %d with uninitialized timeout", + sm->operations.command); + ret = -EINVAL; + goto unlock; + } + + if (!hmdfs_is_node_online(con)) { + ret = -EAGAIN; + goto unlock; + } + + memset(&head, 0, sizeof(head)); + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = sm->operations; + head.data_len = cpu_to_le32(outlen); + head.ret_code = cpu_to_le32(sm->ret_code); + head.reserved = 0; + head.reserved1 = 0; + + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = sm->data; + msg.len = sm->len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + + async_work = kzalloc(sizeof(*async_work), GFP_KERNEL); + if (!async_work) { + ret = -ENOMEM; + goto unlock; + } + async_work->start = start; + ret = hmdfs_alloc_msg_idr(con, MSG_IDR_PAGE, async_work); + if (ret) { + hmdfs_err("alloc msg_id failed, err %d", ret); + goto unlock; + } + head.msg_id = cpu_to_le32(async_work->head.msg_id); + async_work->page = sm->out_buf; + asw_get(async_work); + INIT_DELAYED_WORK(&async_work->d_work, hmdfs_recv_page_work_fn); + ret = queue_delayed_work(con->async_wq, &async_work->d_work, + timeout * HZ); + if (!ret) { + hmdfs_err("queue_delayed_work failed, msg_id %u", head.msg_id); + goto fail_and_unlock_page; + } + ret = hmdfs_sendmessage(con, &msg); + if (ret) { + hmdfs_err("send err sm->device_id, %lld, msg_id %u", + con->device_id, head.msg_id); + if (!cancel_delayed_work(&async_work->d_work)) { + hmdfs_err("cancel async work err"); + asw_put(async_work); + hmdfs_dec_msg_idr_process(con); + goto out; + } + goto fail_and_unlock_page; + } + + asw_put(async_work); + hmdfs_dec_msg_idr_process(con); + return 0; + +fail_and_unlock_page: + asw_put(async_work); + asw_done(async_work); + hmdfs_dec_msg_idr_process(con); + return ret; +unlock: + kfree(async_work); + unlock_page(sm->out_buf); +out: + return ret; +} + +static void hmdfs_request_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + unsigned long start = jiffies; + const struct cred *saved_cred = hmdfs_override_fsids(true); + + if (!saved_cred) { + hmdfs_err("prepare cred failed!"); + kfree(buf); + return; + } + + s_recv_callbacks[head->operations.command](con, head, buf); + hmdfs_statistic(con->sbi, head->operations.command, jiffies - start); + + kfree(buf); + + hmdfs_revert_fsids(saved_cred); +} + +static void hmdfs_msg_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + const struct cred *old_cred = hmdfs_override_creds(con->sbi->cred); + + /* + * Reuse PF_NPROC_EXCEEDED as an indication of hmdfs server context: + * 1. PF_NPROC_EXCEEDED will set by setreuid()/setuid()/setresuid(), + * we assume kwork will not call theses syscalls. + * 2. PF_NPROC_EXCEEDED will be cleared by execv(), and kworker + * will not call it. + */ + current->flags |= PF_NPROC_EXCEEDED; + hmdfs_request_handle_sync(con, head, buf); + current->flags &= ~PF_NPROC_EXCEEDED; + + hmdfs_revert_creds(old_cred); +} + + +static void hmdfs_request_work_fn(struct work_struct *ptr) +{ + struct work_handler_desp *desp = + container_of(ptr, struct work_handler_desp, work); + + hmdfs_msg_handle_sync(desp->peer, desp->head, desp->buf); + peer_put(desp->peer); + kfree(desp->head); + kfree(desp); +} + +static int hmdfs_msg_handle_async(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf, + struct workqueue_struct *wq, + void (*work_fn)(struct work_struct *ptr)) +{ + struct work_handler_desp *desp = NULL; + struct hmdfs_head_cmd *dup_head = NULL; + int ret; + + desp = kzalloc(sizeof(*desp), GFP_KERNEL); + if (!desp) { + ret = -ENOMEM; + goto exit_desp; + } + + dup_head = kzalloc(sizeof(*dup_head), GFP_KERNEL); + if (!dup_head) { + ret = -ENOMEM; + goto exit_desp; + } + + *dup_head = *head; + desp->peer = con; + desp->head = dup_head; + desp->buf = buf; + INIT_WORK(&desp->work, work_fn); + + peer_get(con); + queue_work(wq, &desp->work); + + ret = 0; + return ret; + +exit_desp: + kfree(desp); + return ret; +} + +static int hmdfs_request_recv(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + int ret; + + if (head->operations.command >= F_SIZE || + !s_recv_callbacks[head->operations.command]) { + ret = -EINVAL; + hmdfs_err("NULL callback, command %d", + head->operations.command); + goto out; + } + + switch (head->operations.command) { + case F_OPEN: + case F_RELEASE: + case F_ITERATE: + case F_MKDIR: + case F_RMDIR: + case F_CREATE: + case F_UNLINK: + case F_RENAME: + case F_SETATTR: + case F_STATFS: + case F_CONNECT_REKEY: + case F_DROP_PUSH: + case F_GETATTR: + case F_FSYNC: + case F_SYNCFS: + case F_GETXATTR: + case F_SETXATTR: + case F_LISTXATTR: + case F_READPAGES_OPEN: + case F_ATOMIC_OPEN: + ret = hmdfs_msg_handle_async(con, head, buf, con->req_handle_wq, + hmdfs_request_work_fn); + break; + case F_WRITEPAGE: + case F_READPAGE: + case F_READPAGES: + hmdfs_msg_handle_sync(con, head, buf); + ret = 0; + break; + default: + hmdfs_err("Fatal! Unexpected request command %d", + head->operations.command); + ret = -EINVAL; + } + +out: + return ret; +} + +void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info, + __u32 ret_code, __u32 data_len, void *buf) +{ + msg_info->ret = ret_code; + msg_info->size = data_len; + msg_info->buf = buf; + atomic_set(&msg_info->valid, MSG_Q_END_RECV); + wake_up_interruptible(&msg_info->response_q); +} + +static int hmdfs_readfile_slice(struct sendmsg_wait_queue *msg_info, + struct work_handler_desp *desp) +{ + struct slice_descriptor *sdesc = desp->buf; + void *slice_buf = sdesc + 1; + struct file_recv_info *recv_info = &msg_info->recv_info; + struct file *filp = recv_info->local_filp; + loff_t offset; + ssize_t written_size; + + if (atomic_read(&recv_info->state) != FILE_RECV_PROCESS) + return -EBUSY; + + offset = le32_to_cpu(sdesc->slice_size) * le32_to_cpu(sdesc->slice_sn); + + written_size = kernel_write(filp, slice_buf, + le32_to_cpu(sdesc->content_size), &offset); + if (IS_ERR_VALUE(written_size)) { + atomic_set(&recv_info->state, FILE_RECV_ERR_SPC); + hmdfs_info("Fatal! Cannot store a file slice %d/%d, ret = %d", + le32_to_cpu(sdesc->slice_sn), + le32_to_cpu(sdesc->num_slices), (int)written_size); + return (int)written_size; + } + + if (atomic_inc_return(&recv_info->local_fslices) >= + le32_to_cpu(sdesc->num_slices)) + atomic_set(&recv_info->state, FILE_RECV_SUCC); + return 0; +} + +static void hmdfs_file_response_work_fn(struct work_struct *ptr) +{ + struct work_handler_desp *desp = + container_of(ptr, struct work_handler_desp, work); + struct sendmsg_wait_queue *msg_info = NULL; + int ret; + atomic_t *pstate = NULL; + u8 cmd = desp->head->operations.command; + const struct cred *old_cred = + hmdfs_override_creds(desp->peer->sbi->cred); + + msg_info = (struct sendmsg_wait_queue *)hmdfs_find_msg_head(desp->peer, + le32_to_cpu(desp->head->msg_id)); + if (!msg_info || atomic_read(&msg_info->valid) != MSG_Q_SEND) { + hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_DELAY, + 0, 0); + hmdfs_info("cannot find msg(id %d)", + le32_to_cpu(desp->head->msg_id)); + goto free; + } + + ret = le32_to_cpu(desp->head->ret_code); + if (ret || le32_to_cpu(desp->head->data_len) == sizeof(*desp->head)) + goto wakeup; + ret = hmdfs_readfile_slice(msg_info, desp); + pstate = &msg_info->recv_info.state; + if (ret || atomic_read(pstate) != FILE_RECV_PROCESS) + goto wakeup; + goto free; + +wakeup: + hmdfs_response_wakeup(msg_info, ret, sizeof(struct hmdfs_head_cmd), + NULL); + hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_NORMAL, + msg_info->start, jiffies); +free: + if (msg_info) + msg_put(msg_info); + peer_put(desp->peer); + hmdfs_revert_creds(old_cred); + + kfree(desp->buf); + kfree(desp->head); + kfree(desp); +} + +static void hmdfs_wait_mp_wfired(struct hmdfs_msg_parasite *mp) +{ + /* We just cancel queued works */ + while (unlikely(!smp_load_acquire(&mp->wfired))) + usleep_range(ACQUIRE_WFIRED_INTVAL_USEC_MIN, + ACQUIRE_WFIRED_INTVAL_USEC_MAX); +} + +int hmdfs_response_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + struct sendmsg_wait_queue *msg_info = NULL; + struct hmdfs_msg_parasite *mp = NULL; + struct hmdfs_msg_idr_head *msg_head = NULL; + u32 msg_id = le32_to_cpu(head->msg_id); + bool woke = false; + u8 cmd = head->operations.command; + + msg_head = hmdfs_find_msg_head(con, msg_id); + if (!msg_head) + goto out; + + switch (msg_head->type) { + case MSG_IDR_MESSAGE_SYNC: + msg_info = (struct sendmsg_wait_queue *)msg_head; + if (atomic_read(&msg_info->valid) == MSG_Q_SEND) { + hmdfs_response_wakeup(msg_info, + le32_to_cpu(head->ret_code), + le32_to_cpu(head->data_len), buf); + hmdfs_client_resp_statis(con->sbi, cmd, + HMDFS_RESP_NORMAL, + msg_info->start, jiffies); + woke = true; + } + + msg_put(msg_info); + break; + case MSG_IDR_MESSAGE_ASYNC: + mp = (struct hmdfs_msg_parasite *)msg_head; + + hmdfs_wait_mp_wfired(mp); + if (cancel_delayed_work(&mp->d_work)) { + mp->resp.out_buf = buf; + mp->resp.out_len = + le32_to_cpu(head->data_len) - sizeof(*head); + mp->resp.ret_code = le32_to_cpu(head->ret_code); + queue_delayed_work(con->async_wq, &mp->d_work, 0); + hmdfs_client_resp_statis(con->sbi, cmd, + HMDFS_RESP_NORMAL, mp->start, + jiffies); + woke = true; + } + mp_put(mp); + break; + default: + hmdfs_err("receive incorrect msg type %d msg_id %d cmd %d", + msg_head->type, msg_id, cmd); + break; + } + + if (likely(woke)) + return 0; +out: + hmdfs_client_resp_statis(con->sbi, cmd, HMDFS_RESP_DELAY, 0, 0); + hmdfs_info("cannot find msg_id %d cmd %d", msg_id, cmd); + return -EINVAL; +} + +static int hmdfs_response_recv(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + __u16 command = head->operations.command; + int ret; + + if (command >= F_SIZE) { + ret = -EINVAL; + return ret; + } + + switch (head->operations.command) { + case F_OPEN: + case F_RELEASE: + case F_READPAGE: + case F_WRITEPAGE: + case F_MKDIR: + case F_RMDIR: + case F_CREATE: + case F_UNLINK: + case F_RENAME: + case F_SETATTR: + case F_STATFS: + case F_CONNECT_REKEY: + case F_DROP_PUSH: + case F_GETATTR: + case F_FSYNC: + case F_SYNCFS: + case F_GETXATTR: + case F_SETXATTR: + case F_LISTXATTR: + ret = hmdfs_response_handle_sync(con, head, buf); + return ret; + + case F_ITERATE: + ret = hmdfs_msg_handle_async(con, head, buf, con->async_wq, + hmdfs_file_response_work_fn); + return ret; + + default: + hmdfs_err("Fatal! Unexpected response command %d", + head->operations.command); + ret = -EINVAL; + return ret; + } +} + +static void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, + void *buf) +{ + struct hmdfs_head_cmd *hmdfs_head = (struct hmdfs_head_cmd *)head; + + trace_hmdfs_recv_mesg_callback(hmdfs_head); + + if (hmdfs_message_verify(con, hmdfs_head, buf) < 0) { + hmdfs_info("Message %d has been abandoned", hmdfs_head->msg_id); + goto out_err; + } + + switch (hmdfs_head->operations.cmd_flag) { + case C_REQUEST: + if (hmdfs_request_recv(con, hmdfs_head, buf) < 0) + goto out_err; + break; + + case C_RESPONSE: + if (hmdfs_response_recv(con, hmdfs_head, buf) < 0) + goto out_err; + break; + + default: + hmdfs_err("Fatal! Unexpected msg cmd %d", + hmdfs_head->operations.cmd_flag); + break; + } + return; + +out_err: + kfree(buf); +} + +static inline void hmdfs_recv_page_callback(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, + int err, void *data) +{ + if (head->operations.command == F_READPAGE) + hmdfs_client_recv_readpage(head, err, data); +} + +static const struct connection_operations conn_operations[] = { + [PROTOCOL_VERSION] = { + .recvmsg = hmdfs_recv_mesg_callback, + .recvpage = hmdfs_recv_page_callback, + /* remote device operations */ + .remote_file_fops = + &hmdfs_dev_file_fops_remote, + .remote_file_iops = + &hmdfs_dev_file_iops_remote, + .remote_file_aops = + &hmdfs_dev_file_aops_remote, + .remote_unlink = + hmdfs_dev_unlink_from_con, + .remote_readdir = + hmdfs_dev_readdir_from_con, + } +}; + +const struct connection_operations *hmdfs_get_peer_operation(__u8 version) +{ + if (version <= INVALID_VERSION || version >= MAX_VERSION) + return NULL; + + if (version <= USERSPACE_MAX_VER) + return &(conn_operations[USERDFS_VERSION]); + else + return &(conn_operations[PROTOCOL_VERSION]); +} + +void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp) +{ + hmdfs_wait_mp_wfired(mp); + if (!cancel_delayed_work(&mp->d_work)) + hmdfs_err("cancel parasite work err msg_id=%d cmd=%d", + mp->head.msg_id, mp->req.operations.command); + else + async_request_cb_on_wakeup_fn(&mp->d_work.work); +} + +void hmdfs_wakeup_async_work(struct hmdfs_async_work *async_work) +{ + if (!cancel_delayed_work(&async_work->d_work)) + hmdfs_err("cancel async work err msg_id=%d", + async_work->head.msg_id); + else + hmdfs_recv_page_work_fn(&async_work->d_work.work); +} diff --git a/fs/hmdfs/comm/socket_adapter.h b/fs/hmdfs/comm/socket_adapter.h new file mode 100644 index 0000000000000000000000000000000000000000..ba4c672d7bcc998fd99b7ce9974003007fd1f49b --- /dev/null +++ b/fs/hmdfs/comm/socket_adapter.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/socket_adapter.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef SOCKET_ADAPTER_H +#define SOCKET_ADAPTER_H + +#include +#include + +#include "connection.h" +#include "hmdfs.h" +#include "protocol.h" + +#define HMDFS_KEY_SIZE 32 +#define HMDFS_IV_SIZE 12 +#define HMDFS_TAG_SIZE 16 +#define HMDFS_CID_SIZE 64 +#define INVALID_SOCKET_FD (-1) + +#define HMDFS_IDR_RESCHED_COUNT 512 + +struct connection_operations { + void (*recvmsg)(struct hmdfs_peer *con, void *head, void *buf); + void (*recvpage)(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + int err, void *data); + const struct file_operations *remote_file_fops; + const struct inode_operations *remote_file_iops; + const struct address_space_operations *remote_file_aops; + int (*remote_unlink)(struct hmdfs_peer *con, struct dentry *dentry); + int (*remote_readdir)(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx); + struct hmdfs_lookup_ret *(*remote_lookup)(struct hmdfs_peer *con, + const char *relative_path, + const char *d_name); +}; + +/***************************************************************************** + * connections(TCP, UDP, .etc) adapter for RPC + *****************************************************************************/ + +struct work_handler_desp { + struct work_struct work; + struct hmdfs_peer *peer; + struct hmdfs_head_cmd *head; + void *buf; +}; + +struct work_readfile_request_async { + struct work_struct work; + struct hmdfs_peer *con; + struct hmdfs_send_command sm; +}; + +static inline void hmdfs_init_cmd(struct hmdfs_cmd *op, u8 cmd) +{ + op->reserved = 0; + op->cmd_flag = C_REQUEST; + op->command = cmd; + op->reserved2 = 0; +} + +int hmdfs_send_async_request(struct hmdfs_peer *peer, + const struct hmdfs_req *req); +int hmdfs_sendmessage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *msg); +int hmdfs_sendpage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *msg); + +int hmdfs_sendmessage_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, __u32 data_len, + void *buf, __u32 ret_code); +int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + struct file *filp); +const struct connection_operations *hmdfs_get_peer_operation(__u8 version); + +void hmdfs_recv_page_work_fn(struct work_struct *ptr); + +/***************************************************************************** + * statistics info for RPC + *****************************************************************************/ + +enum hmdfs_resp_type { + HMDFS_RESP_NORMAL, + HMDFS_RESP_DELAY, + HMDFS_RESP_TIMEOUT +}; + +struct server_statistic { + unsigned long long cnt; /* request received */ + unsigned long long max; /* max processing time */ + unsigned long long total; /* total processing time */ + unsigned long long snd_cnt; /* resp send to client */ + unsigned long long snd_fail_cnt; /* send resp to client failed cnt */ +}; + +struct client_statistic { + unsigned long long snd_cnt; /* request send to server */ + unsigned long long resp_cnt; /* response receive from server */ + unsigned long long timeout_cnt; /* no respone from server */ + unsigned long long delay_resp_cnt; /* delay response from server */ + unsigned long long max; /* max waiting time */ + unsigned long long total; /* total waiting time */ + unsigned long long snd_fail_cnt; /* request send failed to server */ +}; + + +static inline void hmdfs_statistic(struct hmdfs_sb_info *sbi, u8 cmd, + unsigned long jiff) +{ + if (cmd >= F_SIZE) + return; + + sbi->s_server_statis[cmd].cnt++; + sbi->s_server_statis[cmd].total += jiff; + if (jiff > sbi->s_server_statis[cmd].max) + sbi->s_server_statis[cmd].max = jiff; +} + +static inline void hmdfs_server_snd_statis(struct hmdfs_sb_info *sbi, + u8 cmd, int ret) +{ + if (cmd >= F_SIZE) + return; + ret ? sbi->s_server_statis[cmd].snd_fail_cnt++ : + sbi->s_server_statis[cmd].snd_cnt++; +} + +static inline void hmdfs_client_snd_statis(struct hmdfs_sb_info *sbi, + u8 cmd, int ret) +{ + if (cmd >= F_SIZE) + return; + ret ? sbi->s_client_statis[cmd].snd_fail_cnt++ : + sbi->s_client_statis[cmd].snd_cnt++; +} + +extern void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, + enum hmdfs_resp_type type, + unsigned long start, unsigned long end); + +/***************************************************************************** + * timeout configuration for RPC + *****************************************************************************/ + +enum HMDFS_TIME_OUT { + TIMEOUT_NONE = 0, + TIMEOUT_COMMON = 4, + TIMEOUT_6S = 6, + TIMEOUT_30S = 30, + TIMEOUT_1M = 60, + TIMEOUT_90S = 90, + TIMEOUT_CONFIG = UINT_MAX - 1, // for hmdfs_req to read from config + TIMEOUT_UNINIT = UINT_MAX, +}; + +static inline int get_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd) +{ + return sbi->s_cmd_timeout[cmd]; +} + +static inline void set_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd, + unsigned int value) +{ + sbi->s_cmd_timeout[cmd] = value; +} + +void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info, + __u32 ret_code, __u32 data_len, void *buf); + +void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp); + +void hmdfs_wakeup_async_work(struct hmdfs_async_work *async_work); + +void msg_put(struct sendmsg_wait_queue *msg_wq); +void head_put(struct hmdfs_msg_idr_head *head); +void mp_put(struct hmdfs_msg_parasite *mp); +void asw_put(struct hmdfs_async_work *asw); +static inline void asw_done(struct hmdfs_async_work *asw) +{ + if (asw->page) + unlock_page(asw->page); + asw_put(asw); +} + +static inline void asw_get(struct hmdfs_async_work *asw) +{ + kref_get(&asw->head.ref); +} +#endif diff --git a/fs/hmdfs/comm/transport.c b/fs/hmdfs/comm/transport.c new file mode 100644 index 0000000000000000000000000000000000000000..cb57da2c53f806ea61a9684bfd4c7d4150adc86b --- /dev/null +++ b/fs/hmdfs/comm/transport.c @@ -0,0 +1,1220 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/transport.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "transport.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "device_node.h" +#include "hmdfs_trace.h" +#include "socket_adapter.h" +#include "authority/authentication.h" + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include +#include "crypto.h" +#endif + +typedef void (*connect_recv_handler)(struct connection *, void *, void *, + __u32); + +static connect_recv_handler connect_recv_callback[CONNECT_STAT_COUNT] = { + [CONNECT_STAT_WAIT_REQUEST] = connection_handshake_recv_handler, + [CONNECT_STAT_WAIT_RESPONSE] = connection_handshake_recv_handler, + [CONNECT_STAT_WORKING] = connection_working_recv_handler, + [CONNECT_STAT_STOP] = NULL, + [CONNECT_STAT_WAIT_ACK] = connection_handshake_recv_handler, + [CONNECT_STAT_NEGO_FAIL] = NULL, +}; + +static int recvmsg_nofs(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size, int flags) +{ + unsigned int nofs_flags; + int ret; + + /* enable NOFS for memory allocation */ + nofs_flags = memalloc_nofs_save(); + ret = kernel_recvmsg(sock, msg, vec, num, size, flags); + memalloc_nofs_restore(nofs_flags); + + return ret; +} + +static int sendmsg_nofs(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size) +{ + unsigned int nofs_flags; + int ret; + + /* enable NOFS for memory allocation */ + nofs_flags = memalloc_nofs_save(); + ret = kernel_sendmsg(sock, msg, vec, num, size); + memalloc_nofs_restore(nofs_flags); + + return ret; +} + +static int tcp_set_recvtimeo(struct socket *sock, int timeout) +{ + long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); + + tcp_sock_set_nodelay(sock->sk); + tcp_sock_set_user_timeout(sock->sk, jiffies_left); + return 0; +} + +uint32_t hmdfs_tcpi_rtt(struct hmdfs_peer *con) +{ + uint32_t rtt_us = 0; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + conn_impl = get_conn_impl(con, CONNECT_TYPE_TCP); + if (!conn_impl) + return rtt_us; + tcp = (struct tcp_handle *)(conn_impl->connect_handle); + if (tcp->sock) + rtt_us = tcp_sk(tcp->sock->sk)->srtt_us >> 3; + connection_put(conn_impl); + return rtt_us; +} + +static int tcp_read_head_from_socket(struct socket *sock, void *buf, + unsigned int to_read) +{ + int rc = 0; + struct msghdr hmdfs_msg; + struct kvec iov; + + iov.iov_base = buf; + iov.iov_len = to_read; + memset(&hmdfs_msg, 0, sizeof(hmdfs_msg)); + hmdfs_msg.msg_flags = MSG_WAITALL; + hmdfs_msg.msg_control = NULL; + hmdfs_msg.msg_controllen = 0; + rc = recvmsg_nofs(sock, &hmdfs_msg, &iov, 1, to_read, + hmdfs_msg.msg_flags); + if (rc == -EAGAIN || rc == -ETIMEDOUT || rc == -EINTR || + rc == -EBADMSG) { + usleep_range(1000, 2000); + return -EAGAIN; + } + // error occurred + if (rc != to_read) { + hmdfs_err("tcp recv error %d", rc); + return -ESHUTDOWN; + } + return 0; +} + +static int tcp_read_buffer_from_socket(struct socket *sock, void *buf, + unsigned int to_read) +{ + int read_cnt = 0; + int retry_time = 0; + int rc = 0; + struct msghdr hmdfs_msg; + struct kvec iov; + + do { + iov.iov_base = (char *)buf + read_cnt; + iov.iov_len = to_read - read_cnt; + memset(&hmdfs_msg, 0, sizeof(hmdfs_msg)); + hmdfs_msg.msg_flags = MSG_WAITALL; + hmdfs_msg.msg_control = NULL; + hmdfs_msg.msg_controllen = 0; + rc = recvmsg_nofs(sock, &hmdfs_msg, &iov, 1, + to_read - read_cnt, hmdfs_msg.msg_flags); + if (rc == -EBADMSG) { + usleep_range(1000, 2000); + continue; + } + if (rc == -EAGAIN || rc == -ETIMEDOUT || rc == -EINTR) { + retry_time++; + hmdfs_info("read again %d", rc); + usleep_range(1000, 2000); + continue; + } + // error occurred + if (rc <= 0) { + hmdfs_err("tcp recv error %d", rc); + return -ESHUTDOWN; + } + read_cnt += rc; + if (read_cnt != to_read) + hmdfs_info("read again %d/%d", read_cnt, to_read); + } while (read_cnt < to_read && retry_time < MAX_RECV_RETRY_TIMES); + if (read_cnt == to_read) + return 0; + return -ESHUTDOWN; +} + +static int hmdfs_drop_readpage_buffer(struct socket *sock, + struct hmdfs_head_cmd *recv) +{ + unsigned int len; + void *buf = NULL; + int err; + + len = le32_to_cpu(recv->data_len) - sizeof(struct hmdfs_head_cmd); + if (len > HMDFS_PAGE_SIZE || !len) { + hmdfs_err("recv invalid readpage length %u", len); + return -EINVAL; + } + + /* Abort the connection if no memory */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ESHUTDOWN; + + err = tcp_read_buffer_from_socket(sock, buf, len); + kfree(buf); + + return err; +} + +static int hmdfs_get_readpage_buffer(struct socket *sock, + struct hmdfs_head_cmd *recv, + struct page *page) +{ + char *page_buf = NULL; + unsigned int out_len; + int err; + + out_len = le32_to_cpu(recv->data_len) - sizeof(struct hmdfs_head_cmd); + if (out_len > HMDFS_PAGE_SIZE || !out_len) { + hmdfs_err("recv invalid readpage length %u", out_len); + return -EINVAL; + } + + page_buf = kmap(page); + err = tcp_read_buffer_from_socket(sock, page_buf, out_len); + if (err) + goto out_unmap; + if (out_len != HMDFS_PAGE_SIZE) + memset(page_buf + out_len, 0, HMDFS_PAGE_SIZE - out_len); + +out_unmap: + kunmap(page); + return err; +} + +static int tcp_recvpage_tls(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + struct hmdfs_peer *node = NULL; + struct page *page = NULL; + struct hmdfs_async_work *async_work = NULL; + int rd_err; + + if (!connect) { + hmdfs_err("tcp connect == NULL"); + return -ESHUTDOWN; + } + node = connect->node; + tcp = (struct tcp_handle *)(connect->connect_handle); + + rd_err = le32_to_cpu(recv->ret_code); + if (rd_err) + hmdfs_warning("tcp: readpage from peer %llu ret err %d", + node->device_id, rd_err); + + async_work = (struct hmdfs_async_work *)hmdfs_find_msg_head(node, + le32_to_cpu(recv->msg_id)); + if (!async_work || !cancel_delayed_work(&async_work->d_work)) + goto out; + + page = async_work->page; + if (!page) { + hmdfs_err("page not found"); + goto out; + } + + if (!rd_err) { + ret = hmdfs_get_readpage_buffer(tcp->sock, recv, page); + if (ret) + rd_err = ret; + } + node->conn_operations->recvpage(node, recv, rd_err, async_work); + asw_put(async_work); + return ret; + +out: + /* async_work will be released by recvpage in normal processure */ + if (async_work) + asw_put(async_work); + hmdfs_err_ratelimited("timeout and droppage"); + hmdfs_client_resp_statis(node->sbi, F_READPAGE, HMDFS_RESP_DELAY, 0, 0); + if (!rd_err) + ret = hmdfs_drop_readpage_buffer(tcp->sock, recv); + return ret; +} + +static void aeadcipher_cb(struct crypto_async_request *req, int error) +{ + struct aeadcrypt_result *result = req->data; + + if (error == -EINPROGRESS) + return; + result->err = error; + complete(&result->completion); +} + +static int aeadcipher_en_de(struct aead_request *req, + struct aeadcrypt_result result, int flag) +{ + int rc = 0; + + if (flag) + rc = crypto_aead_encrypt(req); + else + rc = crypto_aead_decrypt(req); + switch (rc) { + case 0: + break; + case -EINPROGRESS: + case -EBUSY: + rc = wait_for_completion_interruptible(&result.completion); + if (!rc && !result.err) + reinit_completion(&result.completion); + break; + default: + hmdfs_err("returned rc %d result %d", rc, result.err); + break; + } + return rc; +} + +static int set_aeadcipher(struct crypto_aead *tfm, struct aead_request *req, + struct aeadcrypt_result *result) +{ + init_completion(&result->completion); + aead_request_set_callback( + req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + aeadcipher_cb, result); + return 0; +} + +int aeadcipher_encrypt_buffer(struct connection *con, __u8 *src_buf, + size_t src_len, __u8 *dst_buf, size_t dst_len) +{ + int ret = 0; + struct scatterlist src, dst; + struct aead_request *req = NULL; + struct aeadcrypt_result result; + __u8 cipher_iv[HMDFS_IV_SIZE]; + + if (src_len <= 0) + return -EINVAL; + if (!virt_addr_valid(src_buf) || !virt_addr_valid(dst_buf)) { + WARN_ON(1); + hmdfs_err("encrypt address is invalid"); + return -EPERM; + } + + get_random_bytes(cipher_iv, HMDFS_IV_SIZE); + memcpy(dst_buf, cipher_iv, HMDFS_IV_SIZE); + req = aead_request_alloc(con->tfm, GFP_KERNEL); + if (!req) { + hmdfs_err("aead_request_alloc() failed"); + return -ENOMEM; + } + ret = set_aeadcipher(con->tfm, req, &result); + if (ret) { + hmdfs_err("set_enaeadcipher exit fault"); + goto out; + } + + sg_init_one(&src, src_buf, src_len); + sg_init_one(&dst, dst_buf + HMDFS_IV_SIZE, dst_len - HMDFS_IV_SIZE); + aead_request_set_crypt(req, &src, &dst, src_len, cipher_iv); + aead_request_set_ad(req, 0); + ret = aeadcipher_en_de(req, result, ENCRYPT_FLAG); +out: + aead_request_free(req); + return ret; +} + +int aeadcipher_decrypt_buffer(struct connection *con, __u8 *src_buf, + size_t src_len, __u8 *dst_buf, size_t dst_len) +{ + int ret = 0; + struct scatterlist src, dst; + struct aead_request *req = NULL; + struct aeadcrypt_result result; + __u8 cipher_iv[HMDFS_IV_SIZE]; + + if (src_len <= HMDFS_IV_SIZE + HMDFS_TAG_SIZE) + return -EINVAL; + if (!virt_addr_valid(src_buf) || !virt_addr_valid(dst_buf)) { + WARN_ON(1); + hmdfs_err("decrypt address is invalid"); + return -EPERM; + } + + memcpy(cipher_iv, src_buf, HMDFS_IV_SIZE); + req = aead_request_alloc(con->tfm, GFP_KERNEL); + if (!req) { + hmdfs_err("aead_request_alloc() failed"); + return -ENOMEM; + } + ret = set_aeadcipher(con->tfm, req, &result); + if (ret) { + hmdfs_err("set_deaeadcipher exit fault"); + goto out; + } + + sg_init_one(&src, src_buf + HMDFS_IV_SIZE, src_len - HMDFS_IV_SIZE); + sg_init_one(&dst, dst_buf, dst_len); + aead_request_set_crypt(req, &src, &dst, src_len - HMDFS_IV_SIZE, + cipher_iv); + aead_request_set_ad(req, 0); + ret = aeadcipher_en_de(req, result, DECRYPT_FLAG); +out: + aead_request_free(req); + return ret; +} + +static int tcp_recvbuffer_cipher(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + size_t cipherbuffer_len; + __u8 *cipherbuffer = NULL; + size_t outlen = 0; + __u8 *outdata = NULL; + __u32 recv_len = le32_to_cpu(recv->data_len); + + tcp = (struct tcp_handle *)(connect->connect_handle); + if (recv_len == sizeof(struct hmdfs_head_cmd)) + goto out_recv_head; + else if (recv_len > sizeof(struct hmdfs_head_cmd) && + recv_len <= ADAPTER_MESSAGE_LENGTH) + cipherbuffer_len = recv_len - sizeof(struct hmdfs_head_cmd) + + HMDFS_IV_SIZE + HMDFS_TAG_SIZE; + else + return -ENOMSG; + cipherbuffer = kzalloc(cipherbuffer_len, GFP_KERNEL); + if (!cipherbuffer) { + hmdfs_err("zalloc cipherbuffer error"); + return -ESHUTDOWN; + } + outlen = cipherbuffer_len - HMDFS_IV_SIZE - HMDFS_TAG_SIZE; + outdata = kzalloc(outlen, GFP_KERNEL); + if (!outdata) { + hmdfs_err("encrypt zalloc outdata error"); + kfree(cipherbuffer); + return -ESHUTDOWN; + } + + ret = tcp_read_buffer_from_socket(tcp->sock, cipherbuffer, + cipherbuffer_len); + if (ret) + goto out_recv; + ret = aeadcipher_decrypt_buffer(connect, cipherbuffer, cipherbuffer_len, + outdata, outlen); + if (ret) { + hmdfs_err("decrypt_buf fail"); + goto out_recv; + } +out_recv_head: + if (connect_recv_callback[connect->status]) { + connect_recv_callback[connect->status](connect, recv, outdata, + outlen); + } else { + kfree(outdata); + hmdfs_err("encypt callback NULL status %d", connect->status); + } + kfree(cipherbuffer); + return ret; +out_recv: + kfree(cipherbuffer); + kfree(outdata); + return ret; +} + +static int tcp_recvbuffer_tls(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + size_t outlen; + __u8 *outdata = NULL; + __u32 recv_len = le32_to_cpu(recv->data_len); + + tcp = (struct tcp_handle *)(connect->connect_handle); + outlen = recv_len - sizeof(struct hmdfs_head_cmd); + if (outlen == 0) + goto out_recv_head; + + /* + * NOTE: Up to half of the allocated memory may be wasted due to + * the Internal Fragmentation, however the memory allocation times + * can be reduced and we don't have to adjust existing message + * transporting mechanism + */ + outdata = kmalloc(outlen, GFP_KERNEL); + if (!outdata) + return -ESHUTDOWN; + + ret = tcp_read_buffer_from_socket(tcp->sock, outdata, outlen); + if (ret) { + kfree(outdata); + return ret; + } + tcp->connect->stat.recv_bytes += outlen; +out_recv_head: + if (connect_recv_callback[connect->status]) { + connect_recv_callback[connect->status](connect, recv, outdata, + outlen); + } else { + kfree(outdata); + hmdfs_err("callback NULL status %d", connect->status); + } + return 0; +} + +static int tcp_receive_from_sock(struct tcp_handle *tcp) +{ + struct hmdfs_head_cmd *recv = NULL; + int ret = 0; + + if (!tcp) { + hmdfs_info("tcp recv thread !tcp"); + return -ESHUTDOWN; + } + + if (!tcp->sock) { + hmdfs_info("tcp recv thread !sock"); + return -ESHUTDOWN; + } + + recv = kmem_cache_alloc(tcp->recv_cache, GFP_KERNEL); + if (!recv) { + hmdfs_info("tcp recv thread !cache"); + return -ESHUTDOWN; + } + + ret = tcp_read_head_from_socket(tcp->sock, recv, + sizeof(struct hmdfs_head_cmd)); + if (ret) + goto out; + + tcp->connect->stat.recv_bytes += sizeof(struct hmdfs_head_cmd); + tcp->connect->stat.recv_message_count++; + + if (recv->magic != HMDFS_MSG_MAGIC) { + hmdfs_info_ratelimited("tcp recv fd %d wrong magic. drop message", + tcp->fd); + goto out; + } + + if ((le32_to_cpu(recv->data_len) > + HMDFS_MAX_MESSAGE_LEN + sizeof(struct hmdfs_head_cmd)) || + (le32_to_cpu(recv->data_len) < sizeof(struct hmdfs_head_cmd))) { + hmdfs_info("tcp recv fd %d length error. drop message", + tcp->fd); + goto out; + } + + if (recv->version > USERSPACE_MAX_VER && + tcp->connect->status == CONNECT_STAT_WORKING && + recv->operations.command == F_READPAGE && + recv->operations.cmd_flag == C_RESPONSE) { + ret = tcp_recvpage_tls(tcp->connect, recv); + goto out; + } + + if (tcp->connect->status == CONNECT_STAT_WORKING && + recv->version > USERSPACE_MAX_VER) + ret = tcp_recvbuffer_tls(tcp->connect, recv); + else + ret = tcp_recvbuffer_cipher(tcp->connect, recv); + +out: + kmem_cache_free(tcp->recv_cache, recv); + return ret; +} + +static bool tcp_handle_is_available(struct tcp_handle *tcp) +{ +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + struct tls_context *tls_ctx = NULL; + struct tls_sw_context_rx *ctx = NULL; + +#endif + if (!tcp || !tcp->sock || !tcp->sock->sk) { + hmdfs_err("Invalid tcp connection"); + return false; + } + + if (tcp->sock->sk->sk_state != TCP_ESTABLISHED) { + hmdfs_err("TCP conn %d is broken, current sk_state is %d", + tcp->fd, tcp->sock->sk->sk_state); + return false; + } + + if (tcp->sock->state != SS_CONNECTING && + tcp->sock->state != SS_CONNECTED) { + hmdfs_err("TCP conn %d is broken, current sock state is %d", + tcp->fd, tcp->sock->state); + return false; + } + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + tls_ctx = tls_get_ctx(tcp->sock->sk); + if (tls_ctx) { + ctx = tls_sw_ctx_rx(tls_ctx); + if (ctx && ctx->strp.stopped) { + hmdfs_err( + "TCP conn %d is broken, the strparser has stopped", + tcp->fd); + return false; + } + } +#endif + return true; +} + +static int tcp_recv_thread(void *arg) +{ + int ret = 0; + struct tcp_handle *tcp = (struct tcp_handle *)arg; + const struct cred *old_cred; + + WARN_ON(!tcp); + WARN_ON(!tcp->sock); + set_freezable(); + + old_cred = hmdfs_override_creds(tcp->connect->node->sbi->system_cred); + + while (!kthread_should_stop()) { + /* + * 1. In case the redundant connection has not been mounted on + * a peer + * 2. Lock is unnecessary since a transient state is acceptable + */ + if (tcp_handle_is_available(tcp) && + list_empty(&tcp->connect->list)) + goto freeze; + if (!mutex_trylock(&tcp->close_mutex)) + continue; + if (tcp_handle_is_available(tcp)) + ret = tcp_receive_from_sock(tcp); + else + ret = -ESHUTDOWN; + /* + * This kthread will exit if ret is -ESHUTDOWN, thus we need to + * set recv_task to NULL to avoid calling kthread_stop() from + * tcp_close_socket(). + */ + if (ret == -ESHUTDOWN) + tcp->recv_task = NULL; + mutex_unlock(&tcp->close_mutex); + if (ret == -ESHUTDOWN) { + hmdfs_node_inc_evt_seq(tcp->connect->node); + tcp->connect->status = CONNECT_STAT_STOP; + if (tcp->connect->node->status != NODE_STAT_OFFLINE) + hmdfs_reget_connection(tcp->connect); + break; + } +freeze: + schedule(); + try_to_freeze(); + } + + hmdfs_info("Exiting. Now, sock state = %d", tcp->sock->state); + hmdfs_revert_creds(old_cred); + connection_put(tcp->connect); + return 0; +} + +static int tcp_send_message_sock_cipher(struct tcp_handle *tcp, + struct hmdfs_send_data *msg) +{ + int ret = 0; + __u8 *outdata = NULL; + size_t outlen = 0; + int send_len = 0; + int send_vec_cnt = 0; + struct msghdr tcp_msg; + struct kvec iov[TCP_KVEC_ELE_DOUBLE]; + + memset(&tcp_msg, 0, sizeof(tcp_msg)); + if (!tcp || !tcp->sock) { + hmdfs_err("encrypt tcp socket = NULL"); + return -ESHUTDOWN; + } + iov[0].iov_base = msg->head; + iov[0].iov_len = msg->head_len; + send_vec_cnt = TCP_KVEC_HEAD; + if (msg->len == 0) + goto send; + + outlen = msg->len + HMDFS_IV_SIZE + HMDFS_TAG_SIZE; + outdata = kzalloc(outlen, GFP_KERNEL); + if (!outdata) { + hmdfs_err("tcp send message encrypt fail to alloc outdata"); + return -ENOMEM; + } + ret = aeadcipher_encrypt_buffer(tcp->connect, msg->data, msg->len, + outdata, outlen); + if (ret) { + hmdfs_err("encrypt_buf fail"); + goto out; + } + iov[1].iov_base = outdata; + iov[1].iov_len = outlen; + send_vec_cnt = TCP_KVEC_ELE_DOUBLE; +send: + mutex_lock(&tcp->send_mutex); + send_len = sendmsg_nofs(tcp->sock, &tcp_msg, iov, send_vec_cnt, + msg->head_len + outlen); + mutex_unlock(&tcp->send_mutex); + if (send_len <= 0) { + hmdfs_err("error %d", send_len); + ret = -ESHUTDOWN; + } else if (send_len != msg->head_len + outlen) { + hmdfs_err("send part of message. %d/%zu", send_len, + msg->head_len + outlen); + ret = -EAGAIN; + } else { + ret = 0; + } +out: + kfree(outdata); + return ret; +} + +static int tcp_send_message_sock_tls(struct tcp_handle *tcp, + struct hmdfs_send_data *msg) +{ + int send_len = 0; + int send_vec_cnt = 0; + struct msghdr tcp_msg; + struct kvec iov[TCP_KVEC_ELE_TRIPLE]; + + memset(&tcp_msg, 0, sizeof(tcp_msg)); + if (!tcp || !tcp->sock) { + hmdfs_err("tcp socket = NULL"); + return -ESHUTDOWN; + } + iov[TCP_KVEC_HEAD].iov_base = msg->head; + iov[TCP_KVEC_HEAD].iov_len = msg->head_len; + if (msg->len == 0 && msg->sdesc_len == 0) { + send_vec_cnt = TCP_KVEC_ELE_SINGLE; + } else if (msg->sdesc_len == 0) { + iov[TCP_KVEC_DATA].iov_base = msg->data; + iov[TCP_KVEC_DATA].iov_len = msg->len; + send_vec_cnt = TCP_KVEC_ELE_DOUBLE; + } else { + iov[TCP_KVEC_FILE_PARA].iov_base = msg->sdesc; + iov[TCP_KVEC_FILE_PARA].iov_len = msg->sdesc_len; + iov[TCP_KVEC_FILE_CONTENT].iov_base = msg->data; + iov[TCP_KVEC_FILE_CONTENT].iov_len = msg->len; + send_vec_cnt = TCP_KVEC_ELE_TRIPLE; + } + mutex_lock(&tcp->send_mutex); + send_len = sendmsg_nofs(tcp->sock, &tcp_msg, iov, send_vec_cnt, + msg->head_len + msg->len + msg->sdesc_len); + mutex_unlock(&tcp->send_mutex); + if (send_len == -EBADMSG) { + return -EBADMSG; + } else if (send_len <= 0) { + hmdfs_err("error %d", send_len); + return -ESHUTDOWN; + } else if (send_len != msg->head_len + msg->len + msg->sdesc_len) { + hmdfs_err("send part of message. %d/%zu", send_len, + msg->head_len + msg->len); + tcp->connect->stat.send_bytes += send_len; + return -EAGAIN; + } + tcp->connect->stat.send_bytes += send_len; + tcp->connect->stat.send_message_count++; + return 0; +} + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +int tcp_send_rekey_request(struct connection *connect) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct tcp_handle *tcp = connect->connect_handle; + struct hmdfs_head_cmd *head = NULL; + struct connection_rekey_request *rekey_request_param = NULL; + struct hmdfs_cmd operations; + + hmdfs_init_cmd(&operations, F_CONNECT_REKEY); + head = kzalloc(sizeof(struct hmdfs_head_cmd) + + sizeof(struct connection_rekey_request), + GFP_KERNEL); + if (!head) + return -ENOMEM; + rekey_request_param = + (struct connection_rekey_request + *)((uint8_t *)head + sizeof(struct hmdfs_head_cmd)); + + rekey_request_param->update_request = cpu_to_le32(UPDATE_NOT_REQUESTED); + + head->magic = HMDFS_MSG_MAGIC; + head->version = DFS_2_0; + head->operations = operations; + head->data_len = + cpu_to_le32(sizeof(*head) + sizeof(*rekey_request_param)); + head->reserved = 0; + head->reserved1 = 0; + head->ret_code = 0; + + msg.head = head; + msg.head_len = sizeof(*head); + msg.data = rekey_request_param; + msg.len = sizeof(*rekey_request_param); + msg.sdesc = NULL; + msg.sdesc_len = 0; + ret = tcp_send_message_sock_tls(tcp, &msg); + if (ret != 0) + hmdfs_err("return error %d", ret); + kfree(head); + return ret; +} +#endif + +static int tcp_send_message(struct connection *connect, + struct hmdfs_send_data *msg) +{ + int ret = 0; +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + unsigned long nowtime = jiffies; +#endif + struct tcp_handle *tcp = NULL; + + if (!connect) { + hmdfs_err("tcp connection = NULL "); + return -ESHUTDOWN; + } + if (!msg) { + hmdfs_err("msg = NULL"); + return -EINVAL; + } + if (msg->len > HMDFS_MAX_MESSAGE_LEN) { + hmdfs_err("message->len error: %zu", msg->len); + return -EINVAL; + } + tcp = (struct tcp_handle *)(connect->connect_handle); + if (connect->status == CONNECT_STAT_STOP) + return -EAGAIN; + + trace_hmdfs_tcp_send_message(msg->head); + + if (connect->status == CONNECT_STAT_WORKING && + connect->node->version > USERSPACE_MAX_VER) + ret = tcp_send_message_sock_tls(tcp, msg); + else + // Handshake status or version HMDFS1.0 + ret = tcp_send_message_sock_cipher(tcp, msg); + + if (ret != 0) { + hmdfs_err("return error %d", ret); + return ret; + } +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + if (nowtime - connect->stat.rekey_time >= REKEY_LIFETIME && + connect->status == CONNECT_STAT_WORKING && + connect->node->version >= DFS_2_0) { + hmdfs_info("send rekey message to devid %llu", + connect->node->device_id); + ret = tcp_send_rekey_request(connect); + if (ret == 0) + set_crypto_info(connect, SET_CRYPTO_SEND); + connect->stat.rekey_time = nowtime; + } +#endif + return ret; +} + +void tcp_close_socket(struct tcp_handle *tcp) +{ + if (!tcp) + return; + mutex_lock(&tcp->close_mutex); + if (tcp->recv_task) { + kthread_stop(tcp->recv_task); + tcp->recv_task = NULL; + } + mutex_unlock(&tcp->close_mutex); +} + +static int set_tfm(__u8 *master_key, struct crypto_aead *tfm) +{ + int ret = 0; + int iv_len; + __u8 *sec_key = NULL; + + sec_key = master_key; + crypto_aead_clear_flags(tfm, ~0); + ret = crypto_aead_setkey(tfm, sec_key, HMDFS_KEY_SIZE); + if (ret) { + hmdfs_err("failed to set the key"); + goto out; + } + ret = crypto_aead_setauthsize(tfm, HMDFS_TAG_SIZE); + if (ret) { + hmdfs_err("authsize length is error"); + goto out; + } + + iv_len = crypto_aead_ivsize(tfm); + if (iv_len != HMDFS_IV_SIZE) { + hmdfs_err("IV recommended value should be set %d", iv_len); + ret = -ENODATA; + } +out: + return ret; +} + +static int tcp_update_socket(struct tcp_handle *tcp, int fd, + uint8_t *master_key, struct socket *socket) +{ + int err = 0; + struct hmdfs_peer *node = NULL; + + if (!master_key || fd == 0) + return -EAGAIN; + + tcp->sock = socket; + tcp->fd = fd; + if (!tcp_handle_is_available(tcp)) { + err = -EPIPE; + goto put_sock; + } + + hmdfs_info("socket fd %d, state %d, refcount %ld", + fd, socket->state, file_count(socket->file)); + + tcp->recv_cache = kmem_cache_create("hmdfs_socket", + tcp->recvbuf_maxsize, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!tcp->recv_cache) { + err = -ENOMEM; + goto put_sock; + } + + socket->sk->sk_user_data = tcp; + err = tcp_set_recvtimeo(socket, TCP_RECV_TIMEOUT); + if (err) { + hmdfs_err("tcp set timeout error"); + goto free_mem_cache; + } + + /* send key and recv key, default MASTER KEY */ + memcpy(tcp->connect->master_key, master_key, HMDFS_KEY_SIZE); + memcpy(tcp->connect->send_key, master_key, HMDFS_KEY_SIZE); + memcpy(tcp->connect->recv_key, master_key, HMDFS_KEY_SIZE); + tcp->connect->tfm = crypto_alloc_aead("gcm(aes)", 0, 0); + if (IS_ERR(tcp->connect->tfm)) { + err = PTR_ERR(tcp->connect->tfm); + tcp->connect->tfm = NULL; + hmdfs_err("failed to load transform for gcm(aes):%d", err); + goto free_mem_cache; + } + + err = set_tfm(master_key, tcp->connect->tfm); + if (err) { + hmdfs_err("tfm seting exit fault"); + goto free_crypto; + } + + connection_get(tcp->connect); + + node = tcp->connect->node; + tcp->recv_task = kthread_create(tcp_recv_thread, (void *)tcp, + "dfs_rcv%u_%llu_%d", + node->owner, node->device_id, fd); + if (IS_ERR(tcp->recv_task)) { + err = PTR_ERR(tcp->recv_task); + hmdfs_err("tcp->rcev_task %d", err); + goto put_conn; + } + + return 0; + +put_conn: + tcp->recv_task = NULL; + connection_put(tcp->connect); +free_crypto: + crypto_free_aead(tcp->connect->tfm); + tcp->connect->tfm = NULL; +free_mem_cache: + kmem_cache_destroy(tcp->recv_cache); + tcp->recv_cache = NULL; +put_sock: + tcp->sock = NULL; + tcp->fd = 0; + + return err; +} + +static struct tcp_handle *tcp_alloc_handle(struct connection *connect, + int socket_fd, uint8_t *master_key, struct socket *socket) +{ + int ret = 0; + struct tcp_handle *tcp = kzalloc(sizeof(*tcp), GFP_KERNEL); + + if (!tcp) + return NULL; + tcp->connect = connect; + tcp->connect->connect_handle = (void *)tcp; + tcp->recvbuf_maxsize = MAX_RECV_SIZE; + tcp->recv_task = NULL; + tcp->recv_cache = NULL; + tcp->sock = NULL; + mutex_init(&tcp->close_mutex); + mutex_init(&tcp->send_mutex); + ret = tcp_update_socket(tcp, socket_fd, master_key, socket); + if (ret) { + kfree(tcp); + return NULL; + } + return tcp; +} + +void hmdfs_get_connection(struct hmdfs_peer *peer) +{ + struct notify_param param; + + if (!peer) + return; + param.notify = NOTIFY_GET_SESSION; + param.fd = INVALID_SOCKET_FD; + memcpy(param.remote_cid, peer->cid, HMDFS_CID_SIZE); + notify(peer, ¶m); +} + +static void connection_notify_to_close(struct connection *conn) +{ + struct notify_param param; + struct hmdfs_peer *peer = NULL; + struct tcp_handle *tcp = NULL; + + tcp = conn->connect_handle; + peer = conn->node; + + // libdistbus/src/TcpSession.cpp will close the socket + param.notify = NOTIFY_GET_SESSION; + param.fd = tcp->fd; + memcpy(param.remote_cid, peer->cid, HMDFS_CID_SIZE); + notify(peer, ¶m); +} + +void hmdfs_reget_connection(struct connection *conn) +{ + struct tcp_handle *tcp = NULL; + struct connection *conn_impl = NULL; + struct connection *next = NULL; + struct task_struct *recv_task = NULL; + bool should_put = false; + bool stop_thread = true; + + if (!conn) + return; + + // One may put a connection if and only if he took it out of the list + mutex_lock(&conn->node->conn_impl_list_lock); + list_for_each_entry_safe(conn_impl, next, &conn->node->conn_impl_list, + list) { + if (conn_impl == conn) { + should_put = true; + list_move(&conn->list, &conn->node->conn_deleting_list); + break; + } + } + if (!should_put) { + mutex_unlock(&conn->node->conn_impl_list_lock); + return; + } + + tcp = conn->connect_handle; + if (tcp) { + recv_task = tcp->recv_task; + /* + * To avoid the receive thread to stop itself. Ensure receive + * thread stop before process offline event + */ + if (!recv_task || + (recv_task && (recv_task->pid == current->pid))) + stop_thread = false; + } + mutex_unlock(&conn->node->conn_impl_list_lock); + + if (tcp) { + if (tcp->sock) { + hmdfs_info("shudown sock: fd = %d, sockref = %ld, connref = %u stop_thread = %d", + tcp->fd, file_count(tcp->sock->file), + kref_read(&conn->ref_cnt), stop_thread); + kernel_sock_shutdown(tcp->sock, SHUT_RDWR); + } + + if (stop_thread) + tcp_close_socket(tcp); + + if (tcp->fd != INVALID_SOCKET_FD) + connection_notify_to_close(conn); + } + connection_put(conn); +} + +static struct connection * +lookup_conn_by_socketfd_unsafe(struct hmdfs_peer *node, struct socket *socket) +{ + struct connection *tcp_conn = NULL; + struct tcp_handle *tcp = NULL; + + list_for_each_entry(tcp_conn, &node->conn_impl_list, list) { + if (tcp_conn->connect_handle) { + tcp = (struct tcp_handle *)(tcp_conn->connect_handle); + if (tcp->sock == socket) { + connection_get(tcp_conn); + return tcp_conn; + } + } + } + return NULL; +} + +static void hmdfs_reget_connection_work_fn(struct work_struct *work) +{ + struct connection *conn = + container_of(work, struct connection, reget_work); + + hmdfs_reget_connection(conn); + connection_put(conn); +} + +struct connection *alloc_conn_tcp(struct hmdfs_peer *node, int socket_fd, + uint8_t *master_key, uint8_t status, struct socket *socket) +{ + struct connection *tcp_conn = NULL; + unsigned long nowtime = jiffies; + + tcp_conn = kzalloc(sizeof(*tcp_conn), GFP_KERNEL); + if (!tcp_conn) + goto out_err; + + kref_init(&tcp_conn->ref_cnt); + mutex_init(&tcp_conn->ref_lock); + INIT_LIST_HEAD(&tcp_conn->list); + tcp_conn->node = node; + tcp_conn->close = tcp_stop_connect; + tcp_conn->send_message = tcp_send_message; + tcp_conn->type = CONNECT_TYPE_TCP; + tcp_conn->status = status; + tcp_conn->stat.rekey_time = nowtime; + tcp_conn->connect_handle = + (void *)tcp_alloc_handle(tcp_conn, socket_fd, master_key, socket); + INIT_WORK(&tcp_conn->reget_work, hmdfs_reget_connection_work_fn); + if (!tcp_conn->connect_handle) { + hmdfs_err("Failed to alloc tcp_handle for strcut conn"); + goto out_err; + } + return tcp_conn; + +out_err: + kfree(tcp_conn); + return NULL; +} + +static struct connection *add_conn_tcp_unsafe(struct hmdfs_peer *node, + struct socket *socket, + struct connection *conn2add) +{ + struct connection *conn; + + conn = lookup_conn_by_socketfd_unsafe(node, socket); + if (conn) { + hmdfs_info("socket already in list"); + return conn; + } + + /* Prefer to use socket opened by local device */ + if (conn2add->status == CONNECT_STAT_WAIT_REQUEST) + list_add(&conn2add->list, &node->conn_impl_list); + else + list_add_tail(&conn2add->list, &node->conn_impl_list); + connection_get(conn2add); + return conn2add; +} + +struct connection *hmdfs_get_conn_tcp(struct hmdfs_peer *node, int fd, + uint8_t *master_key, uint8_t status) +{ + struct connection *tcp_conn = NULL, *on_peer_conn = NULL; + struct tcp_handle *tcp = NULL; + struct socket *socket = NULL; + int err = 0; + + socket = sockfd_lookup(fd, &err); + if (!socket) { + hmdfs_err("lookup socket fail, socket_fd %d, err %d", fd, err); + return NULL; + } + mutex_lock(&node->conn_impl_list_lock); + tcp_conn = lookup_conn_by_socketfd_unsafe(node, socket); + mutex_unlock(&node->conn_impl_list_lock); + if (tcp_conn) { + hmdfs_info("Got a existing tcp conn: fsocket_fd = %d", + fd); + sockfd_put(socket); + goto out; + } + + tcp_conn = alloc_conn_tcp(node, fd, master_key, status, socket); + if (!tcp_conn) { + hmdfs_info("Failed to alloc a tcp conn, socket_fd %d", fd); + sockfd_put(socket); + goto out; + } + + mutex_lock(&node->conn_impl_list_lock); + on_peer_conn = add_conn_tcp_unsafe(node, socket, tcp_conn); + mutex_unlock(&node->conn_impl_list_lock); + tcp = tcp_conn->connect_handle; + if (on_peer_conn == tcp_conn) { + hmdfs_info("Got a newly allocated tcp conn: socket_fd = %d", fd); + wake_up_process(tcp->recv_task); + if (status == CONNECT_STAT_WAIT_RESPONSE) + connection_send_handshake( + on_peer_conn, CONNECT_MESG_HANDSHAKE_REQUEST, + 0); + } else { + hmdfs_info("Got a existing tcp conn: socket_fd = %d", fd); + tcp->fd = INVALID_SOCKET_FD; + tcp_close_socket(tcp); + connection_put(tcp_conn); + + tcp_conn = on_peer_conn; + } + +out: + return tcp_conn; +} + +void tcp_stop_connect(struct connection *connect) +{ + hmdfs_info("now nothing to do"); +} diff --git a/fs/hmdfs/comm/transport.h b/fs/hmdfs/comm/transport.h new file mode 100644 index 0000000000000000000000000000000000000000..bce882cb6997753ac8c9e7df2ed858aeaf20e896 --- /dev/null +++ b/fs/hmdfs/comm/transport.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/transport.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_TRANSPORT_H +#define HMDFS_TRANSPORT_H + +#include "connection.h" + +#define ENCRYPT_FLAG 1 +#define DECRYPT_FLAG 0 + +struct aeadcrypt_result { + struct completion completion; + int err; +}; + +#define ADAPTER_MESSAGE_LENGTH (1024 * 1024 + 1024) // 1M + 1K +#define MAX_RECV_SIZE sizeof(struct hmdfs_head_cmd) + +#define TCP_KVEC_HEAD 0 +#define TCP_KVEC_DATA 1 + +enum TCP_KVEC_FILE_ELE_INDEX { + TCP_KVEC_FILE_PARA = 1, + TCP_KVEC_FILE_CONTENT = 2, +}; + +enum TCP_KVEC_TYPE { + TCP_KVEC_ELE_SINGLE = 1, + TCP_KVEC_ELE_DOUBLE = 2, + TCP_KVEC_ELE_TRIPLE = 3, +}; + +#define TCP_RECV_TIMEOUT 2 +#define MAX_RECV_RETRY_TIMES 2 + +#ifndef SO_RCVTIMEO +#define SO_RCVTIMEO SO_RCVTIMEO_OLD +#endif + +struct tcp_handle { + struct connection *connect; + int recvbuf_maxsize; + struct mutex close_mutex; + /* + * To achieve atomicity. + * + * The sock lock held at the tcp layer may be temporally released at + * `sk_wait_event()` when waiting for sock buffer. From this point on, + * threads serialized at the initial call to `lock_sock()` contained + * in `tcp_sendmsg()` can proceed, resuling in intermixed messages. + */ + struct mutex send_mutex; + struct socket *sock; + int fd; + struct kmem_cache *recv_cache; + struct task_struct *recv_task; +}; + +void hmdfs_get_connection(struct hmdfs_peer *peer); +void hmdfs_reget_connection(struct connection *conn); +struct connection *hmdfs_get_conn_tcp(struct hmdfs_peer *node, int socket_fd, + uint8_t *master_key, uint8_t status); +void tcp_stop_connect(struct connection *connect); +uint32_t hmdfs_tcpi_rtt(struct hmdfs_peer *node); +void tcp_close_socket(struct tcp_handle *tcp); + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +int tcp_send_rekey_request(struct connection *connect); +#endif + +#endif diff --git a/fs/hmdfs/dentry.c b/fs/hmdfs/dentry.c new file mode 100644 index 0000000000000000000000000000000000000000..ac590df0982a6cf290b402467bafb2e6b8a7b601 --- /dev/null +++ b/fs/hmdfs/dentry.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/dentry.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include + +#include "comm/connection.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" + +extern struct kmem_cache *hmdfs_dentry_cachep; + +void hmdfs_set_time(struct dentry *dentry, unsigned long time) +{ + struct hmdfs_dentry_info *d_info = dentry->d_fsdata; + + if (d_info) + d_info->time = time; +} + +unsigned long hmdfs_get_time(struct dentry *dentry) +{ + struct hmdfs_dentry_info *d_info = dentry->d_fsdata; + + if (d_info) + return (unsigned long)d_info->time; + return 0; +} + +static int hmdfs_d_remote_revalidate(struct hmdfs_peer *conn, + struct dentry *target, + struct dentry *parent) +{ + unsigned int timeout = hmdfs_sb(target->d_sb)->dcache_timeout; + unsigned long dentry_time = hmdfs_get_time(target); + struct clearcache_item *item; + + item = hmdfs_find_cache_item(conn->device_id, parent); + if (!item) + return 0; + kref_put(&item->ref, release_cache_item); + + if (cache_item_revalidate(READ_ONCE(conn->conn_time), + dentry_time, timeout)) + return 1; + + return 0; +} + +static inline void lock_for_dname_cmp(struct dentry *dentry, + struct dentry *lower_dentry) +{ + if (dentry < lower_dentry) { + spin_lock(&dentry->d_lock); + spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&lower_dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + } +} + +static inline void unlock_for_dname_cmp(struct dentry *dentry, + struct dentry *lower_dentry) +{ + spin_unlock(&dentry->d_lock); + spin_unlock(&lower_dentry->d_lock); +} + +static int hmdfs_dev_d_revalidate(struct dentry *direntry, unsigned int flags) +{ + struct inode *dinode = NULL; + struct hmdfs_inode_info *info = NULL; + + spin_lock(&direntry->d_lock); + if (IS_ROOT(direntry)) { + spin_unlock(&direntry->d_lock); + return 1; + } + spin_unlock(&direntry->d_lock); + + dinode = d_inode(direntry); + if (!dinode) + return 0; + + info = hmdfs_i(dinode); + if (info->inode_type == HMDFS_LAYER_SECOND_LOCAL || + info->inode_type == HMDFS_LAYER_FIRST_DEVICE) { + return 1; + } + if (info->conn && info->conn->status == NODE_STAT_ONLINE) + return 1; + + return 0; +} + +static int hmdfs_d_revalidate(struct dentry *direntry, unsigned int flags) +{ + struct inode *dinode = NULL; + struct hmdfs_inode_info *info = NULL; + struct path lower_path, parent_lower_path; + struct dentry *parent_dentry = NULL; + struct dentry *parent_lower_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + struct dentry *lower_dentry = NULL; + int ret; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET | LOOKUP_REVAL)) + return 0; + + dinode = d_inode(direntry); + if (!dinode) + return 0; + + /* remote dentry timeout */ + info = hmdfs_i(dinode); + parent_dentry = dget_parent(direntry); + if (info->conn) { + ret = hmdfs_d_remote_revalidate(info->conn, direntry, + parent_dentry); + dput(parent_dentry); + return ret; + } + + hmdfs_get_lower_path(direntry, &lower_path); + lower_dentry = lower_path.dentry; + lower_cur_parent_dentry = dget_parent(lower_dentry); + hmdfs_get_lower_path(parent_dentry, &parent_lower_path); + parent_lower_dentry = parent_lower_path.dentry; + if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) { + ret = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (ret == 0) + goto out; + } + + spin_lock(&lower_dentry->d_lock); + if (d_unhashed(lower_dentry)) { + spin_unlock(&lower_dentry->d_lock); + ret = 0; + goto out; + } + spin_unlock(&lower_dentry->d_lock); + + if (parent_lower_dentry != lower_cur_parent_dentry) { + ret = 0; + goto out; + } + + ret = 1; + lock_for_dname_cmp(direntry, lower_dentry); + if (!qstr_case_eq(&direntry->d_name, &lower_dentry->d_name)) + ret = 0; + unlock_for_dname_cmp(direntry, lower_dentry); + +out: + hmdfs_put_lower_path(&parent_lower_path); + dput(lower_cur_parent_dentry); + hmdfs_put_lower_path(&lower_path); + dput(parent_dentry); + return ret; +} + +static void hmdfs_dev_d_release(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + switch (hmdfs_d(dentry)->dentry_type) { + case HMDFS_LAYER_SECOND_LOCAL: + hmdfs_clear_cache_dents(dentry, false); + hmdfs_drop_remote_cache_dents(dentry); + path_put(&(hmdfs_d(dentry)->lower_path)); + break; + case HMDFS_LAYER_ZERO: + hmdfs_put_reset_lower_path(dentry); + break; + case HMDFS_LAYER_FIRST_DEVICE: + break; + case HMDFS_LAYER_SECOND_REMOTE: + hmdfs_clear_cache_dents(dentry, false); + break; + default: + hmdfs_err("Unexpected dentry type %d", + hmdfs_d(dentry)->dentry_type); + return; + } + + kmem_cache_free(hmdfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +static void hmdfs_d_release(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + hmdfs_clear_cache_dents(dentry, false); + hmdfs_drop_remote_cache_dents(dentry); + hmdfs_put_reset_lower_path(dentry); + kmem_cache_free(hmdfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +static int hmdfs_cmp_ci(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb); + + if (name->len != len) + return 1; + + if (!sbi->s_case_sensitive) { + if (str_n_case_eq(name->name, str, len)) + return 0; + } else { + if (!strncmp(name->name, str, len)) + return 0; + } + return 1; +} + +static int hmdfs_hash_ci(const struct dentry *dentry, struct qstr *qstr) +{ + const unsigned char *name = qstr->name; + unsigned int len = qstr->len; + unsigned long hash; + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb); + + if (sbi->s_case_sensitive) + return 0; + + hash = init_name_hash(dentry); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + qstr->hash = end_name_hash(hash); + return 0; +} + +void clear_comrades_locked(struct list_head *comrade_list) +{ + struct hmdfs_dentry_comrade *cc, *nc; + + WARN_ON(!comrade_list); + list_for_each_entry_safe(cc, nc, comrade_list, list) { + dput(cc->lo_d); + kfree(cc); + } + INIT_LIST_HEAD(comrade_list); +} + +void clear_comrades(struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(dentry); + + mutex_lock(&cdi->comrade_list_lock); + clear_comrades_locked(&cdi->comrade_list); + mutex_unlock(&cdi->comrade_list_lock); +} + +/** + * d_revalidate_merge - revalidate a merge dentry + * + * Always return 0 to invalidate a dentry for fault-tolerance. + * The cost is acceptable for a overlay filesystem. + */ +static int d_revalidate_merge(struct dentry *direntry, unsigned int flags) +{ + return 0; +} + +static void d_release_merge(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + clear_comrades(dentry); + kmem_cache_free(hmdfs_dentry_merge_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +const struct dentry_operations hmdfs_dops_merge = { + .d_revalidate = d_revalidate_merge, + .d_release = d_release_merge, +}; + +const struct dentry_operations hmdfs_dev_dops = { + .d_revalidate = hmdfs_dev_d_revalidate, + .d_release = hmdfs_dev_d_release, +}; + +const struct dentry_operations hmdfs_dops = { + .d_revalidate = hmdfs_d_revalidate, + .d_release = hmdfs_d_release, + .d_compare = hmdfs_cmp_ci, + .d_hash = hmdfs_hash_ci, +}; diff --git a/fs/hmdfs/file_local.c b/fs/hmdfs/file_local.c new file mode 100644 index 0000000000000000000000000000000000000000..893c6edbc93b40002add2edfcb136afc1cbce61e --- /dev/null +++ b/fs/hmdfs/file_local.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_local.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" +#include "hmdfs_trace.h" + +int hmdfs_file_open_local(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lower_path; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + + if (!gfi) { + err = -ENOMEM; + goto out_err; + } + + hmdfs_get_lower_path(file->f_path.dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, cred); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + kfree(gfi); + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } +out_err: + return err; +} + +int hmdfs_file_release_local(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + + file->private_data = NULL; + fput(gfi->lower_file); + kfree(gfi); + return 0; +} + +ssize_t hmdfs_read_local(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *lower_file = hmdfs_f(iocb->ki_filp)->lower_file; + int err; + + if (iter->type & ITER_KVEC) + err = kernel_read(lower_file, iter->iov->iov_base, + iter->iov->iov_len, &(iocb->ki_pos)); + else + err = vfs_read(lower_file, iter->iov->iov_base, + iter->iov->iov_len, &(iocb->ki_pos)); + + if (err >= 0) + file_inode(iocb->ki_filp)->i_atime = file_inode(lower_file)->i_atime; + return err; +} + +ssize_t hmdfs_write_local(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *lower_file = hmdfs_f(iocb->ki_filp)->lower_file; + struct inode *inode = file_inode(iocb->ki_filp); + struct inode *lower_inode = file_inode(lower_file); + struct dentry *dentry = file_dentry(iocb->ki_filp); + int err; + + if (iter->type & ITER_KVEC) + err = kernel_write(lower_file, iter->iov->iov_base, + iter->iov->iov_len, &(iocb->ki_pos)); + else + err = vfs_write(lower_file, iter->iov->iov_base, + iter->iov->iov_len, &(iocb->ki_pos)); + + if (err >= 0) { + inode_lock(inode); + i_size_write(inode, i_size_read(lower_inode)); + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + if (!hmdfs_i_merge(hmdfs_i(inode))) + update_inode_to_dentry(dentry, inode); + inode_unlock(inode); + } + return err; +} + +int hmdfs_fsync_local(struct file *file, loff_t start, loff_t end, int datasync) +{ + int err; + struct file *lower_file = hmdfs_f(file)->lower_file; + + err = __generic_file_fsync(file, start, end, datasync); + if (err) + goto out; + + err = vfs_fsync_range(lower_file, start, end, datasync); +out: + return err; +} + +loff_t hmdfs_file_llseek_local(struct file *file, loff_t offset, int whence) +{ + int err = 0; + struct file *lower_file = NULL; + + err = generic_file_llseek(file, offset, whence); + if (err < 0) + goto out; + lower_file = hmdfs_f(file)->lower_file; + err = generic_file_llseek(lower_file, offset, whence); +out: + return err; +} + +int hmdfs_file_mmap_local(struct file *file, struct vm_area_struct *vma) +{ + struct hmdfs_file_info *private_data = file->private_data; + struct file *realfile = NULL; + int ret; + + if (!private_data) + return -EINVAL; + + realfile = private_data->lower_file; + if (!realfile) + return -EINVAL; + + if (!realfile->f_op->mmap) + return -ENODEV; + + if (WARN_ON(file != vma->vm_file)) + return -EIO; + + vma->vm_file = get_file(realfile); + ret = call_mmap(vma->vm_file, vma); + if (ret) + fput(realfile); + else + fput(file); + + file_accessed(file); + + return ret; +} + +const struct file_operations hmdfs_file_fops_local = { + .owner = THIS_MODULE, + .llseek = hmdfs_file_llseek_local, + .read_iter = hmdfs_read_local, + .write_iter = hmdfs_write_local, + .mmap = hmdfs_file_mmap_local, + .open = hmdfs_file_open_local, + .release = hmdfs_file_release_local, + .fsync = hmdfs_fsync_local, +}; + +static int hmdfs_iterate_local(struct file *file, struct dir_context *ctx) +{ + int err = 0; + loff_t start_pos = ctx->pos; + struct file *lower_file = hmdfs_f(file)->lower_file; + + if (ctx->pos == -1) + return 0; + + lower_file->f_pos = file->f_pos; + err = iterate_dir(lower_file, ctx); + file->f_pos = lower_file->f_pos; + + if (err < 0) + ctx->pos = -1; + + trace_hmdfs_iterate_local(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int hmdfs_dir_open_local(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct dentry *dentry = file->f_path.dentry; + struct path lower_path; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + + if (!gfi) + return -ENOMEM; + + if (IS_ERR_OR_NULL(cred)) { + err = -EPERM; + goto out_err; + } + hmdfs_get_lower_path(dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, cred); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + goto out_err; + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } + return err; + +out_err: + kfree(gfi); + return err; +} + +static int hmdfs_dir_release_local(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + + file->private_data = NULL; + fput(gfi->lower_file); + kfree(gfi); + return 0; +} + +const struct file_operations hmdfs_dir_ops_local = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_local, + .open = hmdfs_dir_open_local, + .release = hmdfs_dir_release_local, + .fsync = hmdfs_fsync_local, +}; diff --git a/fs/hmdfs/file_merge.c b/fs/hmdfs/file_merge.c new file mode 100644 index 0000000000000000000000000000000000000000..2708f2ba24affe375973660b782dfebb06fd29d3 --- /dev/null +++ b/fs/hmdfs/file_merge.c @@ -0,0 +1,525 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_merge.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_merge_view.h" + +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" + +struct hmdfs_iterate_callback_merge { + struct dir_context ctx; + struct dir_context *caller; + /* + * Record the return value of 'caller->actor': + * + * -EINVAL, buffer is exhausted + * -EINTR, current task is pending + * -EFAULT, something is wrong + * 0, success and can do more + */ + int result; + struct rb_root *root; + uint64_t dev_id; +}; + +struct hmdfs_cache_entry { + struct rb_node rb_node; + int name_len; + char *name; + int file_type; +}; + +struct hmdfs_cache_entry *allocate_entry(const char *name, int namelen, + int d_type) +{ + struct hmdfs_cache_entry *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + data->name = kstrndup(name, namelen, GFP_KERNEL); + if (!data->name) { + kfree(data); + return ERR_PTR(-ENOMEM); + } + + data->name_len = namelen; + data->file_type = d_type; + + return data; +} + +int insert_filename(struct rb_root *root, struct hmdfs_cache_entry **new_entry) +{ + struct rb_node *parent = NULL; + struct rb_node **new_node = &(root->rb_node); + int cmp_res = 0; + struct hmdfs_cache_entry *data = *new_entry; + + while (*new_node) { + struct hmdfs_cache_entry *entry = container_of( + *new_node, struct hmdfs_cache_entry, rb_node); + parent = *new_node; + + if (data->name_len < entry->name_len) + cmp_res = -1; + else if (data->name_len > entry->name_len) + cmp_res = 1; + else + cmp_res = strncmp(data->name, entry->name, + data->name_len); + + if (!cmp_res) { + kfree(data->name); + kfree(data); + *new_entry = entry; + return entry->file_type; + } + + if (cmp_res < 0) + new_node = &((*new_node)->rb_left); + else if (cmp_res > 0) + new_node = &((*new_node)->rb_right); + } + + rb_link_node(&data->rb_node, parent, new_node); + rb_insert_color(&data->rb_node, root); + + return 0; +} + +static void recursive_delete(struct rb_node *node) +{ + struct hmdfs_cache_entry *entry = NULL; + + if (!node) + return; + + recursive_delete(node->rb_left); + recursive_delete(node->rb_right); + + entry = container_of(node, struct hmdfs_cache_entry, rb_node); + kfree(entry->name); + kfree(entry); +} + +static void destroy_tree(struct rb_root *root) +{ + if (!root) + return; + recursive_delete(root->rb_node); + root->rb_node = NULL; +} + +static void delete_filename(struct rb_root *root, + struct hmdfs_cache_entry *data) +{ + struct rb_node **node = &(root->rb_node); + struct hmdfs_cache_entry *entry = NULL; + int cmp_res = 0; + + while (*node) { + entry = container_of(*node, struct hmdfs_cache_entry, rb_node); + if (data->name_len < entry->name_len) + cmp_res = -1; + else if (data->name_len > entry->name_len) + cmp_res = 1; + else + cmp_res = strncmp(data->name, entry->name, + data->name_len); + + if (!cmp_res) + goto found; + + if (cmp_res < 0) + node = &((*node)->rb_left); + else if (cmp_res > 0) + node = &((*node)->rb_right); + } + return; + +found: + rb_erase(*node, root); + kfree(entry->name); + kfree(entry); +} + +static void rename_conflicting_file(char *dentry_name, int *len, + unsigned int dev_id) +{ + int i = *len - 1; + int dot_pos = -1; + char *buffer; + + buffer = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); + if (!buffer) + return; + + while (i >= 0) { + if (dentry_name[i] == '/') + break; + if (dentry_name[i] == '.') { + // TODO: 这个修改同步到 CT01 + dot_pos = i; + break; + } + i--; + } + + if (dot_pos == -1) { + snprintf(dentry_name + *len, DENTRY_NAME_MAX_LEN - *len, + CONFLICTING_FILE_SUFFIX, dev_id); + goto done; + } + + for (i = 0; i < *len - dot_pos; i++) + buffer[i] = dentry_name[i + dot_pos]; + + buffer[i] = '\0'; + snprintf(dentry_name + dot_pos, DENTRY_NAME_MAX_LEN - dot_pos, + CONFLICTING_FILE_SUFFIX, dev_id); + strcat(dentry_name, buffer); + +done: + *len = strlen(dentry_name); + kfree(buffer); +} + +static void rename_conflicting_directory(char *dentry_name, int *len) +{ + snprintf(dentry_name + *len, DENTRY_NAME_MAX_LEN - *len, + CONFLICTING_DIR_SUFFIX); + *len += strlen(CONFLICTING_DIR_SUFFIX); +} + +static int hmdfs_actor_merge(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + int ret = 0; + int insert_res = 0; + int max_devid_len = 2; + char *dentry_name = NULL; + int dentry_len = namelen; + struct hmdfs_cache_entry *cache_entry = NULL; + struct hmdfs_iterate_callback_merge *iterate_callback_merge = NULL; + struct dir_context *org_ctx = NULL; + + if (hmdfs_file_type(name) != HMDFS_TYPE_COMMON) + return 0; + + if (namelen > NAME_MAX) + return -EINVAL; + dentry_name = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!dentry_name) + return -ENOMEM; + + strncpy(dentry_name, name, dentry_len); + + cache_entry = allocate_entry(dentry_name, dentry_len, d_type); + if (IS_ERR(cache_entry)) { + ret = PTR_ERR(cache_entry); + goto done; + } + + iterate_callback_merge = + container_of(ctx, struct hmdfs_iterate_callback_merge, ctx); + insert_res = + insert_filename(iterate_callback_merge->root, &cache_entry); + if (d_type == DT_DIR && insert_res == DT_DIR) { + goto done; + } else if (d_type == DT_DIR && insert_res == DT_REG) { + if (strlen(CONFLICTING_DIR_SUFFIX) > NAME_MAX - dentry_len) { + ret = -ENAMETOOLONG; + goto delete; + } + rename_conflicting_directory(dentry_name, &dentry_len); + cache_entry->file_type = DT_DIR; + } else if (d_type == DT_REG && insert_res > 0) { + if (strlen(CONFLICTING_FILE_SUFFIX) + max_devid_len > + NAME_MAX - dentry_len) { + ret = -ENAMETOOLONG; + goto delete; + } + rename_conflicting_file(dentry_name, &dentry_len, + iterate_callback_merge->dev_id); + } + + org_ctx = iterate_callback_merge->caller; + ret = org_ctx->actor(org_ctx, dentry_name, dentry_len, org_ctx->pos, + ino, d_type); + /* + * Record original return value, so that the caller can be aware of + * different situations. + */ + iterate_callback_merge->result = ret; + ret = ret == 0 ? 0 : 1; + if (ret && d_type == DT_DIR && insert_res == DT_REG && + cache_entry->file_type == DT_DIR) + cache_entry->file_type = DT_REG; + +delete: + if (ret && !insert_res) + delete_filename(iterate_callback_merge->root, cache_entry); +done: + kfree(dentry_name); + return ret; +} + +struct hmdfs_file_info * +get_next_hmdfs_file_info(struct hmdfs_file_info *fi_head, int device_id) +{ + struct hmdfs_file_info *fi_iter = NULL; + struct hmdfs_file_info *fi_result = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry_safe(fi_iter, fi_result, &(fi_head->comrade_list), + comrade_list) { + if (fi_iter->device_id == device_id) + break; + } + mutex_unlock(&fi_head->comrade_list_lock); + + return fi_result != fi_head ? fi_result : NULL; +} + +struct hmdfs_file_info *get_hmdfs_file_info(struct hmdfs_file_info *fi_head, + int device_id) +{ + struct hmdfs_file_info *fi_iter = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry(fi_iter, &(fi_head->comrade_list), comrade_list) { + if (fi_iter->device_id == device_id) { + mutex_unlock(&fi_head->comrade_list_lock); + return fi_iter; + } + } + mutex_unlock(&fi_head->comrade_list_lock); + + return NULL; +} + +int hmdfs_iterate_merge(struct file *file, struct dir_context *ctx) +{ + int err = 0; + struct hmdfs_file_info *fi_head = hmdfs_f(file); + struct hmdfs_file_info *fi_iter = NULL; + struct file *lower_file_iter = NULL; + loff_t start_pos = ctx->pos; + unsigned long device_id = (unsigned long)((ctx->pos) << 1 >> + (POS_BIT_NUM - DEV_ID_BIT_NUM)); + struct hmdfs_iterate_callback_merge ctx_merge = { + .ctx.actor = hmdfs_actor_merge, + .caller = ctx, + .root = &fi_head->root, + .dev_id = device_id + }; + + /* pos = -1 indicates that all devices have been traversed + * or an error has occurred. + */ + if (ctx->pos == -1) + return 0; + + fi_iter = get_hmdfs_file_info(fi_head, device_id); + if (!fi_iter) { + fi_iter = get_next_hmdfs_file_info(fi_head, device_id); + // dev_id is changed, parameter is set 0 to get next file info + if (fi_iter) + ctx_merge.ctx.pos = + hmdfs_set_pos(fi_iter->device_id, 0, 0); + } + while (fi_iter) { + ctx_merge.dev_id = fi_iter->device_id; + device_id = ctx_merge.dev_id; + lower_file_iter = fi_iter->lower_file; + lower_file_iter->f_pos = file->f_pos; + err = iterate_dir(lower_file_iter, &ctx_merge.ctx); + file->f_pos = lower_file_iter->f_pos; + ctx->pos = file->f_pos; + + if (err) + goto done; + /* + * ctx->actor return nonzero means buffer is exhausted or + * something is wrong, thus we should not continue. + */ + if (ctx_merge.result) + goto done; + fi_iter = get_next_hmdfs_file_info(fi_head, device_id); + if (fi_iter) { + file->f_pos = hmdfs_set_pos(fi_iter->device_id, 0, 0); + ctx->pos = file->f_pos; + } + } +done: + trace_hmdfs_iterate_merge(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int do_dir_open_merge(struct file *file, const struct cred *cred, + struct hmdfs_file_info *fi_head) +{ + int ret = -EINVAL; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(file->f_path.dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct hmdfs_file_info *fi = NULL; + struct path lo_p = { .mnt = file->f_path.mnt }; + struct file *lower_file = NULL; + + if (IS_ERR_OR_NULL(cred)) + return ret; + + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + fi = kzalloc(sizeof(*fi), GFP_KERNEL); + if (!fi) { + ret = ret ? -ENOMEM : 0; + continue; // allow some dir to fail to open + } + lo_p.dentry = comrade->lo_d; + // make sure that dentry will not be dentry_kill before open + dget(lo_p.dentry); + if (unlikely(d_is_negative(lo_p.dentry))) { + hmdfs_info("dentry is negative, try again"); + kfree(fi); + dput(lo_p.dentry); + continue; // skip this device + } + lower_file = dentry_open(&lo_p, file->f_flags, cred); + dput(lo_p.dentry); + if (IS_ERR(lower_file)) { + kfree(fi); + continue; + } + ret = 0; + fi->device_id = comrade->dev_id; + fi->lower_file = lower_file; + mutex_lock(&fi_head->comrade_list_lock); + list_add_tail(&fi->comrade_list, &fi_head->comrade_list); + mutex_unlock(&fi_head->comrade_list_lock); + } + mutex_unlock(&dim->comrade_list_lock); + return ret; +} + +int hmdfs_dir_open_merge(struct inode *inode, struct file *file) +{ + int ret = 0; + struct hmdfs_file_info *fi = NULL; + + fi = kzalloc(sizeof(*fi), GFP_KERNEL); + if (!fi) + return -ENOMEM; + + file->private_data = fi; + fi->root = RB_ROOT; + mutex_init(&fi->comrade_list_lock); + INIT_LIST_HEAD(&fi->comrade_list); + + ret = do_dir_open_merge(file, hmdfs_sb(inode->i_sb)->cred, fi); + if (ret) + kfree(fi); + + return ret; +} + +int hmdfs_dir_release_merge(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *fi_head = hmdfs_f(file); + struct hmdfs_file_info *fi_iter = NULL; + struct hmdfs_file_info *fi_temp = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry_safe(fi_iter, fi_temp, &(fi_head->comrade_list), + comrade_list) { + list_del_init(&(fi_iter->comrade_list)); + fput(fi_iter->lower_file); + kfree(fi_iter); + } + mutex_unlock(&fi_head->comrade_list_lock); + destroy_tree(&fi_head->root); + file->private_data = NULL; + kfree(fi_head); + + return 0; +} + +const struct file_operations hmdfs_dir_fops_merge = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_merge, + .open = hmdfs_dir_open_merge, + .release = hmdfs_dir_release_merge, +}; + +int hmdfs_file_open_merge(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lo_p = { .mnt = file->f_path.mnt }; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = NULL; + struct dentry *parent = NULL; + + lo_p.dentry = hmdfs_get_fst_lo_d(file->f_path.dentry); + if (!lo_p.dentry) { + err = -EINVAL; + goto out_err; + } + + gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + if (!gfi) { + err = -ENOMEM; + goto out_err; + } + + parent = dget_parent(file->f_path.dentry); + lower_file = dentry_open(&lo_p, file->f_flags, cred); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + kfree(gfi); + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } + dput(parent); +out_err: + dput(lo_p.dentry); + return err; +} + +int hmdfs_file_flush_merge(struct file *file, fl_owner_t id) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + struct file *lower_file = gfi->lower_file; + + if (lower_file->f_op->flush) + return lower_file->f_op->flush(lower_file, id); + + return 0; +} + +/* Transparent transmission of parameters to device_view level, + * so file operations are same as device_view local operations. + */ +const struct file_operations hmdfs_file_fops_merge = { + .owner = THIS_MODULE, + .llseek = hmdfs_file_llseek_local, + .read_iter = hmdfs_read_local, + .write_iter = hmdfs_write_local, + .mmap = hmdfs_file_mmap_local, + .open = hmdfs_file_open_merge, + .flush = hmdfs_file_flush_merge, + .release = hmdfs_file_release_local, + .fsync = hmdfs_fsync_local, +}; diff --git a/fs/hmdfs/file_remote.c b/fs/hmdfs/file_remote.c new file mode 100644 index 0000000000000000000000000000000000000000..4ae87a138999359b9faed509d1dee978ad5f419a --- /dev/null +++ b/fs/hmdfs/file_remote.c @@ -0,0 +1,1054 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_remote.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "file_remote.h" + +#include "comm/socket_adapter.h" +#include "hmdfs.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" + +static inline bool hmdfs_remote_write_cache_expired( + struct hmdfs_inode_info *info) +{ + return time_after(jiffies, info->writecache_expire); +} + +enum expire_reason { + ALL_GOOD = 0, + INO_DISMATCH = 1, + SIZE_OR_CTIME_DISMATCH = 2, + TIMER_EXPIRE = 3, + TIMER_WORKING = 4, + STABLE_CTIME_DISMATCH = 5, + KEEP_CACHE = 6, +}; + +/* + * hmdfs_open_final_remote - Do final steps of opening a remote file, update + * local inode cache and decide whether of not to truncate inode pages. + * + * @info: hmdfs inode info + * @open_ret: values returned from remote when opening a remote file + * @keep_cache: keep local cache & i_size + */ +static int hmdfs_open_final_remote(struct hmdfs_inode_info *info, + struct hmdfs_open_ret *open_ret, + struct file *file, bool keep_cache) +{ + struct inode *inode = &info->vfs_inode; + bool truncate = false; + enum expire_reason reason = ALL_GOOD; + int ret = 0; + + /* + * if remote inode number changed and lookup stale data, we'll return + * -ESTALE, and reopen the file with metedate from remote getattr. + */ + if (info->remote_ino != open_ret->ino) { + hmdfs_debug( + "got stale local inode, ino in local %llu, ino from open %llu", + info->remote_ino, open_ret->ino); + hmdfs_send_close(info->conn, &open_ret->fid); + reason = INO_DISMATCH; + ret = -ESTALE; + goto out; + } + + if (keep_cache) { + reason = KEEP_CACHE; + trace_hmdfs_open_final_remote(info, open_ret, file, reason); + goto set_fid_out; + } + + /* + * if remote size do not match local inode, or remote ctime do not match + * the last time same file was opened. + */ + if (inode->i_size != open_ret->file_size || + hmdfs_time_compare(&info->remote_ctime, &open_ret->remote_ctime)) { + truncate = true; + reason = SIZE_OR_CTIME_DISMATCH; + goto out; + } + + /* + * If 'writecache_expire' is set, check if it expires. And skip the + * checking of stable_ctime. + */ + if (info->writecache_expire) { + truncate = hmdfs_remote_write_cache_expired(info); + if (truncate) + reason = TIMER_EXPIRE; + else + reason = TIMER_WORKING; + goto out; + } + + /* the first time, or remote ctime is ahead of remote time */ + if (info->stable_ctime.tv_sec == 0 && info->stable_ctime.tv_nsec == 0) { + truncate = true; + reason = STABLE_CTIME_DISMATCH; + goto out; + } + + /* + * - if last stable_ctime == stable_ctime, we do nothing. + * a. if ctime < stable_ctime, data is ensured to be uptodate, + * b. if ctime == stable_ctime, stale data might be accessed. This is + * acceptable since pagecache will be dropped later. + * c. ctime > stable_ctime is impossible. + * - if last stable_ctime < stable_ctime, we clear the cache. + * d. ctime != last stable_ctime is impossible + * e. ctime == last stable_ctime, this is possible to read again from + * b, thus we need to drop the cache. + * - if last stable_ctime > stable_ctime, we clear the cache. + * stable_ctime must be zero in this case, this is possible because + * system time might be changed. + */ + if (hmdfs_time_compare(&info->stable_ctime, &open_ret->stable_ctime)) { + truncate = true; + reason = STABLE_CTIME_DISMATCH; + goto out; + } + +out: + trace_hmdfs_open_final_remote(info, open_ret, file, reason); + if (ret) + return ret; + + if (reason == SIZE_OR_CTIME_DISMATCH) { + inode->i_ctime = open_ret->remote_ctime; + info->remote_ctime = open_ret->remote_ctime; + } + + if (truncate) { + info->writecache_expire = 0; + truncate_inode_pages(inode->i_mapping, 0); + } + + atomic64_set(&info->write_counter, 0); + info->stable_ctime = open_ret->stable_ctime; + i_size_write(inode, open_ret->file_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; +set_fid_out: + spin_lock(&info->fid_lock); + info->fid = open_ret->fid; + spin_unlock(&info->fid_lock); + return 0; +} + +int hmdfs_do_open_remote(struct file *file, bool keep_cache) +{ + struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); + struct hmdfs_peer *conn = info->conn; + struct hmdfs_open_ret open_ret; + __u8 file_type = hmdfs_d(file->f_path.dentry)->file_type; + char *send_buf; + int err = 0; + + send_buf = hmdfs_get_dentry_relative_path(file->f_path.dentry); + if (!send_buf) { + err = -ENOMEM; + goto out_free; + } + err = hmdfs_send_open(conn, send_buf, file_type, &open_ret); + if (err) { + hmdfs_err("hmdfs_send_open return failed with %d", err); + goto out_free; + } + + err = hmdfs_open_final_remote(info, &open_ret, file, keep_cache); + +out_free: + kfree(send_buf); + return err; +} + +static inline bool hmdfs_remote_need_reopen(struct hmdfs_inode_info *info) +{ + return test_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); +} + +static inline bool hmdfs_remote_is_opening_file(struct hmdfs_inode_info *info) +{ + return test_bit(HMDFS_FID_OPENING, &info->fid_flags); +} + +static int hmdfs_remote_wait_opening_file(struct hmdfs_inode_info *info) +{ + int err; + + if (!hmdfs_remote_is_opening_file(info)) + return 0; + + err = ___wait_event(info->fid_wq, hmdfs_remote_is_opening_file(info), + TASK_INTERRUPTIBLE, 0, 0, + spin_unlock(&info->fid_lock); + schedule(); + spin_lock(&info->fid_lock)); + if (err) + err = -EINTR; + + return err; +} + +static int hmdfs_remote_file_reopen(struct hmdfs_inode_info *info, + struct file *filp) +{ + int err = 0; + struct hmdfs_peer *conn = info->conn; + struct inode *inode = NULL; + struct hmdfs_fid fid; + + if (conn->status == NODE_STAT_OFFLINE) + return -EAGAIN; + + spin_lock(&info->fid_lock); + err = hmdfs_remote_wait_opening_file(info); + if (err || !hmdfs_remote_need_reopen(info)) { + spin_unlock(&info->fid_lock); + goto out; + } + + set_bit(HMDFS_FID_OPENING, &info->fid_flags); + fid = info->fid; + spin_unlock(&info->fid_lock); + + inode = &info->vfs_inode; + inode_lock(inode); + /* + * Most closing cases are meaningless, except for one: + * read process A read process B + * err = -EBADF err = -EBADF (caused by re-online) + * set_need_reopen + * do reopen + * fid = new fid_1 [server hold fid_1] + * set need_reopen + * do reopen + * send close (fid_1) // In case of leak + * fid = new fid_2 + */ + if (fid.id != HMDFS_INODE_INVALID_FILE_ID) + hmdfs_send_close(conn, &fid); + err = hmdfs_do_open_remote(filp, true); + inode_unlock(inode); + + spin_lock(&info->fid_lock); + /* + * May make the bit set in offline handler lost, but server + * will tell us whether or not the newly-opened file id is + * generated before offline, if it is opened before offline, + * the operation on the file id will return -EBADF and + * HMDFS_FID_NEED_OPEN bit will be set again. + */ + if (!err) + clear_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + clear_bit(HMDFS_FID_OPENING, &info->fid_flags); + spin_unlock(&info->fid_lock); + + wake_up_interruptible_all(&info->fid_wq); +out: + return err; +} + +static int hmdfs_remote_check_and_reopen(struct hmdfs_inode_info *info, + struct file *filp) +{ + if (!hmdfs_remote_need_reopen(info)) + return 0; + + return hmdfs_remote_file_reopen(info, filp); +} + +void hmdfs_do_close_remote(struct kref *kref) +{ + struct hmdfs_inode_info *info = + container_of(kref, struct hmdfs_inode_info, ref); + struct hmdfs_fid fid; + + hmdfs_remote_fetch_fid(info, &fid); + /* This function can return asynchronously */ + hmdfs_send_close(info->conn, &fid); +} + +static inline bool hmdfs_remote_need_track_file(const struct hmdfs_sb_info *sbi, + fmode_t mode) +{ + return (hmdfs_is_stash_enabled(sbi) && (mode & FMODE_WRITE)); +} + +static void +hmdfs_remote_del_wr_opened_inode_nolock(struct hmdfs_inode_info *info) +{ + WARN_ON(list_empty(&info->wr_opened_node)); + if (atomic_dec_and_test(&info->wr_opened_cnt)) + list_del_init(&info->wr_opened_node); +} + +void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->wr_opened_inode_lock); + hmdfs_remote_del_wr_opened_inode_nolock(info); + spin_unlock(&conn->wr_opened_inode_lock); +} + +void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + if (list_empty(&info->wr_opened_node)) { + atomic_set(&info->wr_opened_cnt, 1); + list_add_tail(&info->wr_opened_node, + &conn->wr_opened_inode_list); + } else { + atomic_inc(&info->wr_opened_cnt); + } +} + +static void hmdfs_remote_add_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->wr_opened_inode_lock); + hmdfs_remote_add_wr_opened_inode_nolock(conn, info); + spin_unlock(&conn->wr_opened_inode_lock); +} + +int hmdfs_file_open_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct kref *ref = &(info->ref); + int err = 0; + + inode_lock(inode); + if (kref_read(ref) == 0) { + err = hmdfs_do_open_remote(file, false); + if (err == 0) + kref_init(ref); + } else { + kref_get(ref); + } + inode_unlock(inode); + + if (!err && hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), + file->f_mode)) + hmdfs_remote_add_wr_opened_inode(info->conn, info); + + return err; +} + +static void hmdfs_set_writecache_expire(struct hmdfs_inode_info *info, + unsigned int seconds) +{ + unsigned long new_expire = jiffies + seconds * HZ; + + /* + * When file has been written before closing, set pagecache expire + * if it has not been set yet. This is necessary because ctime might + * stay the same after overwrite. + */ + if (info->writecache_expire && + time_after(new_expire, info->writecache_expire)) + return; + + info->writecache_expire = new_expire; +} + +static void hmdfs_remote_keep_writecache(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = NULL; + struct kref *ref = NULL; + struct hmdfs_getattr_ret *getattr_ret = NULL; + unsigned int write_cache_timeout = + hmdfs_sb(inode->i_sb)->write_cache_timeout; + int err; + + if (!write_cache_timeout) + return; + + info = hmdfs_i(inode); + ref = &(info->ref); + /* + * don't do anything if file is still opening or file hasn't been + * written. + */ + if (kref_read(ref) > 0 || !atomic64_read(&info->write_counter)) + return; + + /* + * If remote getattr failed, and we don't update ctime, + * pagecache will be truncated the next time file is opened. + */ + err = hmdfs_remote_getattr(info->conn, file_dentry(file), 0, + &getattr_ret); + if (err) { + hmdfs_err("remote getattr failed with err %d", err); + return; + } + + if (!(getattr_ret->stat.result_mask & STATX_CTIME)) { + hmdfs_err("get remote ctime failed with mask 0x%x", + getattr_ret->stat.result_mask); + kfree(getattr_ret); + return; + } + /* + * update ctime from remote, in case that pagecahe will be + * truncated in next open. + */ + inode->i_ctime = getattr_ret->stat.ctime; + info->remote_ctime = getattr_ret->stat.ctime; + hmdfs_set_writecache_expire(info, write_cache_timeout); + kfree(getattr_ret); +} + +int hmdfs_file_release_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + if (hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), file->f_mode)) + hmdfs_remote_del_wr_opened_inode(info->conn, info); + + inode_lock(inode); + kref_put(&info->ref, hmdfs_do_close_remote); + hmdfs_remote_keep_writecache(inode, file); + inode_unlock(inode); + + return 0; +} + +static int hmdfs_file_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct inode *inode = file_inode(file); + + if (!(file->f_mode & FMODE_WRITE)) + return 0; + + /* + * Continue regardless of whether file reopen fails or not, + * because there may be no dirty page. + */ + hmdfs_remote_check_and_reopen(hmdfs_i(inode), file); + + /* + * Wait for wsem here would impact the performance greatly, so we + * overlap the time to issue as many wbs as we can, expecting async + * wbs are eliminated afterwards. + */ + filemap_fdatawrite(inode->i_mapping); + down_write(&hmdfs_i(inode)->wpage_sem); + err = filemap_write_and_wait(inode->i_mapping); + up_write(&hmdfs_i(inode)->wpage_sem); + return err; +} + +static ssize_t hmdfs_file_read_iter_remote(struct kiocb *iocb, + struct iov_iter *iter) +{ + struct file *filp = iocb->ki_filp; + struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); + struct file_ra_state *ra = NULL; + unsigned int rtt; + int err; + bool tried = false; + +retry: + err = hmdfs_remote_check_and_reopen(info, filp); + if (err) + return err; + + ra = &filp->f_ra; + /* rtt is measured in 10 msecs */ + rtt = hmdfs_tcpi_rtt(info->conn) / 10000; + switch (rtt) { + case 0: + break; + case 1: + ra->ra_pages = 256; + break; + case 2: + ra->ra_pages = 512; + break; + default: + ra->ra_pages = 1024; + break; + } + + err = generic_file_read_iter(iocb, iter); + if (err < 0 && !tried && hmdfs_remote_need_reopen(info)) { + /* Read from a stale fid, try read again once. */ + tried = true; + goto retry; + } + + return err; +} + +static inline bool hmdfs_is_file_unwritable(const struct hmdfs_inode_info *info, + bool check_stash) +{ + return (check_stash && hmdfs_inode_is_stashing(info)) || + !hmdfs_is_node_online(info->conn); +} + +static ssize_t __hmdfs_file_write_iter_remote(struct kiocb *iocb, + struct iov_iter *iter, + bool check_stash) +{ + struct file *filp = iocb->ki_filp; + struct inode *inode = file_inode(filp); + struct hmdfs_inode_info *info = hmdfs_i(inode); + ssize_t ret; + + if (hmdfs_is_file_unwritable(info, check_stash)) + return -EAGAIN; + + ret = hmdfs_remote_check_and_reopen(info, filp); + if (ret) + return ret; + + inode_lock(inode); + if (hmdfs_is_file_unwritable(info, check_stash)) { + ret = -EAGAIN; + goto out; + } + ret = generic_write_checks(iocb, iter); + if (ret > 0) + ret = __generic_file_write_iter(iocb, iter); +out: + inode_unlock(inode); + + if (ret > 0) + ret = generic_write_sync(iocb, ret); + return ret; +} + +ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, + struct iov_iter *iter) +{ + return __hmdfs_file_write_iter_remote(iocb, iter, false); +} + +static ssize_t hmdfs_file_write_iter_remote(struct kiocb *iocb, + struct iov_iter *iter) +{ + return __hmdfs_file_write_iter_remote(iocb, iter, true); +} + +/* hmdfs not support mmap write remote file */ +static vm_fault_t hmdfs_page_mkwrite(struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct hmdfs_file_vm_ops = { + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = hmdfs_page_mkwrite, +}; + +static int hmdfs_file_mmap_remote(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &hmdfs_file_vm_ops; + file_accessed(file); + + return 0; +} + +static int hmdfs_file_fsync_remote(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); + struct hmdfs_peer *conn = info->conn; + struct hmdfs_fid fid; + int err; + + trace_hmdfs_fsync_enter_remote(conn->sbi, conn->device_id, + info->remote_ino, datasync); + /* + * Continue regardless of whether file reopen fails or not, + * because there may be no dirty page. + */ + hmdfs_remote_check_and_reopen(info, file); + + filemap_fdatawrite(file->f_mapping); + down_write(&info->wpage_sem); + err = file_write_and_wait_range(file, start, end); + up_write(&info->wpage_sem); + if (err) { + hmdfs_err("local fsync fail with %d", err); + goto out; + } + + hmdfs_remote_fetch_fid(info, &fid); + err = hmdfs_send_fsync(conn, &fid, start, end, datasync); + if (err) + hmdfs_err("send fsync fail with %d", err); + +out: + trace_hmdfs_fsync_exit_remote(conn->sbi, conn->device_id, + info->remote_ino, + get_cmd_timeout(conn->sbi, F_FSYNC), err); + + /* Compatible with POSIX retcode */ + if (err == -ETIME) + err = -EIO; + + return err; +} + +const struct file_operations hmdfs_dev_file_fops_remote = { + .owner = THIS_MODULE, + .llseek = generic_file_llseek, + .read_iter = hmdfs_file_read_iter_remote, + .write_iter = hmdfs_file_write_iter_remote, + .mmap = hmdfs_file_mmap_remote, + .open = hmdfs_file_open_remote, + .release = hmdfs_file_release_remote, + .flush = hmdfs_file_flush, + .fsync = hmdfs_file_fsync_remote, +}; + +static void hmdfs_fill_page_zero(struct page *page) +{ + void *addr = NULL; + + addr = kmap(page); + memset(addr, 0, PAGE_SIZE); + kunmap(page); + SetPageUptodate(page); + unlock_page(page); +} + +static int hmdfs_readpage_remote(struct file *file, struct page *page) +{ + struct inode *inode = file_inode(file); + struct hmdfs_inode_info *info = hmdfs_i(inode); + loff_t isize = i_size_read(inode); + pgoff_t end_index = (isize - 1) >> PAGE_SHIFT; + struct hmdfs_fid fid; + + if (!isize || page->index > end_index) { + hmdfs_fill_page_zero(page); + return 0; + } + + if (!isize || page->index > end_index) { + hmdfs_fill_page_zero(page); + return 0; + } + + hmdfs_remote_fetch_fid(info, &fid); + return hmdfs_client_readpage(info->conn, &fid, page); +} + +uint32_t hmdfs_get_writecount(struct page *page) +{ + uint32_t count = 0; + loff_t pos = (loff_t)page->index << HMDFS_PAGE_OFFSET; + struct inode *inode = page->mapping->host; + loff_t size = i_size_read(inode); + /* + * If page offset is greater than i_size, this is possible when + * writepage concurrent with truncate. In this case, we don't need to + * do remote writepage since it'll be truncated after the page is + * unlocked. + */ + if (pos >= size) + count = 0; + /* + * If the page about to write is beyond i_size, we can't write beyond + * i_size because remote file size will be wrong. + */ + else if (size < pos + HMDFS_PAGE_SIZE) + count = size - pos; + /* It's safe to write the whole page */ + else + count = HMDFS_PAGE_SIZE; + + return count; +} + +static bool allow_cur_thread_wpage(struct hmdfs_inode_info *info, + bool *rsem_held, bool sync_all) +{ + WARN_ON(!rsem_held); + + if (sync_all) { + *rsem_held = false; + return true; + } + *rsem_held = down_read_trylock(&info->wpage_sem); + return *rsem_held; +} + +/** + * hmdfs_writepage_remote - writeback a dirty page to remote + * + * INFO: + * When asked to WB_SYNC_ALL, this function should leave with both the page and + * the radix tree node clean to achieve close-to-open consitency. Moreover, + * this shall never return -EIO to help filemap to iterate all dirty pages. + * + * INFO: + * When asked to WB_SYNC_NONE, this function should be mercy if faults(oom or + * bad pipe) happended to enable subsequent r/w & wb. + */ +static int hmdfs_writepage_remote(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_sb_info *sbi = hmdfs_sb(inode->i_sb); + int ret = 0; + bool rsem_held = false; + bool sync = wbc->sync_mode == WB_SYNC_ALL; + struct hmdfs_writepage_context *param = NULL; + + if (!allow_cur_thread_wpage(info, &rsem_held, sync)) + goto out_unlock; + + set_page_writeback(page); + + param = kzalloc(sizeof(*param), GFP_NOFS); + if (!param) { + ret = -ENOMEM; + goto out_endwb; + } + + if (sync && hmdfs_usr_sig_pending(current)) { + ClearPageUptodate(page); + goto out_free; + } + param->count = hmdfs_get_writecount(page); + if (!param->count) + goto out_free; + param->rsem_held = rsem_held; + hmdfs_remote_fetch_fid(info, ¶m->fid); + param->sync_all = sync; + param->caller = current; + get_task_struct(current); + param->page = page; + param->timeout = jiffies + msecs_to_jiffies(sbi->wb_timeout_ms); + INIT_DELAYED_WORK(¶m->retry_dwork, hmdfs_remote_writepage_retry); + ret = hmdfs_remote_do_writepage(info->conn, param); + if (likely(!ret)) + return 0; + + put_task_struct(current); +out_free: + kfree(param); +out_endwb: + end_page_writeback(page); + if (rsem_held) + up_read(&info->wpage_sem); +out_unlock: + if (sync || !hmdfs_need_redirty_page(info, ret)) { + SetPageError(page); + mapping_set_error(page->mapping, ret); + } else { + redirty_page_for_writepage(wbc, page); + } + unlock_page(page); + return ret; +} + +static void hmdfs_account_dirty_pages(struct address_space *mapping) +{ + struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; + + if (!sbi->h_wb->dirty_writeback_control) + return; + + this_cpu_inc(*sbi->h_wb->bdp_ratelimits); +} + +static int hmdfs_write_begin_remote(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata) +{ + pgoff_t index = ((unsigned long long)pos) >> PAGE_SHIFT; + struct inode *inode = file_inode(file); + struct page *page = NULL; + int ret = 0; + +start: + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return -ENOMEM; + *pagep = page; + wait_on_page_writeback(page); + + // If this page will be covered completely. + if (len == HMDFS_PAGE_SIZE || PageUptodate(page)) + return 0; + + /* + * If data existed in this page will covered, + * we just need to clear this page. + */ + if (!((unsigned long long)pos & (HMDFS_PAGE_SIZE - 1)) && + (pos + len) >= i_size_read(inode)) { + zero_user_segment(page, len, HMDFS_PAGE_SIZE); + return 0; + } + /* + * We need readpage before write date to this page. + */ + ret = hmdfs_readpage_remote(file, page); + if (!ret) { + if (PageLocked(page)) { + ret = __lock_page_killable(page); + if (!ret) + unlock_page(page); + } + + if (!ret && PageUptodate(page)) { + put_page(page); + goto start; + } + if (!ret) + ret = -EIO; + } + put_page(page); + return ret; +} + +static int hmdfs_write_end_remote(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned int len, unsigned int copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + if (!PageUptodate(page)) { + if (unlikely(copied != len)) + copied = 0; + else + SetPageUptodate(page); + } + if (!copied) + goto unlock_out; + + if (!PageDirty(page)) { + hmdfs_account_dirty_pages(mapping); + set_page_dirty(page); + } + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + hmdfs_i(inode)->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + } +unlock_out: + unlock_page(page); + put_page(page); + + /* hmdfs private writeback control */ + hmdfs_balance_dirty_pages_ratelimited(mapping); + return copied; +} + +const struct address_space_operations hmdfs_dev_file_aops_remote = { + .readpage = hmdfs_readpage_remote, + .write_begin = hmdfs_write_begin_remote, + .write_end = hmdfs_write_end_remote, + .writepage = hmdfs_writepage_remote, + .set_page_dirty = __set_page_dirty_nobuffers, +}; + +loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, + unsigned long offset) +{ + loff_t pos; + + pos = ((loff_t)dev_id << (POS_BIT_NUM - 1 - DEV_ID_BIT_NUM)) + + ((loff_t)group_id << OFFSET_BIT_NUM) + offset; + if (dev_id) + pos |= ((loff_t)1 << (POS_BIT_NUM - 1)); + return pos; +} + +static int analysis_dentry_file_from_con(struct hmdfs_sb_info *sbi, + struct file *file, + struct file *handler, + struct dir_context *ctx) +{ + struct hmdfs_dentry_group *dentry_group = NULL; + loff_t pos = ctx->pos; + unsigned long dev_id = (unsigned long)((pos << 1) >> (POS_BIT_NUM - DEV_ID_BIT_NUM)); + unsigned long group_id = (unsigned long)((pos << (1 + DEV_ID_BIT_NUM)) >> + (POS_BIT_NUM - GROUP_ID_BIT_NUM)); + loff_t offset = pos & OFFSET_BIT_MASK; + int group_num = 0; + char *dentry_name = NULL; + int iterate_result = 0; + int i, j; + + dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); + + if (!dentry_group) + return -ENOMEM; + + if (IS_ERR_OR_NULL(handler)) { + kfree(dentry_group); + return -ENOENT; + } + + group_num = get_dentry_group_cnt(file_inode(handler)); + dentry_name = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); + if (!dentry_name) { + kfree(dentry_group); + return -ENOMEM; + } + + for (i = group_id; i < group_num; i++) { + int ret = hmdfs_metainfo_read(sbi, handler, dentry_group, + sizeof(struct hmdfs_dentry_group), + i); + if (ret != sizeof(struct hmdfs_dentry_group)) { + hmdfs_err("read dentry group failed ret:%d", ret); + goto done; + } + + for (j = offset; j < DENTRY_PER_GROUP; j++) { + int len; + int file_type = DT_UNKNOWN; + bool is_continue; + + len = le16_to_cpu(dentry_group->nsl[j].namelen); + if (!test_bit_le(j, dentry_group->bitmap) || len == 0) + continue; + + memset(dentry_name, 0, DENTRY_NAME_MAX_LEN); + // TODO: Support more file_type + if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) + file_type = DT_DIR; + else if (S_ISREG(le16_to_cpu( + dentry_group->nsl[j].i_mode))) + file_type = DT_REG; + + strncat(dentry_name, dentry_group->filename[j], len); + pos = hmdfs_set_pos(dev_id, i, j); + is_continue = + dir_emit(ctx, dentry_name, len, + pos + INUNUMBER_START, file_type); + if (!is_continue) { + ctx->pos = pos; + iterate_result = 1; + goto done; + } + } + offset = 0; + } + +done: + kfree(dentry_name); + kfree(dentry_group); + return iterate_result; +} + +int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx) +{ + int iterate_result = 0; + + iterate_result = analysis_dentry_file_from_con( + con->sbi, file, file->private_data, ctx); + return iterate_result; +} + +static int hmdfs_iterate_remote(struct file *file, struct dir_context *ctx) +{ + int err = 0; + loff_t start_pos = ctx->pos; + struct hmdfs_peer *con = NULL; + struct hmdfs_dentry_info *di = hmdfs_d(file->f_path.dentry); + bool is_local = !((ctx->pos) >> (POS_BIT_NUM - 1)); + uint64_t dev_id = di->device_id; + + if (ctx->pos == -1) + return 0; + if (is_local) + ctx->pos = hmdfs_set_pos(dev_id, 0, 0); + + con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); + if (con) { + // ctx->pos = 0; + err = con->conn_operations->remote_readdir(con, file, ctx); + if (unlikely(!con)) { + hmdfs_err("con is null"); + goto done; + } + peer_put(con); + if (err) + goto done; + } + +done: + if (err <= 0) + ctx->pos = -1; + + trace_hmdfs_iterate_remote(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int hmdfs_dir_open_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct clearcache_item *cache_item = NULL; + + if (info->conn && info->conn->version <= USERSPACE_MAX_VER) { + return 0; + } else if (info->conn) { + if (!hmdfs_cache_revalidate(READ_ONCE(info->conn->conn_time), + info->conn->device_id, + file->f_path.dentry)) + get_remote_dentry_file_sync(file->f_path.dentry, + info->conn); + cache_item = hmdfs_find_cache_item(info->conn->device_id, + file->f_path.dentry); + if (cache_item) { + file->private_data = cache_item->filp; + get_file(file->private_data); + kref_put(&cache_item->ref, release_cache_item); + return 0; + } + return -ENOENT; + } + return -ENOENT; +} + +static int hmdfs_dir_release_remote(struct inode *inode, struct file *file) +{ + if (file->private_data) + fput(file->private_data); + file->private_data = NULL; + return 0; +} + +const struct file_operations hmdfs_dev_dir_ops_remote = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_remote, + .open = hmdfs_dir_open_remote, + .release = hmdfs_dir_release_remote, + .fsync = __generic_file_fsync, +}; diff --git a/fs/hmdfs/file_remote.h b/fs/hmdfs/file_remote.h new file mode 100644 index 0000000000000000000000000000000000000000..026bd0c944a60c6f01d5cadf5cb671b2be9b355d --- /dev/null +++ b/fs/hmdfs/file_remote.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/file_remote.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_FILE_REMOTE_H +#define HMDFS_FILE_REMOTE_H + +#include +#include + +#include "hmdfs.h" +#include "comm/connection.h" + +void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info); + +void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info); + +ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, + struct iov_iter *iter); + +#endif diff --git a/fs/hmdfs/file_root.c b/fs/hmdfs/file_root.c new file mode 100644 index 0000000000000000000000000000000000000000..d82ff4d0b04b0958fb3e34022b0937a0b9d0294e --- /dev/null +++ b/fs/hmdfs/file_root.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_root.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" + +#define DEVICE_VIEW_CTX_POS 2 +#define MERGE_VIEW_CTX_POS 3 +#define ROOT_DIR_INO_START 20000000 + +// used by hmdfs_device_iterate functions +#define DEVICE_VIEW_INO_START 20000002 +#define LOCAL_DEVICE_CTX_POS 2 + +struct hmdfs_peer *get_next_con(struct hmdfs_sb_info *sbi, + unsigned long current_dev_id) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *next_con = NULL; + struct list_head *head, *node; + + mutex_lock(&sbi->connections.node_lock); + head = &sbi->connections.node_list; + if (current_dev_id == 0) { + node = head->next; + if (node == head) + goto done; + next_con = container_of(node, struct hmdfs_peer, list); + if (next_con->status == NODE_STAT_ONLINE) + goto done; + current_dev_id = next_con->device_id; + next_con = NULL; + } + + list_for_each_entry(con, &sbi->connections.node_list, list) { + if ((con->device_id & 0xFFFF) == (current_dev_id & 0xFFFF)) { + node = con->list.next; + if (node == head) + goto done; + next_con = container_of(node, struct hmdfs_peer, list); + if (next_con->status == NODE_STAT_ONLINE) + goto done; + current_dev_id = next_con->device_id; + next_con = NULL; + } + } +done: + if (next_con) + peer_get(next_con); + mutex_unlock(&sbi->connections.node_lock); + return next_con; +} + +int hmdfs_device_iterate(struct file *file, struct dir_context *ctx) +{ + int err = 0; + uint64_t ino_start = DEVICE_VIEW_INO_START; + struct hmdfs_peer *next_con = NULL; + unsigned long dev_id = 0; + struct hmdfs_peer *con = NULL; + char *remote_device_name = NULL; + + if (ctx->pos != 0) + goto out; + dir_emit_dots(file, ctx); + + if (ctx->pos == LOCAL_DEVICE_CTX_POS) { + err = dir_emit(ctx, DEVICE_VIEW_LOCAL, + sizeof(DEVICE_VIEW_LOCAL) - 1, ino_start++, + DT_DIR); + if (!err) + goto out; + (ctx->pos)++; + } + next_con = get_next_con(file->f_inode->i_sb->s_fs_info, 0); + if (!next_con) + goto out; + + dev_id = next_con->device_id; + peer_put(next_con); + con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); + remote_device_name = kmalloc(HMDFS_CID_SIZE + 1, GFP_KERNEL); + if (!remote_device_name) { + err = -ENOMEM; + goto out; + } + while (con) { + peer_put(con); + snprintf(remote_device_name, HMDFS_CID_SIZE + 1, "%s", + con->cid); + if (!dir_emit(ctx, remote_device_name, + strlen(remote_device_name), ino_start++, DT_DIR)) + goto done; + + (ctx->pos)++; + con = get_next_con(file->f_inode->i_sb->s_fs_info, dev_id); + if (!con) + goto done; + + dev_id = con->device_id; + } +done: + kfree(remote_device_name); +out: + if (err <= 0) + ctx->pos = -1; + + return err; +} + +int hmdfs_root_iterate(struct file *file, struct dir_context *ctx) +{ + uint64_t ino_start = ROOT_DIR_INO_START; + struct hmdfs_sb_info *sbi = file_inode(file)->i_sb->s_fs_info; + + if (!dir_emit_dots(file, ctx)) + return 0; + if (ctx->pos == DEVICE_VIEW_CTX_POS) { + if (!dir_emit(ctx, DEVICE_VIEW_ROOT, + sizeof(DEVICE_VIEW_ROOT) - 1, ino_start, DT_DIR)) + return 0; + ino_start++; + ctx->pos = MERGE_VIEW_CTX_POS; + } + if (sbi->s_merge_switch && ctx->pos == MERGE_VIEW_CTX_POS) { + if (!dir_emit(ctx, MERGE_VIEW_ROOT, sizeof(MERGE_VIEW_ROOT) - 1, + ino_start, DT_DIR)) + return 0; + (ctx->pos)++; + } + return 0; +} + +const struct file_operations hmdfs_root_fops = { + .owner = THIS_MODULE, + .iterate = hmdfs_root_iterate, +}; + +const struct file_operations hmdfs_device_fops = { + .owner = THIS_MODULE, + .iterate = hmdfs_device_iterate, +}; diff --git a/fs/hmdfs/hmdfs.h b/fs/hmdfs/hmdfs.h new file mode 100644 index 0000000000000000000000000000000000000000..d0a24db08f62144d53a8912198956105d68d7259 --- /dev/null +++ b/fs/hmdfs/hmdfs.h @@ -0,0 +1,325 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_H +#define HMDFS_H + +#include +#include +#include +#include +#include +#include + +#include "comm/protocol.h" +#include "comm/fault_inject.h" + +#if KERNEL_VERSION(4, 15, 0) < LINUX_VERSION_CODE +#define hmdfs_time_t timespec64 +#define hmdfs_time_compare timespec64_compare +#define hmdfs_time_add timespec64_add +#else +#define hmdfs_time_t timespec +#define hmdfs_time_compare timespec_compare +#define hmdfs_time_add timespec_add +#endif + +#define HMDFS_PAGE_SIZE 4096 +#define HMDFS_PAGE_OFFSET 12 + +/* max xattr value size, not include '\0' */ +#define HMDFS_XATTR_SIZE_MAX 4096 +/* max listxattr response size, include '\0' */ +#define HMDFS_LISTXATTR_SIZE_MAX 4096 + +// 20 digits +'\0', Converted from a u64 integer +#define HMDFS_ACCOUNT_HASH_MAX_LEN 21 +#define CTRL_PATH_MAX_LEN 21 + +#define HMDFS_SUPER_MAGIC 0x20200302 + +#define DEFAULT_WRITE_CACHE_TIMEOUT 30 +#define DEFAULT_SRV_REQ_MAX_ACTIVE 16 + +#define HMDFS_INODE_INVALID_FILE_ID (1U << 31) +#define HMDFS_FID_VER_BOOT_COOKIE_SHIFT 15 + +/* According to task_struct instead of workqueue_struct */ +#define HMDFS_WQ_NAME_LEN 16 + +#define HMDFS_DEF_WB_TIMEOUT_MS 60000 +#define HMDFS_MAX_WB_TIMEOUT_MS 900000 + +#define HMDFS_READPAGES_NR_MAX 32 + +enum { + HMDFS_FEATURE_READPAGES = 1ULL << 0, + HMDFS_FEATURE_READPAGES_OPEN = 1ULL << 1, + HMDFS_ATOMIC_OPEN = 1ULL << 2, +}; + +struct client_statistic; +struct server_statistic; +struct hmdfs_writeback; +struct hmdfs_server_writeback; +struct hmdfs_syncfs_info { + wait_queue_head_t wq; + atomic_t wait_count; + int remote_ret; + unsigned long long version; + + /* Protect version in concurrent operations */ + spinlock_t v_lock; + /* + * Serialize hmdfs_sync_fs() process: + * |<- pending_list ->| exexuting |<- wait_list ->| + * syncfs_1 syncfs_2 (syncfs_3) syncfs_4 syncfs_5 + * + * Abandon syncfs processes in pending_list after syncfs_3 finished; + * Pick the last syncfs process in wait_list after syncfs_3 finished; + */ + bool is_executing; + /* syncfs process arriving after current exexcuting syncfs */ + struct list_head wait_list; + /* syncfs process arriving before current exexcuting syncfs */ + struct list_head pending_list; + spinlock_t list_lock; +}; + +struct hmdfs_sb_info { + /* list for all registered superblocks */ + struct list_head list; + struct mutex umount_mutex; + + struct kobject kobj; + struct completion s_kobj_unregister; + struct super_block *sb; + struct super_block *lower_sb; + /* from mount, which is root */ + const struct cred *cred; + /* from update cmd, expected to be system */ + const struct cred *system_cred; + struct { + struct mutex node_lock; + struct list_head node_list; + atomic_t conn_seq; + unsigned long recent_ol; + } connections; + char *local_dst; + char *real_dst; + char *local_src; + char *cache_dir; + /* seq number for hmdfs super block */ + unsigned int seq; + + /* + * This value indicate how long the pagecache stay valid(in seconds) in + * client if metadate(except iversion) is equal to server. This + * functionality is disabled if this value is 0. + */ + unsigned int write_cache_timeout; + unsigned int dcache_timeout; + unsigned int dcache_precision; + unsigned long dcache_threshold; + struct list_head client_cache; + struct list_head server_cache; + struct list_head to_delete; + struct mutex cache_list_lock; + + /* local operation time statistic */ + struct server_statistic *s_server_statis; + + /* client statistic */ + struct client_statistic *s_client_statis; + + /* TIMEOUT of each command */ + struct kobject s_cmd_timeout_kobj; + struct completion s_timeout_kobj_unregister; + unsigned int s_cmd_timeout[F_SIZE]; + + /* For case sensitive */ + bool s_case_sensitive; + + /* For features supporting */ + u64 s_features; + + /* For merge & device view */ + unsigned int s_merge_switch; + /* For writeback */ + struct hmdfs_writeback *h_wb; + /* For server writeback */ + struct hmdfs_server_writeback *h_swb; + + /* syncfs info */ + struct hmdfs_syncfs_info hsi; + + /* To bridge the userspace utils */ + struct kfifo notify_fifo; + spinlock_t notify_fifo_lock; + struct hmdfs_fault_inject fault_inject; + + /* For reboot detect */ + uint64_t boot_cookie; + /* offline process */ + unsigned int async_cb_delay; + /* For server handle requests */ + unsigned int async_req_max_active; + /* stash dirty pages during offline */ + bool s_offline_stash; + + /* Timeout (ms) to retry writing remote pages */ + unsigned int wb_timeout_ms; + + struct path stash_work_dir; + /* dentry cache */ + bool s_dentry_cache; + + /* msgs that are waiting for remote */ + struct list_head async_readdir_msg_list; + /* protect async_readdir_msg_list */ + spinlock_t async_readdir_msg_lock; + /* async readdir work that are queued but not finished */ + struct list_head async_readdir_work_list; + /* protect async_readdir_work_list */ + spinlock_t async_readdir_work_lock; + /* wait for async_readdir_work_list to be empty in umount */ + wait_queue_head_t async_readdir_wq; + /* don't allow async readdir */ + bool async_readdir_prohibit; +}; + +static inline struct hmdfs_sb_info *hmdfs_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline bool hmdfs_is_stash_enabled(const struct hmdfs_sb_info *sbi) +{ + return sbi->s_offline_stash; +} + +struct setattr_info { + loff_t size; + unsigned int valid; + umode_t mode; + kuid_t uid; + kgid_t gid; + long long atime; + long atime_nsec; + long long mtime; + long mtime_nsec; + long long ctime; + long ctime_nsec; +}; + +struct hmdfs_file_info { + union { + struct { + struct rb_root root; + struct mutex comrade_list_lock; + }; + struct { + struct file *lower_file; + int device_id; + }; + }; + struct list_head comrade_list; +}; + +static inline struct hmdfs_file_info *hmdfs_f(struct file *file) +{ + return file->private_data; +} + +// Almost all the source files want this, so... +#include "inode.h" + +/* locking helpers */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + inode_unlock(d_inode(dir)); + dput(dir); +} + +extern uint64_t path_hash(const char *path, int len, bool case_sense); +extern int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, + const char *name, unsigned int flags, + struct path *path); +extern ssize_t hmdfs_remote_listxattr(struct dentry *dentry, char *buffer, + size_t size); + +int check_filename(const char *name, int len); + +int hmdfs_permission(struct inode *inode, int mask); + +int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data); + +/* Refer to comments in hmdfs_request_work_fn() */ +#define HMDFS_SERVER_CTX_FLAGS (PF_KTHREAD | PF_WQ_WORKER | PF_NPROC_EXCEEDED) + +static inline bool is_current_hmdfs_server_ctx(void) +{ + return ((current->flags & HMDFS_SERVER_CTX_FLAGS) == + HMDFS_SERVER_CTX_FLAGS); +} + +extern uint64_t hmdfs_gen_boot_cookie(void); + +static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len) +{ + return !strncasecmp(s1, s2, len); +} + +static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2) +{ + return q1->len == q2->len && str_n_case_eq(q1->name, q2->name, q2->len); +} + +/***************************************************************************** + * log print helpers + *****************************************************************************/ +__printf(4, 5) void __hmdfs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...); +#define hmdfs_err(fmt, ...) \ + __hmdfs_log(KERN_ERR, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_warning(fmt, ...) \ + __hmdfs_log(KERN_WARNING, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_info(fmt, ...) \ + __hmdfs_log(KERN_INFO, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_err_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_ERR, true, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_warning_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_WARNING, true, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_info_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_INFO, true, __func__, fmt, ##__VA_ARGS__) +#ifdef CONFIG_HMDFS_FS_DEBUG +#define hmdfs_debug(fmt, ...) \ + __hmdfs_log(KERN_DEBUG, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_debug_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_DEBUG, true, __func__, fmt, ##__VA_ARGS__) +#else +#define hmdfs_debug(fmt, ...) ((void)0) +#define hmdfs_debug_ratelimited(fmt, ...) ((void)0) +#endif + +/***************************************************************************** + * inode/file operations declartion + *****************************************************************************/ +extern const struct inode_operations hmdfs_device_ops; +extern const struct inode_operations hmdfs_root_ops; +extern const struct file_operations hmdfs_root_fops; +extern const struct file_operations hmdfs_device_fops; + +#endif // HMDFS_H diff --git a/fs/hmdfs/hmdfs_client.c b/fs/hmdfs/hmdfs_client.c new file mode 100644 index 0000000000000000000000000000000000000000..2c381f57f7e01352749244550c535fa51a4e5b9b --- /dev/null +++ b/fs/hmdfs/hmdfs_client.c @@ -0,0 +1,1096 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_client.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_client.h" +#include "hmdfs_server.h" + +#include +#include +#include + +#include "comm/socket_adapter.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" +#include "comm/node_cb.h" +#include "stash.h" +#include "authority/authentication.h" + +#define HMDFS_SYNC_WPAGE_RETRY_MS 2000 + +static inline void free_sm_outbuf(struct hmdfs_send_command *sm) +{ + if (sm->out_buf && sm->out_len != 0) + kfree(sm->out_buf); + sm->out_len = 0; + sm->out_buf = NULL; +} + +int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf, + __u8 file_type, struct hmdfs_open_ret *open_ret) +{ + int ret; + int path_len = strlen(send_buf); + size_t send_len = sizeof(struct open_request) + path_len + 1; + struct open_request *open_req = kzalloc(send_len, GFP_KERNEL); + struct open_response *resp; + struct hmdfs_send_command sm = { + .data = open_req, + .len = send_len, + }; + hmdfs_init_cmd(&sm.operations, F_OPEN); + + if (!open_req) { + ret = -ENOMEM; + goto out; + } + open_req->file_type = file_type; + open_req->path_len = cpu_to_le32(path_len); + strcpy(open_req->buf, send_buf); + ret = hmdfs_sendmessage_request(con, &sm); + kfree(open_req); + + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + resp = sm.out_buf; + + open_ret->ino = le64_to_cpu(resp->ino); + open_ret->fid.ver = le64_to_cpu(resp->file_ver); + open_ret->fid.id = le32_to_cpu(resp->file_id); + open_ret->file_size = le64_to_cpu(resp->file_size); + open_ret->remote_ctime.tv_sec = le64_to_cpu(resp->ctime); + open_ret->remote_ctime.tv_nsec = le32_to_cpu(resp->ctime_nsec); + open_ret->stable_ctime.tv_sec = le64_to_cpu(resp->stable_ctime); + open_ret->stable_ctime.tv_nsec = le32_to_cpu(resp->stable_ctime_nsec); + +out: + free_sm_outbuf(&sm); + return ret; +} + +void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid) +{ + size_t send_len = sizeof(struct release_request); + struct release_request *release_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = release_req, + .len = send_len, + }; + hmdfs_init_cmd(&sm.operations, F_RELEASE); + + if (!release_req) + return; + + release_req->file_ver = cpu_to_le64(fid->ver); + release_req->file_id = cpu_to_le32(fid->id); + + hmdfs_sendmessage_request(con, &sm); + kfree(release_req); +} + +int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + __s64 start, __s64 end, __s32 datasync) +{ + int ret; + struct fsync_request *fsync_req = + kzalloc(sizeof(struct fsync_request), GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = fsync_req, + .len = sizeof(struct fsync_request), + }; + + hmdfs_init_cmd(&sm.operations, F_FSYNC); + if (!fsync_req) + return -ENOMEM; + + fsync_req->file_ver = cpu_to_le64(fid->ver); + fsync_req->file_id = cpu_to_le32(fid->id); + fsync_req->datasync = cpu_to_le32(datasync); + fsync_req->start = cpu_to_le64(start); + fsync_req->end = cpu_to_le64(end); + + ret = hmdfs_sendmessage_request(con, &sm); + + free_sm_outbuf(&sm); + kfree(fsync_req); + return ret; +} + +int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + struct page *page) +{ + int ret; + size_t send_len = sizeof(struct readpage_request); + struct readpage_request *read_data = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = read_data, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_READPAGE); + if (!read_data) { + unlock_page(page); + return -ENOMEM; + } + + sm.out_buf = page; + read_data->file_ver = cpu_to_le64(fid->ver); + read_data->file_id = cpu_to_le32(fid->id); + read_data->size = cpu_to_le32(HMDFS_PAGE_SIZE); + read_data->index = cpu_to_le64(page->index); + ret = hmdfs_sendpage_request(con, &sm); + kfree(read_data); + return ret; +} + +bool hmdfs_usr_sig_pending(struct task_struct *p) +{ + sigset_t *sig = &p->pending.signal; + + if (likely(!signal_pending(p))) + return false; + return sigismember(sig, SIGINT) || sigismember(sig, SIGTERM) || + sigismember(sig, SIGKILL); +} + +void hmdfs_client_writepage_done(struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx) +{ + struct page *page = ctx->page; + bool unlock = ctx->rsem_held; + + SetPageUptodate(page); + end_page_writeback(page); + if (unlock) + up_read(&info->wpage_sem); + unlock_page(page); +} + +static void hmdfs_client_writepage_err(struct hmdfs_peer *peer, + struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx, + int err) +{ + struct page *page = ctx->page; + bool unlock = ctx->rsem_held; + + if (err == -ENOMEM || err == -EAGAIN || err == -ESHUTDOWN || + err == -ETIME) + SetPageUptodate(page); + else + hmdfs_info("Page %ld of file %u writeback err %d devid %llu", + page->index, ctx->fid.id, err, peer->device_id); + + /* + * Current and subsequent writebacks have been canceled by the + * user, leaving these pages' states in chaos. Read pages in + * the future to update these pages. + */ + if (ctx->sync_all && hmdfs_usr_sig_pending(ctx->caller)) + ClearPageUptodate(page); + + if (ctx->sync_all || !time_is_after_eq_jiffies(ctx->timeout) || + !(err == -ETIME || hmdfs_need_redirty_page(info, err))) { + SetPageError(page); + mapping_set_error(page->mapping, -EIO); + } else { + __set_page_dirty_nobuffers(page); + account_page_redirty(page); + } + + end_page_writeback(page); + if (unlock) + up_read(&info->wpage_sem); + unlock_page(page); +} + +static inline bool +hmdfs_no_timedout_sync_write(struct hmdfs_writepage_context *ctx) +{ + return ctx->sync_all && time_is_after_eq_jiffies(ctx->timeout); +} + +static inline bool +hmdfs_client_rewrite_for_timeout(struct hmdfs_writepage_context *ctx, int err) +{ + return (err == -ETIME && hmdfs_no_timedout_sync_write(ctx) && + !hmdfs_usr_sig_pending(ctx->caller)); +} + +static inline bool +hmdfs_client_rewrite_for_offline(struct hmdfs_sb_info *sbi, + struct hmdfs_writepage_context *ctx, int err) +{ + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + unsigned int status = READ_ONCE(info->stash_status); + + /* + * No retry if offline occurs during inode restoration. + * + * Do retry if local file cache is ready even it is not + * a WB_SYNC_ALL write, else no-sync_all writeback will + * return -EIO, mapping_set_error(mapping, -EIO) will be + * called and it will make the concurrent calling of + * filemap_write_and_wait() in hmdfs_flush_stash_file_data() + * return -EIO. + */ + return (hmdfs_is_stash_enabled(sbi) && + status != HMDFS_REMOTE_INODE_RESTORING && + (hmdfs_no_timedout_sync_write(ctx) || + status == HMDFS_REMOTE_INODE_STASHING) && + hmdfs_is_offline_or_timeout_err(err)); +} + +static inline bool +hmdfs_client_redo_writepage(struct hmdfs_sb_info *sbi, + struct hmdfs_writepage_context *ctx, int err) +{ + return hmdfs_client_rewrite_for_timeout(ctx, err) || + hmdfs_client_rewrite_for_offline(sbi, ctx, err); +} + +static bool hmdfs_remote_write_to_remote(struct hmdfs_inode_info *info) +{ + unsigned int status = READ_ONCE(info->stash_status); + bool stashing; + + if (status != HMDFS_REMOTE_INODE_STASHING) + return true; + + /* Ensure it's OK to use info->cache afterwards */ + spin_lock(&info->stash_lock); + stashing = (info->stash_status == HMDFS_REMOTE_INODE_STASHING); + spin_unlock(&info->stash_lock); + + return !stashing; +} + +int hmdfs_remote_do_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *ctx) +{ + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + bool to_remote = false; + int err = 0; + + to_remote = hmdfs_remote_write_to_remote(info); + if (to_remote) + err = hmdfs_client_writepage(info->conn, ctx); + else + err = hmdfs_stash_writepage(info->conn, ctx); + if (!err) + return 0; + + if (!(to_remote && + hmdfs_client_rewrite_for_offline(con->sbi, ctx, err))) + return err; + + queue_delayed_work(con->retry_wb_wq, &ctx->retry_dwork, + msecs_to_jiffies(HMDFS_SYNC_WPAGE_RETRY_MS)); + + return 0; +} + +void hmdfs_remote_writepage_retry(struct work_struct *work) +{ + struct hmdfs_writepage_context *ctx = + container_of(work, struct hmdfs_writepage_context, + retry_dwork.work); + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + struct hmdfs_peer *peer = info->conn; + const struct cred *old_cred = NULL; + int err; + + old_cred = hmdfs_override_creds(peer->sbi->cred); + err = hmdfs_remote_do_writepage(peer, ctx); + hmdfs_revert_creds(old_cred); + if (err) { + hmdfs_client_writepage_err(peer, info, ctx, err); + put_task_struct(ctx->caller); + kfree(ctx); + } +} + +void hmdfs_writepage_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp) +{ + struct hmdfs_writepage_context *ctx = req->private; + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + int ret = resp->ret_code; + unsigned long page_index = ctx->page->index; + + trace_hmdfs_writepage_cb_enter(peer, info->remote_ino, page_index, ret); + + if (!ret) { + hmdfs_client_writepage_done(info, ctx); + atomic64_inc(&info->write_counter); + goto cleanup_all; + } + + if (hmdfs_client_redo_writepage(peer->sbi, ctx, ret)) { + ret = hmdfs_remote_do_writepage(peer, ctx); + if (!ret) + goto cleanup_req; + WARN_ON(ret == -ETIME); + } + + hmdfs_client_writepage_err(peer, info, ctx, ret); + +cleanup_all: + put_task_struct(ctx->caller); + kfree(ctx); +cleanup_req: + kfree(req->data); + + trace_hmdfs_writepage_cb_exit(peer, info->remote_ino, page_index, ret); +} + +int hmdfs_client_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *param) +{ + int ret = 0; + size_t send_len = sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + struct writepage_request *write_data = kzalloc(send_len, GFP_NOFS); + struct hmdfs_req req; + char *data = NULL; + + if (unlikely(!write_data)) + return -ENOMEM; + + WARN_ON(!PageLocked(param->page)); // VFS + WARN_ON(PageDirty(param->page)); // VFS + WARN_ON(!PageWriteback(param->page)); // hmdfs + + write_data->file_ver = cpu_to_le64(param->fid.ver); + write_data->file_id = cpu_to_le32(param->fid.id); + write_data->index = cpu_to_le64(param->page->index); + write_data->count = cpu_to_le32(param->count); + data = kmap(param->page); + memcpy((char *)write_data->buf, data, HMDFS_PAGE_SIZE); + kunmap(param->page); + req.data = write_data; + req.data_len = send_len; + + req.private = param; + req.private_len = sizeof(*param); + + req.timeout = TIMEOUT_CONFIG; + hmdfs_init_cmd(&req.operations, F_WRITEPAGE); + ret = hmdfs_send_async_request(con, &req); + if (unlikely(ret)) + kfree(write_data); + return ret; +} + +void hmdfs_client_recv_readpage(struct hmdfs_head_cmd *head, int err, + struct hmdfs_async_work *async_work) +{ + struct page *page = async_work->page; + int ret = le32_to_cpu(head->ret_code); + struct hmdfs_inode_info *info = hmdfs_i(page->mapping->host); + unsigned long page_index = page->index; + + if (!err) + SetPageUptodate(page); + else if (err == -EBADF) + /* There may be a stale fd caused by fid version, need reopen */ + set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + + hmdfs_client_resp_statis(async_work->head.peer->sbi, F_READPAGE, + HMDFS_RESP_NORMAL, async_work->start, jiffies); + + trace_hmdfs_client_recv_readpage(async_work->head.peer, + info->remote_ino, page_index, ret); + + asw_done(async_work); +} + +/* read cache dentry file at path and write them into filp */ +int hmdfs_client_start_readdir(struct hmdfs_peer *con, struct file *filp, + const char *path, int path_len, + struct hmdfs_dcache_header *header) +{ + int ret; + size_t send_len = sizeof(struct readdir_request) + path_len + 1; + struct readdir_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + .local_filp = filp, + }; + + hmdfs_init_cmd(&sm.operations, F_ITERATE); + if (!req) + return -ENOMEM; + + /* add ref or it will be release at msg put */ + get_file(sm.local_filp); + req->path_len = cpu_to_le32(path_len); + strncpy(req->path, path, path_len); + + /* + * Is we already have a cache file, verify it. If it is + * uptodate, then we don't have to transfer a new one + */ + if (header) { + req->dcache_crtime = header->dcache_crtime; + req->dcache_crtime_nsec = header->dcache_crtime_nsec; + req->dentry_ctime = header->dentry_ctime; + req->dentry_ctime_nsec = header->dentry_ctime_nsec; + req->num = header->num; + req->verify_cache = cpu_to_le32(1); + } + + ret = hmdfs_sendmessage_request(con, &sm); + kfree(req); + return ret; +} + +int hmdfs_client_start_mkdir(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, struct hmdfs_lookup_ret *mkdir_ret) +{ + int ret = 0; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct mkdir_request) + path_len + 1 + + name_len + 1; + struct mkdir_request *mkdir_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_inodeinfo_response *resp = NULL; + struct hmdfs_send_command sm = { + .data = mkdir_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_MKDIR); + if (!mkdir_req) + return -ENOMEM; + + mkdir_req->path_len = cpu_to_le32(path_len); + mkdir_req->name_len = cpu_to_le32(name_len); + mkdir_req->mode = cpu_to_le16(mode); + strncpy(mkdir_req->path, path, path_len); + strncpy(mkdir_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + if (ret == -ENOENT || ret == -ETIME || ret == -EOPNOTSUPP) + goto out; + if (!sm.out_buf) { + ret = -ENOENT; + goto out; + } + resp = sm.out_buf; + mkdir_ret->i_mode = le16_to_cpu(resp->i_mode); + mkdir_ret->i_size = le64_to_cpu(resp->i_size); + mkdir_ret->i_mtime = le64_to_cpu(resp->i_mtime); + mkdir_ret->i_mtime_nsec = le32_to_cpu(resp->i_mtime_nsec); + mkdir_ret->i_ino = le64_to_cpu(resp->i_ino); + +out: + free_sm_outbuf(&sm); + kfree(mkdir_req); + return ret; +} + +int hmdfs_client_start_create(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, bool want_excl, + struct hmdfs_lookup_ret *create_ret) +{ + int ret = 0; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct create_request) + path_len + 1 + + name_len + 1; + struct create_request *create_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_inodeinfo_response *resp = NULL; + struct hmdfs_send_command sm = { + .data = create_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_CREATE); + if (!create_req) + return -ENOMEM; + + create_req->path_len = cpu_to_le32(path_len); + create_req->name_len = cpu_to_le32(name_len); + create_req->mode = cpu_to_le16(mode); + create_req->want_excl = want_excl; + strncpy(create_req->path, path, path_len); + strncpy(create_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + if (ret == -ENOENT || ret == -ETIME || ret == -EOPNOTSUPP) + goto out; + if (!sm.out_buf) { + ret = -ENOENT; + goto out; + } + resp = sm.out_buf; + create_ret->i_mode = le16_to_cpu(resp->i_mode); + create_ret->i_size = le64_to_cpu(resp->i_size); + create_ret->i_mtime = le64_to_cpu(resp->i_mtime); + create_ret->i_mtime_nsec = le32_to_cpu(resp->i_mtime_nsec); + create_ret->i_ino = le64_to_cpu(resp->i_ino); + +out: + free_sm_outbuf(&sm); + kfree(create_req); + return ret; +} + +int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path, + const char *name) +{ + int ret; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct rmdir_request) + path_len + 1 + + name_len + 1; + struct rmdir_request *rmdir_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = rmdir_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_RMDIR); + if (!rmdir_req) + return -ENOMEM; + + rmdir_req->path_len = cpu_to_le32(path_len); + rmdir_req->name_len = cpu_to_le32(name_len); + strncpy(rmdir_req->path, path, path_len); + strncpy(rmdir_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + free_sm_outbuf(&sm); + kfree(rmdir_req); + return ret; +} + +int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path, + const char *name) +{ + int ret; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct unlink_request) + path_len + 1 + + name_len + 1; + struct unlink_request *unlink_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = unlink_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_UNLINK); + if (!unlink_req) + return -ENOMEM; + + unlink_req->path_len = cpu_to_le32(path_len); + unlink_req->name_len = cpu_to_le32(name_len); + strncpy(unlink_req->path, path, path_len); + strncpy(unlink_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + kfree(unlink_req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path, + const char *old_name, const char *new_path, + const char *new_name, unsigned int flags) +{ + int ret; + int old_path_len = strlen(old_path); + int new_path_len = strlen(new_path); + int old_name_len = strlen(old_name); + int new_name_len = strlen(new_name); + + size_t send_len = sizeof(struct rename_request) + old_path_len + 1 + + new_path_len + 1 + old_name_len + 1 + new_name_len + + 1; + struct rename_request *rename_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = rename_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_RENAME); + if (!rename_req) + return -ENOMEM; + + rename_req->old_path_len = cpu_to_le32(old_path_len); + rename_req->new_path_len = cpu_to_le32(new_path_len); + rename_req->old_name_len = cpu_to_le32(old_name_len); + rename_req->new_name_len = cpu_to_le32(new_name_len); + rename_req->flags = cpu_to_le32(flags); + + strncpy(rename_req->path, old_path, old_path_len); + strncpy(rename_req->path + old_path_len + 1, new_path, new_path_len); + + strncpy(rename_req->path + old_path_len + 1 + new_path_len + 1, + old_name, old_name_len); + strncpy(rename_req->path + old_path_len + 1 + new_path_len + 1 + + old_name_len + 1, + new_name, new_name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + free_sm_outbuf(&sm); + kfree(rename_req); + return ret; +} + +int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf, + struct setattr_info *attr_info) +{ + int ret; + int path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct setattr_request); + struct setattr_request *setattr_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = setattr_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_SETATTR); + if (!setattr_req) + return -ENOMEM; + + strcpy(setattr_req->buf, send_buf); + setattr_req->path_len = cpu_to_le32(path_len); + setattr_req->valid = cpu_to_le32(attr_info->valid); + setattr_req->size = cpu_to_le64(attr_info->size); + setattr_req->mtime = cpu_to_le64(attr_info->mtime); + setattr_req->mtime_nsec = cpu_to_le32(attr_info->mtime_nsec); + ret = hmdfs_sendmessage_request(con, &sm); + kfree(setattr_req); + return ret; +} + +static void hmdfs_update_getattr_ret(struct getattr_response *resp, + struct hmdfs_getattr_ret *result) +{ + struct kstat *stat = &result->stat; + + stat->result_mask = le32_to_cpu(resp->result_mask); + if (stat->result_mask == 0) + return; + + stat->ino = le64_to_cpu(resp->ino); + stat->mode = le16_to_cpu(resp->mode); + stat->nlink = le32_to_cpu(resp->nlink); + stat->uid.val = le32_to_cpu(resp->uid); + stat->gid.val = le32_to_cpu(resp->gid); + stat->size = le64_to_cpu(resp->size); + stat->blocks = le64_to_cpu(resp->blocks); + stat->blksize = le32_to_cpu(resp->blksize); + stat->atime.tv_sec = le64_to_cpu(resp->atime); + stat->atime.tv_nsec = le32_to_cpu(resp->atime_nsec); + stat->mtime.tv_sec = le64_to_cpu(resp->mtime); + stat->mtime.tv_nsec = le32_to_cpu(resp->mtime_nsec); + stat->ctime.tv_sec = le64_to_cpu(resp->ctime); + stat->ctime.tv_nsec = le32_to_cpu(resp->ctime_nsec); + stat->btime.tv_sec = le64_to_cpu(resp->crtime); + stat->btime.tv_nsec = le32_to_cpu(resp->crtime_nsec); + result->fsid = le64_to_cpu(resp->fsid); + /* currently not used */ + result->i_flags = 0; +} + +int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf, + unsigned int lookup_flags, + struct hmdfs_getattr_ret *result) +{ + int path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct getattr_request); + int ret = 0; + struct getattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_GETATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->lookup_flags = cpu_to_le32(lookup_flags); + strncpy(req->buf, send_buf, path_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_getattr_ret(sm.out_buf, result); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +static void hmdfs_update_statfs_ret(struct statfs_response *resp, + struct kstatfs *buf) +{ + buf->f_type = le64_to_cpu(resp->f_type); + buf->f_bsize = le64_to_cpu(resp->f_bsize); + buf->f_blocks = le64_to_cpu(resp->f_blocks); + buf->f_bfree = le64_to_cpu(resp->f_bfree); + buf->f_bavail = le64_to_cpu(resp->f_bavail); + buf->f_files = le64_to_cpu(resp->f_files); + buf->f_ffree = le64_to_cpu(resp->f_ffree); + buf->f_fsid.val[0] = le32_to_cpu(resp->f_fsid_0); + buf->f_fsid.val[1] = le32_to_cpu(resp->f_fsid_1); + buf->f_namelen = le64_to_cpu(resp->f_namelen); + buf->f_frsize = le64_to_cpu(resp->f_frsize); + buf->f_flags = le64_to_cpu(resp->f_flags); + buf->f_spare[0] = le64_to_cpu(resp->f_spare_0); + buf->f_spare[1] = le64_to_cpu(resp->f_spare_1); + buf->f_spare[2] = le64_to_cpu(resp->f_spare_2); + buf->f_spare[3] = le64_to_cpu(resp->f_spare_3); +} + +int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path, + struct kstatfs *buf) +{ + int ret; + int path_len = strlen(path); + size_t send_len = sizeof(struct statfs_request) + path_len + 1; + struct statfs_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_STATFS); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + strncpy(req->path, path, path_len); + + ret = hmdfs_sendmessage_request(con, &sm); + + if (ret == -ETIME) + ret = -EIO; + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_statfs_ret(sm.out_buf, buf); +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_send_syncfs(struct hmdfs_peer *con, int syncfs_timeout) +{ + int ret; + struct hmdfs_req req; + struct hmdfs_sb_info *sbi = con->sbi; + struct syncfs_request *syncfs_req = + kzalloc(sizeof(struct syncfs_request), GFP_KERNEL); + + if (!syncfs_req) { + hmdfs_err("cannot allocate syncfs_request"); + return -ENOMEM; + } + + hmdfs_init_cmd(&req.operations, F_SYNCFS); + req.timeout = syncfs_timeout; + + syncfs_req->version = cpu_to_le64(sbi->hsi.version); + req.data = syncfs_req; + req.data_len = sizeof(*syncfs_req); + + ret = hmdfs_send_async_request(con, &req); + if (ret) { + kfree(syncfs_req); + hmdfs_err("ret fail with %d", ret); + } + + return ret; +} + +static void hmdfs_update_getxattr_ret(struct getxattr_response *resp, + void *value, size_t o_size, int *ret) +{ + ssize_t size = le32_to_cpu(resp->size); + + if (o_size && o_size < size) { + *ret = -ERANGE; + return; + } + + if (o_size) + memcpy(value, resp->value, size); + + *ret = size; +} + +int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, void *value, size_t size) +{ + size_t path_len = strlen(send_buf); + size_t name_len = strlen(name); + size_t send_len = path_len + name_len + + sizeof(struct getxattr_request) + 2; + int ret = 0; + struct getxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_GETXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->name_len = cpu_to_le32(name_len); + req->size = cpu_to_le32(size); + strncpy(req->buf, send_buf, path_len); + strncpy(req->buf + path_len + 1, name, name_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_getxattr_ret(sm.out_buf, value, size, &ret); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, const void *value, + size_t size, int flags) +{ + size_t path_len = strlen(send_buf); + size_t name_len = strlen(name); + size_t send_len = path_len + name_len + size + 2 + + sizeof(struct setxattr_request); + int ret = 0; + struct setxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_SETXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->name_len = cpu_to_le32(name_len); + req->size = cpu_to_le32(size); + req->flags = cpu_to_le32(flags); + strncpy(req->buf, send_buf, path_len); + strncpy(req->buf + path_len + 1, name, name_len); + memcpy(req->buf + path_len + name_len + 2, value, size); + if (!value) + req->del = true; + ret = hmdfs_sendmessage_request(con, &sm); + kfree(req); + return ret; +} + +static void hmdfs_update_listxattr_ret(struct listxattr_response *resp, + char *list, size_t o_size, ssize_t *ret) +{ + ssize_t size = le32_to_cpu(resp->size); + + if (o_size && o_size < size) { + *ret = -ERANGE; + return; + } + + /* multi name split with '\0', use memcpy */ + if (o_size) + memcpy(list, resp->list, size); + + *ret = size; +} + +ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf, + char *list, size_t size) +{ + size_t path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct listxattr_request); + ssize_t ret = 0; + struct listxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_LISTXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->size = cpu_to_le32(size); + strncpy(req->buf, send_buf, path_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_listxattr_ret(sm.out_buf, list, size, &ret); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +void hmdfs_recv_syncfs_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp) +{ + struct hmdfs_sb_info *sbi = peer->sbi; + struct syncfs_request *syncfs_req = (struct syncfs_request *)req->data; + + WARN_ON(!syncfs_req); + spin_lock(&sbi->hsi.v_lock); + if (le64_to_cpu(syncfs_req->version) != sbi->hsi.version) { + hmdfs_info( + "Recv stale syncfs resp[ver: %llu] from device %llu, current ver %llu", + le64_to_cpu(syncfs_req->version), peer->device_id, + sbi->hsi.version); + spin_unlock(&sbi->hsi.v_lock); + goto out; + } + + if (!sbi->hsi.remote_ret) + sbi->hsi.remote_ret = resp->ret_code; + + if (resp->ret_code) { + hmdfs_err("Recv syncfs error code %d from device %llu", + resp->ret_code, peer->device_id); + } else { + /* + * Set @sb_dirty_count to zero if no one else produce + * dirty data on remote server during remote sync. + */ + atomic64_cmpxchg(&peer->sb_dirty_count, + peer->old_sb_dirty_count, 0); + } + + atomic_dec(&sbi->hsi.wait_count); + spin_unlock(&sbi->hsi.v_lock); + wake_up_interruptible(&sbi->hsi.wq); + +out: + kfree(syncfs_req); +} + +void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path) +{ + int path_len = strlen(path); + size_t send_len = sizeof(struct drop_push_request) + path_len + 1; + struct drop_push_request *dp_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = dp_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_DROP_PUSH); + if (!dp_req) + return; + + dp_req->path_len = cpu_to_le32(path_len); + strncpy(dp_req->path, path, path_len); + + hmdfs_sendmessage_request(con, &sm); + kfree(dp_req); +} + +static void *hmdfs_get_msg_next(struct hmdfs_peer *peer, int *id) +{ + struct hmdfs_msg_idr_head *head = NULL; + + spin_lock(&peer->idr_lock); + head = idr_get_next(&peer->msg_idr, id); + if (head && head->type < MSG_IDR_MAX && head->type >= 0) + kref_get(&head->ref); + + spin_unlock(&peer->idr_lock); + + return head; +} + +void hmdfs_client_offline_notify(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + int id; + int count = 0; + struct hmdfs_msg_idr_head *head = NULL; + + for (id = 0; (head = hmdfs_get_msg_next(conn, &id)) != NULL; ++id) { + switch (head->type) { + case MSG_IDR_1_0_NONE: + head_put(head); + head_put(head); + break; + case MSG_IDR_MESSAGE_SYNC: + case MSG_IDR_1_0_MESSAGE_SYNC: + hmdfs_response_wakeup((struct sendmsg_wait_queue *)head, + -ETIME, 0, NULL); + hmdfs_debug("wakeup id=%d", head->msg_id); + msg_put((struct sendmsg_wait_queue *)head); + break; + case MSG_IDR_MESSAGE_ASYNC: + hmdfs_wakeup_parasite( + (struct hmdfs_msg_parasite *)head); + hmdfs_debug("wakeup parasite id=%d", head->msg_id); + mp_put((struct hmdfs_msg_parasite *)head); + break; + case MSG_IDR_PAGE: + case MSG_IDR_1_0_PAGE: + hmdfs_wakeup_async_work( + (struct hmdfs_async_work *)head); + hmdfs_debug("wakeup async work id=%d", head->msg_id); + asw_put((struct hmdfs_async_work *)head); + break; + default: + hmdfs_err("Bad type=%d id=%d", head->type, + head->msg_id); + break; + } + + count++; + /* If there are too many idr to process, avoid to soft lockup, + * process every 512 message we resched + */ + if (count % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } +} + +static struct hmdfs_node_cb_desc client_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_1_0, + .fn = hmdfs_client_offline_notify, + }, +}; + +void __init hmdfs_client_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(client_cb, ARRAY_SIZE(client_cb)); +} diff --git a/fs/hmdfs/hmdfs_client.h b/fs/hmdfs/hmdfs_client.h new file mode 100644 index 0000000000000000000000000000000000000000..ab2867dca4579fd15047c54f790c6ab61985fb90 --- /dev/null +++ b/fs/hmdfs/hmdfs_client.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_client.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_CLIENT_H +#define HMDFS_CLIENT_H + +#include "comm/transport.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" + +struct hmdfs_open_ret { + struct hmdfs_fid fid; + __u64 file_size; + __u64 ino; + struct hmdfs_time_t remote_ctime; + struct hmdfs_time_t stable_ctime; +}; + +struct hmdfs_writepage_context { + struct hmdfs_fid fid; + uint32_t count; + bool sync_all; + bool rsem_held; + unsigned long timeout; + struct task_struct *caller; + struct page *page; + struct delayed_work retry_dwork; +}; + +int hmdfs_client_start_readdir(struct hmdfs_peer *con, struct file *filp, + const char *path, int path_len, + struct hmdfs_dcache_header *header); +int hmdfs_client_start_mkdir(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, struct hmdfs_lookup_ret *mkdir_ret); +int hmdfs_client_start_create(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, bool want_excl, + struct hmdfs_lookup_ret *create_ret); +int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path, + const char *name); +int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path, + const char *name); +int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path, + const char *old_name, const char *new_path, + const char *new_name, unsigned int flags); + +static inline bool hmdfs_is_offline_err(int err) +{ + /* + * writepage() will get -EBADF if peer is online + * again during offline stash, and -EBADF also + * needs redo. + */ + return (err == -EAGAIN || err == -ESHUTDOWN || err == -EBADF); +} + +static inline bool hmdfs_is_offline_or_timeout_err(int err) +{ + return hmdfs_is_offline_err(err) || err == -ETIME; +} + +static inline bool hmdfs_need_redirty_page(const struct hmdfs_inode_info *info, + int err) +{ + /* + * 1. stash is enabled + * 2. offline related error + * 3. no restore + */ + return hmdfs_is_stash_enabled(info->conn->sbi) && + hmdfs_is_offline_err(err) && + READ_ONCE(info->stash_status) != HMDFS_REMOTE_INODE_RESTORING; +} + +bool hmdfs_usr_sig_pending(struct task_struct *p); +void hmdfs_writepage_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); +int hmdfs_client_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *param); +int hmdfs_remote_do_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *ctx); +void hmdfs_remote_writepage_retry(struct work_struct *work); + +void hmdfs_client_writepage_done(struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx); + +int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf, + __u8 file_type, struct hmdfs_open_ret *open_ret); +void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid); +int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + __s64 start, __s64 end, __s32 datasync); +int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + struct page *page); + +int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf, + struct setattr_info *attr_info); +int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf, + unsigned int lookup_flags, + struct hmdfs_getattr_ret *getattr_result); +int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path, + struct kstatfs *buf); +void hmdfs_client_recv_readpage(struct hmdfs_head_cmd *head, int err, + struct hmdfs_async_work *async_work); +int hmdfs_send_syncfs(struct hmdfs_peer *con, int syncfs_timeout); +int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, void *value, size_t size); +int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, const void *val, + size_t size, int flags); +ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf, + char *list, size_t size); +void hmdfs_recv_syncfs_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); + +void __init hmdfs_client_add_node_evt_cb(void); +#endif diff --git a/fs/hmdfs/hmdfs_dentryfile.c b/fs/hmdfs/hmdfs_dentryfile.c new file mode 100644 index 0000000000000000000000000000000000000000..98b215ba2d8edc8c877776c555284caade7071ea --- /dev/null +++ b/fs/hmdfs/hmdfs_dentryfile.c @@ -0,0 +1,2680 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_dentryfile.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_dentryfile.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/transport.h" +#include "hmdfs_client.h" +#include "hmdfs_device_view.h" + +/* Hashing code copied from f2fs */ +#define HMDFS_HASH_COL_BIT ((0x1ULL) << 63) +#define DELTA 0x9E3779B9 + +static bool is_dot_dotdot(const unsigned char *name, __u32 len) +{ + if (len == 1 && name[0] == '.') + return true; + + if (len == 2 && name[0] == '.' && name[1] == '.') + return true; + + return false; +} + +static void str2hashbuf(const unsigned char *msg, size_t len, unsigned int *buf, + int num, bool case_sense) +{ + unsigned int pad, val; + int i; + unsigned char c; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > (size_t)num * 4) + len = (size_t)num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + c = msg[i]; + if (!case_sense) + c = tolower(c); + val = c + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +static void tea_transform(unsigned int buf[4], unsigned int const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4) + a) ^ (b1 + sum) ^ ((b1 >> 5) + b); + b1 += ((b0 << 4) + c) ^ (b0 + sum) ^ ((b0 >> 5) + d); + } while (--n); + + buf[0] += b0; + buf[1] += b1; +} + +static __u32 hmdfs_dentry_hash(const struct qstr *qstr, bool case_sense) +{ + __u32 hash; + __u32 hmdfs_hash; + const unsigned char *p = qstr->name; + __u32 len = qstr->len; + __u32 in[8], buf[4]; + + if (is_dot_dotdot(p, len)) + return 0; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + while (1) { + str2hashbuf(p, len, in, 4, case_sense); + tea_transform(buf, in); + p += 16; + if (len <= 16) + break; + len -= 16; + } + hash = buf[0]; + hmdfs_hash = hash & ~HMDFS_HASH_COL_BIT; + return hmdfs_hash; +} + +static atomic_t curr_ino = ATOMIC_INIT(INUNUMBER_START); +int get_inonumber(void) +{ + return atomic_inc_return(&curr_ino); +} + +static int hmdfs_get_root_dentry_type(struct dentry *dentry, int *is_root) +{ + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + *is_root = 1; + switch (d_info->dentry_type) { + case HMDFS_LAYER_OTHER_LOCAL: + *is_root = 0; + fallthrough; + case HMDFS_LAYER_SECOND_LOCAL: + return HMDFS_LAYER_SECOND_LOCAL; + case HMDFS_LAYER_OTHER_REMOTE: + *is_root = 0; + fallthrough; + case HMDFS_LAYER_SECOND_REMOTE: + return HMDFS_LAYER_SECOND_REMOTE; + default: + hmdfs_info("Unexpected dentry type %d", d_info->dentry_type); + return -EINVAL; + } +} + +static int prepend(char **buffer, int *buflen, const char *str, int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) + return -ENAMETOOLONG; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + +static int prepend_name(char **buffer, int *buflen, const struct qstr *name) +{ + const char *dname = name->name; + u32 dlen = name->len; + char *p = NULL; + + *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; + p = *buffer -= dlen + 1; + *p++ = '/'; + while (dlen--) { + char c = *dname++; + + if (!c) + break; + *p++ = c; + } + return 0; +} + +static char *hmdfs_dentry_path_raw(struct dentry *d, char *buf, int buflen) +{ + struct dentry *dentry = NULL; + char *end = NULL; + char *retval = NULL; + unsigned int len; + unsigned int seq = 0; + int root_flag = 0; + int error = 0; + struct hmdfs_dentry_info *di = hmdfs_d(d); + int hmdfs_root_dentry_type = 0; + + di->time = jiffies; + hmdfs_root_dentry_type = hmdfs_get_root_dentry_type(d, &root_flag); + if (hmdfs_root_dentry_type < 0) + return NULL; + if (root_flag) { + strcpy(buf, "/"); + return buf; + } + rcu_read_lock(); +restart: + dentry = d; + di = hmdfs_d(dentry); + di->time = jiffies; + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); + retval = end - 1; + *retval = '/'; + read_seqbegin_or_lock(&rename_lock, &seq); + while (di->dentry_type != hmdfs_root_dentry_type) { + struct dentry *parent = dentry->d_parent; + + prefetch(parent); + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; + retval = end; + dentry = parent; + di = hmdfs_d(dentry); + di->time = jiffies; + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +char *hmdfs_get_dentry_relative_path(struct dentry *dentry) +{ + char *final_buf = NULL; + char *buf = NULL; + char *p = NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + final_buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!final_buf) { + kfree(buf); + return NULL; + } + + /* NULL dentry return root dir */ + if (!dentry) { + strcpy(final_buf, "/"); + kfree(buf); + return final_buf; + } + p = hmdfs_dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR_OR_NULL(p)) { + kfree(buf); + kfree(final_buf); + return NULL; + } + + if (strlen(p) >= PATH_MAX) { + kfree(buf); + kfree(final_buf); + return NULL; + } + strcpy(final_buf, p); + kfree(buf); + return final_buf; +} + +char *hmdfs_get_dentry_absolute_path(const char *rootdir, + const char *relative_path) +{ + char *buf = 0; + + if (!rootdir || !relative_path) + return NULL; + if (strlen(rootdir) + strlen(relative_path) >= PATH_MAX) + return NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + strcpy(buf, rootdir); + strcat(buf, relative_path); + return buf; +} + +char *hmdfs_connect_path(const char *path, const char *name) +{ + char *buf = 0; + + if (!path || !name) + return NULL; + + if (strlen(path) + strlen(name) + 1 >= PATH_MAX) + return NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + strcpy(buf, path); + strcat(buf, "/"); + strcat(buf, name); + return buf; +} + +int hmdfs_metainfo_read(struct hmdfs_sb_info *sbi, struct file *filp, + void *buffer, int size, int bidx) +{ + loff_t pos = get_dentry_group_pos(bidx); + + return cache_file_read(sbi, filp, buffer, (size_t)size, &pos); +} + +int hmdfs_metainfo_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buffer, int size, int bidx) +{ + loff_t pos = get_dentry_group_pos(bidx); + + return cache_file_write(sbi, filp, buffer, (size_t)size, &pos); +} + +/* for each level */ +/* bucketseq start offset by 0,for example + * level0 bucket0(0) + * level1 bucket0(1) bucket1(2) + * level2 bucket0(3) bucket1(4) bucket2(5) bucket3(6) + * return bucket number. + */ +static __u32 get_bucketaddr(int level, int buckoffset) +{ + int all_level_bucketaddr = 0; + __u32 curlevelmaxbucks; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return all_level_bucketaddr; + } + curlevelmaxbucks = (1 << level); + if (buckoffset >= curlevelmaxbucks) { + hmdfs_err("buckoffset %d overflow, level %d has %d buckets max", + buckoffset, level, curlevelmaxbucks); + return all_level_bucketaddr; + } + all_level_bucketaddr = curlevelmaxbucks + buckoffset - 1; + + return all_level_bucketaddr; +} + +static __u32 get_bucket_by_level(int level) +{ + int buckets = 0; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return buckets; + } + + buckets = (1 << level); + return buckets; +} + +static __u32 get_overall_bucket(int level) +{ + int buckets = 0; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return buckets; + } + buckets = (1 << (level + 1)) - 1; + return buckets; +} + +static inline loff_t get_dcache_file_size(int level) +{ + loff_t buckets = get_overall_bucket(level); + + return buckets * DENTRYGROUP_SIZE * BUCKET_BLOCKS + DENTRYGROUP_HEADER; +} + +static char *get_relative_path(struct hmdfs_sb_info *sbi, char *from) +{ + char *relative; + + if (strncmp(from, sbi->local_src, strlen(sbi->local_src))) { + hmdfs_warning("orig path do not start with local_src"); + return NULL; + } + relative = from + strlen(sbi->local_src); + if (*relative == '/') + relative++; + return relative; +} + +struct file *hmdfs_get_or_create_dents(struct hmdfs_sb_info *sbi, char *name) +{ + struct path root_path, path; + struct file *filp = NULL; + char *relative; + int err; + + err = kern_path(sbi->local_src, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + return NULL; + } + relative = get_relative_path(sbi, name); + if (!relative) { + hmdfs_err("get relative path failed"); + goto err_root_path; + } + err = vfs_path_lookup(root_path.dentry, root_path.mnt, relative, 0, + &path); + if (err) { + hmdfs_err("lookup failed err = %d", err); + goto err_root_path; + } + + filp = hmdfs_server_cache_revalidate(sbi, relative, &path); + if (IS_ERR_OR_NULL(filp)) { + filp = hmdfs_server_rebuild_dents(sbi, &path, NULL, relative); + if (IS_ERR_OR_NULL(filp)) + goto err_lookup_path; + } + +err_lookup_path: + path_put(&path); +err_root_path: + path_put(&root_path); + return filp; +} + +/* read all dentry in target path directory */ +int read_dentry(struct hmdfs_sb_info *sbi, char *file_name, + struct dir_context *ctx) +{ + unsigned long pos = (unsigned long)(ctx->pos); + unsigned long group_id = (pos << (1 + DEV_ID_BIT_NUM)) >> + (POS_BIT_NUM - GROUP_ID_BIT_NUM); + unsigned long offset = pos & OFFSET_BIT_MASK; + struct hmdfs_dentry_group *dentry_group = NULL; + struct file *handler = NULL; + int group_num = 0; + int iterate_result = 0; + int i, j; + const struct cred *saved_cred; + + saved_cred = hmdfs_override_fsids(false); + if (!saved_cred) { + hmdfs_err("prepare cred failed!"); + return -ENOMEM; + } + + + if (!file_name) + return -EINVAL; + + dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); + if (!dentry_group) + return -ENOMEM; + + handler = hmdfs_get_or_create_dents(sbi, file_name); + if (IS_ERR_OR_NULL(handler)) { + kfree(dentry_group); + return -ENOENT; + } + + group_num = get_dentry_group_cnt(file_inode(handler)); + + for (i = group_id; i < group_num; i++) { + hmdfs_metainfo_read(sbi, handler, dentry_group, + sizeof(struct hmdfs_dentry_group), i); + for (j = offset; j < DENTRY_PER_GROUP; j++) { + int len; + int file_type = 0; + bool is_continue; + + len = le16_to_cpu(dentry_group->nsl[j].namelen); + if (!test_bit_le(j, dentry_group->bitmap) || len == 0) + continue; + + if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) + file_type = DT_DIR; + else if (S_ISREG(le16_to_cpu( + dentry_group->nsl[j].i_mode))) + file_type = DT_REG; + else if (S_ISLNK(le16_to_cpu( + dentry_group->nsl[j].i_mode))) + file_type = DT_LNK; + + pos = hmdfs_set_pos(0, i, j); + is_continue = dir_emit( + ctx, dentry_group->filename[j], len, + le64_to_cpu(dentry_group->nsl[j].i_ino), + file_type); + if (!is_continue) { + ctx->pos = pos; + iterate_result = 1; + goto done; + } + } + offset = 0; + } + +done: + hmdfs_revert_fsids(saved_cred); + kfree(dentry_group); + fput(handler); + return iterate_result; +} + +unsigned int get_max_depth(struct file *filp) +{ + size_t isize; + + isize = get_dentry_group_cnt(file_inode(filp)) / BUCKET_BLOCKS; + + return get_count_order(isize + 1); +} + +struct hmdfs_dentry_group *find_dentry_page(struct hmdfs_sb_info *sbi, + pgoff_t index, struct file *filp) +{ + int size; + struct hmdfs_dentry_group *dentry_blk = NULL; + loff_t pos = get_dentry_group_pos(index); + int err; + + dentry_blk = kmalloc(sizeof(*dentry_blk), GFP_KERNEL); + if (!dentry_blk) + return NULL; + + err = hmdfs_wlock_file(filp, pos, DENTRYGROUP_SIZE); + if (err) { + hmdfs_err("lock file pos %lld failed", pos); + kfree(dentry_blk); + return NULL; + } + + size = cache_file_read(sbi, filp, dentry_blk, (size_t)DENTRYGROUP_SIZE, + &pos); + if (size != DENTRYGROUP_SIZE) { + kfree(dentry_blk); + dentry_blk = NULL; + } + + return dentry_blk; +} + +static ssize_t write_dentry_page(struct file *filp, const void *buffer, + int buffersize, loff_t position) +{ + ssize_t size; + + size = kernel_write(filp, buffer, (size_t)buffersize, &position); + if (size != buffersize) + hmdfs_err("write failed, ret = %zd", size); + + return size; +} + +static struct hmdfs_dentry *find_in_block(struct hmdfs_dentry_group *dentry_blk, + __u32 namehash, + const struct qstr *qstr, + struct hmdfs_dentry **insense_de, + bool case_sense) +{ + struct hmdfs_dentry *de; + unsigned long bit_pos = 0; + int max_len = 0; + + while (bit_pos < DENTRY_PER_GROUP) { + if (!test_bit_le(bit_pos, dentry_blk->bitmap)) { + bit_pos++; + max_len++; + } + de = &dentry_blk->nsl[bit_pos]; + if (unlikely(!de->namelen)) { + bit_pos++; + continue; + } + + if (le32_to_cpu(de->hash) == namehash && + le16_to_cpu(de->namelen) == qstr->len && + !memcmp(qstr->name, dentry_blk->filename[bit_pos], + le16_to_cpu(de->namelen))) + goto found; + if (!(*insense_de) && !case_sense && + le32_to_cpu(de->hash) == namehash && + le16_to_cpu(de->namelen) == qstr->len && + str_n_case_eq(qstr->name, dentry_blk->filename[bit_pos], + le16_to_cpu(de->namelen))) + *insense_de = de; + max_len = 0; + bit_pos += get_dentry_slots(le16_to_cpu(de->namelen)); + } + de = NULL; +found: + return de; +} + +static struct hmdfs_dentry *hmdfs_in_level(struct dentry *child_dentry, + unsigned int level, + struct hmdfs_dcache_lookup_ctx *ctx) +{ + unsigned int nbucket; + unsigned int bidx, end_block; + struct hmdfs_dentry *de = NULL; + struct hmdfs_dentry *tmp_insense_de = NULL; + struct hmdfs_dentry_group *dentry_blk; + + nbucket = get_bucket_by_level(level); + if (!nbucket) + return de; + + bidx = get_bucketaddr(level, ctx->hash % nbucket) * BUCKET_BLOCKS; + end_block = bidx + BUCKET_BLOCKS; + + for (; bidx < end_block; bidx++) { + dentry_blk = find_dentry_page(ctx->sbi, bidx, ctx->filp); + if (!dentry_blk) + break; + + de = find_in_block(dentry_blk, ctx->hash, ctx->name, + &tmp_insense_de, ctx->sbi->s_case_sensitive); + if (!de && !(ctx->insense_de) && tmp_insense_de) { + ctx->insense_de = tmp_insense_de; + ctx->insense_page = dentry_blk; + ctx->insense_bidx = bidx; + } else if (!de) { + hmdfs_unlock_file(ctx->filp, get_dentry_group_pos(bidx), + DENTRYGROUP_SIZE); + kfree(dentry_blk); + } else { + ctx->page = dentry_blk; + break; + } + } + ctx->bidx = bidx; + return de; +} + +struct hmdfs_dentry *hmdfs_find_dentry(struct dentry *child_dentry, + struct hmdfs_dcache_lookup_ctx *ctx) +{ + struct hmdfs_dentry *de = NULL; + unsigned int max_depth; + unsigned int level; + + if (!ctx->filp) + return NULL; + + ctx->hash = hmdfs_dentry_hash(ctx->name, ctx->sbi->s_case_sensitive); + + max_depth = get_max_depth(ctx->filp); + for (level = 0; level < max_depth; level++) { + de = hmdfs_in_level(child_dentry, level, ctx); + if (de) { + if (ctx->insense_page) { + hmdfs_unlock_file(ctx->filp, + get_dentry_group_pos(ctx->insense_bidx), + DENTRYGROUP_SIZE); + kfree(ctx->insense_page); + ctx->insense_page = NULL; + } + return de; + } + } + if (ctx->insense_de) { + ctx->bidx = ctx->insense_bidx; + ctx->page = ctx->insense_page; + ctx->insense_bidx = 0; + ctx->insense_page = NULL; + } + return ctx->insense_de; +} + +void update_dentry(struct hmdfs_dentry_group *d, struct dentry *child_dentry, + struct inode *inode, __u32 name_hash, unsigned int bit_pos) +{ + struct hmdfs_dentry *de; + struct hmdfs_dentry_info *gdi = hmdfs_d(child_dentry); + const struct qstr name = child_dentry->d_name; + int slots = get_dentry_slots(name.len); + int i; + unsigned long ino; + __u32 igen; + + /* + * If the dentry's inode is symlink, it must be lower inode, + * and we should use the upper ino and generation to fill + * the dentryfile. + */ + if (!gdi && S_ISLNK(d_inode(child_dentry)->i_mode)) { + ino = d_inode(child_dentry)->i_ino; + igen = d_inode(child_dentry)->i_generation; + } else { + ino = inode->i_ino; + igen = inode->i_generation; + } + + de = &d->nsl[bit_pos]; + de->hash = cpu_to_le32(name_hash); + de->namelen = cpu_to_le16(name.len); + memcpy(d->filename[bit_pos], name.name, name.len); + de->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + de->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + de->i_size = cpu_to_le64(inode->i_size); + de->i_ino = cpu_to_le64(generate_u64_ino(ino, igen)); + de->i_flag = 0; + + /* + * If the dentry has fsdata, we just assume it must be + * hmdfs filesystem's dentry. + * Only client may update it's info in dentryfile when rename + * the remote file. + * Since the symlink mtime and size is from server's lower + * inode, we should just use it and only set S_IFLNK in mode. + */ + if (gdi && hm_islnk(gdi->file_type)) + de->i_mode = cpu_to_le16(S_IFLNK); + else if (!gdi && S_ISLNK(d_inode(child_dentry)->i_mode)) + de->i_mode = d_inode(child_dentry)->i_mode; + else + de->i_mode = cpu_to_le16(inode->i_mode); + + for (i = 0; i < slots; i++) { + __set_bit_le(bit_pos + i, d->bitmap); + /* avoid wrong garbage data for readdir */ + if (i) + (de + i)->namelen = 0; + } +} + +int room_for_filename(const void *bitmap, int slots, int max_slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; + + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= max_slots) + return max_slots; + goto next; +} + +void create_in_cache_file(uint64_t dev_id, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + + item = hmdfs_find_cache_item(dev_id, dentry->d_parent); + if (item) { + if (d_inode(dentry)) + create_dentry(dentry, d_inode(dentry), item->filp, + hmdfs_sb(dentry->d_sb)); + else + hmdfs_err("inode is null!"); + kref_put(&item->ref, release_cache_item); + } else { + hmdfs_info("find cache item failed, device_id:%llu", dev_id); + } +} + +int create_dentry(struct dentry *child_dentry, struct inode *inode, + struct file *file, struct hmdfs_sb_info *sbi) +{ + unsigned int bit_pos, level; + unsigned long bidx, end_block; + const struct qstr qstr = child_dentry->d_name; + __u32 namehash; + loff_t pos; + ssize_t size; + int ret = 0; + struct hmdfs_dentry_group *dentry_blk = NULL; + + level = 0; + + namehash = hmdfs_dentry_hash(&qstr, sbi->s_case_sensitive); + + dentry_blk = kmalloc(sizeof(*dentry_blk), GFP_KERNEL); + if (!dentry_blk) { + ret = -ENOMEM; + goto out_err; + } +find: + if (level == MAX_BUCKET_LEVEL) { + ret = -ENOSPC; + goto out; + } + bidx = BUCKET_BLOCKS * + get_bucketaddr(level, namehash % get_bucket_by_level(level)); + end_block = bidx + BUCKET_BLOCKS; + if (end_block > get_dentry_group_cnt(file_inode(file))) { + if (cache_file_truncate(sbi, &(file->f_path), + get_dcache_file_size(level))) { + ret = -ENOSPC; + goto out; + } + } + + for (; bidx < end_block; bidx++) { + int size; + + pos = get_dentry_group_pos(bidx); + ret = hmdfs_wlock_file(file, pos, DENTRYGROUP_SIZE); + if (ret) + goto out; + + size = cache_file_read(sbi, file, dentry_blk, + (size_t)DENTRYGROUP_SIZE, &pos); + if (size != DENTRYGROUP_SIZE) { + ret = -ENOSPC; + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); + goto out; + } + + bit_pos = room_for_filename(&dentry_blk->bitmap, + get_dentry_slots(qstr.len), + DENTRY_PER_GROUP); + if (bit_pos < DENTRY_PER_GROUP) + goto add; + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); + } + ++level; + goto find; +add: + pos = get_dentry_group_pos(bidx); + update_dentry(dentry_blk, child_dentry, inode, namehash, bit_pos); + size = cache_file_write(sbi, file, dentry_blk, + sizeof(struct hmdfs_dentry_group), &pos); + if (size != sizeof(struct hmdfs_dentry_group)) + hmdfs_err("cache file write failed!, ret = %zd", size); + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); +out: + kfree(dentry_blk); +out_err: + return ret; +} + +void hmdfs_init_dcache_lookup_ctx(struct hmdfs_dcache_lookup_ctx *ctx, + struct hmdfs_sb_info *sbi, + const struct qstr *qstr, struct file *filp) +{ + ctx->sbi = sbi; + ctx->name = qstr; + ctx->filp = filp; + ctx->bidx = 0; + ctx->page = NULL; + ctx->insense_de = NULL; + ctx->insense_bidx = 0; + ctx->insense_page = NULL; +} + +int update_inode_to_dentry(struct dentry *child_dentry, struct inode *inode) +{ + struct hmdfs_sb_info *sbi = d_inode(child_dentry)->i_sb->s_fs_info; + struct hmdfs_dentry *de = NULL; + loff_t ipos; + struct dentry *parent_dentry; + struct cache_file_node *cfn = NULL; + char *relative_path = NULL; + struct hmdfs_dcache_lookup_ctx ctx; + + parent_dentry = child_dentry->d_parent; + + relative_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!relative_path) + return -ENOMEM; + + cfn = find_cfn(sbi, HMDFS_SERVER_CID, relative_path, true); + if (!cfn) + goto out; + + hmdfs_init_dcache_lookup_ctx(&ctx, sbi, &child_dentry->d_name, + cfn->filp); + de = hmdfs_find_dentry(child_dentry, &ctx); + if (!de) + goto out_cfn; + + de->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + de->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + de->i_size = cpu_to_le64(inode->i_size); + de->i_ino = cpu_to_le64( + generate_u64_ino(inode->i_ino, inode->i_generation)); + de->i_flag = 0; + + ipos = get_dentry_group_pos(ctx.bidx); + write_dentry_page(cfn->filp, ctx.page, + sizeof(struct hmdfs_dentry_group), ipos); + hmdfs_unlock_file(cfn->filp, ipos, DENTRYGROUP_SIZE); + kfree(ctx.page); +out_cfn: + release_cfn(cfn); +out: + kfree(relative_path); + return 0; +} + +void hmdfs_delete_dentry(struct dentry *d, struct file *filp) +{ + struct hmdfs_dentry *de = NULL; + unsigned int bit_pos; + int slots, i; + loff_t ipos; + ssize_t size; + struct hmdfs_dcache_lookup_ctx ctx; + + hmdfs_init_dcache_lookup_ctx(&ctx, hmdfs_sb(d->d_sb), &d->d_name, filp); + + de = hmdfs_find_dentry(d, &ctx); + if (IS_ERR_OR_NULL(de)) { + hmdfs_info("find dentry failed!, err=%ld", PTR_ERR(de)); + return; + } + slots = get_dentry_slots(le16_to_cpu(de->namelen)); + + bit_pos = de - ctx.page->nsl; + for (i = 0; i < slots; i++) + __clear_bit_le(bit_pos + i, &ctx.page->bitmap); + + ipos = get_dentry_group_pos(ctx.bidx); + size = cache_file_write(hmdfs_sb(d->d_sb), filp, ctx.page, + sizeof(struct hmdfs_dentry_group), &ipos); + if (size != sizeof(struct hmdfs_dentry_group)) + hmdfs_err("cache file write failed!, ret = %zd", size); + hmdfs_unlock_file(filp, ipos, DENTRYGROUP_SIZE); + kfree(ctx.page); +} + +static int hmdfs_get_cache_path(struct hmdfs_sb_info *sbi, struct path *dir) +{ + struct hmdfs_dentry_info *di = hmdfs_d(sbi->sb->s_root); + int err; + + if (!sbi->s_dentry_cache) { + *dir = di->lower_path; + return 0; + } + + err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, dir); + if (err) + hmdfs_err("open failed, errno = %d", err); + + return err; +} + +static void hmdfs_put_cache_path(struct hmdfs_sb_info *sbi, struct path *dir) +{ + if (!sbi->s_dentry_cache) + return; + path_put(dir); +} + +struct file *create_local_dentry_file_cache(struct hmdfs_sb_info *sbi) +{ + struct file *filp = NULL; + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + struct path cache_dir; + int err; + + err = hmdfs_get_cache_path(sbi, &cache_dir); + if (err) { + filp = ERR_PTR(err); + goto out; + } + + filp = file_open_root(&cache_dir, ".", + O_RDWR | O_LARGEFILE | O_TMPFILE, + DENTRY_FILE_PERM); + if (IS_ERR(filp)) + hmdfs_err("dentryfile open failed and exit err=%ld", + PTR_ERR(filp)); + + hmdfs_put_cache_path(sbi, &cache_dir); +out: + hmdfs_revert_creds(old_cred); + return filp; +} + +static int hmdfs_linkat(struct path *old_path, const char *newname) +{ + struct dentry *new_dentry = NULL; + struct path new_path; + int error; + + new_dentry = kern_path_create(AT_FDCWD, newname, &new_path, 0); + if (IS_ERR(new_dentry)) { + hmdfs_err("create kernel path failed, error: %ld", + PTR_ERR(new_dentry)); + return PTR_ERR(new_dentry); + } + + error = -EXDEV; + if (old_path->mnt != new_path.mnt) + goto out_dput; + + error = vfs_link(old_path->dentry, new_path.dentry->d_inode, new_dentry, + NULL); + +out_dput: + done_path_create(&new_path, new_dentry); + return error; +} + +static int cache_file_mkdir(const char *name, umode_t mode) +{ + struct dentry *dentry; + struct path path; + int err; + + dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + err = vfs_mkdir(d_inode(path.dentry), dentry, mode); + if (err && err != -EEXIST) + hmdfs_err("vfs_mkdir failed, err = %d", err); + + done_path_create(&path, dentry); + return err; +} + +static int cache_file_create_path(const char *fullpath) +{ + char *path; + char *s; + int err = 0; + + path = kstrdup(fullpath, GFP_KERNEL); + if (!path) + return -ENOMEM; + + s = path + 1; + while (true) { + s = strchr(s, '/'); + if (!s) + break; + s[0] = '\0'; + err = cache_file_mkdir(path, 0755); + if (err && err != -EEXIST) + break; + s[0] = '/'; + s++; + } + kfree(path); + return err; +} + +static void hmdfs_cache_path_create(char *s, const char *dir, bool server) +{ + if (server) + snprintf(s, PATH_MAX, "%s/dentry_cache/server/", dir); + else + snprintf(s, PATH_MAX, "%s/dentry_cache/client/", dir); +} + +static void hmdfs_cache_file_create(char *s, uint64_t hash, const char *id, + bool server) +{ + int offset = strlen(s); + + if (server) + snprintf(s + offset, PATH_MAX - offset, "%016llx", hash); + else + snprintf(s + offset, PATH_MAX - offset, "%s_%016llx", id, hash); +} + +int cache_file_name_generate(char *fullname, struct hmdfs_peer *con, + const char *relative_path, bool server) +{ + struct hmdfs_sb_info *sbi = con->sbi; + uint64_t hash; + char cid[HMDFS_CFN_CID_SIZE]; + int err; + + hmdfs_cache_path_create(fullname, sbi->cache_dir, server); + + err = cache_file_create_path(fullname); + if (err && err != -EEXIST) { + hmdfs_err("making dir failed %d", err); + return err; + } + + strncpy(cid, con->cid, HMDFS_CFN_CID_SIZE - 1); + cid[HMDFS_CFN_CID_SIZE - 1] = '\0'; + + hash = path_hash(relative_path, strlen(relative_path), + sbi->s_case_sensitive); + hmdfs_cache_file_create(fullname, hash, cid, server); + + return 0; +} + +static void free_cfn(struct cache_file_node *cfn) +{ + if (!IS_ERR_OR_NULL(cfn->filp)) + filp_close(cfn->filp, NULL); + + kfree(cfn->relative_path); + kfree(cfn); +} + +static bool dentry_file_match(struct cache_file_node *cfn, const char *id, + const char *path) +{ + int ret; + + if (cfn->sbi->s_case_sensitive) + ret = strcmp(cfn->relative_path, path); + else + ret = strcasecmp(cfn->relative_path, path); + + return (!ret && !strncmp((cfn)->cid, id, HMDFS_CFN_CID_SIZE - 1)); +} + +struct cache_file_node *__find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server) +{ + struct cache_file_node *cfn = NULL; + struct list_head *head = get_list_head(sbi, server); + + list_for_each_entry(cfn, head, list) { + if (dentry_file_match(cfn, cid, path)) { + refcount_inc(&cfn->ref); + return cfn; + } + } + return NULL; +} + +struct cache_file_node *create_cfn(struct hmdfs_sb_info *sbi, const char *path, + const char *cid, bool server) +{ + struct cache_file_node *cfn = kzalloc(sizeof(*cfn), GFP_KERNEL); + + if (!cfn) + return NULL; + + cfn->relative_path = kstrdup(path, GFP_KERNEL); + if (!cfn->relative_path) + goto out; + + refcount_set(&cfn->ref, 1); + strncpy(cfn->cid, cid, HMDFS_CFN_CID_SIZE - 1); + cfn->cid[HMDFS_CFN_CID_SIZE - 1] = '\0'; + cfn->sbi = sbi; + cfn->server = server; + return cfn; +out: + free_cfn(cfn); + return NULL; +} + +static struct file *insert_cfn(struct hmdfs_sb_info *sbi, const char *filename, + const char *path, const char *cid, bool server) +{ + const struct cred *old_cred = NULL; + struct cache_file_node *cfn = NULL; + struct cache_file_node *exist = NULL; + struct list_head *head = NULL; + struct file *filp = NULL; + + cfn = create_cfn(sbi, path, cid, server); + if (!cfn) + return ERR_PTR(-ENOMEM); + + old_cred = hmdfs_override_creds(sbi->system_cred); + filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0); + hmdfs_revert_creds(old_cred); + if (IS_ERR(filp)) { + hmdfs_err("open file failed, err=%ld", PTR_ERR(filp)); + goto out; + } + + head = get_list_head(sbi, server); + + mutex_lock(&sbi->cache_list_lock); + exist = __find_cfn(sbi, cid, path, server); + if (!exist) { + cfn->filp = filp; + list_add_tail(&cfn->list, head); + } else { + mutex_unlock(&sbi->cache_list_lock); + release_cfn(exist); + filp_close(filp, NULL); + filp = ERR_PTR(-EEXIST); + goto out; + } + mutex_unlock(&sbi->cache_list_lock); + return filp; +out: + free_cfn(cfn); + return filp; +} + +int hmdfs_rename_dentry(struct dentry *old_dentry, struct dentry *new_dentry, + struct file *old_filp, struct file *new_filp) +{ + int ret; + struct hmdfs_sb_info *sbi = hmdfs_sb(new_dentry->d_sb); + + /* + * Try to delete first, because stale dentry might exist after + * coverwrite. + */ + hmdfs_delete_dentry(new_dentry, new_filp); + + ret = create_dentry(new_dentry, d_inode(old_dentry), new_filp, sbi); + if (ret) { + hmdfs_err("create dentry failed!, err=%d", ret); + return ret; + } + + hmdfs_delete_dentry(old_dentry, old_filp); + return 0; +} + +/** + * cache_file_persistent - link the tmpfile to the cache dir + * @con: the connection peer + * @filp: the file handler of the tmpfile + * @relative_path: the relative path which the tmpfile belongs + * @server: server or client + * + * Return value: the new file handler of the persistent file if the + * persistent operation succeed. Otherwise will return the original handler + * of the tmpfile passed in, so that the caller does not have to check + * the returned handler. + * + */ +struct file *cache_file_persistent(struct hmdfs_peer *con, struct file *filp, + const char *relative_path, bool server) +{ + struct cache_file_node *cfn = NULL; + char *fullname = NULL; + char *cid = server ? HMDFS_SERVER_CID : (char *)con->cid; + struct file *newf = NULL; + int i = 0; + int len; + int err; + + if (!con->sbi->s_dentry_cache) + return filp; + + cfn = find_cfn(con->sbi, cid, relative_path, server); + if (cfn) { + release_cfn(cfn); + return filp; + } + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return filp; + + err = cache_file_name_generate(fullname, con, relative_path, server); + if (err) + goto out; + + err = __vfs_setxattr(file_dentry(filp), file_inode(filp), + DENTRY_FILE_XATTR_NAME, relative_path, + strlen(relative_path), 0); + if (err) { + hmdfs_err("setxattr for file failed, err=%d", err); + goto out; + } + + len = strlen(fullname); + + do { + err = hmdfs_linkat(&filp->f_path, fullname); + if (!err) + break; + + snprintf(fullname + len, PATH_MAX - len, "_%d", i); + } while (i++ < DENTRY_FILE_NAME_RETRY); + + if (err) { + hmdfs_err("link for file failed, err=%d", err); + goto out; + } + + newf = insert_cfn(con->sbi, fullname, relative_path, cid, server); + if (!IS_ERR(newf)) + filp = newf; +out: + kfree(fullname); + return filp; +} + +void __destroy_cfn(struct list_head *head) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + list_for_each_entry_safe(cfn, n, head, list) { + list_del_init(&cfn->list); + release_cfn(cfn); + } +} + +void hmdfs_cfn_destroy(struct hmdfs_sb_info *sbi) +{ + mutex_lock(&sbi->cache_list_lock); + __destroy_cfn(&sbi->client_cache); + __destroy_cfn(&sbi->server_cache); + mutex_unlock(&sbi->cache_list_lock); +} + +struct cache_file_node *find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server) +{ + struct cache_file_node *cfn = NULL; + + mutex_lock(&sbi->cache_list_lock); + cfn = __find_cfn(sbi, cid, path, server); + mutex_unlock(&sbi->cache_list_lock); + return cfn; +} + +void release_cfn(struct cache_file_node *cfn) +{ + if (refcount_dec_and_test(&cfn->ref)) + free_cfn(cfn); +} + +void remove_cfn(struct cache_file_node *cfn) +{ + struct hmdfs_sb_info *sbi = cfn->sbi; + bool deleted; + + mutex_lock(&sbi->cache_list_lock); + deleted = list_empty(&cfn->list); + if (!deleted) + list_del_init(&cfn->list); + mutex_unlock(&sbi->cache_list_lock); + if (!deleted) { + delete_dentry_file(cfn->filp); + release_cfn(cfn); + } +} + +int hmdfs_do_lock_file(struct file *filp, unsigned char fl_type, loff_t start, + loff_t len) +{ + struct file_lock fl; + int err; + + locks_init_lock(&fl); + + fl.fl_type = fl_type; + fl.fl_flags = FL_POSIX | FL_CLOSE | FL_SLEEP; + fl.fl_start = start; + fl.fl_end = start + len - 1; + fl.fl_owner = filp; + fl.fl_pid = current->tgid; + fl.fl_file = filp; + fl.fl_ops = NULL; + fl.fl_lmops = NULL; + + err = locks_lock_file_wait(filp, &fl); + if (err) + hmdfs_err("lock file wait failed: %d", err); + + return err; +} + +int hmdfs_wlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_WRLCK, start, len); +} + +int hmdfs_rlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_RDLCK, start, len); +} + +int hmdfs_unlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_UNLCK, start, len); +} + +long cache_file_truncate(struct hmdfs_sb_info *sbi, const struct path *path, + loff_t length) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + long ret = vfs_truncate(path, length); + + hmdfs_revert_creds(old_cred); + + return ret; +} + +ssize_t cache_file_read(struct hmdfs_sb_info *sbi, struct file *filp, void *buf, + size_t count, loff_t *pos) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + ssize_t ret = kernel_read(filp, buf, count, pos); + + hmdfs_revert_creds(old_cred); + + return ret; +} + +ssize_t cache_file_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buf, size_t count, loff_t *pos) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + ssize_t ret = kernel_write(filp, buf, count, pos); + + hmdfs_revert_creds(old_cred); + + return ret; +} + + +int read_header(struct hmdfs_sb_info *sbi, struct file *filp, + struct hmdfs_dcache_header *header) +{ + ssize_t bytes; + loff_t pos = 0; + + bytes = cache_file_read(sbi, filp, header, sizeof(*header), &pos); + if (bytes != sizeof(*header)) { + hmdfs_err("read file failed, err:%zd", bytes); + return -EIO; + } + + return 0; +} + +static unsigned long long cache_get_dentry_count(struct hmdfs_sb_info *sbi, + struct file *filp) +{ + struct hmdfs_dcache_header header; + int overallpage; + + overallpage = get_dentry_group_cnt(file_inode(filp)); + if (overallpage == 0) + return 0; + + if (read_header(sbi, filp, &header)) + return 0; + + return le64_to_cpu(header.num); +} + +static int cache_check_case_sensitive(struct hmdfs_sb_info *sbi, + struct file *filp) +{ + struct hmdfs_dcache_header header; + + if (read_header(sbi, filp, &header)) + return 0; + + if (sbi->s_case_sensitive != (bool)header.case_sensitive) { + hmdfs_info("Case sensitive inconsistent, current fs is: %d, cache is %d, will drop cache", + sbi->s_case_sensitive, header.case_sensitive); + return 0; + } + return 1; +} + +int write_header(struct file *filp, struct hmdfs_dcache_header *header) +{ + loff_t pos = 0; + ssize_t size; + + size = kernel_write(filp, header, sizeof(*header), &pos); + if (size != sizeof(*header)) { + hmdfs_err("update dcache header failed %zd", size); + return -EIO; + } + + return 0; +} + +void add_to_delete_list(struct hmdfs_sb_info *sbi, struct cache_file_node *cfn) +{ + mutex_lock(&sbi->cache_list_lock); + list_add_tail(&cfn->list, &sbi->to_delete); + mutex_unlock(&sbi->cache_list_lock); +} + +void load_cfn(struct hmdfs_sb_info *sbi, const char *fullname, const char *path, + const char *cid, bool server) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *cfn1 = NULL; + struct list_head *head = NULL; + + cfn = create_cfn(sbi, path, cid, server); + if (!cfn) + return; + + cfn->filp = filp_open(fullname, O_RDWR | O_LARGEFILE, 0); + if (IS_ERR(cfn->filp)) { + hmdfs_err("open fail %ld", PTR_ERR(cfn->filp)); + goto out; + } + + if (cache_get_dentry_count(sbi, cfn->filp) < sbi->dcache_threshold) { + add_to_delete_list(sbi, cfn); + return; + } + + if (!cache_check_case_sensitive(sbi, cfn->filp)) { + add_to_delete_list(sbi, cfn); + return; + } + + head = get_list_head(sbi, server); + + mutex_lock(&sbi->cache_list_lock); + cfn1 = __find_cfn(sbi, cid, path, server); + if (!cfn1) { + list_add_tail(&cfn->list, head); + } else { + release_cfn(cfn1); + mutex_unlock(&sbi->cache_list_lock); + add_to_delete_list(sbi, cfn); + return; + } + mutex_unlock(&sbi->cache_list_lock); + + return; +out: + free_cfn(cfn); +} + +static int get_cid_and_hash(const char *name, uint64_t *hash, char *cid) +{ + int len; + char *p = strstr(name, "_"); + + if (!p) + return -EINVAL; + + len = p - name; + if (len >= HMDFS_CFN_CID_SIZE) + return -EINVAL; + + memcpy(cid, name, len); + cid[len] = '\0'; + + if (sscanf(++p, "%llx", hash) != 1) + return -EINVAL; + return 0; +} + +static void store_one(const char *name, struct cache_file_callback *cb) +{ + struct file *file = NULL; + char *fullname = NULL; + char *kvalue = NULL; + char cid[HMDFS_CFN_CID_SIZE]; + uint64_t hash; + ssize_t error; + + if (strlen(name) + strlen(cb->dirname) >= PATH_MAX) + return; + + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return; + + snprintf(fullname, PATH_MAX, "%s%s", cb->dirname, name); + + file = filp_open(fullname, O_RDWR | O_LARGEFILE, 0); + if (IS_ERR(file)) { + hmdfs_err("open fail %ld", PTR_ERR(file)); + goto out; + } + + kvalue = kzalloc(PATH_MAX, GFP_KERNEL); + if (!kvalue) + goto out_file; + + error = __vfs_getxattr(file_dentry(file), file_inode(file), + DENTRY_FILE_XATTR_NAME, kvalue, PATH_MAX); + if (error <= 0 || error >= PATH_MAX) { + hmdfs_err("getxattr return: %zd", error); + goto out_kvalue; + } + kvalue[error] = '\0'; + cid[0] = '\0'; + + if (!cb->server) { + if (get_cid_and_hash(name, &hash, cid)) { + hmdfs_err("get cid and hash fail"); + goto out_kvalue; + } + } + + load_cfn(cb->sbi, fullname, kvalue, cid, cb->server); + +out_kvalue: + kfree(kvalue); +out_file: + filp_close(file, NULL); +out: + kfree(fullname); +} + +static int cache_file_iterate(struct dir_context *ctx, const char *name, + int name_len, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct cache_file_item *cfi = NULL; + struct cache_file_callback *cb = + container_of(ctx, struct cache_file_callback, ctx); + + if (name_len > NAME_MAX) { + hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX); + return 0; + } + + if (d_type != DT_REG) + return 0; + + cfi = kmalloc(sizeof(*cfi), GFP_KERNEL); + if (!cfi) + return -ENOMEM; + + cfi->name = kstrndup(name, name_len, GFP_KERNEL); + if (!cfi->name) { + kfree(cfi); + return -ENOMEM; + } + + list_add_tail(&cfi->list, &cb->list); + + return 0; +} + +void hmdfs_do_load(struct hmdfs_sb_info *sbi, const char *fullname, bool server) +{ + struct file *file = NULL; + struct path dirpath; + int err; + struct cache_file_item *cfi = NULL; + struct cache_file_item *n = NULL; + struct cache_file_callback cb = { + .ctx.actor = cache_file_iterate, + .ctx.pos = 0, + .dirname = fullname, + .sbi = sbi, + .server = server, + }; + INIT_LIST_HEAD(&cb.list); + + + err = kern_path(fullname, LOOKUP_DIRECTORY, &dirpath); + if (err) { + hmdfs_info("No file path"); + return; + } + + file = dentry_open(&dirpath, O_RDONLY, current_cred()); + if (IS_ERR_OR_NULL(file)) { + hmdfs_err("dentry_open failed, error: %ld", PTR_ERR(file)); + path_put(&dirpath); + return; + } + + err = iterate_dir(file, &cb.ctx); + if (err) + hmdfs_err("iterate_dir failed, err: %d", err); + + list_for_each_entry_safe(cfi, n, &cb.list, list) { + store_one(cfi->name, &cb); + list_del_init(&cfi->list); + kfree(cfi->name); + kfree(cfi); + } + + fput(file); + path_put(&dirpath); +} + +/** + * This function just used for delete dentryfile.dat + */ +int delete_dentry_file(struct file *filp) +{ + int err = 0; + struct dentry *dentry = file_dentry(filp); + struct dentry *parent = lock_parent(dentry); + + if (dentry->d_parent == parent) { + dget(dentry); + err = vfs_unlink(d_inode(parent), dentry, NULL); + dput(dentry); + } + unlock_dir(parent); + + return err; +} + +void hmdfs_delete_useless_cfn(struct hmdfs_sb_info *sbi) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + mutex_lock(&sbi->cache_list_lock); + + list_for_each_entry_safe(cfn, n, &sbi->to_delete, list) { + delete_dentry_file(cfn->filp); + list_del_init(&cfn->list); + release_cfn(cfn); + } + mutex_unlock(&sbi->cache_list_lock); +} + +void hmdfs_cfn_load(struct hmdfs_sb_info *sbi) +{ + char *fullname = NULL; + + if (!sbi->s_dentry_cache) + return; + + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return; + + snprintf(fullname, PATH_MAX, "%s/dentry_cache/client/", + sbi->cache_dir); + hmdfs_do_load(sbi, fullname, false); + + snprintf(fullname, PATH_MAX, "%s/dentry_cache/server/", + sbi->cache_dir); + hmdfs_do_load(sbi, fullname, true); + kfree(fullname); + + hmdfs_delete_useless_cfn(sbi); +} + +static void __cache_file_destroy_by_path(struct list_head *head, + const char *path) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + list_for_each_entry_safe(cfn, n, head, list) { + if (strcmp(path, cfn->relative_path) != 0) + continue; + list_del_init(&cfn->list); + delete_dentry_file(cfn->filp); + release_cfn(cfn); + } +} + +static void cache_file_destroy_by_path(struct hmdfs_sb_info *sbi, + const char *path) +{ + mutex_lock(&sbi->cache_list_lock); + + __cache_file_destroy_by_path(&sbi->server_cache, path); + __cache_file_destroy_by_path(&sbi->client_cache, path); + + mutex_unlock(&sbi->cache_list_lock); +} + +static void cache_file_find_and_delete(struct hmdfs_peer *con, + const char *relative_path) +{ + struct cache_file_node *cfn; + + cfn = find_cfn(con->sbi, con->cid, relative_path, false); + if (!cfn) + return; + + remove_cfn(cfn); + release_cfn(cfn); +} + +void cache_file_delete_by_dentry(struct hmdfs_peer *con, struct dentry *dentry) +{ + char *relative_path = NULL; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (unlikely(!relative_path)) { + hmdfs_err("get relative path failed %d", -ENOMEM); + return; + } + cache_file_find_and_delete(con, relative_path); + kfree(relative_path); +} + +struct file *hmdfs_get_new_dentry_file(struct hmdfs_peer *con, + const char *relative_path, + struct hmdfs_dcache_header *header) +{ + struct hmdfs_sb_info *sbi = con->sbi; + int len = strlen(relative_path); + struct file *filp = NULL; + int err; + + filp = create_local_dentry_file_cache(sbi); + if (IS_ERR(filp)) + return filp; + + err = hmdfs_client_start_readdir(con, filp, relative_path, len, header); + if (err) { + if (err != -ENOENT) + hmdfs_err("readdir failed dev: %llu err: %d", + con->device_id, err); + fput(filp); + filp = ERR_PTR(err); + } + + return filp; +} + +void add_cfn_to_item(struct dentry *dentry, struct hmdfs_peer *con, + struct cache_file_node *cfn) +{ + struct file *file = cfn->filp; + int err; + + err = hmdfs_add_cache_list(con->device_id, dentry, file); + if (unlikely(err)) { + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + return; + } +} + +int hmdfs_add_file_to_cache(struct dentry *dentry, struct hmdfs_peer *con, + struct file *file, const char *relative_path) +{ + struct hmdfs_sb_info *sbi = con->sbi; + struct file *newf = file; + + if (cache_get_dentry_count(sbi, file) >= sbi->dcache_threshold) + newf = cache_file_persistent(con, file, relative_path, false); + else + cache_file_find_and_delete(con, relative_path); + + return hmdfs_add_cache_list(con->device_id, dentry, newf); +} + +static struct file *read_header_and_revalidate(struct hmdfs_peer *con, + struct file *filp, + const char *relative_path) +{ + struct hmdfs_dcache_header header; + struct hmdfs_dcache_header *p = NULL; + + if (read_header(con->sbi, filp, &header) == 0) + p = &header; + + return hmdfs_get_new_dentry_file(con, relative_path, p); +} + +void remote_file_revalidate_cfn(struct dentry *dentry, struct hmdfs_peer *con, + struct cache_file_node *cfn, + const char *relative_path) +{ + struct file *file = NULL; + int err; + + file = read_header_and_revalidate(con, cfn->filp, relative_path); + if (IS_ERR(file)) + return; + + /* + * If the request returned ok but file length is 0, we assume + * that the server verified the client cache file is uptodate. + */ + if (i_size_read(file->f_inode) == 0) { + hmdfs_info("The cfn cache for dev:%llu is uptodate", + con->device_id); + fput(file); + add_cfn_to_item(dentry, con, cfn); + return; + } + + /* OK, cfn is not uptodate, let's remove it and add the new file */ + remove_cfn(cfn); + + err = hmdfs_add_file_to_cache(dentry, con, file, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + fput(file); +} + +void remote_file_revalidate_item(struct dentry *dentry, struct hmdfs_peer *con, + struct clearcache_item *item, + const char *relative_path) +{ + struct file *file = NULL; + int err; + + file = read_header_and_revalidate(con, item->filp, relative_path); + if (IS_ERR(file)) + return; + + /* + * If the request returned ok but file length is 0, we assume + * that the server verified the client cache file is uptodate. + */ + if (i_size_read(file->f_inode) == 0) { + hmdfs_info("The item cache for dev:%llu is uptodate", + con->device_id); + item->time = jiffies; + fput(file); + return; + } + + /* We need to replace the old item */ + remove_cache_item(item); + cache_file_find_and_delete(con, relative_path); + + err = hmdfs_add_file_to_cache(dentry, con, file, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + fput(file); +} + +bool get_remote_dentry_file(struct dentry *dentry, struct hmdfs_peer *con) +{ + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + struct cache_file_node *cfn = NULL; + struct hmdfs_sb_info *sbi = con->sbi; + char *relative_path = NULL; + int err = 0; + struct file *filp = NULL; + struct clearcache_item *item; + + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), con->device_id, + dentry)) + return false; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (unlikely(!relative_path)) { + hmdfs_err("get relative path failed %d", -ENOMEM); + return false; + } + mutex_lock(&d_info->cache_pull_lock); + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), con->device_id, + dentry)) + goto out_unlock; + + item = hmdfs_find_cache_item(con->device_id, dentry); + if (item) { + remote_file_revalidate_item(dentry, con, item, relative_path); + kref_put(&item->ref, release_cache_item); + goto out_unlock; + } + + cfn = find_cfn(sbi, con->cid, relative_path, false); + if (cfn) { + remote_file_revalidate_cfn(dentry, con, cfn, relative_path); + release_cfn(cfn); + goto out_unlock; + } + + filp = hmdfs_get_new_dentry_file(con, relative_path, NULL); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out_unlock; + } + + err = hmdfs_add_file_to_cache(dentry, con, filp, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%lu err:%d", + (unsigned long)con->device_id, err); + fput(filp); + +out_unlock: + mutex_unlock(&d_info->cache_pull_lock); + if (err && err != -ENOENT) + hmdfs_err("readdir failed dev:%lu err:%d", + (unsigned long)con->device_id, err); + kfree(relative_path); + return true; +} + +int hmdfs_file_type(const char *name) +{ + if (!name) + return -EINVAL; + + if (!strcmp(name, CURRENT_DIR) || !strcmp(name, PARENT_DIR)) + return HMDFS_TYPE_DOT; + + return HMDFS_TYPE_COMMON; +} + +struct clearcache_item *hmdfs_find_cache_item(uint64_t dev_id, + struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + if (!d_info) + return NULL; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry(item, &(d_info->cache_list_head), list) { + if (dev_id == item->dev_id) { + kref_get(&item->ref); + spin_unlock(&d_info->cache_list_lock); + return item; + } + } + spin_unlock(&d_info->cache_list_lock); + return NULL; +} + +bool hmdfs_cache_revalidate(unsigned long conn_time, uint64_t dev_id, + struct dentry *dentry) +{ + bool ret = false; + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + unsigned int timeout; + + if (!d_info) + return ret; + + timeout = hmdfs_sb(dentry->d_sb)->dcache_timeout; + spin_lock(&d_info->cache_list_lock); + list_for_each_entry(item, &(d_info->cache_list_head), list) { + if (dev_id == item->dev_id) { + ret = cache_item_revalidate(conn_time, item->time, + timeout); + break; + } + } + spin_unlock(&d_info->cache_list_lock); + return ret; +} + +void remove_cache_item(struct clearcache_item *item) +{ + bool deleted; + + spin_lock(&item->d_info->cache_list_lock); + deleted = list_empty(&item->list); + if (!deleted) + list_del_init(&item->list); + spin_unlock(&item->d_info->cache_list_lock); + if (!deleted) + kref_put(&item->ref, release_cache_item); +} + +void release_cache_item(struct kref *ref) +{ + struct clearcache_item *item = + container_of(ref, struct clearcache_item, ref); + + if (item->filp) + fput(item->filp); + kfree(item); +} + +void hmdfs_remove_cache_filp(struct hmdfs_peer *con, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + struct clearcache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + // struct path *lower_path = NULL; + + if (!d_info) + return; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry_safe(item, item_temp, &(d_info->cache_list_head), + list) { + if (con->device_id == item->dev_id) { + list_del_init(&item->list); + spin_unlock(&d_info->cache_list_lock); + cache_file_delete_by_dentry(con, dentry); + kref_put(&item->ref, release_cache_item); + return; + } + } + spin_unlock(&d_info->cache_list_lock); +} + +int hmdfs_add_cache_list(uint64_t dev_id, struct dentry *dentry, + struct file *filp) +{ + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + if (!d_info) + return -ENOMEM; + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->dev_id = dev_id; + item->filp = get_file(filp); + item->time = jiffies; + item->d_info = d_info; + kref_init(&item->ref); + spin_lock(&d_info->cache_list_lock); + list_add_tail(&(item->list), &(d_info->cache_list_head)); + spin_unlock(&d_info->cache_list_lock); + return 0; +} + +void hmdfs_add_remote_cache_list(struct hmdfs_peer *con, const char *dir_path) +{ + int err = 0; + struct remotecache_item *item = NULL; + struct remotecache_item *item_temp = NULL; + struct path path, root_path; + struct hmdfs_dentry_info *d_info = NULL; + + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + return; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, dir_path, 0, + &path); + if (err) + goto out_put_root; + + d_info = hmdfs_d(path.dentry); + if (!d_info) { + err = -EINVAL; + goto out; + } + + /* find duplicate con */ + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry_safe(item, item_temp, + &(d_info->remote_cache_list_head), list) { + if (item->con->device_id == con->device_id) { + mutex_unlock(&d_info->remote_cache_list_lock); + goto out; + } + } + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) { + err = -ENOMEM; + mutex_unlock(&d_info->remote_cache_list_lock); + goto out; + } + + item->con = con; + item->drop_flag = 0; + list_add(&(item->list), &(d_info->remote_cache_list_head)); + mutex_unlock(&d_info->remote_cache_list_lock); + +out: + path_put(&path); +out_put_root: + path_put(&root_path); +} + +int hmdfs_drop_remote_cache_dents(struct dentry *dentry) +{ + struct path lower_path; + struct inode *lower_inode = NULL; + struct remotecache_item *item = NULL; + struct remotecache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = NULL; + char *relative_path = NULL; + + if (!dentry) { + hmdfs_err("dentry null and return"); + return 0; + } + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return 0; + } + hmdfs_get_lower_path(dentry, &lower_path); + if (IS_ERR_OR_NULL(lower_path.dentry)) { + hmdfs_put_lower_path(&lower_path); + return 0; + } + lower_inode = d_inode(lower_path.dentry); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR_OR_NULL(lower_inode)) + return 0; + /* only for directory */ + if (!S_ISDIR(lower_inode->i_mode)) + return 0; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (!relative_path) { + hmdfs_err("get dentry relative path failed"); + return 0; + } + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry_safe(item, item_temp, + &(d_info->remote_cache_list_head), list) { + if (item->drop_flag) { + item->drop_flag = 0; + continue; + } + mutex_unlock(&d_info->remote_cache_list_lock); + hmdfs_send_drop_push(item->con, relative_path); + mutex_lock(&d_info->remote_cache_list_lock); + list_del(&item->list); + kfree(item); + } + mutex_unlock(&d_info->remote_cache_list_lock); + + kfree(relative_path); + return 0; +} + +/* Clear the dentry cache files of target directory */ +int hmdfs_clear_cache_dents(struct dentry *dentry, bool remove_cache) +{ + struct clearcache_item *item = NULL; + struct clearcache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + char *path = NULL; + + if (!d_info) + return 0; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry_safe(item, item_temp, &(d_info->cache_list_head), + list) { + list_del_init(&item->list); + kref_put(&item->ref, release_cache_item); + } + spin_unlock(&d_info->cache_list_lock); + + if (!remove_cache) + return 0; + + /* it also need confirm that there are no dentryfile_dev* + * under this dentry + */ + path = hmdfs_get_dentry_relative_path(dentry); + + if (unlikely(!path)) { + hmdfs_err("get relative path failed"); + return 0; + } + + cache_file_destroy_by_path(hmdfs_sb(dentry->d_sb), path); + + kfree(path); + return 0; +} + +void hmdfs_mark_drop_flag(uint64_t device_id, struct dentry *dentry) +{ + struct remotecache_item *item = NULL; + struct hmdfs_dentry_info *d_info = NULL; + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return; + } + + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry(item, &(d_info->remote_cache_list_head), list) { + if (item->con->device_id == device_id) { + item->drop_flag = 1; + break; + } + } + mutex_unlock(&d_info->remote_cache_list_lock); +} + +void hmdfs_clear_drop_flag(struct dentry *dentry) +{ + struct remotecache_item *item = NULL; + struct hmdfs_dentry_info *d_info = NULL; + + if (!dentry) { + hmdfs_err("dentry null and return"); + return; + } + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return; + } + + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry(item, &(d_info->remote_cache_list_head), list) { + if (item->drop_flag) + item->drop_flag = 0; + } + mutex_unlock(&d_info->remote_cache_list_lock); +} + +#define DUSTBIN_SUFFIX ".hwbk" +static void hmdfs_rename_bak(struct dentry *dentry) +{ + struct path lower_path; + struct dentry *lower_parent = NULL; + struct dentry *lower_dentry = NULL; + struct dentry *new_dentry = NULL; + char *name = NULL; + int len = 0; + int err = 0; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + len = strlen(lower_dentry->d_name.name) + strlen(DUSTBIN_SUFFIX) + 2; + if (len >= NAME_MAX) { + err = -ENAMETOOLONG; + goto put_lower_path; + } + + name = kmalloc(len, GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto put_lower_path; + } + + snprintf(name, len, ".%s%s", lower_dentry->d_name.name, DUSTBIN_SUFFIX); + err = mnt_want_write(lower_path.mnt); + if (err) { + hmdfs_info("get write access failed, err %d", err); + goto free_name; + } + + lower_parent = lock_parent(lower_dentry); + new_dentry = lookup_one_len(name, lower_parent, strlen(name)); + if (IS_ERR(new_dentry)) { + err = PTR_ERR(new_dentry); + hmdfs_info("lookup new dentry failed, err %d", err); + goto unlock_parent; + } + + err = vfs_rename(d_inode(lower_parent), lower_dentry, + d_inode(lower_parent), new_dentry, NULL, 0); + + dput(new_dentry); +unlock_parent: + unlock_dir(lower_parent); + mnt_drop_write(lower_path.mnt); +free_name: + kfree(name); +put_lower_path: + hmdfs_put_lower_path(&lower_path); + + if (err) + hmdfs_err("failed to rename file, err %d", err); +} + +int hmdfs_root_unlink(uint64_t device_id, struct path *root_path, + const char *unlink_dir, const char *unlink_name) +{ + int err = 0; + struct path path; + struct dentry *child_dentry = NULL; + struct inode *dir = NULL; + struct inode *child_inode = NULL; + kuid_t tmp_uid; + + err = vfs_path_lookup(root_path->dentry, root_path->mnt, + unlink_dir, LOOKUP_DIRECTORY, &path); + if (err) { + hmdfs_err("found path failed err = %d", err); + return err; + } + dir = d_inode(path.dentry); + inode_lock_nested(dir, I_MUTEX_PARENT); + + child_dentry = lookup_one_len(unlink_name, path.dentry, + strlen(unlink_name)); + if (IS_ERR(child_dentry)) { + err = PTR_ERR(child_dentry); + hmdfs_err("lookup_one_len failed, err = %d", err); + goto unlock_out; + } + if (d_is_negative(child_dentry)) { + err = -ENOENT; + dput(child_dentry); + goto unlock_out; + } + child_inode = d_inode(child_dentry); + + tmp_uid = hmdfs_override_inode_uid(dir); + + hmdfs_mark_drop_flag(device_id, path.dentry); + ihold(child_inode); + err = vfs_unlink(dir, child_dentry, NULL); + /* + * -EOWNERDEAD means we want to put the file in a specail dir instead of + * deleting it, specifically dustbin in phone, so that user can + * recover the deleted images and videos. + */ + if (err == -EOWNERDEAD) { + hmdfs_rename_bak(child_dentry); + err = 0; + } + if (err) + hmdfs_err("unlink path failed err = %d", err); + hmdfs_revert_inode_uid(dir, tmp_uid); + dput(child_dentry); + +unlock_out: + inode_unlock(dir); + if (child_inode) + iput(child_inode); + path_put(&path); + return err; +} + +struct dentry *hmdfs_root_mkdir(uint64_t device_id, const char *local_dst_path, + const char *mkdir_dir, const char *mkdir_name, + umode_t mode) +{ + int err; + struct path path; + struct dentry *child_dentry = NULL; + struct dentry *ret = NULL; + char *mkdir_path = NULL; + char *mkdir_abs_path = NULL; + + mkdir_path = hmdfs_connect_path(mkdir_dir, mkdir_name); + if (!mkdir_path) + return ERR_PTR(-EACCES); + + mkdir_abs_path = + hmdfs_get_dentry_absolute_path(local_dst_path, mkdir_path); + if (!mkdir_abs_path) { + ret = ERR_PTR(-ENOMEM); + goto out; + } + + child_dentry = kern_path_create(AT_FDCWD, mkdir_abs_path, + &path, LOOKUP_DIRECTORY); + if (IS_ERR(child_dentry)) { + ret = child_dentry; + goto out; + } + + hmdfs_mark_drop_flag(device_id, child_dentry->d_parent); + err = vfs_mkdir(d_inode(path.dentry), child_dentry, mode); + if (err) { + hmdfs_err("mkdir failed! err=%d", err); + ret = ERR_PTR(err); + goto out_put; + } + ret = dget(child_dentry); +out_put: + done_path_create(&path, child_dentry); +out: + kfree(mkdir_path); + kfree(mkdir_abs_path); + return ret; +} + +struct dentry *hmdfs_root_create(uint64_t device_id, const char *local_dst_path, + const char *create_dir, + const char *create_name, + umode_t mode, bool want_excl) +{ + int err; + struct path path; + struct dentry *child_dentry = NULL; + struct dentry *ret = NULL; + char *create_path = NULL; + char *create_abs_path = NULL; + + create_path = hmdfs_connect_path(create_dir, create_name); + if (!create_path) + return ERR_PTR(-EACCES); + + create_abs_path = + hmdfs_get_dentry_absolute_path(local_dst_path, create_path); + if (!create_abs_path) { + ret = ERR_PTR(-ENOMEM); + goto out; + } + + child_dentry = kern_path_create(AT_FDCWD, create_abs_path, &path, 0); + + if (IS_ERR(child_dentry)) { + ret = child_dentry; + goto out; + } + hmdfs_mark_drop_flag(device_id, child_dentry->d_parent); + err = vfs_create(d_inode(path.dentry), child_dentry, mode, want_excl); + if (err) { + hmdfs_err("path create failed! err=%d", err); + ret = ERR_PTR(err); + goto out_put; + } + ret = dget(child_dentry); +out_put: + done_path_create(&path, child_dentry); +out: + kfree(create_path); + kfree(create_abs_path); + return ret; +} + +int hmdfs_root_rmdir(uint64_t device_id, struct path *root_path, + const char *rmdir_dir, const char *rmdir_name) +{ + int err = 0; + struct path path; + struct dentry *child_dentry = NULL; + struct inode *dir = NULL; + + err = vfs_path_lookup(root_path->dentry, root_path->mnt, + rmdir_dir, LOOKUP_DIRECTORY, &path); + if (err) { + hmdfs_err("found path failed err = %d", err); + return err; + } + dir = d_inode(path.dentry); + inode_lock_nested(dir, I_MUTEX_PARENT); + + child_dentry = lookup_one_len(rmdir_name, path.dentry, + strlen(rmdir_name)); + if (IS_ERR(child_dentry)) { + err = PTR_ERR(child_dentry); + hmdfs_err("lookup_one_len failed, err = %d", err); + goto unlock_out; + } + if (d_is_negative(child_dentry)) { + err = -ENOENT; + dput(child_dentry); + goto unlock_out; + } + + hmdfs_mark_drop_flag(device_id, path.dentry); + err = vfs_rmdir(dir, child_dentry); + if (err) + hmdfs_err("rmdir failed err = %d", err); + dput(child_dentry); + +unlock_out: + inode_unlock(dir); + path_put(&path); + return err; +} + +int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id, + const char *oldpath, const char *oldname, + const char *newpath, const char *newname, + unsigned int flags) +{ + int err = 0; + struct path path_dst; + struct path path_old; + struct path path_new; + struct dentry *trap = NULL; + struct dentry *old_dentry = NULL; + struct dentry *new_dentry = NULL; + + err = kern_path(sbi->local_dst, 0, &path_dst); + if (err) { + hmdfs_err("kern_path for local dst failed %d", err); + return err; + } + + err = vfs_path_lookup(path_dst.dentry, path_dst.mnt, oldpath, 0, + &path_old); + if (err) { + hmdfs_info("lookup oldpath from local_dst failed, err %d", err); + goto put_path_dst; + } + + err = vfs_path_lookup(path_dst.dentry, path_dst.mnt, newpath, 0, + &path_new); + if (err) { + hmdfs_info("lookup newpath from local_dst failed, err %d", err); + goto put_path_old; + } + + err = mnt_want_write(path_dst.mnt); + if (err) { + hmdfs_info("get write access failed for local_dst, err %d", + err); + goto put_path_new; + } + + trap = lock_rename(path_new.dentry, path_old.dentry); + + old_dentry = lookup_one_len(oldname, path_old.dentry, strlen(oldname)); + if (IS_ERR(old_dentry)) { + err = PTR_ERR(old_dentry); + hmdfs_info("lookup old dentry failed, err %d", err); + goto unlock; + } + + /* source should not be ancestor of target */ + if (old_dentry == trap) { + err = -EINVAL; + goto put_old_dentry; + } + + new_dentry = lookup_one_len(newname, path_new.dentry, strlen(newname)); + if (IS_ERR(new_dentry)) { + err = PTR_ERR(new_dentry); + hmdfs_info("lookup new dentry failed, err %d", err); + goto put_old_dentry; + } + + /* + * Exchange rename is not supported, thus target should not be an + * ancestor of source. + */ + if (trap == new_dentry) { + err = -ENOTEMPTY; + goto put_new_dentry; + } + + if (d_is_positive(new_dentry) && (flags & RENAME_NOREPLACE)) { + err = -EEXIST; + goto put_new_dentry; + } + + hmdfs_mark_drop_flag(device_id, path_old.dentry); + if (path_old.dentry != path_new.dentry) + hmdfs_mark_drop_flag(device_id, path_new.dentry); + + err = vfs_rename(d_inode(path_old.dentry), old_dentry, + d_inode(path_new.dentry), new_dentry, NULL, 0); + +put_new_dentry: + dput(new_dentry); +put_old_dentry: + dput(old_dentry); +unlock: + unlock_rename(path_new.dentry, path_old.dentry); + mnt_drop_write(path_dst.mnt); +put_path_new: + path_put(&path_new); +put_path_old: + path_put(&path_old); +put_path_dst: + path_put(&path_dst); + + return err; +} + +int hmdfs_get_path_in_sb(struct super_block *sb, const char *name, + unsigned int flags, struct path *path) +{ + int err; + + err = kern_path(name, flags, path); + if (err) { + hmdfs_err("can't get %s %d\n", name, err); + return err; + } + + /* should ensure the path is belong sb */ + if (path->dentry->d_sb != sb) { + err = -EINVAL; + hmdfs_err("Wrong sb: %s on %s", name, + path->dentry->d_sb->s_type->name); + path_put(path); + } + + return err; +} diff --git a/fs/hmdfs/hmdfs_dentryfile.h b/fs/hmdfs/hmdfs_dentryfile.h new file mode 100644 index 0000000000000000000000000000000000000000..df1463007f15be5f3ba3180b4559a7a65a47eedf --- /dev/null +++ b/fs/hmdfs/hmdfs_dentryfile.h @@ -0,0 +1,342 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_dentryfile.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DENTRYFILE_H +#define HMDFS_DENTRYFILE_H + +#include "hmdfs.h" +#include + +/* use for escape from hmdfs file system, hmdfs hide follow names */ +#define CURRENT_DIR "." +#define PARENT_DIR ".." + +/* local dentry cache data */ +#define DENTRY_FILE_XATTR_NAME "user.hmdfs_cache" + +#define DENTRY_FILE_NAME_RETRY 10 + +#define MAX_BUCKET_LEVEL 63 +#define BUCKET_BLOCKS 2 +#define MAX_DIR_BUCKETS (1 << ((MAX_BUCKET_LEVEL / 2) - 1)) + +#define CONFLICTING_FILE_CONST_SUFFIX "_conflict_dev" +#define CONFLICTING_FILE_SUFFIX "_conflict_dev%u" +#define CONFLICTING_DIR_SUFFIX "_remote_directory" + +#define POS_BIT_NUM 64 +#define DEV_ID_BIT_NUM 16 +#define GROUP_ID_BIT_NUM 39 +#define OFFSET_BIT_NUM 8 +#define OFFSET_BIT_MASK 0xFF + +#define DEFAULT_DCACHE_TIMEOUT 30 +#define DEFAULT_DCACHE_PRECISION 10 +#define DEFAULT_DCACHE_THRESHOLD 1000 +#define HMDFS_STALE_REMOTE_ISIZE ULLONG_MAX + +/* Seconds per-week */ +#define MAX_DCACHE_TIMEOUT 604800 + +struct hmdfs_iterate_callback { + struct dir_context ctx; + struct dir_context *caller; + int result; + struct rb_root *root; +}; + +/* + * 4096 = version(1) + bitmap(10) + reserved(5) + * + nsl(80 * 43) + filename(80 * 8) + */ +#define DENTRYGROUP_SIZE 4096 +#define DENTRY_NAME_LEN 8 +#define DENTRY_RESERVED_LENGTH 3 +#define DENTRY_PER_GROUP 80 +#define DENTRY_BITMAP_LENGTH 10 +#define DENTRY_GROUP_RESERVED 5 +#define DENTRYGROUP_HEADER 4096 + +struct hmdfs_dentry { + __le32 hash; + __le16 i_mode; + __le16 namelen; + __le64 i_size; + /* modification time */ + __le64 i_mtime; + /* modification time in nano scale */ + __le32 i_mtime_nsec; + /* combination of inode number and generation */ + __le64 i_ino; + __le32 i_flag; + /* reserved bytes for long term extend, total 43 bytes */ + __u8 reserved[DENTRY_RESERVED_LENGTH]; +} __packed; + +/* 4K/51 Bytes = 80 dentries for per dentrygroup */ +struct hmdfs_dentry_group { + __u8 dentry_version; /* dentry version start from 1 */ + __u8 bitmap[DENTRY_BITMAP_LENGTH]; + struct hmdfs_dentry nsl[DENTRY_PER_GROUP]; + __u8 filename[DENTRY_PER_GROUP][DENTRY_NAME_LEN]; + __u8 reserved[DENTRY_GROUP_RESERVED]; +} __packed; + +/** + * The content of 1st 4k block in dentryfile.dat. + * Used for check whether the dcache can be used directly or + * need to rebuild. + * + * Since the ctime has 10ms or less precision, if the dcache + * rebuild at the same time of the dentry inode ctime, maybe + * non-consistent in dcache. + * eg: create 1.jpg 2.jpg 3.jpg + * dcache rebuild may only has 1.jpg 2.jpg + * So, we need use these time to verify the dcache. + */ +struct hmdfs_dcache_header { + /* The time of dcache rebuild */ + __le64 dcache_crtime; + __le64 dcache_crtime_nsec; + + /* The directory inode ctime when dcache rebuild */ + __le64 dentry_ctime; + __le64 dentry_ctime_nsec; + + /* The dentry count */ + __le64 num; + + /* The case sensitive */ + __u8 case_sensitive; +} __packed; + +static inline loff_t get_dentry_group_pos(unsigned int bidx) +{ + return ((loff_t)bidx) * DENTRYGROUP_SIZE + DENTRYGROUP_HEADER; +} + +static inline unsigned int get_dentry_group_cnt(struct inode *inode) +{ + loff_t size = i_size_read(inode); + + return size >= DENTRYGROUP_HEADER ? + (size - DENTRYGROUP_HEADER) / DENTRYGROUP_SIZE : + 0; +} + +#define DENTRY_NAME_MAX_LEN (DENTRY_PER_GROUP * DENTRY_NAME_LEN) +#define BITS_PER_BYTE 8 +#define HMDFS_SLOT_LEN_BITS 3 +#define get_dentry_slots(x) (((x) + BITS_PER_BYTE - 1) >> HMDFS_SLOT_LEN_BITS) + +#define INUNUMBER_START 10000000 + +#ifdef CONFIG_HMDFS_FS_PERMISSION +#define DENTRY_FILE_PERM 0660 +#else +#define DENTRY_FILE_PERM 0666 +#endif + +struct hmdfs_dcache_lookup_ctx { + struct hmdfs_sb_info *sbi; + const struct qstr *name; + struct file *filp; + __u32 hash; + + /* for case sensitive */ + unsigned int bidx; + struct hmdfs_dentry_group *page; + + /* for case insensitive */ + struct hmdfs_dentry *insense_de; + unsigned int insense_bidx; + struct hmdfs_dentry_group *insense_page; +}; + +extern void hmdfs_init_dcache_lookup_ctx(struct hmdfs_dcache_lookup_ctx *ctx, + struct hmdfs_sb_info *sbi, + const struct qstr *qstr, + struct file *filp); + +int create_dentry(struct dentry *child_dentry, struct inode *inode, + struct file *file, struct hmdfs_sb_info *sbi); +int read_dentry(struct hmdfs_sb_info *sbi, char *file_name, + struct dir_context *ctx); +struct hmdfs_dentry *hmdfs_find_dentry(struct dentry *child_dentry, + struct hmdfs_dcache_lookup_ctx *ctx); +void hmdfs_delete_dentry(struct dentry *d, struct file *filp); +int hmdfs_rename_dentry(struct dentry *old_dentry, struct dentry *new_dentry, + struct file *old_filp, struct file *new_filp); +int get_inonumber(void); +struct file *create_local_dentry_file_cache(struct hmdfs_sb_info *sbi); +int update_inode_to_dentry(struct dentry *child_dentry, struct inode *inode); +struct file *cache_file_persistent(struct hmdfs_peer *con, struct file *filp, + const char *relative_path, bool server); + +#define HMDFS_TYPE_COMMON 0 +#define HMDFS_TYPE_DOT 1 +#define HMDFS_TYPE_DENTRY 2 +#define HMDFS_TYPE_DENTRY_CACHE 3 +int hmdfs_file_type(const char *name); + +loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, + unsigned long offset); + +struct getdents_callback_real { + struct dir_context ctx; + struct path *parent_path; + loff_t num; + struct file *file; + struct hmdfs_sb_info *sbi; + const char *dir; +}; + +struct file *hmdfs_server_rebuild_dents(struct hmdfs_sb_info *sbi, + struct path *path, loff_t *num, + const char *dir); + +#define DCACHE_LIFETIME 30 + +struct clearcache_item { + uint64_t dev_id; + struct file *filp; + unsigned long time; + struct list_head list; + struct kref ref; + struct hmdfs_dentry_info *d_info; +}; + +void hmdfs_add_remote_cache_list(struct hmdfs_peer *con, const char *dir_path); + +struct remotecache_item { + struct hmdfs_peer *con; + struct list_head list; + __u8 drop_flag; +}; + +#define HMDFS_CFN_CID_SIZE 65 +#define HMDFS_SERVER_CID "" + +struct cache_file_node { + struct list_head list; + struct hmdfs_sb_info *sbi; + char *relative_path; + u8 cid[HMDFS_CFN_CID_SIZE]; + refcount_t ref; + bool server; + struct file *filp; +}; + +struct cache_file_item { + struct list_head list; + const char *name; +}; + +struct cache_file_callback { + struct dir_context ctx; + const char *dirname; + struct hmdfs_sb_info *sbi; + bool server; + struct list_head list; +}; + +int hmdfs_drop_remote_cache_dents(struct dentry *dentry); +void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path); +void hmdfs_mark_drop_flag(uint64_t device_id, struct dentry *dentry); +void hmdfs_clear_drop_flag(struct dentry *dentry); +void delete_in_cache_file(uint64_t dev_id, struct dentry *dentry); +void create_in_cache_file(uint64_t dev_id, struct dentry *dentry); +struct clearcache_item *hmdfs_find_cache_item(uint64_t dev_id, + struct dentry *dentry); +bool hmdfs_cache_revalidate(unsigned long conn_time, uint64_t dev_id, + struct dentry *dentry); +void hmdfs_remove_cache_filp(struct hmdfs_peer *con, struct dentry *dentry); +int hmdfs_add_cache_list(uint64_t dev_id, struct dentry *dentry, + struct file *filp); +int hmdfs_clear_cache_dents(struct dentry *dentry, bool remove_cache); + +int hmdfs_root_unlink(uint64_t device_id, struct path *root_path, + const char *unlink_dir, const char *unlink_name); +struct dentry *hmdfs_root_mkdir(uint64_t device_id, const char *local_dst_path, + const char *mkdir_dir, const char *mkdir_name, + umode_t mode); +struct dentry *hmdfs_root_create(uint64_t device_id, const char *local_dst_path, + const char *create_dir, + const char *create_name, + umode_t mode, bool want_excl); +int hmdfs_root_rmdir(uint64_t device_id, struct path *root_path, + const char *rmdir_dir, const char *rmdir_name); +int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id, + const char *oldpath, const char *oldname, + const char *newpath, const char *newname, + unsigned int flags); + +int hmdfs_get_path_in_sb(struct super_block *sb, const char *name, + unsigned int flags, struct path *path); + +int hmdfs_wlock_file(struct file *filp, loff_t start, loff_t len); +int hmdfs_rlock_file(struct file *filp, loff_t start, loff_t len); +int hmdfs_unlock_file(struct file *filp, loff_t start, loff_t len); +long cache_file_truncate(struct hmdfs_sb_info *sbi, const struct path *path, + loff_t length); +ssize_t cache_file_read(struct hmdfs_sb_info *sbi, struct file *filp, void *buf, + size_t count, loff_t *pos); +ssize_t cache_file_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buf, size_t count, loff_t *pos); +int hmdfs_metainfo_read(struct hmdfs_sb_info *sbi, struct file *filp, + void *buffer, int buffersize, int bidx); + +bool get_remote_dentry_file(struct dentry *dentry, struct hmdfs_peer *con); +void get_remote_dentry_file_sync(struct dentry *dentry, struct hmdfs_peer *con); + +void release_cache_item(struct kref *ref); +void remove_cache_item(struct clearcache_item *item); + +void hmdfs_cfn_load(struct hmdfs_sb_info *sbi); +void hmdfs_cfn_destroy(struct hmdfs_sb_info *sbi); +struct cache_file_node *find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server); +void release_cfn(struct cache_file_node *cfn); +void destroy_cfn(struct hmdfs_sb_info *sbi); +void remove_cfn(struct cache_file_node *cfn); +int delete_dentry_file(struct file *filp); +struct file *hmdfs_server_cache_revalidate(struct hmdfs_sb_info *sbi, + const char *recvpath, + struct path *path); +int write_header(struct file *filp, struct hmdfs_dcache_header *header); + +static inline struct list_head *get_list_head(struct hmdfs_sb_info *sbi, + bool server) +{ + return ((server) ? &(sbi)->server_cache : &(sbi)->client_cache); +} + +/* + * generate_u64_ino - generate a new 64 bit inode number + * + * @ino: origin 32 bit inode number + * @generation: origin 32 bit inode generation + * + * We need both remote inode number and generation to ensure the uniqueness of + * the local inode, thus we store inode->i_ino in lower 32 bits, and + * inode->i_generation in higher 32 bits. + */ +static inline uint64_t generate_u64_ino(unsigned long ino, + unsigned int generation) +{ + return (uint64_t)ino | ((uint64_t)generation << 32); +} + +static inline bool cache_item_revalidate(unsigned long conn_time, + unsigned long item_time, + unsigned int timeout) +{ + return time_before_eq(jiffies, item_time + timeout * HZ) && + time_before_eq(conn_time, item_time); +} + +#endif diff --git a/fs/hmdfs/hmdfs_device_view.h b/fs/hmdfs/hmdfs_device_view.h new file mode 100644 index 0000000000000000000000000000000000000000..dcc49fb89597e0d62504aa47ffd5d45766f0f65b --- /dev/null +++ b/fs/hmdfs/hmdfs_device_view.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_device_view.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DEVICE_VIEW_H +#define HMDFS_DEVICE_VIEW_H + +#include "hmdfs.h" + +/***************************************************************************** + * macro defination + *****************************************************************************/ + +#define DEVICE_VIEW_ROOT "device_view" +#define MERGE_VIEW_ROOT "merge_view" +#define UPDATE_LOCAL_DST "/device_view/local/" + +#define DEVICE_VIEW_LOCAL "local" + +/* + * in order to distinguish from vfs, we define our own bitmask, this should + * covert to vfs bitmask while calling vfs apis + */ +#define HMDFS_LOOKUP_REVAL 0x1 + +enum HMDFS_FILE_TYPE { + HM_REG = 0, + HM_SYMLINK = 1, + + HM_MAX_FILE_TYPE = 0XFF +}; + +struct bydev_inode_info { + struct inode *lower_inode; + uint64_t ino; +}; + +struct hmdfs_dentry_info { + struct path lower_path; + unsigned long time; + struct list_head cache_list_head; + spinlock_t cache_list_lock; + struct list_head remote_cache_list_head; + struct mutex remote_cache_list_lock; + __u8 file_type; + __u8 dentry_type; + uint64_t device_id; + spinlock_t lock; + struct mutex cache_pull_lock; + bool async_readdir_in_progress; +}; + +struct hmdfs_lookup_ret { + uint64_t i_size; + uint64_t i_mtime; + uint32_t i_mtime_nsec; + uint16_t i_mode; + uint64_t i_ino; +}; + +struct hmdfs_getattr_ret { + /* + * if stat->result_mask is 0, it means this remote getattr failed with + * look up, see details in hmdfs_server_getattr. + */ + struct kstat stat; + uint32_t i_flags; + uint64_t fsid; +}; + +extern int hmdfs_remote_getattr(struct hmdfs_peer *conn, struct dentry *dentry, + unsigned int lookup_flags, + struct hmdfs_getattr_ret **getattr_result); + +/***************************************************************************** + * local/remote inode/file operations + *****************************************************************************/ + +extern const struct dentry_operations hmdfs_dops; +extern const struct dentry_operations hmdfs_dev_dops; + +/* local device operation */ +extern const struct inode_operations hmdfs_file_iops_local; +extern const struct file_operations hmdfs_file_fops_local; +extern const struct inode_operations hmdfs_dir_inode_ops_local; +extern const struct file_operations hmdfs_dir_ops_local; +extern const struct inode_operations hmdfs_symlink_iops_local; + +/* remote device operation */ +extern const struct inode_operations hmdfs_dev_file_iops_remote; +extern const struct file_operations hmdfs_dev_file_fops_remote; +extern const struct address_space_operations hmdfs_dev_file_aops_remote; +extern const struct inode_operations hmdfs_dev_dir_inode_ops_remote; +extern const struct file_operations hmdfs_dev_dir_ops_remote; +extern int hmdfs_dev_unlink_from_con(struct hmdfs_peer *conn, + struct dentry *dentry); +extern int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx); +int hmdfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +int hmdfs_rmdir(struct inode *dir, struct dentry *dentry); +int hmdfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl); +int hmdfs_unlink(struct inode *dir, struct dentry *dentry); +int hmdfs_remote_unlink(struct hmdfs_peer *conn, struct dentry *dentry); +int hmdfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags); +loff_t hmdfs_file_llseek_local(struct file *file, loff_t offset, int whence); +ssize_t hmdfs_read_local(struct kiocb *iocb, struct iov_iter *iter); +ssize_t hmdfs_write_local(struct kiocb *iocb, struct iov_iter *iter); +int hmdfs_file_release_local(struct inode *inode, struct file *file); +int hmdfs_file_mmap_local(struct file *file, struct vm_area_struct *vma); +struct dentry *hmdfs_lookup(struct inode *parent_inode, + struct dentry *child_dentry, unsigned int flags); +struct dentry *hmdfs_lookup_local(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); +struct dentry *hmdfs_lookup_remote(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); +int hmdfs_symlink_local(struct inode *dir, struct dentry *dentry, + const char *symname); +int hmdfs_fsync_local(struct file *file, loff_t start, loff_t end, + int datasync); +int hmdfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname); +int hmdfs_fsync(struct file *file, loff_t start, loff_t end, int datasync); + +/***************************************************************************** + * common functions declaration + *****************************************************************************/ + +static inline struct hmdfs_dentry_info *hmdfs_d(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline bool hm_isreg(uint8_t file_type) +{ + return (file_type == HM_REG); +} + +static inline bool hm_islnk(uint8_t file_type) +{ + return (file_type == HM_SYMLINK); +} +struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, + struct hmdfs_lookup_ret *lookup_result, + struct inode *dir); +struct hmdfs_lookup_ret *get_remote_inode_info(struct hmdfs_peer *con, + struct dentry *dentry, + unsigned int flags); +void hmdfs_set_time(struct dentry *dentry, unsigned long time); +struct inode *fill_inode_local(struct super_block *sb, + struct inode *lower_inode); +struct inode *fill_root_inode(struct super_block *sb, + struct inode *lower_inode); +struct inode *fill_device_inode(struct super_block *sb, + struct inode *lower_inode); +struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con, + struct dentry *dentry, + struct qstr *qstr, + unsigned int flags, + const char *relative_path); +char *hmdfs_connect_path(const char *path, const char *name); + +char *hmdfs_get_dentry_relative_path(struct dentry *dentry); +char *hmdfs_get_dentry_absolute_path(const char *rootdir, + const char *relative_path); +int hmdfs_convert_lookup_flags(unsigned int hmdfs_flags, + unsigned int *vfs_flags); +static inline void hmdfs_get_lower_path(struct dentry *dent, struct path *pname) +{ + spin_lock(&hmdfs_d(dent)->lock); + pname->dentry = hmdfs_d(dent)->lower_path.dentry; + pname->mnt = hmdfs_d(dent)->lower_path.mnt; + path_get(pname); + spin_unlock(&hmdfs_d(dent)->lock); +} + +static inline void hmdfs_put_lower_path(struct path *pname) +{ + path_put(pname); +} + +static inline void hmdfs_put_reset_lower_path(struct dentry *dent) +{ + struct path pname; + + spin_lock(&hmdfs_d(dent)->lock); + if (hmdfs_d(dent)->lower_path.dentry) { + pname.dentry = hmdfs_d(dent)->lower_path.dentry; + pname.mnt = hmdfs_d(dent)->lower_path.mnt; + hmdfs_d(dent)->lower_path.dentry = NULL; + hmdfs_d(dent)->lower_path.mnt = NULL; + spin_unlock(&hmdfs_d(dent)->lock); + path_put(&pname); + } else { + spin_unlock(&hmdfs_d(dent)->lock); + } +} + +static inline void hmdfs_set_lower_path(struct dentry *dent, struct path *pname) +{ + spin_lock(&hmdfs_d(dent)->lock); + hmdfs_d(dent)->lower_path.dentry = pname->dentry; + hmdfs_d(dent)->lower_path.mnt = pname->mnt; + spin_unlock(&hmdfs_d(dent)->lock); +} + +/* Only reg file for HMDFS_LAYER_OTHER_* support xattr */ +static inline bool hmdfs_support_xattr(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_dentry_info *gdi = hmdfs_d(dentry); + + if (info->inode_type != HMDFS_LAYER_OTHER_LOCAL && + info->inode_type != HMDFS_LAYER_OTHER_REMOTE) + return false; + + if (!S_ISREG(inode->i_mode)) + return false; + + if (hm_islnk(gdi->file_type)) + return false; + + return true; +} + +int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry, + int dentry_type); + +#endif diff --git a/fs/hmdfs/hmdfs_merge_view.h b/fs/hmdfs/hmdfs_merge_view.h new file mode 100644 index 0000000000000000000000000000000000000000..01064b3d98dfb2e092b7d83268628631e25ba2c3 --- /dev/null +++ b/fs/hmdfs/hmdfs_merge_view.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_merge_view.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_MERGE_VIEW_H +#define HMDFS_MERGE_VIEW_H + +#include "hmdfs.h" + +#include "comm/connection.h" +#include + +/***************************************************************************** + * Dentires for merge view and their comrades. + * A dentry's lower dentry is named COMRADE. + *****************************************************************************/ + +struct hmdfs_dentry_info_merge { + unsigned long ctime; + // For the merge view to link dentries with same names + struct mutex comrade_list_lock; + struct list_head comrade_list; +}; + +struct hmdfs_dentry_comrade { + uint64_t dev_id; + struct dentry *lo_d; + struct list_head list; +}; + +enum FILE_CMD_MERGE { + F_MKDIR_MERGE = 0, + F_CREATE_MERGE = 1, + F_SYMLINK_MERGE = 2, +}; + +struct hmdfs_recursive_para { + bool is_last; + int opcode; + umode_t mode; + bool want_excl; + const char *name; +}; +static inline struct hmdfs_dentry_info_merge *hmdfs_dm(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline umode_t hmdfs_cm(struct hmdfs_dentry_comrade *comrade) +{ + return d_inode(comrade->lo_d)->i_mode; +} + +static inline bool comrade_is_local(struct hmdfs_dentry_comrade *comrade) +{ + return comrade->dev_id == HMDFS_DEVID_LOCAL; +} + +struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); + +struct hmdfs_dentry_comrade *alloc_comrade(struct dentry *lo_d, int dev_id); + +void link_comrade(struct list_head *onstack_comrades_head, + struct hmdfs_dentry_comrade *comrade); + +static inline void destroy_comrade(struct hmdfs_dentry_comrade *comrade) +{ + dput(comrade->lo_d); + kfree(comrade); +} + +void clear_comrades(struct dentry *dentry); + +static inline void link_comrade_unlocked(struct dentry *dentry, + struct hmdfs_dentry_comrade *comrade) +{ + mutex_lock(&hmdfs_dm(dentry)->comrade_list_lock); + link_comrade(&hmdfs_dm(dentry)->comrade_list, comrade); + mutex_unlock(&hmdfs_dm(dentry)->comrade_list_lock); +} + +void clear_comrades_locked(struct list_head *comrade_list); + +#define for_each_comrade_locked(_dentry, _comrade) \ + list_for_each_entry(_comrade, &(hmdfs_dm(_dentry)->comrade_list), list) + +#define hmdfs_trace_merge(_trace_func, _parent_inode, _child_dentry, err) \ + { \ + struct hmdfs_dentry_comrade *comrade; \ + struct hmdfs_dentry_info_merge *dm = hmdfs_dm(_child_dentry); \ + _trace_func(_parent_inode, _child_dentry, err); \ + if (likely(dm)) { \ + mutex_lock(&dm->comrade_list_lock); \ + for_each_comrade_locked(_child_dentry, comrade) \ + trace_hmdfs_show_comrade(_child_dentry, \ + comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&dm->comrade_list_lock); \ + } \ + } + +#define hmdfs_trace_rename_merge(olddir, olddentry, newdir, newdentry, err) \ + { \ + struct hmdfs_dentry_comrade *comrade; \ + trace_hmdfs_rename_merge(olddir, olddentry, newdir, newdentry, \ + err); \ + mutex_lock(&hmdfs_dm(olddentry)->comrade_list_lock); \ + for_each_comrade_locked(olddentry, comrade) \ + trace_hmdfs_show_comrade(olddentry, comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&hmdfs_dm(olddentry)->comrade_list_lock); \ + mutex_lock(&hmdfs_dm(newdentry)->comrade_list_lock); \ + for_each_comrade_locked(newdentry, comrade) \ + trace_hmdfs_show_comrade(newdentry, comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&hmdfs_dm(newdentry)->comrade_list_lock); \ + } + +/***************************************************************************** + * Helper functions abstarcting out comrade + *****************************************************************************/ + +static inline bool hmdfs_i_merge(struct hmdfs_inode_info *hii) +{ + __u8 t = hii->inode_type; + return t == HMDFS_LAYER_FIRST_MERGE || t == HMDFS_LAYER_OTHER_MERGE; +} + +struct dentry *hmdfs_get_lo_d(struct dentry *dentry, int dev_id); +struct dentry *hmdfs_get_fst_lo_d(struct dentry *dentry); + +/***************************************************************************** + * Inode operations for the merge view + *****************************************************************************/ + +extern const struct inode_operations hmdfs_file_iops_merge; +extern const struct file_operations hmdfs_file_fops_merge; +extern const struct inode_operations hmdfs_symlink_iops_merge; +extern const struct inode_operations hmdfs_dir_iops_merge; +extern const struct file_operations hmdfs_dir_fops_merge; +extern const struct dentry_operations hmdfs_dops_merge; + +/***************************************************************************** + * dentry cache for the merge view + *****************************************************************************/ +extern struct kmem_cache *hmdfs_dentry_merge_cachep; + +#endif // HMDFS_MERGE_H diff --git a/fs/hmdfs/hmdfs_server.c b/fs/hmdfs/hmdfs_server.c new file mode 100644 index 0000000000000000000000000000000000000000..c50e9f9de8429aac4ba3a9944e2f7be486cd83ef --- /dev/null +++ b/fs/hmdfs/hmdfs_server.c @@ -0,0 +1,2073 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_server.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_server.h" + +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/fault_inject.h" +#include "hmdfs.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" +#include "server_writeback.h" +#include "comm/node_cb.h" + +#define HMDFS_MAX_HIDDEN_DIR 1 + +struct hmdfs_open_info { + struct file *file; + struct inode *inode; + bool stat_valid; + struct kstat stat; + uint64_t real_ino; + int file_id; +}; + +static int insert_file_into_conn(struct hmdfs_peer *conn, struct file *file) +{ + struct idr *idr = &(conn->file_id_idr); + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&(conn->file_id_lock)); + ret = idr_alloc_cyclic(idr, file, 0, 0, GFP_NOWAIT); + spin_unlock(&(conn->file_id_lock)); + idr_preload_end(); + return ret; +} + +/* + * get_file_from_conn - get file from conn by file_id. It should be noted that + * an additional reference will be acquired for returned file, the called should + * put it after the file is not used anymore. + */ +static struct file *get_file_from_conn(struct hmdfs_peer *conn, __u32 file_id) +{ + struct file *file; + struct idr *idr = &(conn->file_id_idr); + + rcu_read_lock(); + file = idr_find(idr, file_id); + if (file && !get_file_rcu(file)) + file = NULL; + rcu_read_unlock(); + return file; +} + +void remove_file_from_conn(struct hmdfs_peer *conn, __u32 file_id) +{ + spinlock_t *lock = &(conn->file_id_lock); + struct idr *idr = &(conn->file_id_idr); + + spin_lock(lock); + idr_remove(idr, file_id); + spin_unlock(lock); +} + +struct file *hmdfs_open_photokit_path(struct hmdfs_sb_info *sbi, + const char *path) +{ + struct file *file; + int err; + const char *root_name = sbi->local_dst; + char *real_path; + int path_len; + + path_len = strlen(root_name) + strlen(path) + 2; + if (path_len >= PATH_MAX) { + err = -EINVAL; + return ERR_PTR(err); + } + real_path = kzalloc(path_len, GFP_KERNEL); + if (!real_path) { + err = -ENOMEM; + return ERR_PTR(err); + } + + sprintf(real_path, "%s/%s", root_name, path); + file = filp_open(real_path, O_RDWR | O_LARGEFILE, 0644); + if (IS_ERR(file)) { + hmdfs_info("filp_open failed: %ld", PTR_ERR(file)); + } else { + hmdfs_info("get file with magic %lu", + file->f_inode->i_sb->s_magic); + } + + kfree(real_path); + return file; +} + +struct file *hmdfs_open_path(struct hmdfs_sb_info *sbi, const char *path) +{ + struct path root_path; + struct file *file; + int err; + const char *root_name = sbi->local_dst; + + err = kern_path(root_name, 0, &root_path); + if (err) { + hmdfs_info("kern_path failed: %d", err); + return ERR_PTR(err); + } + file = file_open_root(&root_path, path, + O_RDWR | O_LARGEFILE, 0644); + path_put(&root_path); + if (IS_ERR(file)) { + hmdfs_err( + "GRAPERR sb->s_readonly_remount %d sb_flag %lu", + sbi->sb->s_readonly_remount, sbi->sb->s_flags); + hmdfs_info("file_open_root failed: %ld", PTR_ERR(file)); + } else { + hmdfs_info("get file with magic %lu", + file->f_inode->i_sb->s_magic); + } + return file; +} + +inline void hmdfs_close_path(struct file *file) +{ + fput(file); +} + +/* After offline server close all files opened by client */ +void hmdfs_server_offline_notify(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + int id; + int count = 0; + unsigned int next; + struct file *filp = NULL; + struct idr *idr = &conn->file_id_idr; + + /* wait all async work complete */ + flush_workqueue(conn->req_handle_wq); + flush_workqueue(conn->async_wq); + + /* If there is some open requests in processing, + * Maybe, we need to close file when peer offline + */ + idr_for_each_entry(idr, filp, id) { + hmdfs_debug("[%d]Server close: id=%d", count, id); + hmdfs_close_path(filp); + count++; + if (count % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } + + /* Reinitialize idr */ + next = idr_get_cursor(idr); + idr_destroy(idr); + + idr_init(idr); + idr_set_cursor(idr, next); + + /* Make old file id to be stale */ + conn->fid_cookie++; +} + +static struct hmdfs_node_cb_desc server_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_server_offline_notify + }, +}; + +void __init hmdfs_server_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(server_cb, ARRAY_SIZE(server_cb)); +} + +static int hmdfs_get_inode_by_name(struct hmdfs_peer *con, const char *filename, + uint64_t *ino) +{ + int ret = 0; + struct path root_path; + struct path dst_path; + struct inode *inode = NULL; + + ret = kern_path(con->sbi->local_dst, 0, &root_path); + if (ret) { + hmdfs_err("kern_path failed err = %d", ret); + return ret; + } + + ret = vfs_path_lookup(root_path.dentry, root_path.mnt, filename, 0, + &dst_path); + if (ret) { + path_put(&root_path); + return ret; + } + + inode = d_inode(dst_path.dentry); + if (con->sbi->sb == inode->i_sb) + inode = hmdfs_i(inode)->lower_inode; + *ino = generate_u64_ino(inode->i_ino, inode->i_generation); + + path_put(&dst_path); + path_put(&root_path); + + return 0; +} + +static struct file *hmdfs_open_file(struct hmdfs_peer *con, + const char *filename, uint8_t file_type, + int *file_id) +{ + struct file *file = NULL; + int id; + + if (!filename) { + hmdfs_err("filename is NULL"); + return ERR_PTR(-EINVAL); + } + + if (hm_islnk(file_type)) + file = hmdfs_open_photokit_path(con->sbi, filename); + else + file = hmdfs_open_path(con->sbi, filename); + if (IS_ERR(file)) + return file; + + id = insert_file_into_conn(con, file); + if (id < 0) { + hmdfs_err("file_id alloc failed! err=%d", id); + hmdfs_close_path(file); + return ERR_PTR(id); + } + *file_id = id; + + return file; +} + +static struct hmdfs_time_t msec_to_timespec(unsigned int msec) +{ + struct hmdfs_time_t timespec = { + .tv_sec = msec / MSEC_PER_SEC, + .tv_nsec = (msec % MSEC_PER_SEC) * NSEC_PER_MSEC, + }; + + return timespec; +} + +static struct hmdfs_time_t hmdfs_current_kernel_time(void) +{ + struct hmdfs_time_t time; + +#if KERNEL_VERSION(4, 18, 0) < LINUX_VERSION_CODE + ktime_get_coarse_real_ts64(&time); +#else + time = current_kernel_time(); +#endif + return time; +} + +/* + * Generate fid version like following format: + * + * | boot cookie | con cookie | + * |---------------------|-------------| + * 49 15 (bits) + */ +static uint64_t hmdfs_server_pack_fid_ver(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd) +{ + uint64_t boot_cookie = con->sbi->boot_cookie; + uint16_t con_cookie = con->fid_cookie; + + if (hmdfs_should_fake_fid_ver(&con->sbi->fault_inject, con, + cmd, T_BOOT_COOKIE)) + boot_cookie = hmdfs_gen_boot_cookie(); + + if (hmdfs_should_fake_fid_ver(&con->sbi->fault_inject, con, + cmd, T_CON_COOKIE)) + con_cookie++; + + return (boot_cookie | + (con_cookie & ((1 << HMDFS_FID_VER_BOOT_COOKIE_SHIFT) - 1))); +} + +static struct file *get_file_by_fid_and_ver(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + __u32 file_id, __u64 file_ver) +{ + struct file *file = NULL; + __u64 cur_file_ver = hmdfs_server_pack_fid_ver(con, cmd); + + if (file_ver != cur_file_ver) { + hmdfs_warning("Stale file version %llu for fid %u (ver %llu)", + file_ver, file_id, cur_file_ver); + return ERR_PTR(-EBADF); + } + + file = get_file_from_conn(con, file_id); + if (!file) + return ERR_PTR(-EBADF); + + return file; +} + +static void hmdfs_update_open_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct hmdfs_open_info *info, + struct open_response *resp) +{ + struct hmdfs_time_t current_time = hmdfs_current_kernel_time(); + struct hmdfs_time_t ctime = info->stat_valid ? info->stat.ctime : + info->inode->i_ctime; + struct hmdfs_time_t precision = + msec_to_timespec(con->sbi->dcache_precision); + loff_t size = info->stat_valid ? info->stat.size : + i_size_read(info->inode); + + resp->ino = cpu_to_le64(info->real_ino); + resp->file_ver = cpu_to_le64(hmdfs_server_pack_fid_ver(con, cmd)); + resp->file_id = cpu_to_le32(info->file_id); + resp->file_size = cpu_to_le64(size); + resp->ctime = cpu_to_le64(ctime.tv_sec); + resp->ctime_nsec = cpu_to_le32(ctime.tv_nsec); + + /* + * In server, ctime might stay the same after coverwrite. We introduce a + * new value stable_ctime to handle the problem. + * - if open rpc time < ctime, stable_ctime = 0; + * - if ctime <= open rpc time < ctime + dcache_precision, stable_ctime + * = ctime + * - else, stable_ctime = ctime + dcache_precision; + */ + precision = hmdfs_time_add(ctime, precision); + if (hmdfs_time_compare(¤t_time, &ctime) < 0) { + resp->stable_ctime = cpu_to_le64(0); + resp->stable_ctime_nsec = cpu_to_le32(0); + } else if (hmdfs_time_compare(¤t_time, &ctime) >= 0 && + hmdfs_time_compare(¤t_time, &precision) < 0) { + resp->stable_ctime = resp->ctime; + resp->stable_ctime_nsec = resp->ctime_nsec; + } else { + resp->stable_ctime = cpu_to_le64(precision.tv_sec); + resp->stable_ctime_nsec = cpu_to_le32(precision.tv_nsec); + } +} + +static int hmdfs_get_open_info(struct hmdfs_peer *con, uint8_t file_type, + const char *filename, + struct hmdfs_open_info *info) +{ + int ret = 0; + + info->inode = file_inode(info->file); + info->stat_valid = false; + if (con->sbi->sb == info->inode->i_sb) { + /* if open a regular file */ + info->inode = hmdfs_i(info->inode)->lower_inode; + } else if (con->sbi->lower_sb != info->inode->i_sb) { + /* It's possible that inode is not from lower, for example: + * 1. touch /f2fs/file + * 2. ln -s /sdcard_fs/file /f2fs/link + * 3. cat /hmdfs/link -> generate dentry cache in sdcard_fs + * 4. echo hi >> /hmdfs/file -> append write not through + * sdcard_fs + * 5. cat /hmdfs/link -> got inode in sdcard, which size is + * still 0 + * + * If src file isn't in lower, use getattr to get + * information. + */ + ret = vfs_getattr(&info->file->f_path, &info->stat, STATX_BASIC_STATS | STATX_BTIME, + 0); + if (ret) { + hmdfs_err("call vfs_getattr failed, err %d", ret); + return ret; + } + info->stat_valid = true; + } + + /* if open a link file, get ino from link inode */ + if (hm_islnk(file_type)) { + ret = hmdfs_get_inode_by_name(con, filename, &info->real_ino); + if (ret) + return ret; + } else { + info->real_ino = generate_u64_ino(info->inode->i_ino, + info->inode->i_generation); + } + + return 0; +} + +void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct open_request *recv = data; + int sizeread = sizeof(struct open_response); + struct open_response *resp = NULL; + struct hmdfs_open_info *info = NULL; + int ret = 0; + + trace_hmdfs_server_open_enter(con, recv); + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) + goto out_err; + + resp = kzalloc(sizeread, GFP_KERNEL); + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!resp || !info) { + ret = -ENOMEM; + goto err_free; + } + + info->file = hmdfs_open_file(con, recv->buf, recv->file_type, + &info->file_id); + if (IS_ERR(info->file)) { + ret = PTR_ERR(info->file); + goto err_free; + } + + ret = hmdfs_get_open_info(con, recv->file_type, recv->buf, info); + if (ret) + goto err_close; + + hmdfs_update_open_response(con, cmd, info, resp); + + trace_hmdfs_server_open_exit(con, resp, info->file, 0); + ret = hmdfs_sendmessage_response(con, cmd, sizeread, resp, 0); + if (ret) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, ret); + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } + kfree(resp); + kfree(info); + return; + +err_close: + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); +err_free: + kfree(resp); + kfree(info); +out_err: + trace_hmdfs_server_open_exit(con, NULL, NULL, ret); + hmdfs_send_err_response(con, cmd, ret); +} + +static int hmdfs_check_and_create(struct path *path_parent, + struct dentry *dentry, uint64_t device_id, + umode_t mode, bool is_excl) +{ + int err = 0; + + /* if inode doesn't exist, create it */ + if (d_is_negative(dentry)) { + hmdfs_mark_drop_flag(device_id, path_parent->dentry); + err = vfs_create(d_inode(path_parent->dentry), dentry, mode, + is_excl); + if (err) + hmdfs_err("create failed, err %d", err); + } else { + if (is_excl) + err = -EEXIST; + /* if inode aready exist, see if it's symlink */ + else if (S_ISREG(d_inode(dentry)->i_mode) && + hm_islnk(hmdfs_d(dentry)->file_type)) + err = -EINVAL; + else if (S_ISDIR(d_inode(dentry)->i_mode)) + err = -EISDIR; + } + + return err; +} +static int hmdfs_lookup_create(struct hmdfs_peer *con, + struct atomic_open_request *recv, + struct path *child_path, bool *truncate) +{ + int err = 0; + struct path path_root; + struct path path_parent; + uint32_t open_flags = le32_to_cpu(recv->open_flags); + char *path = recv->buf; + char *filename = recv->buf + le32_to_cpu(recv->path_len) + 1; + struct dentry *dentry = NULL; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &path_root); + if (err) { + hmdfs_err("no path for %s, err %d", con->sbi->local_dst, err); + return err; + } + + err = vfs_path_lookup(path_root.dentry, path_root.mnt, path, + LOOKUP_DIRECTORY, &path_parent); + if (err) { + hmdfs_info("no dir in %s, err %d", con->sbi->local_dst, err); + goto put_path_root; + } + + inode_lock(d_inode(path_parent.dentry)); + dentry = lookup_one_len(filename, path_parent.dentry, strlen(filename)); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + inode_unlock(d_inode(path_parent.dentry)); + goto put_path_parent; + } + /* only truncate if inode already exists */ + *truncate = ((open_flags & HMDFS_O_TRUNC) && d_is_positive(dentry)); + err = hmdfs_check_and_create(&path_parent, dentry, con->device_id, + le16_to_cpu(recv->mode), + open_flags & HMDFS_O_EXCL); + inode_unlock(d_inode(path_parent.dentry)); + if (err) { + dput(dentry); + } else { + child_path->dentry = dentry; + child_path->mnt = mntget(path_parent.mnt); + } + +put_path_parent: + path_put(&path_parent); +put_path_root: + path_put(&path_root); + return err; +} + +static int hmdfs_dentry_open(struct hmdfs_peer *con, + const struct path *path, + struct hmdfs_open_info *info) +{ + int err = 0; + + info->file = dentry_open(path, O_RDWR | O_LARGEFILE, current_cred()); + if (IS_ERR(info->file)) { + err = PTR_ERR(info->file); + hmdfs_err("open file failed, err %d", err); + return err; + } + + info->file_id = insert_file_into_conn(con, info->file); + if (info->file_id < 0) { + err = info->file_id; + hmdfs_err("file_id alloc failed! err %d", err); + hmdfs_close_path(info->file); + return err; + } + + return 0; +} + +static int hmdfs_server_do_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct atomic_open_request *recv, + struct hmdfs_open_info *info, + struct atomic_open_response *resp) +{ + struct path child_path; + bool truncate = false; + int err = 0; + + err = hmdfs_lookup_create(con, recv, &child_path, &truncate); + if (err) + return err; + + err = hmdfs_dentry_open(con, &child_path, info); + if (err) + goto put_child; + + err = hmdfs_get_open_info(con, HM_REG, NULL, info); + if (err) + goto fail_close; + + if (truncate) { + err = vfs_truncate(&child_path, 0); + if (err) { + hmdfs_err("truncate failed, err %d", err); + goto fail_close; + } + } + hmdfs_update_open_response(con, cmd, info, &resp->open_resp); + resp->i_mode = cpu_to_le16(file_inode(info->file)->i_mode); + +fail_close: + if (err) { + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } +put_child: + path_put(&child_path); + return err; +} + +void hmdfs_server_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + int err; + struct atomic_open_request *recv = data; + struct atomic_open_response *resp = NULL; + struct hmdfs_open_info *info = NULL; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + resp = kzalloc(sizeof(*resp), GFP_KERNEL); + if (!resp || !info) { + err = -ENOMEM; + goto out; + } + + err = hmdfs_server_do_atomic_open(con, cmd, recv, info, resp); + +out: + if (err) { + hmdfs_send_err_response(con, cmd, err); + } else { + err = hmdfs_sendmessage_response(con, cmd, sizeof(*resp), resp, + 0); + if (err) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, err); + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } + } + kfree(info); + kfree(resp); +} + +void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct release_request *release_recv = data; + struct file *file = NULL; + __u32 file_id; + __u64 file_ver; + int ret = 0; + + file_id = le32_to_cpu(release_recv->file_id); + file_ver = le64_to_cpu(release_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_err("cannot find %u", file_id); + ret = PTR_ERR(file); + goto out; + } + /* put the reference acquired by get_file_by_fid_and_ver() */ + hmdfs_close_path(file); + hmdfs_info("close %u", file_id); + remove_file_from_conn(con, file_id); + + hmdfs_close_path(file); + +out: + trace_hmdfs_server_release(con, file_id, file_ver, ret); + set_conn_sock_quickack(con); +} + +void hmdfs_server_fsync(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct fsync_request *fsync_recv = data; + __s32 datasync = le32_to_cpu(fsync_recv->datasync); + __s64 start = le64_to_cpu(fsync_recv->start); + __s64 end = le64_to_cpu(fsync_recv->end); + struct file *file = NULL; + __u32 file_id; + __u64 file_ver; + int ret = 0; + + file_id = le32_to_cpu(fsync_recv->file_id); + file_ver = le64_to_cpu(fsync_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_err("cannot find %u", file_id); + ret = PTR_ERR(file); + goto out; + } + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) + goto out_put_file; + + ret = vfs_fsync_range(file, start, end, datasync); + if (ret) + hmdfs_err("fsync fail, ret %d", ret); + +out_put_file: + hmdfs_close_path(file); +out: + hmdfs_send_err_response(con, cmd, ret); +} + +void hmdfs_server_readpage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readpage_request *readpage_recv = data; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + struct readpage_response *readpage = NULL; + int ret = 0; + size_t read_len; + + file_id = le32_to_cpu(readpage_recv->file_id); + file_ver = le64_to_cpu(readpage_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_info( + "file with id %u does not exist, pgindex %llu, devid %llu", + file_id, le64_to_cpu(readpage_recv->index), + con->device_id); + ret = PTR_ERR(file); + goto fail; + } + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) + goto fail_put_file; + + read_len = (size_t)le32_to_cpu(readpage_recv->size); + if (read_len == 0) + goto fail_put_file; + + readpage = kmalloc(read_len, GFP_KERNEL); + if (!readpage) { + ret = -ENOMEM; + goto fail_put_file; + } + + pos = (loff_t)le64_to_cpu(readpage_recv->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(file, readpage->buf, read_len, &pos); + if (ret < 0) { + hmdfs_send_err_response(con, cmd, -EIO); + } else { + if (ret != read_len) + memset(readpage->buf + ret, 0, read_len - ret); + hmdfs_sendmessage_response(con, cmd, read_len, readpage, 0); + } + + hmdfs_close_path(file); + kfree(readpage); + return; + +fail_put_file: + hmdfs_close_path(file); +fail: + hmdfs_send_err_response(con, cmd, ret); +} + +static struct readpages_response *alloc_readpages_resp(unsigned int len) +{ + struct readpages_response *resp = NULL; + + if (len > HMDFS_PAGE_SIZE) + resp = vmalloc(len); + else + resp = kmalloc(len, GFP_KERNEL); + + return resp; +} + +static void free_readpages_resp(struct readpages_response *resp, + unsigned int len) +{ + if (len > HMDFS_PAGE_SIZE) + vfree(resp); + else + kfree(resp); +} + +void hmdfs_server_readpages(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readpages_request *req = data; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + struct readpages_response *resp = NULL; + ssize_t ret = 0; + size_t read_len; + + file_id = le32_to_cpu(req->file_id); + file_ver = le64_to_cpu(req->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto fail; + } + + read_len = (size_t)le32_to_cpu(req->size); + if (read_len == 0) + goto fail_put_file; + + resp = alloc_readpages_resp(read_len); + if (!resp) { + ret = -ENOMEM; + goto fail_put_file; + } + + pos = (loff_t)le64_to_cpu(req->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(file, resp->buf, read_len, &pos); + if (ret < 0) { + ret = -EIO; + goto fail_free_resp; + } + + hmdfs_sendmessage_response(con, cmd, ret, resp, 0); + hmdfs_close_path(file); + free_readpages_resp(resp, read_len); + return; + +fail_free_resp: + free_readpages_resp(resp, read_len); +fail_put_file: + hmdfs_close_path(file); +fail: + hmdfs_send_err_response(con, cmd, ret); +} + +static int hmdfs_do_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct readpages_open_request *recv, + struct hmdfs_open_info *info, + struct readpages_open_response *resp) +{ + int ret = 0; + loff_t pos = 0; + + info->file = hmdfs_open_file(con, recv->buf, recv->file_type, + &info->file_id); + if (IS_ERR(info->file)) + return PTR_ERR(info->file); + + ret = hmdfs_get_open_info(con, recv->file_type, recv->buf, info); + if (ret) + goto fail_close; + + pos = (loff_t)le64_to_cpu(recv->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(info->file, resp->buf, le32_to_cpu(recv->size), &pos); + if (ret < 0) + goto fail_close; + + hmdfs_update_open_response(con, cmd, info, &resp->open_resp); + memset(resp->reserved, 0, sizeof(resp->reserved)); + ret = hmdfs_sendmessage_response(con, cmd, sizeof(*resp) + ret, resp, + 0); + if (ret) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, ret); + ret = 0; + goto fail_close; + } + return 0; + +fail_close: + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + return ret; +} + +void hmdfs_server_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct readpages_open_request *recv = data; + struct readpages_open_response *resp = NULL; + int ret = -EINVAL; + size_t read_len = 0; + size_t resp_len = 0; + struct hmdfs_open_info *info = NULL; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) + goto fail; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + + read_len = (size_t)le32_to_cpu(recv->size); + if (read_len == 0) { + ret = -EINVAL; + goto fail_free_info; + } + resp_len = read_len + sizeof(*resp); + resp = vmalloc(resp_len); + if (!resp) { + ret = -ENOMEM; + goto fail_free_info; + } + + ret = hmdfs_do_readpages_open(con, cmd, recv, info, resp); + + vfree(resp); +fail_free_info: + kfree(info); +fail: + if (ret) + hmdfs_send_err_response(con, cmd, ret); +} + +static bool need_rebuild_dcache(struct hmdfs_dcache_header *h, + struct hmdfs_time_t time, + unsigned int precision) +{ + struct hmdfs_time_t crtime = { .tv_sec = le64_to_cpu(h->dcache_crtime), + .tv_nsec = le64_to_cpu( + h->dcache_crtime_nsec) }; + struct hmdfs_time_t ctime = { .tv_sec = le64_to_cpu(h->dentry_ctime), + .tv_nsec = le64_to_cpu( + h->dentry_ctime_nsec) }; + struct hmdfs_time_t pre_time = { .tv_sec = precision / MSEC_PER_SEC, + .tv_nsec = precision % MSEC_PER_SEC * + NSEC_PER_MSEC }; + + if (hmdfs_time_compare(&time, &ctime) != 0) + return true; + + pre_time = hmdfs_time_add(time, pre_time); + if (hmdfs_time_compare(&crtime, &pre_time) < 0) + return true; + + return false; +} + +static bool hmdfs_server_cache_validate(struct file *filp, struct inode *inode, + unsigned long precision) +{ + struct hmdfs_dcache_header header; + int overallpage; + ssize_t bytes; + loff_t pos = 0; + + overallpage = get_dentry_group_cnt(file_inode(filp)); + if (overallpage == 0) { + hmdfs_err("cache file size is 0"); + return false; + } + + bytes = kernel_read(filp, &header, sizeof(header), &pos); + if (bytes != sizeof(header)) { + hmdfs_err("read file failed, err:%zd", bytes); + return false; + } + + return !need_rebuild_dcache(&header, inode->i_ctime, precision); +} + +struct file *hmdfs_server_cache_revalidate(struct hmdfs_sb_info *sbi, + const char *recvpath, + struct path *path) +{ + struct cache_file_node *cfn = NULL; + struct file *file; + + cfn = find_cfn(sbi, HMDFS_SERVER_CID, recvpath, true); + if (!cfn) + return NULL; + + if (!hmdfs_server_cache_validate(cfn->filp, path->dentry->d_inode, + sbi->dcache_precision)) { + remove_cfn(cfn); + release_cfn(cfn); + return NULL; + } + file = cfn->filp; + get_file(cfn->filp); + release_cfn(cfn); + + return file; +} + +bool hmdfs_client_cache_validate(struct hmdfs_sb_info *sbi, + struct readdir_request *readdir_recv, + struct path *path) +{ + struct inode *inode = path->dentry->d_inode; + struct hmdfs_dcache_header header; + + /* always rebuild dentryfile for small dir */ + if (le64_to_cpu(readdir_recv->num) < sbi->dcache_threshold) + return false; + + header.dcache_crtime = readdir_recv->dcache_crtime; + header.dcache_crtime_nsec = readdir_recv->dcache_crtime_nsec; + header.dentry_ctime = readdir_recv->dentry_ctime; + header.dentry_ctime_nsec = readdir_recv->dentry_ctime_nsec; + + return !need_rebuild_dcache(&header, inode->i_ctime, + sbi->dcache_precision); +} + +static char *server_lower_dentry_path_raw(struct hmdfs_peer *peer, + struct dentry *lo_d) +{ + struct hmdfs_dentry_info *di = hmdfs_d(peer->sbi->sb->s_root); + struct dentry *lo_d_root = di->lower_path.dentry; + struct dentry *lo_d_tmp = NULL; + char *lo_p_buf = NULL; + char *buf_head = NULL; + char *buf_tail = NULL; + size_t path_len = 0; + + lo_p_buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (unlikely(!lo_p_buf)) + return ERR_PTR(-ENOMEM); + + /* To generate a reversed path str */ + for (lo_d_tmp = lo_d; lo_d_tmp != lo_d_root && !IS_ROOT(lo_d_tmp); + lo_d_tmp = lo_d_tmp->d_parent) { + u32 dlen = lo_d_tmp->d_name.len; + int reverse_index = dlen - 1; + + /* Considering the appended slash and '\0' */ + if (unlikely(path_len + dlen + 1 > PATH_MAX - 1)) { + kfree(lo_p_buf); + return ERR_PTR(-ENAMETOOLONG); + } + for (; reverse_index >= 0; --reverse_index) + lo_p_buf[path_len++] = + lo_d_tmp->d_name.name[reverse_index]; + lo_p_buf[path_len++] = '/'; + } + + /* Reverse the reversed path str to get the real path str */ + for (buf_head = lo_p_buf, buf_tail = lo_p_buf + path_len - 1; + buf_head < buf_tail; ++buf_head, --buf_tail) + swap(*buf_head, *buf_tail); + + if (path_len == 0) + lo_p_buf[0] = '/'; + return lo_p_buf; +} + +static int server_lookup(struct hmdfs_peer *peer, const char *req_path, + struct path *path) +{ + struct path root_path; + int err = 0; + + err = kern_path(peer->sbi->local_dst, 0, &root_path); + if (err) + goto out_noroot; + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, req_path, + LOOKUP_DIRECTORY, path); + path_put(&root_path); +out_noroot: + return err; +} + +/** + * server_lookup_lower - lookup lower file-system + * @peer: target device node + * @req_path: abs path (mount point as the root) from the request + * @lo_o: the lower path to return + * + * return the lower path's name, with characters' cases matched + */ +static char *server_lookup_lower(struct hmdfs_peer *peer, const char *req_path, + struct path *lo_p) +{ + char *lo_p_name = ERR_PTR(-ENOENT); + struct path up_p; + int err = 0; + + err = server_lookup(peer, req_path, &up_p); + if (err) + goto out; + + hmdfs_get_lower_path(up_p.dentry, lo_p); + path_put(&up_p); + + lo_p_name = server_lower_dentry_path_raw(peer, lo_p->dentry); + if (IS_ERR(lo_p_name)) { + err = PTR_ERR(lo_p_name); + path_put(lo_p); + } +out: + return err ? ERR_PTR(err) : lo_p_name; +} + +void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readdir_request *readdir_recv = data; + struct path lo_p; + struct file *filp = NULL; + int err = 0; + unsigned long long num = 0; + char *lo_p_name = NULL; + + trace_hmdfs_server_readdir(readdir_recv); + + lo_p_name = server_lookup_lower(con, readdir_recv->path, &lo_p); + if (IS_ERR(lo_p_name)) { + err = PTR_ERR(lo_p_name); + hmdfs_info("Failed to get lower path: %d", err); + goto send_err; + } + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto err_lookup_path; + + if (le32_to_cpu(readdir_recv->verify_cache)) { + if (hmdfs_client_cache_validate(con->sbi, readdir_recv, &lo_p)) + goto out_response; + } + + filp = hmdfs_server_cache_revalidate(con->sbi, lo_p_name, &lo_p); + if (IS_ERR_OR_NULL(filp)) { + filp = hmdfs_server_rebuild_dents(con->sbi, &lo_p, &num, + lo_p_name); + if (IS_ERR_OR_NULL(filp)) { + err = PTR_ERR(filp); + goto err_lookup_path; + } + } + +out_response: + err = hmdfs_readfile_response(con, cmd, filp); + if (!err) + hmdfs_add_remote_cache_list(con, lo_p_name); + if (num >= con->sbi->dcache_threshold) + cache_file_persistent(con, filp, lo_p_name, true); + if (filp) + fput(filp); +err_lookup_path: + path_put(&lo_p); + kfree(lo_p_name); +send_err: + if (err) + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct mkdir_request *mkdir_recv = data; + struct inode *child_inode = NULL; + struct dentry *dent = NULL; + char *mkdir_dir = NULL; + char *mkdir_name = NULL; + struct hmdfs_inodeinfo_response *mkdir_resp = NULL; + int respsize = sizeof(struct hmdfs_inodeinfo_response); + int path_len = le32_to_cpu(mkdir_recv->path_len); + + mkdir_resp = kzalloc(respsize, GFP_KERNEL); + if (!mkdir_resp) { + err = -ENOMEM; + goto mkdir_out; + } + + mkdir_dir = mkdir_recv->path; + mkdir_name = mkdir_recv->path + path_len + 1; + + dent = hmdfs_root_mkdir(con->device_id, con->sbi->local_dst, + mkdir_dir, mkdir_name, + le16_to_cpu(mkdir_recv->mode)); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + hmdfs_err("hmdfs_root_mkdir failed err = %d", err); + goto mkdir_out; + } + child_inode = d_inode(dent); + mkdir_resp->i_mode = cpu_to_le16(child_inode->i_mode); + mkdir_resp->i_size = cpu_to_le64(child_inode->i_size); + mkdir_resp->i_mtime = cpu_to_le64(child_inode->i_mtime.tv_sec); + mkdir_resp->i_mtime_nsec = cpu_to_le32(child_inode->i_mtime.tv_nsec); + mkdir_resp->i_ino = cpu_to_le64(child_inode->i_ino); + dput(dent); +mkdir_out: + hmdfs_sendmessage_response(con, cmd, respsize, mkdir_resp, err); + kfree(mkdir_resp); +} + +void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct create_request *create_recv = data; + struct inode *child_inode = NULL; + struct dentry *dent = NULL; + char *create_dir = NULL; + char *create_name = NULL; + struct hmdfs_inodeinfo_response *create_resp = NULL; + int respsize = sizeof(struct hmdfs_inodeinfo_response); + int path_len = le32_to_cpu(create_recv->path_len); + + create_resp = kzalloc(respsize, GFP_KERNEL); + if (!create_resp) { + err = -ENOMEM; + goto create_out; + } + + create_dir = create_recv->path; + create_name = create_recv->path + path_len + 1; + + dent = hmdfs_root_create(con->device_id, con->sbi->local_dst, + create_dir, create_name, + le16_to_cpu(create_recv->mode), + create_recv->want_excl); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + hmdfs_err("hmdfs_root_create failed err = %d", err); + goto create_out; + } + child_inode = d_inode(dent); + create_resp->i_mode = cpu_to_le16(child_inode->i_mode); + create_resp->i_size = cpu_to_le64(child_inode->i_size); + create_resp->i_mtime = cpu_to_le64(child_inode->i_mtime.tv_sec); + create_resp->i_mtime_nsec = cpu_to_le32(child_inode->i_mtime.tv_nsec); + /* + * keep same as hmdfs_server_open, + * to prevent hmdfs_open_final_remote from judging ino errors. + */ + create_resp->i_ino = cpu_to_le64( + generate_u64_ino(hmdfs_i(child_inode)->lower_inode->i_ino, + child_inode->i_generation)); + dput(dent); +create_out: + hmdfs_sendmessage_response(con, cmd, respsize, create_resp, err); + kfree(create_resp); +} + +void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct path root_path; + char *path = NULL; + char *name = NULL; + struct rmdir_request *rmdir_recv = data; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + path = rmdir_recv->path; + name = rmdir_recv->path + le32_to_cpu(rmdir_recv->path_len) + 1; + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (!err) { + err = hmdfs_root_rmdir(con->device_id, &root_path, path, name); + path_put(&root_path); + } +out: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct path root_path; + char *path = NULL; + char *name = NULL; + struct unlink_request *unlink_recv = data; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + path = unlink_recv->path; + name = unlink_recv->path + le32_to_cpu(unlink_recv->path_len) + 1; + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (!err) { + err = hmdfs_root_unlink(con->device_id, &root_path, path, name); + path_put(&root_path); + } +out: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + int old_path_len; + int new_path_len; + int old_name_len; + int new_name_len; + unsigned int flags; + char *path_old = NULL; + char *name_old = NULL; + char *path_new = NULL; + char *name_new = NULL; + struct rename_request *recv = data; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + old_path_len = le32_to_cpu(recv->old_path_len); + new_path_len = le32_to_cpu(recv->new_path_len); + old_name_len = le32_to_cpu(recv->old_name_len); + new_name_len = le32_to_cpu(recv->new_name_len); + flags = le32_to_cpu(recv->flags); + + path_old = recv->path; + path_new = recv->path + old_path_len + 1; + name_old = recv->path + old_path_len + 1 + new_path_len + 1; + name_new = recv->path + old_path_len + 1 + new_path_len + 1 + + old_name_len + 1; + + err = hmdfs_root_rename(con->sbi, con->device_id, path_old, name_old, + path_new, name_new, flags); +out: + hmdfs_send_err_response(con, cmd, err); +} + +static int hmdfs_lookup_symlink(struct path *link_path, const char *path_fmt, + ...) +{ + int ret; + va_list args; + char *path = kmalloc(PATH_MAX, GFP_KERNEL); + + if (!path) + return -ENOMEM; + + va_start(args, path_fmt); + ret = vsnprintf(path, PATH_MAX, path_fmt, args); + va_end(args); + + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + goto out; + } + + /* + * Todo: when rebuild dentryfile, there maybe deadlock + * because iterate_dir already hold the parent + * lock, but now, we didn't know the symlink + * src's parent. + */ + ret = kern_path(path, LOOKUP_FOLLOW, link_path); + if (ret) { + hmdfs_err("kern_path failed err = %d", ret); + goto out; + } + + if (!S_ISREG(d_inode(link_path->dentry)->i_mode)) { + hmdfs_err("path is dir symlink"); + path_put(link_path); + ret = -EOPNOTSUPP; + goto out; + } +out: + kfree(path); + return ret; +} + +static int hmdfs_filldir_real(struct dir_context *ctx, const char *name, + int name_len, loff_t offset, u64 ino, + unsigned int d_type) +{ + int res = 0; + char namestr[NAME_MAX + 1]; + struct getdents_callback_real *gc = NULL; + struct dentry *child = NULL; + + if (name_len > NAME_MAX) { + hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX); + goto out; + } + + gc = container_of(ctx, struct getdents_callback_real, ctx); + + memcpy(namestr, name, name_len); + namestr[name_len] = '\0'; + + if (hmdfs_file_type(namestr) != HMDFS_TYPE_COMMON) + goto out; + + /* parent lock already hold by iterate_dir */ + child = lookup_one_len(name, gc->parent_path->dentry, name_len); + if (IS_ERR(child)) { + res = PTR_ERR(child); + hmdfs_err("lookup failed because %d", res); + goto out; + } + + if (d_really_is_negative(child)) { + dput(child); + hmdfs_err("lookup failed because negative dentry"); + /* just do not fill this entry and continue for next entry */ + goto out; + } + + if (d_type == DT_REG || d_type == DT_DIR) { + create_dentry(child, d_inode(child), gc->file, gc->sbi); + gc->num++; + } else if (d_type == DT_LNK) { + struct path link_path; + + res = hmdfs_lookup_symlink(&link_path, "%s/%s/%s", + gc->sbi->local_src, gc->dir, + namestr); + if (!res) { + create_dentry(child, d_inode(link_path.dentry), + gc->file, gc->sbi); + path_put(&link_path); + gc->num++; + } else if (res == -ENOENT) { + /* + * If source file do not exist, use the info from link + * inode. + */ + create_dentry(child, d_inode(child), gc->file, gc->sbi); + gc->num++; + } + } + + dput(child); + +out: + /* + * we always return 0 here, so that the caller can continue to next + * dentry even if failed on this dentry somehow. + */ + return 0; +} + +static void hmdfs_server_set_header(struct hmdfs_dcache_header *header, + struct file *file, struct file *dentry_file) +{ + struct inode *inode = NULL; + struct hmdfs_time_t cur_time; + + inode = file_inode(file); + cur_time = current_time(file_inode(dentry_file)); + header->dcache_crtime = cpu_to_le64(cur_time.tv_sec); + header->dcache_crtime_nsec = cpu_to_le64(cur_time.tv_nsec); + header->dentry_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + header->dentry_ctime_nsec = cpu_to_le64(inode->i_ctime.tv_nsec); +} + +// Get the dentries of target directory +struct file *hmdfs_server_rebuild_dents(struct hmdfs_sb_info *sbi, + struct path *path, loff_t *num, + const char *dir) +{ + int err = 0; + struct getdents_callback_real gc = { + .ctx.actor = hmdfs_filldir_real, + .ctx.pos = 0, + .num = 0, + .sbi = sbi, + .dir = dir, + }; + struct file *file = NULL; + struct file *dentry_file = NULL; + struct hmdfs_dcache_header header; + + dentry_file = create_local_dentry_file_cache(sbi); + if (IS_ERR(dentry_file)) { + hmdfs_err("file create failed err=%ld", PTR_ERR(dentry_file)); + return dentry_file; + } + + file = dentry_open(path, O_RDONLY | O_DIRECTORY, current_cred()); + if (IS_ERR(file)) { + err = PTR_ERR(file); + hmdfs_err("dentry_open failed"); + goto out; + } + + hmdfs_server_set_header(&header, file, dentry_file); + + gc.parent_path = path; + gc.file = dentry_file; + + err = iterate_dir(file, &(gc.ctx)); + if (err) { + hmdfs_err("iterate_dir failed"); + goto out; + } + + header.case_sensitive = sbi->s_case_sensitive; + header.num = cpu_to_le64(gc.num); + if (num) + *num = gc.num; + + err = write_header(dentry_file, &header); +out: + if (!IS_ERR_OR_NULL(file)) + fput(file); + + if (err) { + fput(dentry_file); + dentry_file = ERR_PTR(err); + } + + trace_hmdfs_server_rebuild_dents(&header, err); + return dentry_file; +} + +void hmdfs_server_writepage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct writepage_request *writepage_recv = data; + struct hmdfs_server_writeback *hswb = NULL; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + __u32 count; + ssize_t ret; + int err = 0; + + file_id = le32_to_cpu(writepage_recv->file_id); + file_ver = le64_to_cpu(writepage_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_info( + "file with id %u does not exist, pgindex %llu, devid %llu", + file_id, le64_to_cpu(writepage_recv->index), + con->device_id); + err = PTR_ERR(file); + goto out; + } + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out_put_file; + + pos = (loff_t)le64_to_cpu(writepage_recv->index) << HMDFS_PAGE_OFFSET; + count = le32_to_cpu(writepage_recv->count); + ret = kernel_write(file, writepage_recv->buf, count, &pos); + if (ret != count) + err = -EIO; + +out_put_file: + hmdfs_close_path(file); +out: + hmdfs_send_err_response(con, cmd, err); + + hswb = con->sbi->h_swb; + if (!err && hswb->dirty_writeback_control) + hmdfs_server_check_writeback(hswb); +} + +static int hmdfs_lookup_linkpath(struct hmdfs_sb_info *sbi, + const char *path_name, struct path *dst_path) +{ + struct path link_path; + int err; + + err = hmdfs_lookup_symlink(&link_path, "%s/%s", sbi->local_dst, + path_name); + if (err) + return err; + + if (d_inode(link_path.dentry)->i_sb != sbi->sb) { + path_put(dst_path); + *dst_path = link_path; + } else { + path_put(&link_path); + } + + return 0; +} + +static struct inode *hmdfs_verify_path(struct dentry *dentry, char *recv_buf, + struct super_block *sb) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = NULL; + + /* if we found path from wrong fs */ + if (inode->i_sb != sb) { + hmdfs_err("super block do not match"); + return NULL; + } + + info = hmdfs_i(inode); + /* make sure lower inode is not NULL */ + if (info->lower_inode) + return info->lower_inode; + + /* + * we don't expect lower inode to be NULL in server. However, it's + * possible because dentry cache can contain stale data. + */ + hmdfs_info("lower inode is NULL, is remote file: %d", + info->conn != NULL); + return NULL; +} + +static int hmdfs_notify_change(struct vfsmount *mnt, struct dentry *dentry, + struct iattr *attr, + struct inode **delegated_inode) +{ +#ifdef CONFIG_SDCARD_FS + /* sdcard_fs need to call setattr2, notify_change will call setattr */ + return notify_change2(mnt, dentry, attr, delegated_inode); +#else + return notify_change(dentry, attr, delegated_inode); +#endif +} + +void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct dentry *dentry = NULL; + struct inode *inode = NULL; + struct setattr_request *recv = data; + struct path root_path, dst_path; + struct iattr attr; + __u32 valid = le32_to_cpu(recv->valid); + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto out; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->buf, 0, + &dst_path); + if (err) + goto out_put_root; + + inode = hmdfs_verify_path(dst_path.dentry, recv->buf, con->sbi->sb); + if (!inode) { + err = -ENOENT; + goto out_put_dst; + } + + /* We need to follow if symlink was found */ + if (S_ISLNK(inode->i_mode)) { + err = hmdfs_lookup_linkpath(con->sbi, recv->buf, &dst_path); + /* if source file doesn't exist, use link inode */ + if (err == -ENOENT) + err = 0; + else if (err) + goto out_put_dst; + } + + dentry = dst_path.dentry; + memset(&attr, 0, sizeof(attr)); + /* only support size and mtime */ + if (valid & (ATTR_SIZE | ATTR_MTIME)) + attr.ia_valid = + (valid & (ATTR_MTIME | ATTR_MTIME_SET | ATTR_SIZE)); + attr.ia_size = le64_to_cpu(recv->size); + attr.ia_mtime.tv_sec = le64_to_cpu(recv->mtime); + attr.ia_mtime.tv_nsec = le32_to_cpu(recv->mtime_nsec); + + inode_lock(dentry->d_inode); + err = hmdfs_notify_change(dst_path.mnt, dentry, &attr, NULL); + inode_unlock(dentry->d_inode); + +out_put_dst: + path_put(&dst_path); +out_put_root: + path_put(&root_path); +out: + hmdfs_send_err_response(con, cmd, err); +} + +static void update_getattr_response(struct hmdfs_peer *con, struct inode *inode, + struct kstat *ks, + struct getattr_response *resp) +{ + /* if getattr for link, get ino and mode from actual lower inode */ + resp->ino = cpu_to_le64( + generate_u64_ino(inode->i_ino, inode->i_generation)); + resp->mode = cpu_to_le16(inode->i_mode); + + /* get other information from vfs_getattr() */ + resp->result_mask = cpu_to_le32(STATX_BASIC_STATS | STATX_BTIME); + resp->fsid = cpu_to_le64(ks->dev); + resp->nlink = cpu_to_le32(ks->nlink); + resp->uid = cpu_to_le32(ks->uid.val); + resp->gid = cpu_to_le32(ks->gid.val); + resp->size = cpu_to_le64(ks->size); + resp->blocks = cpu_to_le64(ks->blocks); + resp->blksize = cpu_to_le32(ks->blksize); + resp->atime = cpu_to_le64(ks->atime.tv_sec); + resp->atime_nsec = cpu_to_le32(ks->atime.tv_nsec); + resp->mtime = cpu_to_le64(ks->mtime.tv_sec); + resp->mtime_nsec = cpu_to_le32(ks->mtime.tv_nsec); + resp->ctime = cpu_to_le64(ks->ctime.tv_sec); + resp->ctime_nsec = cpu_to_le32(ks->ctime.tv_nsec); + resp->crtime = cpu_to_le64(ks->btime.tv_sec); + resp->crtime_nsec = cpu_to_le32(ks->btime.tv_nsec); +} + +void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct getattr_request *recv = data; + int size_read = sizeof(struct getattr_response); + struct getattr_response *resp = NULL; + struct kstat ks; + struct path root_path, dst_path; + struct inode *inode = NULL; + unsigned int recv_flags = le32_to_cpu(recv->lookup_flags); + unsigned int lookup_flags = 0; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto err; + + err = hmdfs_convert_lookup_flags(recv_flags, &lookup_flags); + if (err) + goto err; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto err; + } + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto err_free_resp; + } + //TODO: local_dst -->local_src + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->buf, + lookup_flags, &dst_path); + if (err) + goto out_put_root; + + inode = hmdfs_verify_path(dst_path.dentry, recv->buf, con->sbi->sb); + if (!inode) { + err = -ENOENT; + goto out_put_dst; + } + /* We need to follow if symlink was found */ + if (S_ISLNK(inode->i_mode)) { + err = hmdfs_lookup_linkpath(con->sbi, recv->buf, &dst_path); + /* if source file doesn't exist, use link inode */ + if (err && err != -ENOENT) + goto out_put_dst; + } + + err = vfs_getattr(&dst_path, &ks, STATX_BASIC_STATS | STATX_BTIME, 0); + if (err) + goto err_put_dst; + update_getattr_response(con, inode, &ks, resp); + +out_put_dst: + path_put(&dst_path); +out_put_root: + /* + * if path lookup failed, we return with result_mask setting to + * zero. So we can be aware of such situation in caller. + */ + if (err) + resp->result_mask = cpu_to_le32(0); + path_put(&root_path); + hmdfs_sendmessage_response(con, cmd, size_read, resp, err); + kfree(resp); + return; + +err_put_dst: + path_put(&dst_path); + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +static void init_statfs_response(struct statfs_response *resp, + struct kstatfs *st) +{ + resp->f_type = cpu_to_le64(HMDFS_SUPER_MAGIC); + resp->f_bsize = cpu_to_le64(st->f_bsize); + resp->f_blocks = cpu_to_le64(st->f_blocks); + resp->f_bfree = cpu_to_le64(st->f_bfree); + resp->f_bavail = cpu_to_le64(st->f_bavail); + resp->f_files = cpu_to_le64(st->f_files); + resp->f_ffree = cpu_to_le64(st->f_ffree); + resp->f_fsid_0 = cpu_to_le32(st->f_fsid.val[0]); + resp->f_fsid_1 = cpu_to_le32(st->f_fsid.val[1]); + resp->f_namelen = cpu_to_le64(st->f_namelen); + resp->f_frsize = cpu_to_le64(st->f_frsize); + resp->f_flags = cpu_to_le64(st->f_flags); + /* f_spare is not used in f2fs or ext4 */ + resp->f_spare_0 = cpu_to_le64(st->f_spare[0]); + resp->f_spare_1 = cpu_to_le64(st->f_spare[1]); + resp->f_spare_2 = cpu_to_le64(st->f_spare[2]); + resp->f_spare_3 = cpu_to_le64(st->f_spare[3]); +} + +void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct statfs_request *recv = data; + struct statfs_response *resp = NULL; + struct path root_path, path; + struct kstatfs *st = NULL; + int err = 0; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err)) + goto out; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) { + err = -ENOMEM; + goto out; + } + + resp = kmalloc(sizeof(*resp), GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto free_st; + } + + err = kern_path(con->sbi->local_src, 0, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto free_st; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->path, 0, + &path); + if (err) { + hmdfs_info("recv->path found failed err = %d", err); + goto put_root; + } + + err = vfs_statfs(&path, st); + if (err) + hmdfs_info("statfs local dentry failed, err = %d", err); + init_statfs_response(resp, st); + path_put(&path); + +put_root: + path_put(&root_path); +free_st: + kfree(st); +out: + if (err) + hmdfs_send_err_response(con, cmd, err); + else + hmdfs_sendmessage_response(con, cmd, sizeof(*resp), resp, 0); + + kfree(resp); +} + +void hmdfs_server_syncfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + /* + * Reserved interface. There is a difference compared with traditional + * syncfs process. Remote syncfs process in client: + * 1. Remote writepages by async call + * 2. Remote syncfs calling + * 3. Wait all remote async calls(writepages) return in step 1 + */ + int ret = 0; + + if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) { + hmdfs_send_err_response(con, cmd, ret); + return; + } + + hmdfs_send_err_response(con, cmd, ret); +} + +void hmdfs_server_getxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct getxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + size_t size_read = sizeof(struct getxattr_response) + size; + struct getxattr_response *resp = NULL; + struct path root_path; + struct path path; + char *file_path = recv->buf; + char *name = recv->buf + recv->path_len + 1; + int err = -ENOMEM; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) + goto err; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err_free_resp; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (!size) + err = vfs_getxattr(path.dentry, name, NULL, size); + else + err = vfs_getxattr(path.dentry, name, resp->value, size); + if (err < 0) { + hmdfs_info("getxattr failed err %d", err); + goto err_put_path; + } + + resp->size = cpu_to_le32(err); + hmdfs_sendmessage_response(con, cmd, size_read, resp, 0); + path_put(&path); + path_put(&root_path); + kfree(resp); + return; + +err_put_path: + path_put(&path); +err_put_root: + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_setxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct setxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + int flags = le32_to_cpu(recv->flags); + bool del = recv->del; + struct path root_path; + struct path path; + const char *file_path = NULL; + const char *name = NULL; + const void *value = NULL; + int err; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err; + } + + file_path = recv->buf; + name = recv->buf + recv->path_len + 1; + value = name + recv->name_len + 1; + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (del) { + WARN_ON(flags != XATTR_REPLACE); + err = vfs_removexattr(path.dentry, name); + } else { + err = vfs_setxattr(path.dentry, name, value, size, flags); + } + + path_put(&path); +err_put_root: + path_put(&root_path); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_listxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct listxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + int size_read = sizeof(struct listxattr_response) + size; + struct listxattr_response *resp = NULL; + const char *file_path = NULL; + struct path root_path; + struct path path; + int err = 0; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto err; + } + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err_free_resp; + } + + file_path = recv->buf; + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (!size) + err = vfs_listxattr(path.dentry, NULL, size); + else + err = vfs_listxattr(path.dentry, resp->list, size); + if (err < 0) { + hmdfs_info("listxattr failed err = %d", err); + goto err_put_path; + } + + resp->size = cpu_to_le32(err); + hmdfs_sendmessage_response(con, cmd, size_read, resp, 0); + path_put(&root_path); + path_put(&path); + kfree(resp); + return; + +err_put_path: + path_put(&path); +err_put_root: + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_get_drop_push(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct drop_push_request *dp_recv = data; + struct path root_path, path; + int err; + char *tmp_path = NULL; + + err = kern_path(con->sbi->real_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto quickack; + } + tmp_path = kzalloc(PATH_MAX, GFP_KERNEL); + if (!tmp_path) + goto out; + snprintf(tmp_path, PATH_MAX, "/" DEVICE_VIEW_ROOT "/%s%s", + con->cid, dp_recv->path); + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, tmp_path, 0, + &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto free; + } + hmdfs_remove_cache_filp(con, path.dentry); + + path_put(&path); +free: + kfree(tmp_path); +out: + path_put(&root_path); +quickack: + set_conn_sock_quickack(con); +} diff --git a/fs/hmdfs/hmdfs_server.h b/fs/hmdfs/hmdfs_server.h new file mode 100644 index 0000000000000000000000000000000000000000..844f3a9ee82c41ad1ab0b7e3c5f01905006cf85d --- /dev/null +++ b/fs/hmdfs/hmdfs_server.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_server.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_SERVER_H +#define HMDFS_SERVER_H + +#include "hmdfs.h" +#include "comm/transport.h" +#include "comm/socket_adapter.h" + +static inline void hmdfs_send_err_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, int err) +{ + if (hmdfs_sendmessage_response(con, cmd, 0, NULL, (__u32)err)) + hmdfs_warning("send err failed"); +} + +void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); +void hmdfs_server_fsync(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpages(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); +void hmdfs_server_writepage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_syncfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_getxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_setxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_listxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_get_drop_push(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); + +void __init hmdfs_server_add_node_evt_cb(void); +#endif diff --git a/fs/hmdfs/hmdfs_trace.h b/fs/hmdfs/hmdfs_trace.h new file mode 100644 index 0000000000000000000000000000000000000000..205bf697c35741590e0df9cc17b5df995358f8a9 --- /dev/null +++ b/fs/hmdfs/hmdfs_trace.h @@ -0,0 +1,800 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_trace.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hmdfs + +#if !defined(__HMDFS_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) + +#define __HMDFS_TRACE_H__ + +#include +#include "comm/protocol.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_client.h" +#include "hmdfs_device_view.h" +#include "client_writeback.h" + +TRACE_EVENT(hmdfs_permission, + + TP_PROTO(unsigned long ino), + + TP_ARGS(ino), + + TP_STRUCT__entry(__field(unsigned long, ino)), + + TP_fast_assign(__entry->ino = ino;), + + TP_printk("permission check for ino %lu failed", __entry->ino)); + +/* communication */ +TRACE_EVENT(hmdfs_recv_mesg_callback, + + TP_PROTO(struct hmdfs_head_cmd *cmd), + + TP_ARGS(cmd), + + TP_STRUCT__entry( + __field(__u32, msg_id) + __field(__u32, magic) + __field(__u16, command) + __field(__u16, cmd_flag) + __field(__u32, data_len) + __field(__u32, ret_code) + ), + + TP_fast_assign( + __entry->msg_id = le32_to_cpu(cmd->msg_id); + __entry->magic = cmd->magic; + __entry->command = cmd->operations.command; + __entry->cmd_flag = cmd->operations.cmd_flag; + __entry->data_len = cmd->data_len; + __entry->ret_code = cmd->ret_code; + ), + + TP_printk("msg_id:%u magic:%u command:%hu, cmd_flag:%hu, data_len:%u, ret_code:%u", + __entry->msg_id, __entry->magic, __entry->command, + __entry->cmd_flag, __entry->data_len, __entry->ret_code) +); + +TRACE_EVENT(hmdfs_tcp_send_message, + + TP_PROTO(struct hmdfs_head_cmd *cmd), + + TP_ARGS(cmd), + + TP_STRUCT__entry( + __field(__u32, msg_id) + __field(__u32, magic) + __field(__u16, command) + __field(__u16, cmd_flag) + __field(__u32, data_len) + __field(__u32, ret_code) + ), + + TP_fast_assign( + __entry->msg_id = le32_to_cpu(cmd->msg_id); + __entry->magic = cmd->magic; + __entry->command = cmd->operations.command; + __entry->cmd_flag = cmd->operations.cmd_flag; + __entry->data_len = cmd->data_len; + __entry->ret_code = cmd->ret_code; + ), + + TP_printk("msg_id:%u magic:%u command:%hu, cmd_flag:%hu, data_len:%u, ret_code:%u", + __entry->msg_id, __entry->magic, __entry->command, + __entry->cmd_flag, __entry->data_len, __entry->ret_code) +); + +/* file system interface */ +DECLARE_EVENT_CLASS(hmdfs_iterate_op_end, + + TP_PROTO(struct dentry *__d, loff_t start_pos, loff_t end_pos, int err), + + TP_ARGS(__d, start_pos, end_pos, err), + + TP_STRUCT__entry( + __string(name_str, __d->d_name.name) + __field(loff_t, start) + __field(loff_t, end) + __field(int, err) + ), + + TP_fast_assign( + __assign_str(name_str, __d->d_name.name); + __entry->start = start_pos; + __entry->end = end_pos; + __entry->err = err; + ), + + TP_printk("dentry[%s] start_pos:%llx, end_pos:%llx, err:%d", + __get_str(name_str), __entry->start, + __entry->end, __entry->err) +); + +#define define_hmdfs_iterate_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_iterate_op_end, event_name, \ + TP_PROTO(struct dentry *__d, loff_t start_pos, \ + loff_t end_pos, int err), \ + TP_ARGS(__d, start_pos, end_pos, err)) + +define_hmdfs_iterate_op_end_event(hmdfs_iterate_local); +define_hmdfs_iterate_op_end_event(hmdfs_iterate_remote); +define_hmdfs_iterate_op_end_event(hmdfs_iterate_merge); + + +TRACE_EVENT(hmdfs_lookup, + + TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(ino_t, ino) + __string(name_str, dentry->d_name.name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->ino = dir->i_ino; + __assign_str(name_str, dentry->d_name.name); + __entry->flags = flags; + ), + + TP_printk("parent_ino = %lu, name:%s, flags:%u", + __entry->ino, __get_str(name_str), __entry->flags) +); + +DECLARE_EVENT_CLASS(hmdfs_lookup_op_end, + + TP_PROTO(struct inode *dir, struct dentry *dentry, int err), + + TP_ARGS(dir, dentry, err), + + TP_STRUCT__entry( + __field(ino_t, ino) + __string(name_str, dentry->d_name.name) + __field(int, err) + ), + + TP_fast_assign( + __entry->ino = dir->i_ino; + __assign_str(name_str, dentry->d_name.name); + __entry->err = err; + ), + + TP_printk("parent_ino = %lu, name:%s, err:%d", + __entry->ino, __get_str(name_str), __entry->err) +); + +#define define_hmdfs_lookup_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_lookup_op_end, event_name, \ + TP_PROTO(struct inode *dir, struct dentry *dentry, \ + int err), \ + TP_ARGS(dir, dentry, err)) + + +define_hmdfs_lookup_op_end_event(hmdfs_root_lookup); +define_hmdfs_lookup_op_end_event(hmdfs_root_lookup_end); + +define_hmdfs_lookup_op_end_event(hmdfs_device_lookup); +define_hmdfs_lookup_op_end_event(hmdfs_device_lookup_end); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_local); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_local_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_local); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_local); +define_hmdfs_lookup_op_end_event(hmdfs_create_local); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_remote); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_remote_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_remote); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_remote); +define_hmdfs_lookup_op_end_event(hmdfs_create_remote); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_merge); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_merge_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_merge); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_merge); +define_hmdfs_lookup_op_end_event(hmdfs_create_merge); + + +define_hmdfs_lookup_op_end_event(hmdfs_symlink_merge); +define_hmdfs_lookup_op_end_event(hmdfs_symlink_local); + +define_hmdfs_lookup_op_end_event(hmdfs_get_link_merge); +define_hmdfs_lookup_op_end_event(hmdfs_get_link_local); + +TRACE_EVENT(hmdfs_show_comrade, + + TP_PROTO(struct dentry *d, struct dentry *lo_d, uint64_t devid), + + TP_ARGS(d, lo_d, devid), + + TP_STRUCT__entry( + __string(name, d->d_name.name) + __string(lo_name, lo_d->d_name.name) + __field(uint64_t, devid) + ), + + TP_fast_assign( + __assign_str(name, d->d_name.name) + __assign_str(lo_name, lo_d->d_name.name) + __entry->devid = devid; + ), + + TP_printk("parent_name:%s -> lo_d_name:%s, lo_d_devid:%llu", + __get_str(name), __get_str(lo_name), __entry->devid) +); + +DECLARE_EVENT_CLASS(hmdfs_rename_op_end, + + TP_PROTO(struct inode *olddir, struct dentry *olddentry, + struct inode *newdir, struct dentry *newdentry, + unsigned int flags), + + TP_ARGS(olddir, olddentry, newdir, newdentry, flags), + + TP_STRUCT__entry( + __field(ino_t, oldino) + __string(oldname_str, olddentry->d_name.name) + __field(ino_t, newino) + __string(newname_str, newdentry->d_name.name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->oldino = olddir->i_ino; + __assign_str(oldname_str, olddentry->d_name.name); + __entry->newino = newdir->i_ino; + __assign_str(newname_str, newdentry->d_name.name); + __entry->flags = flags; + ), + + TP_printk("old_pino = %lu, oldname:%s; new_pino = %lu, newname:%s, flags:%u", + __entry->oldino, __get_str(oldname_str), + __entry->newino, __get_str(newname_str), __entry->flags) +); + +#define define_hmdfs_rename_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_rename_op_end, event_name, \ + TP_PROTO(struct inode *olddir, struct dentry *olddentry, \ + struct inode *newdir, struct dentry *newdentry, \ + unsigned int flags), \ + TP_ARGS(olddir, olddentry, newdir, newdentry, flags)) + +define_hmdfs_rename_op_end_event(hmdfs_rename_local); +define_hmdfs_rename_op_end_event(hmdfs_rename_remote); +define_hmdfs_rename_op_end_event(hmdfs_rename_merge); + +TRACE_EVENT(hmdfs_statfs, + + TP_PROTO(struct dentry *d, uint8_t type), + + TP_ARGS(d, type), + + TP_STRUCT__entry( + __string(name, d->d_name.name) + __field(uint8_t, type) + ), + + TP_fast_assign( + __assign_str(name, d->d_name.name) + __entry->type = type; + ), + + TP_printk("dentry_name:%s, lo_d_devid:%u", + __get_str(name), __entry->type) +); + + + +TRACE_EVENT(hmdfs_balance_dirty_pages_ratelimited, + + TP_PROTO(struct hmdfs_sb_info *sbi, + struct hmdfs_writeback *hwb, + int bdp_ratelimits), + + TP_ARGS(sbi, hwb, bdp_ratelimits), + + TP_STRUCT__entry( + __array(char, dst, 128) + __field(int, nr_dirtied) + __field(int, nr_dirtied_pause) + __field(int, dirty_exceeded) + __field(long long, bdp_ratelimits) + __field(long, ratelimit_pages) + ), + + TP_fast_assign( + strlcpy(__entry->dst, sbi->local_dst, 128); + + __entry->nr_dirtied = current->nr_dirtied; + __entry->nr_dirtied_pause = current->nr_dirtied_pause; + __entry->dirty_exceeded = hwb->dirty_exceeded; + __entry->bdp_ratelimits = bdp_ratelimits; + __entry->ratelimit_pages = hwb->ratelimit_pages; + ), + + TP_printk("hmdfs dst:%s nr_dirtied=%d nr_dirtied_pause=%d dirty_exceeded=%d bdp_ratelimits=%lld ratelimit_pages=%ld", + __entry->dst, __entry->nr_dirtied, __entry->nr_dirtied_pause, + __entry->dirty_exceeded, __entry->bdp_ratelimits, + __entry->ratelimit_pages) +); + +TRACE_EVENT(hmdfs_balance_dirty_pages, + + TP_PROTO(struct hmdfs_sb_info *sbi, + struct bdi_writeback *wb, + struct hmdfs_dirty_throttle_control *hdtc, + unsigned long pause, + unsigned long start_time), + + TP_ARGS(sbi, wb, hdtc, pause, start_time), + + TP_STRUCT__entry( + __array(char, dst, 128) + __field(unsigned long, write_bw) + __field(unsigned long, avg_write_bw) + __field(unsigned long, file_bg_thresh) + __field(unsigned long, fs_bg_thresh) + __field(unsigned long, file_thresh) + __field(unsigned long, fs_thresh) + __field(unsigned long, file_nr_dirty) + __field(unsigned long, fs_nr_dirty) + __field(unsigned long, file_nr_rec) + __field(unsigned long, fs_nr_rec) + __field(unsigned long, pause) + __field(unsigned long, paused) + ), + + TP_fast_assign( + strlcpy(__entry->dst, sbi->local_dst, 128); + + __entry->write_bw = wb->write_bandwidth; + __entry->avg_write_bw = wb->avg_write_bandwidth; + __entry->file_bg_thresh = hdtc->file_bg_thresh; + __entry->fs_bg_thresh = hdtc->fs_bg_thresh; + __entry->file_thresh = hdtc->file_thresh; + __entry->fs_thresh = hdtc->fs_thresh; + __entry->file_nr_dirty = hdtc->file_nr_dirty; + __entry->fs_nr_dirty = hdtc->fs_nr_dirty; + __entry->file_nr_rec = hdtc->file_nr_reclaimable; + __entry->fs_nr_rec = hdtc->fs_nr_reclaimable; + __entry->pause = pause * 1000 / HZ; + __entry->paused = (jiffies - start_time) * + 1000 / HZ; + ), + + TP_printk("hmdfs dst:%s write_bw=%lu, awrite_bw=%lu, bg_thresh=%lu,%lu thresh=%lu,%lu dirty=%lu,%lu reclaimable=%lu,%lu pause=%lu paused=%lu", + __entry->dst, __entry->write_bw, __entry->avg_write_bw, + __entry->file_bg_thresh, __entry->fs_bg_thresh, + __entry->file_thresh, __entry->fs_thresh, + __entry->file_nr_dirty, __entry->fs_nr_dirty, + __entry->file_nr_rec, __entry->fs_nr_rec, + __entry->pause, __entry->paused + ) +); + +TRACE_EVENT(hmdfs_start_srv_wb, + + TP_PROTO(struct hmdfs_sb_info *sbi, int dirty_pages, + unsigned int dirty_thresh_pg), + + TP_ARGS(sbi, dirty_pages, dirty_thresh_pg), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(int, dirty_pages) + __field(unsigned int, dirty_thresh_pg) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->dirty_pages = dirty_pages; + __entry->dirty_thresh_pg = dirty_thresh_pg; + ), + + TP_printk("hmdfs src: %s, start writeback dirty pages. writeback %d pages dirty_thresh is %d pages", + __entry->src, __entry->dirty_pages, __entry->dirty_thresh_pg) +); + +TRACE_EVENT(hmdfs_fsync_enter_remote, + + TP_PROTO(struct hmdfs_sb_info *sbi, unsigned long long device_id, + unsigned long long remote_ino, int datasync), + + TP_ARGS(sbi, device_id, remote_ino, datasync), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, device_id) + __field(uint64_t, remote_ino) + __field(int, datasync) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->device_id = device_id; + __entry->remote_ino = remote_ino; + __entry->datasync = datasync; + ), + + TP_printk("hmdfs: src %s, start remote fsync file(remote dev_id=%llu,ino=%llu), datasync=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->datasync) +); + +TRACE_EVENT(hmdfs_fsync_exit_remote, + + TP_PROTO(struct hmdfs_sb_info *sbi, unsigned long long device_id, + unsigned long long remote_ino, unsigned int timeout, int err), + + TP_ARGS(sbi, device_id, remote_ino, timeout, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, device_id) + __field(uint64_t, remote_ino) + __field(uint32_t, timeout) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->device_id = device_id; + __entry->remote_ino = remote_ino; + __entry->timeout = timeout; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, finish remote fsync file(remote dev_id=%llu,ino=%llu), timeout=%u, err=%d", + __entry->src, __entry->device_id, __entry->remote_ino, + __entry->timeout, __entry->err) +); + +TRACE_EVENT(hmdfs_syncfs_enter, + + TP_PROTO(struct hmdfs_sb_info *sbi), + + TP_ARGS(sbi), + + TP_STRUCT__entry( + __array(char, src, 128) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + ), + + TP_printk("hmdfs: src %s, start syncfs", __entry->src) +); + +TRACE_EVENT(hmdfs_syncfs_exit, + + TP_PROTO(struct hmdfs_sb_info *sbi, int remain_count, + unsigned int timeout, int err), + + TP_ARGS(sbi, remain_count, timeout, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(int, remain_count) + __field(uint32_t, timeout) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->remain_count = remain_count; + __entry->timeout = timeout; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, finish syncfs(timeout=%u), remain %d remote devices to response, err=%d", + __entry->src, __entry->timeout, + __entry->remain_count, __entry->err) +); + +TRACE_EVENT(hmdfs_server_release, + + TP_PROTO(struct hmdfs_peer *con, uint32_t file_id, + uint64_t file_ver, int err), + + TP_ARGS(con, file_id, file_ver, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint32_t, file_id) + __field(uint64_t, file_ver) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->file_id = file_id; + __entry->file_ver = file_ver; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, server release file, fid=%u, fid_ver=%llu, remote_dev=%llu, err=%d", + __entry->src, __entry->file_id, __entry->file_ver, + __entry->device_id, __entry->err) +); + +TRACE_EVENT(hmdfs_client_recv_readpage, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, client readpage callback from remote device %llu, remote_ino=%llu, page_idx=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_writepage_cb_enter, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, writepage_cb start, return from remote device %llu, remote_ino=%llu, page_idx=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_writepage_cb_exit, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, writepage_cb exit, return from remote device %llu, remote_ino=%llu, page_index=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_server_rebuild_dents, + + TP_PROTO(struct hmdfs_dcache_header *__h, int err), + + TP_ARGS(__h, err), + + TP_STRUCT__entry( + __field(uint64_t, crtime) + __field(uint64_t, crtime_nsec) + __field(uint64_t, ctime) + __field(uint64_t, ctime_nsec) + __field(uint64_t, num) + __field(int, err) + ), + + TP_fast_assign( + __entry->crtime = le64_to_cpu(__h->dcache_crtime); + __entry->crtime_nsec = le64_to_cpu(__h->dcache_crtime_nsec); + __entry->ctime = le64_to_cpu(__h->dentry_ctime); + __entry->ctime_nsec = le64_to_cpu(__h->dentry_ctime_nsec); + __entry->num = le64_to_cpu(__h->num); + __entry->err = err; + ), + + TP_printk("dcache crtime %llu:%llu ctime %llu:%llu has %llu dentry err %d", + __entry->crtime, __entry->crtime_nsec, __entry->ctime, + __entry->ctime_nsec, __entry->num, __entry->err) +); + +TRACE_EVENT(hmdfs_server_readdir, + + TP_PROTO(struct readdir_request *req), + + TP_ARGS(req), + + TP_STRUCT__entry( + __string(path, req->path) + ), + + TP_fast_assign( + __assign_str(path, req->path); + ), + + TP_printk("hmdfs_server_readdir %s", __get_str(path)) +); + +TRACE_EVENT(hmdfs_open_final_remote, + + TP_PROTO(struct hmdfs_inode_info *info, + struct hmdfs_open_ret *open_ret, + struct file *file, + int reason), + + TP_ARGS(info, open_ret, file, reason), + + TP_STRUCT__entry( + __array(char, file_path, MAX_FILTER_STR_VAL) + __field(uint32_t, reason) + __field(uint32_t, file_id) + __field(uint64_t, file_ver) + __field(uint64_t, remote_file_size) + __field(uint64_t, remote_ino) + __field(uint64_t, remote_ctime) + __field(uint64_t, remote_ctime_nsec) + __field(uint64_t, remote_stable_ctime) + __field(uint64_t, remote_stable_ctime_nsec) + __field(uint64_t, local_file_size) + __field(uint64_t, local_ino) + __field(uint64_t, local_ctime) + __field(uint64_t, local_ctime_nsec) + __field(uint64_t, local_stable_ctime) + __field(uint64_t, local_stable_ctime_nsec) + ), + + TP_fast_assign( + strlcpy(__entry->file_path, file->f_path.dentry->d_name.name, + MAX_FILTER_STR_VAL); + __entry->reason = reason; + __entry->file_id = open_ret->fid.id; + __entry->file_ver = open_ret->fid.ver; + __entry->remote_file_size = open_ret->file_size; + __entry->remote_ino = open_ret->ino; + __entry->remote_ctime = open_ret->remote_ctime.tv_sec; + __entry->remote_ctime_nsec = open_ret->remote_ctime.tv_nsec; + __entry->remote_stable_ctime = open_ret->stable_ctime.tv_sec; + __entry->remote_stable_ctime_nsec = + open_ret->stable_ctime.tv_nsec; + __entry->local_file_size = info->vfs_inode.i_size; + __entry->local_ino = info->remote_ino; + __entry->local_ctime = info->remote_ctime.tv_sec; + __entry->local_ctime_nsec = info->remote_ctime.tv_nsec; + __entry->local_stable_ctime = info->stable_ctime.tv_sec; + __entry->local_stable_ctime_nsec = info->stable_ctime.tv_nsec; + ), + + TP_printk("file path: %s, file id: %u, file ver: %llu, reason: %d, file size: %llu/%llu, ino: %llu/%llu, ctime: %llu.%llu/%llu.%llu, stable_ctime: %llu.%llu/%llu.%llu from remote/local", + __entry->file_path, __entry->file_id, __entry->file_ver, + __entry->reason, __entry->remote_file_size, + __entry->local_file_size, __entry->remote_ino, + __entry->local_ino, __entry->remote_ctime, + __entry->remote_ctime_nsec, __entry->local_ctime, + __entry->local_ctime_nsec, __entry->remote_stable_ctime, + __entry->remote_stable_ctime_nsec, + __entry->local_stable_ctime, __entry->local_stable_ctime_nsec) +); + +TRACE_EVENT(hmdfs_server_open_enter, + + TP_PROTO(struct hmdfs_peer *con, + struct open_request *recv), + + TP_ARGS(con, recv), + + TP_STRUCT__entry( + __array(char, open_path, MAX_FILTER_STR_VAL) + __array(char, dst_path, MAX_FILTER_STR_VAL) + __field(uint32_t, file_type) + ), + + TP_fast_assign( + strlcpy(__entry->open_path, recv->buf, MAX_FILTER_STR_VAL); + strlcpy(__entry->dst_path, con->sbi->local_dst, + MAX_FILTER_STR_VAL); + __entry->file_type = recv->file_type; + ), + + TP_printk("server open file %s from %s, file_type is %u", + __entry->open_path, __entry->dst_path, + __entry->file_type) +); + +TRACE_EVENT(hmdfs_server_open_exit, + + TP_PROTO(struct hmdfs_peer *con, + struct open_response *resp, + struct file *file, + int ret), + + TP_ARGS(con, resp, file, ret), + + TP_STRUCT__entry( + __array(char, file_path, MAX_FILTER_STR_VAL) + __array(char, src_path, MAX_FILTER_STR_VAL) + __field(uint32_t, file_id) + __field(uint64_t, file_size) + __field(uint64_t, ino) + __field(uint64_t, ctime) + __field(uint64_t, ctime_nsec) + __field(uint64_t, stable_ctime) + __field(uint64_t, stable_ctime_nsec) + __field(int, retval) + ), + + TP_fast_assign( + if (file) + strlcpy(__entry->file_path, + file->f_path.dentry->d_name.name, + MAX_FILTER_STR_VAL); + else + strlcpy(__entry->file_path, "null", MAX_FILTER_STR_VAL); + strlcpy(__entry->src_path, con->sbi->local_src, + MAX_FILTER_STR_VAL); + __entry->file_id = resp ? resp->file_id : UINT_MAX; + __entry->file_size = resp ? resp->file_size : ULLONG_MAX; + __entry->ino = resp ? resp->ino : 0; + __entry->ctime = resp ? resp->ctime : 0; + __entry->ctime_nsec = resp ? resp->ctime_nsec : 0; + __entry->stable_ctime = resp ? resp->stable_ctime : 0; + __entry->stable_ctime_nsec = resp ? resp->stable_ctime_nsec : 0; + __entry->retval = ret; + ), + + TP_printk("server file %s is opened from %s, open result: %d, file id: %u, file size: %llu, ino: %llu, ctime: %llu.%llu, stable ctime: %llu.%llu", + __entry->file_path, __entry->src_path, + __entry->retval, __entry->file_id, + __entry->file_size, __entry->ino, __entry->ctime, + __entry->ctime_nsec, __entry->stable_ctime, + __entry->stable_ctime_nsec) +); +#endif + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE hmdfs_trace +#include diff --git a/fs/hmdfs/inode.c b/fs/hmdfs/inode.c new file mode 100644 index 0000000000000000000000000000000000000000..8cdedf42dc952a571a5185ef6acd654797216fe8 --- /dev/null +++ b/fs/hmdfs/inode.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_device_view.h" +#include "inode.h" +#include "comm/connection.h" + +/** + * Rules to generate inode numbers: + * + * "/", "/device_view", "/merge_view", "/device_view/local", "/device_view/cid" + * = DOMAIN {3} : dev_id {29} : HMDFS_ROOT {32} + * + * "/device_view/cid/xxx" + * = DOMAIN {3} : dev_id {29} : hash(remote_ino){32} + * + * "/merge_view/xxx" + * = DOMAIN {3} : lower's dev_id {29} : lower's ino_raw {32} + */ + +#define BIT_WIDE_TOTAL 64 + +#define BIT_WIDE_DOMAIN 3 +#define BIT_WIDE_DEVID 29 +#define BIT_WIDE_INO_RAW 32 + +enum DOMAIN { + DOMAIN_ROOT, + DOMAIN_DEVICE_LOCAL, + DOMAIN_DEVICE_REMOTE, + DOMAIN_MERGE_VIEW, + DOMAIN_INVALID, +}; + +union hmdfs_ino { + const uint64_t ino_output; + struct { + uint64_t ino_raw : BIT_WIDE_INO_RAW; + uint64_t dev_id : BIT_WIDE_DEVID; + uint8_t domain : BIT_WIDE_DOMAIN; + }; +}; + +static uint8_t read_ino_domain(uint64_t ino) +{ + union hmdfs_ino _ino = { + .ino_output = ino, + }; + + return _ino.domain; +} + +struct iget_args { + /* The lower inode of local/merge/root(part) inode */ + struct inode *lo_i; + /* The peer of remote inode */ + struct hmdfs_peer *peer; + /* The ino of remote inode */ + uint64_t remote_ino; + + /* Returned inode's ino */ + union hmdfs_ino ino; +}; + +/** + * iget_test - whether or not the inode with matched hashval is the one we are + * looking for + * + * @inode: the local inode we found in inode cache with matched hashval + * @data: struct iget_args + */ +static int iget_test(struct inode *inode, void *data) +{ + struct hmdfs_inode_info *hii = hmdfs_i(inode); + struct iget_args *ia = data; + int res = 0; + + WARN_ON(ia->ino.domain < DOMAIN_ROOT || + ia->ino.domain >= DOMAIN_INVALID); + + if (read_ino_domain(inode->i_ino) == DOMAIN_ROOT) + return 0; + + switch (ia->ino.domain) { + case DOMAIN_MERGE_VIEW: + res = (ia->lo_i == hii->lower_inode); + break; + case DOMAIN_DEVICE_LOCAL: + res = (ia->lo_i == hii->lower_inode); + break; + case DOMAIN_DEVICE_REMOTE: + res = (ia->peer == hii->conn && + ia->remote_ino == hii->remote_ino); + break; + } + + return res; +} + +/** + * iget_set - initialize a inode with iget_args + * + * @sb: the superblock of current hmdfs instance + * @data: struct iget_args + */ +static int iget_set(struct inode *inode, void *data) +{ + struct hmdfs_inode_info *hii = hmdfs_i(inode); + struct iget_args *ia = (struct iget_args *)data; + + inode->i_ino = ia->ino.ino_output; + inode_inc_iversion(inode); + + hii->conn = ia->peer; + hii->remote_ino = ia->remote_ino; + hii->lower_inode = ia->lo_i; + + return 0; +} + +static uint64_t make_ino_raw_dev_local(uint64_t lo_ino) +{ + if (!(lo_ino >> BIT_WIDE_INO_RAW)) + return lo_ino; + + return lo_ino * GOLDEN_RATIO_64 >> BIT_WIDE_INO_RAW; +} + +static uint64_t make_ino_raw_dev_remote(uint64_t remote_ino) +{ + return hash_long(remote_ino, BIT_WIDE_INO_RAW); +} + +/** + * hmdfs_iget5_locked_merge - obtain an inode for the merge-view + * + * @sb: superblock of current instance + * @fst_lo_i: the lower inode of it's first comrade + * + * Simply replace the lower's domain for a new ino. + */ +struct inode *hmdfs_iget5_locked_merge(struct super_block *sb, + struct dentry *fst_lo_d) +{ + struct iget_args ia = { + .lo_i = d_inode(fst_lo_d), + .peer = NULL, + .remote_ino = 0, + .ino.ino_output = 0, + }; + + if (unlikely(!d_inode(fst_lo_d))) { + hmdfs_err("Received a invalid lower inode"); + return NULL; + } + + ia.ino.ino_raw = d_inode(fst_lo_d)->i_ino; + ia.ino.dev_id = hmdfs_d(fst_lo_d)->device_id; + ia.ino.domain = DOMAIN_MERGE_VIEW; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +/** + * hmdfs_iget5_locked_local - obtain an inode for the local-dev-view + * + * @sb: superblock of current instance + * @lo_i: the lower inode from local filesystem + * + * Hashing local inode's ino to generate our ino. We continue to compare the + * address of the lower_inode for uniqueness when collisions occurred. + */ +struct inode *hmdfs_iget5_locked_local(struct super_block *sb, + struct inode *lo_i) +{ + struct iget_args ia = { + .lo_i = lo_i, + .peer = NULL, + .remote_ino = 0, + .ino.ino_output = 0, + }; + + if (unlikely(!lo_i)) { + hmdfs_err("Received a invalid lower inode"); + return NULL; + } + ia.ino.ino_raw = make_ino_raw_dev_local(lo_i->i_ino); + ia.ino.dev_id = 0; + ia.ino.domain = DOMAIN_DEVICE_LOCAL; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +/** + * hmdfs_iget5_locked_remote - obtain an inode for the remote-dev-view + * + * @sb: superblock of current instance + * @peer: corresponding device node + * @remote_ino: remote inode's ino + * + * Hash remote ino for ino's 32bit~1bit. + * + * Note that currenly implementation assume the each remote inode has unique + * ino. Thus the combination of the peer's unique dev_id and the remote_ino + * is enough to determine a unique remote inode. + */ +struct inode *hmdfs_iget5_locked_remote(struct super_block *sb, + struct hmdfs_peer *peer, + uint64_t remote_ino) +{ + struct iget_args ia = { + .lo_i = NULL, + .peer = peer, + .remote_ino = remote_ino, + .ino.ino_output = 0, + }; + + if (unlikely(!peer)) { + hmdfs_err("Received a invalid peer"); + return NULL; + } + + ia.ino.ino_raw = make_ino_raw_dev_remote(remote_ino); + ia.ino.dev_id = peer->device_id; + ia.ino.domain = DOMAIN_DEVICE_REMOTE; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino, + struct inode *lo_i, + struct hmdfs_peer *peer) +{ + struct iget_args ia = { + .lo_i = lo_i, + .peer = peer, + .remote_ino = 0, + .ino.ino_raw = root_ino, + .ino.dev_id = peer ? peer->device_id : 0, + .ino.domain = DOMAIN_ROOT, + }; + + if (unlikely(root_ino < 0 || root_ino >= HMDFS_ROOT_INVALID)) { + hmdfs_err("Root %llu is invalid", root_ino); + return NULL; + } + if (unlikely(root_ino == HMDFS_ROOT_DEV_REMOTE && !peer)) { + hmdfs_err("Root %llu received a invalid peer", root_ino); + return NULL; + } + + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} diff --git a/fs/hmdfs/inode.h b/fs/hmdfs/inode.h new file mode 100644 index 0000000000000000000000000000000000000000..47f189f3cf828444036eebe1acc2a51b14fe25c4 --- /dev/null +++ b/fs/hmdfs/inode.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/inode.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef INODE_H +#define INODE_H + +#include "hmdfs.h" + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) +#include +#endif + +enum { + HMDFS_REMOTE_INODE_NONE = 0, + HMDFS_REMOTE_INODE_STASHING, + HMDFS_REMOTE_INODE_RESTORING, +}; + +/***************************************************************************** + * fid + *****************************************************************************/ + +/* Bits for fid_flags */ +enum { + HMDFS_FID_NEED_OPEN = 0, + HMDFS_FID_OPENING, +}; + +struct hmdfs_fid { + __u64 ver; + __u32 id; +}; + +/* + * Cache file is stored in file like following format: + * ________________________________________________________________ + * |meta file info| remote file(s) path | file content | + * | head | path | data | + * ↑ ↑ + * path_offs data_offs + */ +struct hmdfs_cache_info { + /* Path start offset in file (HMDFS_STASH_BLK_SIZE aligned) */ + __u32 path_offs; + __u32 path_len; + __u32 path_cnt; + char *path_buf; + /* Stricky remote file(hardlink)s' path, split by '\0' */ + char *path; + /* Data start offset in file (HMDFS_STASH_BLK_SIZE aligned) */ + __u32 data_offs; + /* # of pages need to be written to remote file during offline */ + atomic64_t to_write_pgs; + /* # of pages written to remote file during offline */ + atomic64_t written_pgs; + /* Stash file handler */ + struct file *cache_file; +}; + +/***************************************************************************** + * inode info and it's inline helpers + *****************************************************************************/ + +struct hmdfs_inode_info { + struct inode *lower_inode; // for local/merge inode + struct hmdfs_peer *conn; // for remote inode + struct kref ref; + spinlock_t fid_lock; + struct hmdfs_fid fid; + unsigned long fid_flags; + wait_queue_head_t fid_wq; + __u8 inode_type; // deprecated: use ino system instead + + /* writeback list */ + struct list_head wb_list; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + __u16 perm; +#endif + /* + * lookup remote file will generate a local inode, this store the + * combination of remote inode number and generation in such situation. + * the uniqueness of local inode can be determined. + */ + __u64 remote_ino; + /* + * if this value is not ULLONG_MAX, it means that remote getattr syscall + * should return this value as inode size. + */ + __u64 getattr_isize; + /* + * this value stores remote ctime, explicitly when remote file is opened + */ + struct hmdfs_time_t remote_ctime; + /* + * this value stores the last time, aligned to dcache_precision, that + * remote file was modified. It should be noted that this value won't + * be effective if writecace_expire is set. + */ + struct hmdfs_time_t stable_ctime; + /* + * If this value is set nonzero, pagecache should be truncated if the + * time that the file is opened is beyond the value. Furthermore, + * the functionality of stable_ctime won't be effective. + */ + unsigned long writecache_expire; + /* + * This value record how many times the file has been written while file + * is opened. 'writecache_expire' will set in close if this value is + * nonzero. + */ + atomic64_t write_counter; + /* + * will be linked to hmdfs_peer::wr_opened_inode_list + * if the remote inode is writable-opened. And using + * wr_opened_cnt to track possibly multiple writeable-open. + */ + struct list_head wr_opened_node; + atomic_t wr_opened_cnt; + spinlock_t stash_lock; + unsigned int stash_status; + struct hmdfs_cache_info *cache; + /* link to hmdfs_peer::stashed_inode_list when stashing completes */ + struct list_head stash_node; + /* + * The flush/fsync thread will hold the write lock while threads + * calling writepage will hold the read lock. We use rwlock to + * eliminate the cases that flush/fsync operations are done with + * re-dirtied pages remain dirty. + * + * Here is the explanation in detail: + * + * During `writepage()`, the state of a re-dirtied page will switch + * to the following states in sequence: + * s1: page dirty + tree dirty + * s2: page dirty + tree dirty + * s3: page clean + tree dirty + * s4: page clean + tree clean + write back + * s5: page dirty + tree dirty + write back + * s6: page dirty + tree dirty + * + * A page upon s4 will thus be ignored by the concurrent + * `do_writepages()` contained by `close()`, `fsync()`, making it's + * state inconsistent. + * + * To avoid such situation, we use per-file rwsems to prevent + * concurrent in-flight `writepage` during `close()` or `fsync()`. + * + * Minimal overhead is brought in since rsems allow concurrent + * `writepage` while `close()` or `fsync()` is natural to wait for + * in-flight `writepage()`s to complete. + * + * NOTE that in the worst case, a process may wait for wsem for TIMEOUT + * even if a signal is pending. But we've to wait there to iterate all + * pages and make sure that no dirty page should remain. + */ + struct rw_semaphore wpage_sem; + + // The real inode shared with vfs. ALWAYS PUT IT AT THE BOTTOM. + struct inode vfs_inode; +}; + +struct hmdfs_readdir_work { + struct list_head head; + struct dentry *dentry; + struct hmdfs_peer *con; + struct delayed_work dwork; +}; + +static inline struct hmdfs_inode_info *hmdfs_i(struct inode *inode) +{ + return container_of(inode, struct hmdfs_inode_info, vfs_inode); +} + +static inline bool hmdfs_inode_is_stashing(const struct hmdfs_inode_info *info) +{ + const struct hmdfs_sb_info *sbi = hmdfs_sb(info->vfs_inode.i_sb); + + /* Refer to comments in hmdfs_stash_remote_inode() */ + return (hmdfs_is_stash_enabled(sbi) && + smp_load_acquire(&info->stash_status)); // protect +} + +static inline void hmdfs_remote_fetch_fid(struct hmdfs_inode_info *info, + struct hmdfs_fid *fid) +{ + spin_lock(&info->fid_lock); + *fid = info->fid; + spin_unlock(&info->fid_lock); +} + +/***************************************************************************** + * ino allocator + *****************************************************************************/ + +enum HMDFS_ROOT { + HMDFS_ROOT_ANCESTOR = 1, // / + HMDFS_ROOT_DEV, // /device_view + HMDFS_ROOT_DEV_LOCAL, // /device_view/local + HMDFS_ROOT_DEV_REMOTE, // /device_view/remote + HMDFS_ROOT_MERGE, // /merge_view + + HMDFS_ROOT_INVALID, +}; + +// delete layer, directory layer, not overlay layer +enum HMDFS_LAYER_TYPE { + HMDFS_LAYER_ZERO = 0, // / + HMDFS_LAYER_FIRST_DEVICE, // /device_view + HMDFS_LAYER_SECOND_LOCAL, // /device_view/local + HMDFS_LAYER_SECOND_REMOTE, // /device_view/remote + HMDFS_LAYER_OTHER_LOCAL, // /device_view/local/xx + HMDFS_LAYER_OTHER_REMOTE, // /device_view/remote/xx + + HMDFS_LAYER_FIRST_MERGE, // /merge_view + HMDFS_LAYER_OTHER_MERGE, // /merge_view/xxx + HMDFS_LAYER_INVALID, +}; + +struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino, + struct inode *lo_i, + struct hmdfs_peer *peer); +struct inode *hmdfs_iget5_locked_merge(struct super_block *sb, + struct dentry *fst_lo_d); + +struct inode *hmdfs_iget5_locked_local(struct super_block *sb, + struct inode *lo_i); +struct hmdfs_peer; +struct inode *hmdfs_iget5_locked_remote(struct super_block *sb, + struct hmdfs_peer *peer, + uint64_t remote_ino); + +#endif // INODE_H diff --git a/fs/hmdfs/inode_local.c b/fs/hmdfs/inode_local.c new file mode 100644 index 0000000000000000000000000000000000000000..d34b765ab65daab8c1ca48a334ef92c9debc404a --- /dev/null +++ b/fs/hmdfs/inode_local.c @@ -0,0 +1,963 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_local.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_trace.h" + +extern struct kmem_cache *hmdfs_dentry_cachep; + +static const char *const symlink_tgt_white_list[] = { + "/storage/", + "/sdcard/", +}; + +struct hmdfs_name_data { + struct dir_context ctx; + const struct qstr *to_find; + char *name; + bool found; +}; + +int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry, + int dentry_type) +{ + struct hmdfs_dentry_info *info = + kmem_cache_zalloc(hmdfs_dentry_cachep, GFP_ATOMIC); + + if (!info) + return -ENOMEM; + dentry->d_fsdata = info; + INIT_LIST_HEAD(&info->cache_list_head); + INIT_LIST_HEAD(&info->remote_cache_list_head); + spin_lock_init(&info->cache_list_lock); + mutex_init(&info->remote_cache_list_lock); + mutex_init(&info->cache_pull_lock); + spin_lock_init(&info->lock); + info->dentry_type = dentry_type; + info->device_id = 0; + if (dentry_type == HMDFS_LAYER_ZERO || + dentry_type == HMDFS_LAYER_FIRST_DEVICE || + dentry_type == HMDFS_LAYER_SECOND_LOCAL || + dentry_type == HMDFS_LAYER_SECOND_REMOTE) + d_set_d_op(dentry, &hmdfs_dev_dops); + else + d_set_d_op(dentry, &hmdfs_dops); + return 0; +} + +static inline void set_symlink_flag(struct hmdfs_dentry_info *gdi) +{ + gdi->file_type = HM_SYMLINK; +} + +struct inode *fill_inode_local(struct super_block *sb, + struct inode *lower_inode) +{ + struct inode *inode; + struct hmdfs_inode_info *info; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget5_locked_local(sb, lower_inode); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); +#ifdef CONFIG_HMDFS_FS_PERMISSION + info->perm = hmdfs_read_perm(lower_inode); +#endif + if (S_ISDIR(lower_inode->i_mode)) + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRWXU | + S_IRWXG | S_IXOTH; + else if (S_ISREG(lower_inode->i_mode)) + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | + S_IWUSR | S_IRGRP | S_IWGRP; + else if (S_ISLNK(lower_inode->i_mode)) + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = lower_inode->i_uid; + inode->i_gid = lower_inode->i_gid; +#else + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); +#endif + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_generation = lower_inode->i_generation; + + info->inode_type = HMDFS_LAYER_OTHER_LOCAL; + if (S_ISDIR(lower_inode->i_mode)) { + inode->i_op = &hmdfs_dir_inode_ops_local; + inode->i_fop = &hmdfs_dir_ops_local; + inode->i_mode |= S_IXUGO; + } else if (S_ISREG(lower_inode->i_mode)) { + inode->i_op = &hmdfs_file_iops_local; + inode->i_fop = &hmdfs_file_fops_local; + } else if (S_ISLNK(lower_inode->i_mode)) { + inode->i_op = &hmdfs_symlink_iops_local; + inode->i_fop = &hmdfs_file_fops_local; + } + + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} + +/* hmdfs_convert_lookup_flags - covert hmdfs lookup flags to vfs lookup flags + * + * @hmdfs_flags: hmdfs lookup flags + * @vfs_flags: pointer to converted flags + * + * return 0 on success, or err code on failure. + */ +int hmdfs_convert_lookup_flags(unsigned int hmdfs_flags, + unsigned int *vfs_flags) +{ + *vfs_flags = 0; + + /* currently only support HMDFS_LOOKUP_REVAL */ + if (hmdfs_flags & ~HMDFS_LOOKUP_REVAL) + return -EINVAL; + + if (hmdfs_flags & HMDFS_LOOKUP_REVAL) + *vfs_flags |= LOOKUP_REVAL; + + return 0; +} + +static int hmdfs_name_match(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct hmdfs_name_data *buf = + container_of(ctx, struct hmdfs_name_data, ctx); + struct qstr candidate = QSTR_INIT(name, namelen); + + if (qstr_case_eq(buf->to_find, &candidate)) { + memcpy(buf->name, name, namelen); + buf->name[namelen] = 0; + buf->found = true; + return 1; + } + return 0; +} + +static int __lookup_nosensitive(struct path *lower_parent_path, + struct dentry *child_dentry, unsigned int flags, + struct path *lower_path) +{ + struct file *file; + const struct cred *cred = current_cred(); + const struct qstr *name = &child_dentry->d_name; + int err; + struct hmdfs_name_data buffer = { + .ctx.actor = hmdfs_name_match, + .to_find = name, + .name = __getname(), + .found = false, + }; + + if (!buffer.name) { + err = -ENOMEM; + goto out; + } + file = dentry_open(lower_parent_path, O_RDONLY, cred); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_name; + } + err = iterate_dir(file, &buffer.ctx); + fput(file); + if (err) + goto put_name; + if (buffer.found) + err = vfs_path_lookup(lower_parent_path->dentry, + lower_parent_path->mnt, buffer.name, + flags, lower_path); + else + err = -ENOENT; +put_name: + __putname(buffer.name); +out: + return err; +} + +struct dentry *hmdfs_lookup_local(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + int err = 0; + struct path lower_path, lower_parent_path; + struct dentry *lower_dentry = NULL, *parent_dentry = NULL, *ret = NULL; + struct hmdfs_dentry_info *gdi = NULL; + struct inode *child_inode = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + trace_hmdfs_lookup_local(parent_inode, child_dentry, flags); + if (child_dentry->d_name.len > NAME_MAX) { + ret = ERR_PTR(-ENAMETOOLONG); + goto out; + } + + /* local device */ + parent_dentry = dget_parent(child_dentry); + hmdfs_get_lower_path(parent_dentry, &lower_parent_path); + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_OTHER_LOCAL); + if (err) { + ret = ERR_PTR(err); + goto out_err; + } + + gdi = hmdfs_d(child_dentry); + + flags &= ~LOOKUP_FOLLOW; + err = vfs_path_lookup(lower_parent_path.dentry, lower_parent_path.mnt, + (child_dentry->d_name.name), 0, &lower_path); + if (err == -ENOENT && !sbi->s_case_sensitive) + err = __lookup_nosensitive(&lower_parent_path, child_dentry, 0, + &lower_path); + if (err && err != -ENOENT) { + ret = ERR_PTR(err); + goto out_err; + } else if (!err) { + hmdfs_set_lower_path(child_dentry, &lower_path); + child_inode = fill_inode_local(parent_inode->i_sb, + d_inode(lower_path.dentry)); + if (S_ISLNK(d_inode(lower_path.dentry)->i_mode)) + set_symlink_flag(gdi); + if (IS_ERR(child_inode)) { + err = PTR_ERR(child_inode); + ret = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out_err; + } + ret = d_splice_alias(child_inode, child_dentry); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + hmdfs_put_reset_lower_path(child_dentry); + goto out_err; + } + + check_and_fixup_ownership(parent_inode, child_inode, + lower_path.dentry, + child_dentry->d_name.name); + goto out_err; + } + /* + * return 0 here, so that vfs can continue the process of making this + * negative dentry to a positive one while creating a new file. + */ + err = 0; + ret = 0; + + lower_dentry = lookup_one_len_unlocked(d_name, lower_parent_path.dentry, + child_dentry->d_name.len); + if (IS_ERR(lower_dentry)) { + err = PTR_ERR(lower_dentry); + ret = lower_dentry; + goto out_err; + } + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_parent_path.mnt); + hmdfs_set_lower_path(child_dentry, &lower_path); + +out_err: + if (!err) + hmdfs_set_time(child_dentry, jiffies); + hmdfs_put_lower_path(&lower_parent_path); + dput(parent_dentry); +out: + trace_hmdfs_lookup_local_end(parent_inode, child_dentry, err); + return ret; +} + +int hmdfs_mkdir_local_dentry(struct inode *dir, struct dentry *dentry, + umode_t mode) +{ + struct inode *lower_dir = hmdfs_i(dir)->lower_inode; + struct dentry *lower_dir_dentry = NULL; + struct super_block *sb = dir->i_sb; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + struct inode *lower_inode = NULL; + struct inode *child_inode = NULL; + bool local_res = false; + struct cache_fs_override or; + __u16 child_perm; + kuid_t tmp_uid; + + error = hmdfs_override_dir_id_fs(&or, dir, dentry, &child_perm); + if (error) + goto cleanup; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_dir_dentry = lock_parent(lower_dentry); + + tmp_uid = hmdfs_override_inode_uid(lower_dir); + mode = (mode & S_IFMT) | 00771; + + error = vfs_mkdir(lower_dir, lower_dentry, mode); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + if (error) { + hmdfs_err("vfs_mkdir() error:%d", error); + goto out; + } + local_res = true; + lower_inode = d_inode(lower_dentry); +#ifdef CONFIG_HMDFS_FS_PERMISSION + error = hmdfs_persist_perm(lower_dentry, &child_perm); +#endif + child_inode = fill_inode_local(sb, lower_inode); + if (IS_ERR(child_inode)) { + error = PTR_ERR(child_inode); + goto out; + } + d_add(dentry, child_inode); + set_nlink(dir, hmdfs_i(dir)->lower_inode->i_nlink); +out: + unlock_dir(lower_dir_dentry); + if (local_res) + hmdfs_drop_remote_cache_dents(dentry->d_parent); + + if (error) { + hmdfs_clear_drop_flag(dentry->d_parent); + d_drop(dentry); + } + hmdfs_put_lower_path(&lower_path); + hmdfs_revert_dir_id_fs(&or); +cleanup: + return error; +} + +int hmdfs_mkdir_local(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int err = 0; + + if (check_filename(dentry->d_name.name, dentry->d_name.len)) { + err = -EINVAL; + return err; + } + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + return err; + } + err = hmdfs_mkdir_local_dentry(dir, dentry, mode); + trace_hmdfs_mkdir_local(dir, dentry, err); + return err; +} + +int hmdfs_create_local_dentry(struct inode *dir, struct dentry *dentry, + umode_t mode, bool want_excl) +{ + struct inode *lower_dir = NULL; + struct dentry *lower_dir_dentry = NULL; + struct super_block *sb = dir->i_sb; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + struct inode *lower_inode = NULL; + struct inode *child_inode = NULL; + kuid_t tmp_uid; +#ifdef CONFIG_HMDFS_FS_PERMISSION + const struct cred *saved_cred = NULL; + struct fs_struct *saved_fs = NULL, *copied_fs = NULL; + __u16 child_perm; +#endif + +#ifdef CONFIG_HMDFS_FS_PERMISSION + saved_cred = hmdfs_override_file_fsids(dir, &child_perm); + if (!saved_cred) { + error = -ENOMEM; + goto path_err; + } + + saved_fs = current->fs; + copied_fs = hmdfs_override_fsstruct(saved_fs); + if (!copied_fs) { + error = -ENOMEM; + goto revert_fsids; + } +#endif + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + mode = (mode & S_IFMT) | 00660; + lower_dir_dentry = lock_parent(lower_dentry); + lower_dir = d_inode(lower_dir_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + error = vfs_create(lower_dir, lower_dentry, mode, want_excl); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + unlock_dir(lower_dir_dentry); + if (error) + goto out; + + lower_inode = d_inode(lower_dentry); +#ifdef CONFIG_HMDFS_FS_PERMISSION + error = hmdfs_persist_perm(lower_dentry, &child_perm); +#endif + child_inode = fill_inode_local(sb, lower_inode); + if (IS_ERR(child_inode)) { + error = PTR_ERR(child_inode); + goto out_created; + } + d_add(dentry, child_inode); + +out_created: + hmdfs_drop_remote_cache_dents(dentry->d_parent); +out: + if (error) { + hmdfs_clear_drop_flag(dentry->d_parent); + d_drop(dentry); + } + hmdfs_put_lower_path(&lower_path); + +#ifdef CONFIG_HMDFS_FS_PERMISSION + hmdfs_revert_fsstruct(saved_fs, copied_fs); +revert_fsids: + hmdfs_revert_fsids(saved_cred); +#endif +#ifdef CONFIG_HMDFS_FS_PERMISSION +path_err: +#endif + return error; +} + +int hmdfs_create_local(struct inode *dir, struct dentry *child_dentry, + umode_t mode, bool want_excl) +{ + int err = 0; + + if (check_filename(child_dentry->d_name.name, + child_dentry->d_name.len)) { + err = -EINVAL; + return err; + } + + if (hmdfs_file_type(child_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + return err; + } + + err = hmdfs_create_local_dentry(dir, child_dentry, mode, want_excl); + trace_hmdfs_create_local(dir, child_dentry, err); + return err; +} + +int hmdfs_rmdir_local_dentry(struct inode *dir, struct dentry *dentry) +{ + struct inode *lower_dir = NULL; + struct dentry *lower_dir_dentry = NULL; + kuid_t tmp_uid; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + + hmdfs_clear_cache_dents(dentry, true); + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_dir_dentry = lock_parent(lower_dentry); + lower_dir = d_inode(lower_dir_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + + error = vfs_rmdir(lower_dir, lower_dentry); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + unlock_dir(lower_dir_dentry); + hmdfs_put_lower_path(&lower_path); + if (error) + goto path_err; + hmdfs_drop_remote_cache_dents(dentry->d_parent); +path_err: + if (error) + hmdfs_clear_drop_flag(dentry->d_parent); + return error; +} + +int hmdfs_rmdir_local(struct inode *dir, struct dentry *dentry) +{ + int err = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto out; + } + + err = hmdfs_rmdir_local_dentry(dir, dentry); + if (err != 0) { + hmdfs_err("rm dir failed:%d", err); + goto out; + } + + /* drop dentry even remote failed + * it maybe cause that one remote devices disconnect + * when doing remote rmdir + */ + d_drop(dentry); +out: + /* return connect device's errcode */ + trace_hmdfs_rmdir_local(dir, dentry, err); + return err; +} + +int hmdfs_unlink_local_dentry(struct inode *dir, struct dentry *dentry) +{ + struct inode *lower_dir = hmdfs_i(dir)->lower_inode; + struct dentry *lower_dir_dentry = NULL; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error; + kuid_t tmp_uid; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + error = vfs_unlink(lower_dir, lower_dentry, NULL); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + set_nlink(d_inode(dentry), + hmdfs_i(d_inode(dentry))->lower_inode->i_nlink); + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + if (error) + goto path_err; + + hmdfs_drop_remote_cache_dents(dentry->d_parent); + d_drop(dentry); + hmdfs_put_lower_path(&lower_path); + +path_err: + if (error) + hmdfs_clear_drop_flag(dentry->d_parent); + return error; +} + +int hmdfs_unlink_local(struct inode *dir, struct dentry *dentry) +{ + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) + return -EACCES; + + return hmdfs_unlink_local_dentry(dir, dentry); +} + +int hmdfs_rename_local_dentry(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct path lower_old_path; + struct path lower_new_path; + struct dentry *lower_old_dentry = NULL; + struct dentry *lower_new_dentry = NULL; + struct dentry *lower_old_dir_dentry = NULL; + struct dentry *lower_new_dir_dentry = NULL; + struct dentry *trap = NULL; + int rc = 0; + kuid_t old_dir_uid, new_dir_uid; + + if (flags) + return -EINVAL; + + hmdfs_get_lower_path(old_dentry, &lower_old_path); + lower_old_dentry = lower_old_path.dentry; + if (!lower_old_dentry) { + hmdfs_err("lower_old_dentry as NULL"); + rc = -EACCES; + goto out_put_old_path; + } + + hmdfs_get_lower_path(new_dentry, &lower_new_path); + lower_new_dentry = lower_new_path.dentry; + if (!lower_new_dentry) { + hmdfs_err("lower_new_dentry as NULL"); + rc = -EACCES; + goto out_put_new_path; + } + + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + old_dir_uid = hmdfs_override_inode_uid(d_inode(lower_old_dir_dentry)); + new_dir_uid = hmdfs_override_inode_uid(d_inode(lower_new_dir_dentry)); + + /* source should not be ancestor of target */ + if (trap == lower_old_dentry) { + rc = -EINVAL; + goto out_lock; + } + /* target should not be ancestor of source */ + if (trap == lower_new_dentry) { + rc = -ENOTEMPTY; + goto out_lock; + } + + rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry, + d_inode(lower_new_dir_dentry), lower_new_dentry, NULL, + flags); +out_lock: + dget(old_dentry); + + hmdfs_revert_inode_uid(d_inode(lower_old_dir_dentry), old_dir_uid); + hmdfs_revert_inode_uid(d_inode(lower_new_dir_dentry), new_dir_uid); + + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + if (rc == 0) { + hmdfs_drop_remote_cache_dents(old_dentry->d_parent); + if (old_dentry->d_parent != new_dentry->d_parent) + hmdfs_drop_remote_cache_dents(new_dentry->d_parent); + } else { + hmdfs_clear_drop_flag(old_dentry->d_parent); + if (old_dentry->d_parent != new_dentry->d_parent) + hmdfs_clear_drop_flag(old_dentry->d_parent); + d_drop(new_dentry); + } + + dput(old_dentry); + dput(lower_old_dir_dentry); + dput(lower_new_dir_dentry); + +out_put_new_path: + hmdfs_put_lower_path(&lower_new_path); +out_put_old_path: + hmdfs_put_lower_path(&lower_old_path); + return rc; +} + +int hmdfs_rename_local(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err = 0; + int ret = 0; + + trace_hmdfs_rename_local(old_dir, old_dentry, new_dir, new_dentry, + flags); + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto rename_out; + } + + if (S_ISREG(old_dentry->d_inode->i_mode)) { + err = hmdfs_rename_local_dentry(old_dir, old_dentry, new_dir, + new_dentry, flags); + } else if (S_ISDIR(old_dentry->d_inode->i_mode)) { + ret = hmdfs_rename_local_dentry(old_dir, old_dentry, new_dir, + new_dentry, flags); + if (ret != 0) { + err = ret; + goto rename_out; + } + } + + if (!err) + d_invalidate(old_dentry); + +rename_out: + return err; +} + +static bool symname_is_allowed(const char *symname) +{ + size_t symname_len = strlen(symname); + const char *prefix = NULL; + int i, total; + + /** + * Adjacent dots are prohibited. + * Note that vfs has escaped back slashes yet. + */ + for (i = 0; i < symname_len - 1; ++i) + if (symname[i] == '.' && symname[i + 1] == '.') + goto out_fail; + + /** + * Check if the symname is included in the whitelist + * Note that we skipped cmping strlen because symname is end with '\0' + */ + total = sizeof(symlink_tgt_white_list) / + sizeof(*symlink_tgt_white_list); + for (i = 0; i < total; ++i) { + prefix = symlink_tgt_white_list[i]; + if (!strncmp(symname, prefix, strlen(prefix))) + goto out_succ; + } + +out_fail: + hmdfs_err("Prohibited link path"); + return false; +out_succ: + return true; +} + +int hmdfs_symlink_local(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int err; + struct dentry *lower_dentry = NULL; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + struct inode *child_inode = NULL; + struct inode *lower_dir_inode = hmdfs_i(dir)->lower_inode; + struct hmdfs_dentry_info *gdi = hmdfs_d(dentry); + kuid_t tmp_uid; +#ifdef CONFIG_HMDFS_FS_PERMISSION + const struct cred *saved_cred = NULL; + struct fs_struct *saved_fs = NULL, *copied_fs = NULL; + __u16 child_perm; +#endif + + if (unlikely(!symname_is_allowed(symname))) { + err = -EPERM; + goto path_err; + } + +#ifdef CONFIG_HMDFS_FS_PERMISSION + saved_cred = hmdfs_override_file_fsids(dir, &child_perm); + if (!saved_cred) { + err = -ENOMEM; + goto path_err; + } + + saved_fs = current->fs; + copied_fs = hmdfs_override_fsstruct(saved_fs); + if (!copied_fs) { + err = -ENOMEM; + goto revert_fsids; + } +#endif + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir_inode); + err = vfs_symlink(lower_dir_inode, lower_dentry, symname); + hmdfs_revert_inode_uid(lower_dir_inode, tmp_uid); + unlock_dir(lower_parent_dentry); + if (err) + goto out_err; + set_symlink_flag(gdi); +#ifdef CONFIG_HMDFS_FS_PERMISSION + err = hmdfs_persist_perm(lower_dentry, &child_perm); +#endif + child_inode = fill_inode_local(dir->i_sb, d_inode(lower_dentry)); + if (IS_ERR(child_inode)) { + err = PTR_ERR(child_inode); + goto out_err; + } + d_add(dentry, child_inode); + fsstack_copy_attr_times(dir, lower_dir_inode); + fsstack_copy_inode_size(dir, lower_dir_inode); + +out_err: + hmdfs_put_lower_path(&lower_path); +#ifdef CONFIG_HMDFS_FS_PERMISSION + hmdfs_revert_fsstruct(saved_fs, copied_fs); +revert_fsids: + hmdfs_revert_fsids(saved_cred); +#endif +path_err: + trace_hmdfs_symlink_local(dir, dentry, err); + return err; +} + +static const char *hmdfs_get_link_local(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + const char *link = NULL; + struct dentry *lower_dentry = NULL; + struct inode *lower_inode = NULL; + struct path lower_path; + + if (!dentry) { + hmdfs_err("dentry NULL"); + link = ERR_PTR(-ECHILD); + goto link_out; + } + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_inode = d_inode(lower_dentry); + if (!lower_inode->i_op || !lower_inode->i_op->get_link) { + hmdfs_err("The lower inode doesn't support get_link i_op"); + link = ERR_PTR(-EINVAL); + goto out; + } + + link = lower_inode->i_op->get_link(lower_dentry, lower_inode, done); + if (IS_ERR_OR_NULL(link)) + goto out; + fsstack_copy_attr_atime(inode, lower_inode); +out: + hmdfs_put_lower_path(&lower_path); + trace_hmdfs_get_link_local(inode, dentry, PTR_ERR_OR_ZERO(link)); +link_out: + return link; +} + +static int hmdfs_setattr_local(struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = d_inode(dentry); + struct inode *lower_inode = hmdfs_i(inode)->lower_inode; + struct path lower_path; + struct dentry *lower_dentry = NULL; + struct iattr lower_ia; + unsigned int ia_valid = ia->ia_valid; + int err = 0; + kuid_t tmp_uid; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + memcpy(&lower_ia, ia, sizeof(lower_ia)); + if (ia_valid & ATTR_FILE) + lower_ia.ia_file = hmdfs_f(ia->ia_file)->lower_file; + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + if (ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(inode, ia->ia_size); + if (err) + goto out; + truncate_setsize(inode, ia->ia_size); + } + inode_lock(lower_inode); + tmp_uid = hmdfs_override_inode_uid(lower_inode); + + err = notify_change(lower_dentry, &lower_ia, NULL); + i_size_write(inode, i_size_read(lower_inode)); + inode->i_atime = lower_inode->i_atime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_ctime = lower_inode->i_ctime; + err = update_inode_to_dentry(dentry, inode); + hmdfs_revert_inode_uid(lower_inode, tmp_uid); + + inode_unlock(lower_inode); +out: + hmdfs_put_lower_path(&lower_path); + return err; +} + +static int hmdfs_getattr_local(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct path lower_path; + int ret; + + hmdfs_get_lower_path(path->dentry, &lower_path); + ret = vfs_getattr(&lower_path, stat, request_mask, flags); + stat->ino = d_inode(path->dentry)->i_ino; + hmdfs_put_lower_path(&lower_path); + + return ret; +} + +int hmdfs_permission(struct inode *inode, int mask) +{ +#ifdef CONFIG_HMDFS_FS_PERMISSION + unsigned int mode = inode->i_mode; + struct hmdfs_inode_info *hii = hmdfs_i(inode); + kuid_t cur_uid = current_fsuid(); + + if (uid_eq(cur_uid, ROOT_UID) || uid_eq(cur_uid, SYSTEM_UID)) + return 0; + + if (uid_eq(cur_uid, inode->i_uid)) { + mode >>= 6; + } else if (in_group_p(inode->i_gid)) { + mode >>= 3; + } else if (is_pkg_auth(hii->perm)) { + if (uid_eq(cur_uid, inode->i_uid)) + return 0; + } else if (is_system_auth(hii->perm)) { + if (in_group_p(MEDIA_RW_GID)) + return 0; + } + + if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + return 0; + + trace_hmdfs_permission(inode->i_ino); + return -EACCES; +#else + + return 0; +#endif +} + +static ssize_t hmdfs_local_listxattr(struct dentry *dentry, char *list, + size_t size) +{ + struct path lower_path; + ssize_t res = 0; + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (size > HMDFS_LISTXATTR_SIZE_MAX) + r_size = HMDFS_LISTXATTR_SIZE_MAX; + + hmdfs_get_lower_path(dentry, &lower_path); + res = vfs_listxattr(lower_path.dentry, list, r_size); + hmdfs_put_lower_path(&lower_path); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support listxattr size over than %d", + HMDFS_LISTXATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} + +const struct inode_operations hmdfs_symlink_iops_local = { + .get_link = hmdfs_get_link_local, + .permission = hmdfs_permission, + .setattr = hmdfs_setattr_local, +}; + +const struct inode_operations hmdfs_dir_inode_ops_local = { + .lookup = hmdfs_lookup_local, + .mkdir = hmdfs_mkdir_local, + .create = hmdfs_create_local, + .rmdir = hmdfs_rmdir_local, + .unlink = hmdfs_unlink_local, + .symlink = hmdfs_symlink_local, + .rename = hmdfs_rename_local, + .permission = hmdfs_permission, + .setattr = hmdfs_setattr_local, + .getattr = hmdfs_getattr_local, +}; + +const struct inode_operations hmdfs_file_iops_local = { + .setattr = hmdfs_setattr_local, + .getattr = hmdfs_getattr_local, + .permission = hmdfs_permission, + .listxattr = hmdfs_local_listxattr, +}; diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c new file mode 100644 index 0000000000000000000000000000000000000000..f84f57d5e85c3664768b5c732f257fd765059ade --- /dev/null +++ b/fs/hmdfs/inode_merge.c @@ -0,0 +1,1357 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_merge.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_merge_view.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "authority/authentication.h" +#include "hmdfs_trace.h" + +struct kmem_cache *hmdfs_dentry_merge_cachep; + +struct dentry *hmdfs_get_fst_lo_d(struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *d = NULL; + + mutex_lock(&dim->comrade_list_lock); + comrade = list_first_entry_or_null(&dim->comrade_list, + struct hmdfs_dentry_comrade, list); + if (comrade) + d = dget(comrade->lo_d); + mutex_unlock(&dim->comrade_list_lock); + return d; +} + +struct dentry *hmdfs_get_lo_d(struct dentry *dentry, int dev_id) +{ + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *d = NULL; + + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &dim->comrade_list, list) { + if (comrade->dev_id == dev_id) { + d = dget(comrade->lo_d); + break; + } + } + mutex_unlock(&dim->comrade_list_lock); + return d; +} + +static void update_inode_attr(struct inode *inode, struct dentry *child_dentry) +{ + struct inode *li = NULL; + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(child_dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct hmdfs_dentry_comrade *fst_comrade = NULL; + + mutex_lock(&cdi->comrade_list_lock); + fst_comrade = list_first_entry(&cdi->comrade_list, + struct hmdfs_dentry_comrade, list); + list_for_each_entry(comrade, &cdi->comrade_list, list) { + li = d_inode(comrade->lo_d); + if (!li) + continue; + + if (comrade == fst_comrade) { + inode->i_atime = li->i_atime; + inode->i_ctime = li->i_ctime; + inode->i_mtime = li->i_mtime; + inode->i_size = li->i_size; + continue; + } + + if (hmdfs_time_compare(&inode->i_mtime, &li->i_mtime) < 0) + inode->i_mtime = li->i_mtime; + } + mutex_unlock(&cdi->comrade_list_lock); +} + +static int get_num_comrades(struct dentry *dentry) +{ + struct list_head *pos; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + int count = 0; + + mutex_lock(&dim->comrade_list_lock); + list_for_each(pos, &dim->comrade_list) + count++; + mutex_unlock(&dim->comrade_list_lock); + return count; +} + +static struct inode *fill_inode_merge(struct super_block *sb, + struct inode *parent_inode, + struct dentry *child_dentry, + struct dentry *lo_d_dentry) +{ + struct dentry *fst_lo_d = NULL; + struct hmdfs_inode_info *info = NULL; + struct inode *inode = NULL; + umode_t mode; + + if (lo_d_dentry) { + fst_lo_d = lo_d_dentry; + dget(fst_lo_d); + } else { + fst_lo_d = hmdfs_get_fst_lo_d(child_dentry); + } + if (!fst_lo_d) { + inode = ERR_PTR(-EINVAL); + goto out; + } + if (hmdfs_i(parent_inode)->inode_type == HMDFS_LAYER_ZERO) + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_MERGE, NULL, + NULL); + else + inode = hmdfs_iget5_locked_merge(sb, fst_lo_d); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + inode = ERR_PTR(-ENOMEM); + goto out; + } + if (!(inode->i_state & I_NEW)) + goto out; + info = hmdfs_i(inode); + if (hmdfs_i(parent_inode)->inode_type == HMDFS_LAYER_ZERO) + info->inode_type = HMDFS_LAYER_FIRST_MERGE; + else + info->inode_type = HMDFS_LAYER_OTHER_MERGE; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + update_inode_attr(inode, child_dentry); + mode = d_inode(fst_lo_d)->i_mode; + /* remote symlink need to treat as regfile, + * the specific operation is performed by device_view. + * local symlink is managed by merge_view. + */ + if (hm_islnk(hmdfs_d(fst_lo_d)->file_type) && + hmdfs_d(fst_lo_d)->device_id == 0) { + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + inode->i_op = &hmdfs_symlink_iops_merge; + inode->i_fop = &hmdfs_file_fops_merge; + set_nlink(inode, 1); + } else if (S_ISREG(mode)) { // Reguler file 0660 + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + inode->i_op = &hmdfs_file_iops_merge; + inode->i_fop = &hmdfs_file_fops_merge; + set_nlink(inode, 1); + } else if (S_ISDIR(mode)) { // Directory 0771 + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + inode->i_op = &hmdfs_dir_iops_merge; + inode->i_fop = &hmdfs_dir_fops_merge; + set_nlink(inode, get_num_comrades(child_dentry) + 2); + } + + unlock_new_inode(inode); +out: + dput(fst_lo_d); + return inode; +} + +struct hmdfs_dentry_comrade *alloc_comrade(struct dentry *lo_d, int dev_id) +{ + struct hmdfs_dentry_comrade *comrade = NULL; + + // 文件只有一个 comrade,考虑 {comrade, list + list lock} + comrade = kzalloc(sizeof(*comrade), GFP_KERNEL); + if (unlikely(!comrade)) + return ERR_PTR(-ENOMEM); + + comrade->lo_d = lo_d; + comrade->dev_id = dev_id; + dget(lo_d); + return comrade; +} + +void link_comrade(struct list_head *onstack_comrades_head, + struct hmdfs_dentry_comrade *comrade) +{ + struct hmdfs_dentry_comrade *c = NULL; + + list_for_each_entry(c, onstack_comrades_head, list) { + if (likely(c->dev_id != comrade->dev_id)) + continue; + hmdfs_err("Redundant comrade of device %llu", c->dev_id); + dput(comrade->lo_d); + kfree(comrade); + WARN_ON(1); + return; + } + + if (comrade_is_local(comrade)) + list_add(&comrade->list, onstack_comrades_head); + else + list_add_tail(&comrade->list, onstack_comrades_head); +} + +/** + * assign_comrades_unlocked - assign a child dentry with comrades + * + * We tend to setup a local list of all the comrades we found and place the + * list onto the dentry_info to achieve atomicity. + */ +static void assign_comrades_unlocked(struct dentry *child_dentry, + struct list_head *onstack_comrades_head) +{ + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(child_dentry); + + mutex_lock(&cdi->comrade_list_lock); + WARN_ON(!list_empty(&cdi->comrade_list)); + list_splice_init(onstack_comrades_head, &cdi->comrade_list); + mutex_unlock(&cdi->comrade_list_lock); +} + +static struct hmdfs_dentry_comrade *lookup_comrade(struct path lower_path, + const char *d_name, + int dev_id, + unsigned int flags) +{ + struct path path; + struct hmdfs_dentry_comrade *comrade = NULL; + int err; + + err = vfs_path_lookup(lower_path.dentry, lower_path.mnt, d_name, flags, + &path); + if (err) + return ERR_PTR(err); + + comrade = alloc_comrade(path.dentry, dev_id); + path_put(&path); + return comrade; +} + +/** + * conf_name_trans_nop - do nothing but copy + * + * WARNING: always check before translation + */ +static char *conf_name_trans_nop(struct dentry *d) +{ + return kstrndup(d->d_name.name, d->d_name.len, GFP_KERNEL); +} + +/** + * conf_name_trans_dir - conflicted name translation for directory + * + * WARNING: always check before translation + */ +static char *conf_name_trans_dir(struct dentry *d) +{ + int len = d->d_name.len - strlen(CONFLICTING_DIR_SUFFIX); + + return kstrndup(d->d_name.name, len, GFP_KERNEL); +} + +/** + * conf_name_trans_reg - conflicted name translation for regular file + * + * WARNING: always check before translation + */ +static char *conf_name_trans_reg(struct dentry *d, int *dev_id) +{ + int dot_pos, start_cpy_pos, num_len, i; + int len = d->d_name.len; + char *name = kstrndup(d->d_name.name, d->d_name.len, GFP_KERNEL); + + if (unlikely(!name)) + return NULL; + + // find the last dot if possible + for (dot_pos = len - 1; dot_pos >= 0; dot_pos--) { + if (name[dot_pos] == '.') + break; + } + if (dot_pos == -1) + dot_pos = len; + + // retrieve the conf sn (i.e. dev_id) + num_len = 0; + for (i = dot_pos - 1; i >= 0; i--) { + if (name[i] >= '0' && name[i] <= '9') + num_len++; + else + break; + } + + *dev_id = 0; + for (i = 0; i < num_len; i++) + *dev_id = *dev_id * 10 + name[dot_pos - num_len + i] - '0'; + + // move the file suffix( '\0' included) right after the file name + start_cpy_pos = + dot_pos - num_len - strlen(CONFLICTING_FILE_CONST_SUFFIX); + memmove(name + start_cpy_pos, name + dot_pos, len - dot_pos + 1); + return name; +} + +int check_filename(const char *name, int len) +{ + int cmp_res = 0; + + if (len >= strlen(CONFLICTING_DIR_SUFFIX)) { + cmp_res = strncmp(name + len - strlen(CONFLICTING_DIR_SUFFIX), + CONFLICTING_DIR_SUFFIX, + strlen(CONFLICTING_DIR_SUFFIX)); + if (cmp_res == 0) + return DT_DIR; + } + + if (len >= strlen(CONFLICTING_FILE_CONST_SUFFIX)) { + int dot_pos, start_cmp_pos, num_len, i; + + for (dot_pos = len - 1; dot_pos >= 0; dot_pos--) { + if (name[dot_pos] == '.') + break; + } + if (dot_pos == -1) + dot_pos = len; + + num_len = 0; + for (i = dot_pos - 1; i >= 0; i--) { + if (name[i] >= '0' && name[i] <= '9') + num_len++; + else + break; + } + + start_cmp_pos = dot_pos - num_len - + strlen(CONFLICTING_FILE_CONST_SUFFIX); + cmp_res = strncmp(name + start_cmp_pos, + CONFLICTING_FILE_CONST_SUFFIX, + strlen(CONFLICTING_FILE_CONST_SUFFIX)); + if (cmp_res == 0) + return DT_REG; + } + + return 0; +} + +static int lookup_merge_normal(struct dentry *child_dentry, unsigned int flags) +{ + struct dentry *parent_dentry = dget_parent(child_dentry); + struct hmdfs_dentry_info_merge *pdi = hmdfs_dm(parent_dentry); + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct hmdfs_dentry_comrade *comrade, *cc; + struct path lo_p, path; + LIST_HEAD(head); + int ret = -ENOENT; + int dev_id = -1; + int ftype; + char *lo_name; + umode_t mode; + + ftype = check_filename(child_dentry->d_name.name, + child_dentry->d_name.len); + if (ftype == DT_REG) + lo_name = conf_name_trans_reg(child_dentry, &dev_id); + else if (ftype == DT_DIR) + lo_name = conf_name_trans_dir(child_dentry); + else + lo_name = conf_name_trans_nop(child_dentry); + if (unlikely(!lo_name)) { + ret = -ENOMEM; + goto out; + } + + ret = hmdfs_get_path_in_sb(child_dentry->d_sb, sbi->real_dst, + LOOKUP_DIRECTORY, &path); + if (ret) { + if (ret == -ENOENT) + ret = -EINVAL; + goto free; + } + lo_p.mnt = path.mnt; + + ret = -ENOENT; + mutex_lock(&pdi->comrade_list_lock); + list_for_each_entry(cc, &pdi->comrade_list, list) { + if (ftype == DT_REG && cc->dev_id != dev_id) + continue; + + lo_p.dentry = cc->lo_d; + comrade = lookup_comrade(lo_p, lo_name, cc->dev_id, flags); + if (IS_ERR(comrade)) { + ret = ret ? PTR_ERR(comrade) : 0; + continue; + } + + mode = hmdfs_cm(comrade); + if ((ftype == DT_DIR && !S_ISDIR(mode)) || + (ftype == DT_REG && S_ISDIR(mode))) { + destroy_comrade(comrade); + ret = ret ? PTR_ERR(comrade) : 0; + continue; + } + + ret = 0; + link_comrade(&head, comrade); + + if (!S_ISDIR(mode)) + break; + } + mutex_unlock(&pdi->comrade_list_lock); + + assign_comrades_unlocked(child_dentry, &head); + path_put(&path); +free: + kfree(lo_name); +out: + dput(parent_dentry); + return ret; +} + +/** + * do_lookup_merge_root - lookup the root of the merge view(root/merge_view) + * + * It's common for a network filesystem to incur various of faults, so we + * intent to show mercy for faults here, except faults reported by the local. + */ +static int do_lookup_merge_root(struct path path_dev, + struct dentry *child_dentry, unsigned int flags) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct hmdfs_dentry_comrade *comrade; + const int buf_len = + max((int)HMDFS_CID_SIZE + 1, (int)sizeof(DEVICE_VIEW_LOCAL)); + char *buf = kzalloc(buf_len, GFP_KERNEL); + struct hmdfs_peer *peer; + LIST_HEAD(head); + int ret; + + if (!buf) + return -ENOMEM; + + // lookup real_dst/device_view/local + memcpy(buf, DEVICE_VIEW_LOCAL, sizeof(DEVICE_VIEW_LOCAL)); + comrade = lookup_comrade(path_dev, buf, HMDFS_DEVID_LOCAL, flags); + if (IS_ERR(comrade)) { + ret = PTR_ERR(comrade); + goto out; + } + link_comrade(&head, comrade); + + // lookup real_dst/device_view/cidxx + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + mutex_unlock(&sbi->connections.node_lock); + memcpy(buf, peer->cid, HMDFS_CID_SIZE); + comrade = lookup_comrade(path_dev, buf, peer->device_id, flags); + if (IS_ERR(comrade)) + continue; + + link_comrade(&head, comrade); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + assign_comrades_unlocked(child_dentry, &head); + ret = 0; + +out: + kfree(buf); + return ret; +} + +// mkdir -p +static void lock_root_inode_shared(struct inode *root, bool *locked, bool *down) +{ + struct rw_semaphore *sem = &root->i_rwsem; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) +#define RWSEM_READER_OWNED (1UL << 0) +#define RWSEM_RD_NONSPINNABLE (1UL << 1) +#define RWSEM_WR_NONSPINNABLE (1UL << 2) +#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE) +#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) + struct task_struct *sem_owner = + (struct task_struct *)(atomic_long_read(&sem->owner) & + ~RWSEM_OWNER_FLAGS_MASK); +#else + struct task_struct *sem_owner = sem->owner; +#endif + + *locked = false; + *down = false; + + if (sem_owner != current) + return; + + // It's us that takes the wsem + if (!inode_trylock_shared(root)) { + downgrade_write(sem); + *down = true; + } + *locked = true; +} + +static void restore_root_inode_sem(struct inode *root, bool locked, bool down) +{ + if (!locked) + return; + + inode_unlock_shared(root); + if (down) + inode_lock(root); +} + +static int lookup_merge_root(struct inode *root_inode, + struct dentry *child_dentry, unsigned int flags) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct path path_dev; + int ret = -ENOENT; + int buf_len; + char *buf = NULL; + bool locked, down; + + // consider additional one slash and one '\0' + buf_len = strlen(sbi->real_dst) + 1 + sizeof(DEVICE_VIEW_ROOT); + if (buf_len > PATH_MAX) + return -ENAMETOOLONG; + + buf = kmalloc(buf_len, GFP_KERNEL); + if (unlikely(!buf)) + return -ENOMEM; + + sprintf(buf, "%s/%s", sbi->real_dst, DEVICE_VIEW_ROOT); + lock_root_inode_shared(root_inode, &locked, &down); + ret = hmdfs_get_path_in_sb(child_dentry->d_sb, buf, LOOKUP_DIRECTORY, + &path_dev); + if (ret) + goto free_buf; + + ret = do_lookup_merge_root(path_dev, child_dentry, flags); + path_put(&path_dev); + +free_buf: + kfree(buf); + restore_root_inode_sem(root_inode, locked, down); + return ret; +} + +int init_hmdfs_dentry_info_merge(struct hmdfs_sb_info *sbi, + struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *info = NULL; + + info = kmem_cache_zalloc(hmdfs_dentry_merge_cachep, GFP_NOFS); + if (!info) + return -ENOMEM; + + info->ctime = jiffies; + INIT_LIST_HEAD(&info->comrade_list); + mutex_init(&info->comrade_list_lock); + d_set_d_op(dentry, &hmdfs_dops_merge); + dentry->d_fsdata = info; + return 0; +} + +static void update_dm(struct dentry *dst, struct dentry *src) +{ + struct hmdfs_dentry_info_merge *dmi_dst = hmdfs_dm(dst); + struct hmdfs_dentry_info_merge *dmi_src = hmdfs_dm(src); + LIST_HEAD(tmp_dst); + LIST_HEAD(tmp_src); + + /* Mobilize all the comrades */ + mutex_lock(&dmi_dst->comrade_list_lock); + mutex_lock(&dmi_src->comrade_list_lock); + list_splice_init(&dmi_dst->comrade_list, &tmp_dst); + list_splice_init(&dmi_src->comrade_list, &tmp_src); + list_splice(&tmp_dst, &dmi_src->comrade_list); + list_splice(&tmp_src, &dmi_dst->comrade_list); + mutex_unlock(&dmi_src->comrade_list_lock); + mutex_unlock(&dmi_dst->comrade_list_lock); +} + +// do this in a map-reduce manner +struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + bool create = flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET); + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct hmdfs_inode_info *pii = hmdfs_i(parent_inode); + struct inode *child_inode = NULL; + struct dentry *ret_dentry = NULL; + int err = 0; + + /* + * Internal flags like LOOKUP_CREATE should not pass to device view. + * LOOKUP_REVAL is needed because dentry cache in hmdfs might be stale + * after rename in lower fs. LOOKUP_FOLLOW is not needed because + * get_link is defined for symlink inode in merge_view. + * LOOKUP_DIRECTORY is not needed because merge_view can do the + * judgement that whether result is directory or not. + */ + flags = flags & LOOKUP_REVAL; + + child_dentry->d_fsdata = NULL; + + if (child_dentry->d_name.len > NAME_MAX) { + err = -ENAMETOOLONG; + goto out; + } + + err = init_hmdfs_dentry_info_merge(sbi, child_dentry); + if (unlikely(err)) + goto out; + + if (pii->inode_type == HMDFS_LAYER_ZERO) + err = lookup_merge_root(parent_inode, child_dentry, flags); + else + err = lookup_merge_normal(child_dentry, flags); + + if (!err) { + struct hmdfs_inode_info *info = NULL; + + child_inode = fill_inode_merge(parent_inode->i_sb, parent_inode, + child_dentry, NULL); + ret_dentry = d_splice_alias(child_inode, child_dentry); + if (IS_ERR(ret_dentry)) { + clear_comrades(child_dentry); + err = PTR_ERR(ret_dentry); + goto out; + } + if (ret_dentry) { + update_dm(ret_dentry, child_dentry); + child_dentry = ret_dentry; + } + info = hmdfs_i(child_inode); + if (info->inode_type == HMDFS_LAYER_FIRST_MERGE) + hmdfs_root_inode_perm_init(child_inode); + else + check_and_fixup_ownership_remote(parent_inode, + child_dentry); + + goto out; + } + + if ((err == -ENOENT) && create) + err = 0; + +out: + hmdfs_trace_merge(trace_hmdfs_lookup_merge_end, parent_inode, + child_dentry, err); + return err ? ERR_PTR(err) : ret_dentry; +} + +static int hmdfs_getattr_merge(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + int ret; + struct path lower_path = { + .dentry = hmdfs_get_fst_lo_d(path->dentry), + .mnt = path->mnt, + }; + + if (unlikely(!lower_path.dentry)) { + hmdfs_err("Fatal! No comrades"); + ret = -EINVAL; + goto out; + } + + ret = vfs_getattr(&lower_path, stat, request_mask, flags); +out: + dput(lower_path.dentry); + return ret; +} + +static int hmdfs_setattr_merge(struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = d_inode(dentry); + struct dentry *lower_dentry = hmdfs_get_fst_lo_d(dentry); + struct inode *lower_inode = NULL; + struct iattr lower_ia; + unsigned int ia_valid = ia->ia_valid; + int err = 0; + kuid_t tmp_uid; + + if (!lower_dentry) { + WARN_ON(1); + err = -EINVAL; + goto out; + } + + lower_inode = d_inode(lower_dentry); + memcpy(&lower_ia, ia, sizeof(lower_ia)); + if (ia_valid & ATTR_FILE) + lower_ia.ia_file = hmdfs_f(ia->ia_file)->lower_file; + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + + inode_lock(lower_inode); + tmp_uid = hmdfs_override_inode_uid(lower_inode); + + err = notify_change(lower_dentry, &lower_ia, NULL); + i_size_write(inode, i_size_read(lower_inode)); + inode->i_atime = lower_inode->i_atime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_ctime = lower_inode->i_ctime; + hmdfs_revert_inode_uid(lower_inode, tmp_uid); + + inode_unlock(lower_inode); + +out: + dput(lower_dentry); + return err; +} + +const struct inode_operations hmdfs_file_iops_merge = { + .getattr = hmdfs_getattr_merge, + .setattr = hmdfs_setattr_merge, + .permission = hmdfs_permission, +}; + +int do_mkdir_merge(struct inode *parent_inode, struct dentry *child_dentry, + umode_t mode, struct inode *lo_i_parent, + struct dentry *lo_d_child) +{ + int ret = 0; + struct super_block *sb = parent_inode->i_sb; + struct inode *child_inode = NULL; + + ret = vfs_mkdir(lo_i_parent, lo_d_child, mode); + if (ret) + goto out; + + child_inode = + fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child); + if (IS_ERR(child_inode)) { + ret = PTR_ERR(child_inode); + goto out; + } + + d_add(child_dentry, child_inode); + /* nlink should be increased with the joining of children */ + set_nlink(parent_inode, 2); +out: + return ret; +} + +int do_create_merge(struct inode *parent_inode, struct dentry *child_dentry, + umode_t mode, bool want_excl, struct inode *lo_i_parent, + struct dentry *lo_d_child) +{ + int ret = 0; + struct super_block *sb = parent_inode->i_sb; + struct inode *child_inode = NULL; + + ret = vfs_create(lo_i_parent, lo_d_child, mode, want_excl); + if (ret) + goto out; + + child_inode = + fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child); + if (IS_ERR(child_inode)) { + ret = PTR_ERR(child_inode); + goto out; + } + + d_add(child_dentry, child_inode); + /* nlink should be increased with the joining of children */ + set_nlink(parent_inode, 2); +out: + return ret; +} + +int do_symlink_merge(struct inode *parent_inode, struct dentry *child_dentry, + const char *symname, struct inode *lower_parent_inode, + struct dentry *lo_d_child) +{ + int ret = 0; + struct super_block *sb = parent_inode->i_sb; + struct inode *child_inode = NULL; + + ret = vfs_symlink(lower_parent_inode, lo_d_child, symname); + if (ret) + goto out; + + child_inode = + fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child); + if (IS_ERR(child_inode)) { + ret = PTR_ERR(child_inode); + goto out; + } + + d_add(child_dentry, child_inode); + fsstack_copy_attr_times(parent_inode, lower_parent_inode); + fsstack_copy_inode_size(parent_inode, lower_parent_inode); +out: + return ret; +} + +int hmdfs_do_ops_merge(struct inode *i_parent, struct dentry *d_child, + struct dentry *lo_d_child, struct path path, + struct hmdfs_recursive_para *rec_op_para) +{ + int ret = 0; + + if (rec_op_para->is_last) { + switch (rec_op_para->opcode) { + case F_MKDIR_MERGE: + ret = do_mkdir_merge(i_parent, d_child, + rec_op_para->mode, + d_inode(path.dentry), lo_d_child); + break; + case F_CREATE_MERGE: + ret = do_create_merge(i_parent, d_child, + rec_op_para->mode, + rec_op_para->want_excl, + d_inode(path.dentry), lo_d_child); + break; + case F_SYMLINK_MERGE: + ret = do_symlink_merge(i_parent, d_child, + rec_op_para->name, + d_inode(path.dentry), + lo_d_child); + break; + default: + ret = -EINVAL; + break; + } + } else { + ret = vfs_mkdir(d_inode(path.dentry), lo_d_child, + rec_op_para->mode); + } + if (ret) + hmdfs_err("vfs_ops failed, ops %d, err = %d", + rec_op_para->opcode, ret); + return ret; +} + +int hmdfs_create_lower_dentry(struct inode *i_parent, struct dentry *d_child, + struct dentry *lo_d_parent, bool is_dir, + struct hmdfs_recursive_para *rec_op_para) +{ + struct hmdfs_sb_info *sbi = i_parent->i_sb->s_fs_info; + struct hmdfs_dentry_comrade *new_comrade = NULL; + struct dentry *lo_d_child = NULL; + char *path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *absolute_path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *path_name = NULL; + struct path path = { .mnt = NULL, .dentry = NULL }; + int ret = 0; + + if (unlikely(!path_buf || !absolute_path_buf)) { + ret = -ENOMEM; + goto out; + } + + path_name = dentry_path_raw(lo_d_parent, path_buf, PATH_MAX); + if (IS_ERR(path_name)) { + ret = PTR_ERR(path_name); + goto out; + } + if ((strlen(sbi->real_dst) + strlen(path_name) + + strlen(d_child->d_name.name) + 2) > PATH_MAX) { + ret = -ENAMETOOLONG; + goto out; + } + + sprintf(absolute_path_buf, "%s%s/%s", sbi->real_dst, path_name, + d_child->d_name.name); + + if (is_dir) + lo_d_child = kern_path_create(AT_FDCWD, absolute_path_buf, + &path, LOOKUP_DIRECTORY); + else + lo_d_child = kern_path_create(AT_FDCWD, absolute_path_buf, + &path, 0); + if (IS_ERR(lo_d_child)) { + ret = PTR_ERR(lo_d_child); + goto out; + } + // to ensure link_comrade after vfs_mkdir succeed + ret = hmdfs_do_ops_merge(i_parent, d_child, lo_d_child, path, + rec_op_para); + if (ret) + goto out_put; + new_comrade = alloc_comrade(lo_d_child, HMDFS_DEVID_LOCAL); + if (IS_ERR(new_comrade)) { + ret = PTR_ERR(new_comrade); + goto out_put; + } else { + link_comrade_unlocked(d_child, new_comrade); + } + +out_put: + done_path_create(&path, lo_d_child); +out: + kfree(absolute_path_buf); + kfree(path_buf); + return ret; +} + +static int create_lo_d_parent_recur(struct dentry *d_parent, + struct dentry *d_child, umode_t mode, + struct hmdfs_recursive_para *rec_op_para) +{ + struct dentry *lo_d_parent, *d_pparent; + int ret = 0; + + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + d_pparent = dget_parent(d_parent); + ret = create_lo_d_parent_recur(d_pparent, d_parent, + d_inode(d_parent)->i_mode, + rec_op_para); + dput(d_pparent); + if (ret) + goto out; + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + ret = -ENOENT; + goto out; + } + } + rec_op_para->is_last = false; + rec_op_para->mode = mode; + ret = hmdfs_create_lower_dentry(d_inode(d_parent), d_child, lo_d_parent, + true, rec_op_para); +out: + dput(lo_d_parent); + return ret; +} + +int create_lo_d_child(struct inode *i_parent, struct dentry *d_child, + bool is_dir, struct hmdfs_recursive_para *rec_op_para) +{ + struct dentry *d_pparent, *lo_d_parent, *lo_d_child; + struct dentry *d_parent = dget_parent(d_child); + int ret = 0; + mode_t d_child_mode = rec_op_para->mode; + + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + d_pparent = dget_parent(d_parent); + ret = create_lo_d_parent_recur(d_pparent, d_parent, + d_inode(d_parent)->i_mode, + rec_op_para); + dput(d_pparent); + if (unlikely(ret)) { + lo_d_child = ERR_PTR(ret); + goto out; + } + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + lo_d_child = ERR_PTR(-ENOENT); + goto out; + } + } + rec_op_para->is_last = true; + rec_op_para->mode = d_child_mode; + ret = hmdfs_create_lower_dentry(i_parent, d_child, lo_d_parent, is_dir, + rec_op_para); + +out: + dput(d_parent); + dput(lo_d_parent); + return ret; +} + +void hmdfs_init_recursive_para(struct hmdfs_recursive_para *rec_op_para, + int opcode, mode_t mode, bool want_excl, + const char *name) +{ + rec_op_para->is_last = true; + rec_op_para->opcode = opcode; + rec_op_para->mode = mode; + rec_op_para->want_excl = want_excl; + rec_op_para->name = name; +} + +int hmdfs_mkdir_merge(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int ret = 0; + struct hmdfs_recursive_para *rec_op_para = NULL; + + // confict_name & file_type is checked by hmdfs_mkdir_local + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL); + if (!rec_op_para) { + ret = -ENOMEM; + goto out; + } + + hmdfs_init_recursive_para(rec_op_para, F_MKDIR_MERGE, mode, false, + NULL); + ret = create_lo_d_child(dir, dentry, true, rec_op_para); +out: + hmdfs_trace_merge(trace_hmdfs_mkdir_merge, dir, dentry, ret); + if (ret) + d_drop(dentry); + kfree(rec_op_para); + return ret; +} + +int hmdfs_create_merge(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + struct hmdfs_recursive_para *rec_op_para = NULL; + int ret = 0; + + rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL); + if (!rec_op_para) { + ret = -ENOMEM; + goto out; + } + hmdfs_init_recursive_para(rec_op_para, F_CREATE_MERGE, mode, want_excl, + NULL); + // confict_name & file_type is checked by hmdfs_create_local + ret = create_lo_d_child(dir, dentry, false, rec_op_para); +out: + hmdfs_trace_merge(trace_hmdfs_create_merge, dir, dentry, ret); + if (ret) + d_drop(dentry); + kfree(rec_op_para); + return ret; +} + +int do_rmdir_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *lo_d = NULL; + struct dentry *lo_d_dir = NULL; + struct inode *lo_i_dir = NULL; + + //TODO: 当前只删本地,因不会影响到图库场景 + //TODO:图库重启清除软连接?或者什么场景会删除 + //TODO: remove 调用同时删除空目录以及非空目录,结果不一致 + //TODO: 如果校验会不会有并发问题?就算锁,也只能锁自己 + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lo_d = comrade->lo_d; + lo_d_dir = lock_parent(lo_d); + lo_i_dir = d_inode(lo_d_dir); + //TODO: 部分成功,lo_d确认 + ret = vfs_rmdir(lo_i_dir, lo_d); + unlock_dir(lo_d_dir); + if (ret) + break; + } + mutex_unlock(&dim->comrade_list_lock); + hmdfs_trace_merge(trace_hmdfs_rmdir_merge, dir, dentry, ret); + return ret; +} + +int hmdfs_rmdir_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + + ret = do_rmdir_merge(dir, dentry); + if (ret) { + hmdfs_err("rm dir failed:%d", ret); + goto out; + } + + d_drop(dentry); +out: + hmdfs_trace_merge(trace_hmdfs_rmdir_merge, dir, dentry, ret); + return ret; +} + +int do_unlink_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *lo_d = NULL; + struct dentry *lo_d_dir = NULL; + struct inode *lo_i_dir = NULL; + // TODO:文件场景 list_first_entry + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lo_d = comrade->lo_d; + lo_d_dir = lock_parent(lo_d); + lo_i_dir = d_inode(lo_d_dir); + ret = vfs_unlink(lo_i_dir, lo_d, NULL); // lo_d GET + unlock_dir(lo_d_dir); + if (ret) + break; + } + mutex_unlock(&dim->comrade_list_lock); + + return ret; +} + +int hmdfs_unlink_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + + ret = do_unlink_merge(dir, dentry); + if (ret) { + hmdfs_err("unlink failed:%d", ret); + goto out; + } + + d_drop(dentry); +out: + return ret; +} + +int hmdfs_symlink_merge(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int ret = 0; + struct hmdfs_recursive_para *rec_op_para = NULL; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + + rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL); + if (!rec_op_para) { + ret = -ENOMEM; + goto out; + } + hmdfs_init_recursive_para(rec_op_para, F_SYMLINK_MERGE, 0, false, + symname); + ret = create_lo_d_child(dir, dentry, false, rec_op_para); + +out: + trace_hmdfs_symlink_merge(dir, dentry, ret); + if (ret) + d_drop(dentry); + kfree(rec_op_para); + return ret; +} + +int do_rename_merge(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int ret = 0; + struct hmdfs_sb_info *sbi = (old_dir->i_sb)->s_fs_info; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(old_dentry); + struct hmdfs_dentry_comrade *comrade = NULL, *new_comrade = NULL; + struct path lo_p_new = { .mnt = NULL, .dentry = NULL }; + struct inode *lo_i_old_dir = NULL, *lo_i_new_dir = NULL; + struct dentry *lo_d_old_dir = NULL, *lo_d_old = NULL, + *lo_d_new_dir = NULL, *lo_d_new = NULL; + struct dentry *d_new_dir = NULL; + char *path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *abs_path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *path_name = NULL; + + /* TODO: Will WPS rename a temporary file to another directory? + * could flags with replace bit result in rename ops + * cross_devices? + * currently does not support replace flags. + */ + if (flags & ~RENAME_NOREPLACE) { + ret = -EINVAL; + goto out; + } + + if (unlikely(!path_buf || !abs_path_buf)) { + ret = -ENOMEM; + goto out; + } + + list_for_each_entry(comrade, &dim->comrade_list, list) { + lo_d_old = comrade->lo_d; + d_new_dir = d_find_alias(new_dir); + lo_d_new_dir = hmdfs_get_lo_d(d_new_dir, comrade->dev_id); + dput(d_new_dir); + + if (!lo_d_new_dir) + continue; + path_name = dentry_path_raw(lo_d_new_dir, path_buf, PATH_MAX); + dput(lo_d_new_dir); + if (IS_ERR(path_name)) { + ret = PTR_ERR(path_name); + continue; + } + + if (strlen(sbi->real_dst) + strlen(path_name) + + strlen(new_dentry->d_name.name) + 2 > PATH_MAX) { + ret = -ENAMETOOLONG; + goto out; + } + + snprintf(abs_path_buf, PATH_MAX, "%s%s/%s", sbi->real_dst, + path_name, new_dentry->d_name.name); + if (S_ISDIR(d_inode(old_dentry)->i_mode)) + lo_d_new = kern_path_create(AT_FDCWD, abs_path_buf, + &lo_p_new, + LOOKUP_DIRECTORY); + else + lo_d_new = kern_path_create(AT_FDCWD, abs_path_buf, + &lo_p_new, 0); + if (IS_ERR(lo_d_new)) + continue; + + lo_d_new_dir = dget_parent(lo_d_new); + lo_i_new_dir = d_inode(lo_d_new_dir); + lo_d_old_dir = dget_parent(lo_d_old); + lo_i_old_dir = d_inode(lo_d_old_dir); + + ret = vfs_rename(lo_i_old_dir, lo_d_old, lo_i_new_dir, lo_d_new, + NULL, flags); + new_comrade = alloc_comrade(lo_p_new.dentry, comrade->dev_id); + if (IS_ERR(new_comrade)) { + ret = PTR_ERR(new_comrade); + goto no_comrade; + } + + link_comrade_unlocked(new_dentry, new_comrade); +no_comrade: + done_path_create(&lo_p_new, lo_d_new); + dput(lo_d_old_dir); + dput(lo_d_new_dir); + } +out: + kfree(abs_path_buf); + kfree(path_buf); + return ret; +} + +int hmdfs_rename_merge(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + char *old_dir_buf = NULL; + char *new_dir_buf = NULL; + char *old_dir_path = NULL; + char *new_dir_path = NULL; + struct dentry *old_dir_dentry = NULL; + struct dentry *new_dir_dentry = NULL; + int ret = 0; + + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto rename_out; + } + old_dir_buf = kmalloc(PATH_MAX, GFP_KERNEL); + new_dir_buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!old_dir_buf || !new_dir_buf) { + ret = -ENOMEM; + goto rename_out; + } + + new_dir_dentry = d_find_alias(new_dir); + if (!new_dir_dentry) { + ret = -EINVAL; + goto rename_out; + } + + old_dir_dentry = d_find_alias(old_dir); + if (!old_dir_dentry) { + ret = -EINVAL; + dput(new_dir_dentry); + goto rename_out; + } + + old_dir_path = dentry_path_raw(old_dir_dentry, old_dir_buf, PATH_MAX); + new_dir_path = dentry_path_raw(new_dir_dentry, new_dir_buf, PATH_MAX); + dput(new_dir_dentry); + dput(old_dir_dentry); + if (strcmp(old_dir_path, new_dir_path)) { + ret = -EPERM; + goto rename_out; + } + + trace_hmdfs_rename_merge(old_dir, old_dentry, new_dir, new_dentry, + flags); + ret = do_rename_merge(old_dir, old_dentry, new_dir, new_dentry, flags); + + if (ret != 0) + d_drop(new_dentry); + + if (S_ISREG(old_dentry->d_inode->i_mode) && !ret) + d_invalidate(old_dentry); + +rename_out: + hmdfs_trace_rename_merge(old_dir, old_dentry, new_dir, new_dentry, ret); + kfree(old_dir_buf); + kfree(new_dir_buf); + return ret; +} + +static const char *hmdfs_get_link_merge(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + const char *link = NULL; + struct dentry *lower_dentry = NULL; + struct inode *lower_inode = NULL; + + if (!dentry) { + hmdfs_err("dentry NULL"); + link = ERR_PTR(-ECHILD); + goto link_out; + } + + lower_dentry = hmdfs_get_fst_lo_d(dentry); + if (!lower_dentry) { + WARN_ON(1); + link = ERR_PTR(-EINVAL); + goto out; + } + lower_inode = d_inode(lower_dentry); + if (!lower_inode->i_op || !lower_inode->i_op->get_link) { + hmdfs_err("lower inode hold no operations"); + link = ERR_PTR(-EINVAL); + goto out; + } + + link = lower_inode->i_op->get_link(lower_dentry, lower_inode, done); + if (IS_ERR_OR_NULL(link)) + goto out; + fsstack_copy_attr_atime(inode, lower_inode); +out: + dput(lower_dentry); + trace_hmdfs_get_link_merge(inode, dentry, PTR_ERR_OR_ZERO(link)); +link_out: + return link; +} + +const struct inode_operations hmdfs_symlink_iops_merge = { + .get_link = hmdfs_get_link_merge, + .permission = hmdfs_permission, +}; + +const struct inode_operations hmdfs_dir_iops_merge = { + .lookup = hmdfs_lookup_merge, + .mkdir = hmdfs_mkdir_merge, + .create = hmdfs_create_merge, + .rmdir = hmdfs_rmdir_merge, + .unlink = hmdfs_unlink_merge, + .symlink = hmdfs_symlink_merge, + .rename = hmdfs_rename_merge, + .permission = hmdfs_permission, +}; diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c new file mode 100644 index 0000000000000000000000000000000000000000..98a0e34c2253cee0b09eaa0207b15ef2725d15ba --- /dev/null +++ b/fs/hmdfs/inode_remote.c @@ -0,0 +1,989 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_remote.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include + +#include "comm/socket_adapter.h" +#include "hmdfs.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" +#include "authority/authentication.h" +#include "stash.h" + +struct hmdfs_lookup_ret *lookup_remote_dentry(struct dentry *child_dentry, + const struct qstr *qstr, + uint64_t dev_id) +{ + struct hmdfs_lookup_ret *lookup_ret; + struct hmdfs_dentry *dentry = NULL; + struct clearcache_item *cache_item = NULL; + struct hmdfs_dcache_lookup_ctx ctx; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + cache_item = hmdfs_find_cache_item(dev_id, child_dentry->d_parent); + if (!cache_item) + return NULL; + + lookup_ret = kmalloc(sizeof(*lookup_ret), GFP_KERNEL); + if (!lookup_ret) + goto out; + + hmdfs_init_dcache_lookup_ctx(&ctx, sbi, qstr, cache_item->filp); + dentry = hmdfs_find_dentry(child_dentry, &ctx); + if (!dentry) { + kfree(lookup_ret); + lookup_ret = NULL; + goto out; + } + + lookup_ret->i_mode = le16_to_cpu(dentry->i_mode); + lookup_ret->i_size = le64_to_cpu(dentry->i_size); + lookup_ret->i_mtime = le64_to_cpu(dentry->i_mtime); + lookup_ret->i_mtime_nsec = le32_to_cpu(dentry->i_mtime_nsec); + lookup_ret->i_ino = le64_to_cpu(dentry->i_ino); + + hmdfs_unlock_file(ctx.filp, get_dentry_group_pos(ctx.bidx), + DENTRYGROUP_SIZE); + kfree(ctx.page); +out: + kref_put(&cache_item->ref, release_cache_item); + return lookup_ret; +} + +/* get_remote_inode_info - fill hmdfs_lookup_ret by info from remote getattr + * + * @dentry: local dentry + * @hmdfs_peer: which remote devcie + * @flags: lookup flags + * + * return allocaed and initialized hmdfs_lookup_ret on success, and NULL on + * failure. + */ +struct hmdfs_lookup_ret *get_remote_inode_info(struct hmdfs_peer *con, + struct dentry *dentry, + unsigned int flags) +{ + int err = 0; + struct hmdfs_lookup_ret *lookup_ret = NULL; + struct hmdfs_getattr_ret *getattr_ret = NULL; + unsigned int expected_flags = 0; + + lookup_ret = kmalloc(sizeof(*lookup_ret), GFP_KERNEL); + if (!lookup_ret) + return NULL; + + err = hmdfs_remote_getattr(con, dentry, flags, &getattr_ret); + if (err) { + hmdfs_debug("inode info get failed with err %d", err); + kfree(lookup_ret); + return NULL; + } + /* make sure we got everything we need */ + expected_flags = STATX_INO | STATX_SIZE | STATX_MODE | STATX_MTIME; + if ((getattr_ret->stat.result_mask & expected_flags) != + expected_flags) { + hmdfs_debug("remote getattr failed with flag %x", + getattr_ret->stat.result_mask); + kfree(lookup_ret); + kfree(getattr_ret); + return NULL; + } + + lookup_ret->i_mode = getattr_ret->stat.mode; + lookup_ret->i_size = getattr_ret->stat.size; + lookup_ret->i_mtime = getattr_ret->stat.mtime.tv_sec; + lookup_ret->i_mtime_nsec = getattr_ret->stat.mtime.tv_nsec; + lookup_ret->i_ino = getattr_ret->stat.ino; + kfree(getattr_ret); + return lookup_ret; +} + +static void hmdfs_remote_readdir_work(struct work_struct *work) +{ + struct hmdfs_readdir_work *rw = + container_of(to_delayed_work(work), struct hmdfs_readdir_work, + dwork); + struct dentry *dentry = rw->dentry; + struct hmdfs_peer *con = rw->con; + const struct cred *old_cred = hmdfs_override_creds(con->sbi->cred); + bool empty = false; + + get_remote_dentry_file(dentry, con); + hmdfs_d(dentry)->async_readdir_in_progress = false; + hmdfs_revert_creds(old_cred); + + dput(dentry); + peer_put(con); + spin_lock(&con->sbi->async_readdir_work_lock); + list_del(&rw->head); + empty = list_empty(&con->sbi->async_readdir_work_list); + spin_unlock(&con->sbi->async_readdir_work_lock); + kfree(rw); + + if (empty) + wake_up_interruptible(&con->sbi->async_readdir_wq); +} + +static void get_remote_dentry_file_in_wq(struct dentry *dentry, + struct hmdfs_peer *con) +{ + struct hmdfs_readdir_work *rw = NULL; + + /* do nothing if async readdir is already in progress */ + if (cmpxchg_relaxed(&hmdfs_d(dentry)->async_readdir_in_progress, false, + true)) + return; + + rw = kmalloc(sizeof(*rw), GFP_KERNEL); + if (!rw) { + hmdfs_d(dentry)->async_readdir_in_progress = false; + return; + } + + dget(dentry); + peer_get(con); + rw->dentry = dentry; + rw->con = con; + spin_lock(&con->sbi->async_readdir_work_lock); + INIT_DELAYED_WORK(&rw->dwork, hmdfs_remote_readdir_work); + list_add(&rw->head, &con->sbi->async_readdir_work_list); + spin_unlock(&con->sbi->async_readdir_work_lock); + queue_delayed_work(con->dentry_wq, &rw->dwork, 0); +} + +void get_remote_dentry_file_sync(struct dentry *dentry, struct hmdfs_peer *con) +{ + get_remote_dentry_file_in_wq(dentry, con); + flush_workqueue(con->dentry_wq); +} + +struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con, + struct dentry *dentry, + struct qstr *qstr, + unsigned int flags, + const char *relative_path) +{ + struct hmdfs_lookup_ret *result = NULL; + + if (con->version > USERSPACE_MAX_VER) { + /* + * LOOKUP_REVAL means we found stale info from dentry file, thus + * we need to use remote getattr. + */ + if (flags & LOOKUP_REVAL) { + /* + * HMDFS_LOOKUP_REVAL means we need to skip dentry cache + * in lookup, because dentry cache in server might have + * stale data. + */ + result = get_remote_inode_info(con, dentry, + HMDFS_LOOKUP_REVAL); + get_remote_dentry_file_in_wq(dentry->d_parent, con); + return result; + } + + /* If cache file is still valid */ + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), + con->device_id, dentry->d_parent)) { + result = lookup_remote_dentry(dentry, qstr, + con->device_id); + /* + * If lookup from cache file failed, use getattr to see + * if remote have created the file. + */ + if (!(flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) && + !result) + result = get_remote_inode_info(con, dentry, 0); + /* If cache file expired, use getattr directly + * except create and rename opt + */ + } else { + result = get_remote_inode_info(con, dentry, 0); + get_remote_dentry_file_in_wq(dentry->d_parent, con); + } + } else { + if (!relative_path) + return NULL; + + result = con->conn_operations->remote_lookup( + con, relative_path, dentry->d_name.name); + } + + return result; +} + +/* + * hmdfs_update_inode_size - update inode size when finding aready existed + * inode. + * + * First of all, if the file is opened for writing, we don't update inode size + * here, because inode size is about to be changed after writing. + * + * If the file is not opened, simply update getattr_isize(not actual inode size, + * just a value showed to user). This is safe because inode size will be + * up-to-date after open. + * + * If the file is opened for read: + * a. getattr_isize == HMDFS_STALE_REMOTE_ISIZE + * 1) i_size == new_size, nothing need to be done. + * 2) i_size > new_size, we keep the i_size and set getattr_isize to new_size, + * stale data might be readed in this case, which is fine because file is + * opened before remote truncate the file. + * 3) i_size < new_size, we drop the last page of the file if i_size is not + * aligned to PAGE_SIZE, clear getattr_isize, and update i_size to + * new_size. + * b. getattr_isize != HMDFS_STALE_REMOTE_ISIZE, getattr_isize will only be set + * after 2). + * 4) getattr_isize > i_size, this situation is impossible. + * 5) i_size >= new_size, this case is the same as 2). + * 6) i_size < new_size, this case is the same as 3). + */ +static void hmdfs_update_inode_size(struct inode *inode, uint64_t new_size) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + int writecount; + uint64_t size; + + inode_lock(inode); + size = info->getattr_isize; + if (size == HMDFS_STALE_REMOTE_ISIZE) + size = i_size_read(inode); + if (size == new_size) { + inode_unlock(inode); + return; + } + + writecount = atomic_read(&inode->i_writecount); + /* check if writing is in progress */ + if (writecount > 0) { + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + inode_unlock(inode); + return; + } + + /* check if there is no one who opens the file */ + if (kref_read(&info->ref) == 0) + goto update_info; + + /* check if there is someone who opens the file for read */ + if (writecount == 0) { + uint64_t aligned_size; + + /* use inode size here instead of getattr_isize */ + size = i_size_read(inode); + if (new_size <= size) + goto update_info; + /* + * if the old inode size is not aligned to HMDFS_PAGE_SIZE, we + * need to drop the last page of the inode, otherwise zero will + * be returned while reading the new range in the page after + * chaning inode size. + */ + aligned_size = round_down(size, HMDFS_PAGE_SIZE); + if (aligned_size != size) + truncate_inode_pages(inode->i_mapping, aligned_size); + i_size_write(inode, new_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + inode_unlock(inode); + return; + } + +update_info: + info->getattr_isize = new_size; + inode_unlock(inode); +} + +static void hmdfs_update_inode(struct inode *inode, + struct hmdfs_lookup_ret *lookup_result) +{ + struct hmdfs_time_t remote_mtime = { + .tv_sec = lookup_result->i_mtime, + .tv_nsec = lookup_result->i_mtime_nsec, + }; + + /* + * We only update mtime if the file is not opened for writing. If we do + * update it before writing is about to start, user might see the mtime + * up-and-down if system time in server and client do not match. However + * mtime in client will eventually match server after timeout without + * writing. + */ + if (!inode_is_open_for_write(inode)) + inode->i_mtime = remote_mtime; + + /* + * We don't care i_size of dir, and lock inode for dir + * might cause deadlock. + */ + if (S_ISREG(inode->i_mode)) + hmdfs_update_inode_size(inode, lookup_result->i_size); +} + +static void hmdfs_fill_inode_android(struct inode *inode, struct inode *dir, + umode_t mode) +{ +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = dir->i_uid; + inode->i_gid = dir->i_gid; +#endif +} + +struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, + struct hmdfs_lookup_ret *res, struct inode *dir) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info; + umode_t mode = res->i_mode; + + inode = hmdfs_iget5_locked_remote(sb, con, res->i_ino); + if (!inode) + return ERR_PTR(-ENOMEM); + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_OTHER_REMOTE; + if (con->version > USERSPACE_MAX_VER) { + /* the inode was found in cache */ + if (!(inode->i_state & I_NEW)) { + hmdfs_fill_inode_android(inode, dir, mode); + hmdfs_update_inode(inode, res); + return inode; + } + + hmdfs_remote_init_stash_status(con, inode, mode); + } + + inode->i_ctime.tv_sec = 0; + inode->i_ctime.tv_nsec = 0; + inode->i_mtime.tv_sec = res->i_mtime; + inode->i_mtime.tv_nsec = res->i_mtime_nsec; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + if (S_ISDIR(mode)) + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + else if (S_ISREG(mode)) + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + else if (S_ISLNK(mode)) + inode->i_mode = S_IFREG | S_IRWXU | S_IRWXG; + + if (S_ISREG(mode) || S_ISLNK(mode)) { // Reguler file + inode->i_op = con->conn_operations->remote_file_iops; + inode->i_fop = con->conn_operations->remote_file_fops; + inode->i_size = res->i_size; + set_nlink(inode, 1); + } else if (S_ISDIR(mode)) { // Directory + inode->i_op = &hmdfs_dev_dir_inode_ops_remote; + inode->i_fop = &hmdfs_dev_dir_ops_remote; + set_nlink(inode, 2); + } + inode->i_mapping->a_ops = con->conn_operations->remote_file_aops; + + hmdfs_fill_inode_android(inode, dir, mode); + unlock_new_inode(inode); + return inode; +} + +static struct dentry *hmdfs_lookup_remote_dentry(struct inode *parent_inode, + struct dentry *child_dentry, + int flags) +{ + struct dentry *ret = NULL; + struct inode *inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_lookup_ret *lookup_result = NULL; + struct hmdfs_peer *con = NULL; + char *file_name = NULL; + int file_name_len = child_dentry->d_name.len; + struct qstr qstr; + struct hmdfs_dentry_info *gdi = hmdfs_d(child_dentry); + uint64_t device_id = 0; + char *relative_path = NULL; + + file_name = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!file_name) + return ERR_PTR(-ENOMEM); + strncpy(file_name, child_dentry->d_name.name, file_name_len); + + qstr.name = file_name; + qstr.len = strlen(file_name); + + device_id = gdi->device_id; + con = hmdfs_lookup_from_devid(sbi, device_id); + if (!con) { + ret = ERR_PTR(-ESHUTDOWN); + goto done; + } + + relative_path = hmdfs_get_dentry_relative_path(child_dentry->d_parent); + if (unlikely(!relative_path)) { + ret = ERR_PTR(-ENOMEM); + hmdfs_err("get relative path failed %d", -ENOMEM); + goto done; + } + + lookup_result = hmdfs_lookup_by_con(con, child_dentry, &qstr, flags, + relative_path); + if (lookup_result != NULL) { + if (S_ISLNK(lookup_result->i_mode)) + gdi->file_type = HM_SYMLINK; + inode = fill_inode_remote(sb, con, lookup_result, parent_inode); + ret = d_splice_alias(inode, child_dentry); + if (!IS_ERR_OR_NULL(ret)) + child_dentry = ret; + if (!IS_ERR(ret)) + check_and_fixup_ownership_remote(parent_inode, + child_dentry); + } else { + ret = ERR_PTR(-ENOENT); + } + +done: + if (con) + peer_put(con); + kfree(relative_path); + kfree(lookup_result); + kfree(file_name); + return ret; +} + +struct dentry *hmdfs_lookup_remote(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + int err = 0; + struct dentry *ret = NULL; + struct hmdfs_dentry_info *gdi = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + trace_hmdfs_lookup_remote(parent_inode, child_dentry, flags); + if (child_dentry->d_name.len > NAME_MAX) { + err = -ENAMETOOLONG; + ret = ERR_PTR(-ENAMETOOLONG); + goto out; + } + + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_OTHER_REMOTE); + if (err) { + ret = ERR_PTR(err); + goto out; + } + gdi = hmdfs_d(child_dentry); + gdi->device_id = hmdfs_d(child_dentry->d_parent)->device_id; + + if (is_current_hmdfs_server_ctx()) + goto out; + + ret = hmdfs_lookup_remote_dentry(parent_inode, child_dentry, flags); + /* + * don't return error if inode do not exist, so that vfs can continue + * to create it. + */ + if (IS_ERR_OR_NULL(ret)) { + err = PTR_ERR(ret); + if (err == -ENOENT) + ret = NULL; + } else { + child_dentry = ret; + } + +out: + if (!err) + hmdfs_set_time(child_dentry, jiffies); + trace_hmdfs_lookup_remote_end(parent_inode, child_dentry, err); + return ret; +} + +/* delete dentry in cache file */ +void delete_in_cache_file(uint64_t dev_id, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + + item = hmdfs_find_cache_item(dev_id, dentry->d_parent); + if (item) { + hmdfs_delete_dentry(dentry, item->filp); + kref_put(&item->ref, release_cache_item); + } else { + hmdfs_info("find cache item failed, con:%llu", dev_id); + } +} + +int hmdfs_mkdir_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry, + umode_t mode) +{ + int err = 0; + char *dir_path = NULL; + struct dentry *parent_dentry = dentry->d_parent; + struct inode *parent_inode = d_inode(parent_dentry); + struct super_block *sb = parent_inode->i_sb; + const unsigned char *d_name = dentry->d_name.name; + struct hmdfs_lookup_ret *mkdir_ret = NULL; + struct inode *inode = NULL; + + mkdir_ret = kmalloc(sizeof(*mkdir_ret), GFP_KERNEL); + if (!mkdir_ret) { + err = -ENOMEM; + return err; + } + dir_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!dir_path) { + err = -EACCES; + goto mkdir_out; + } + err = hmdfs_client_start_mkdir(conn, dir_path, d_name, mode, mkdir_ret); + if (err) { + hmdfs_err("hmdfs_client_start_mkdir failed err = %d", err); + goto mkdir_out; + } + if (mkdir_ret) { + inode = fill_inode_remote(sb, conn, mkdir_ret, parent_inode); + if (!IS_ERR(inode)) + d_add(dentry, inode); + else + err = PTR_ERR(inode); + check_and_fixup_ownership_remote(parent_inode, dentry); + } else { + err = -ENOENT; + } + +mkdir_out: + kfree(dir_path); + kfree(mkdir_ret); + return err; +} + +int hmdfs_mkdir_remote(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dir); + struct hmdfs_peer *con = info->conn; + + if (!con) { + hmdfs_warning("qpb_debug: con is null!"); + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_mkdir_remote_dentry(con, dentry, mode); + if (!err) + create_in_cache_file(con->device_id, dentry); + else + hmdfs_err("remote mkdir failed err = %d", err); + +out: + trace_hmdfs_mkdir_remote(dir, dentry, err); + return err; +} + +int hmdfs_create_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry, + umode_t mode, bool want_excl) +{ + int err = 0; + char *dir_path = NULL; + struct dentry *parent_dentry = dentry->d_parent; + struct inode *parent_inode = d_inode(parent_dentry); + struct super_block *sb = parent_inode->i_sb; + const unsigned char *d_name = dentry->d_name.name; + struct hmdfs_lookup_ret *create_ret = NULL; + struct inode *inode = NULL; + + create_ret = kmalloc(sizeof(*create_ret), GFP_KERNEL); + if (!create_ret) { + err = -ENOMEM; + return err; + } + dir_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!dir_path) { + err = -EACCES; + goto create_out; + } + err = hmdfs_client_start_create(conn, dir_path, d_name, mode, + want_excl, create_ret); + if (err) { + hmdfs_err("hmdfs_client_start_create failed err = %d", err); + goto create_out; + } + if (create_ret) { + inode = fill_inode_remote(sb, conn, create_ret, parent_inode); + if (!IS_ERR(inode)) + d_add(dentry, inode); + else + err = PTR_ERR(inode); + check_and_fixup_ownership_remote(parent_inode, dentry); + } else { + err = -ENOENT; + hmdfs_err("get remote inode info failed err = %d", err); + } + +create_out: + kfree(dir_path); + kfree(create_ret); + return err; +} + +int hmdfs_create_remote(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dir); + struct hmdfs_peer *con = info->conn; + + if (!con) { + hmdfs_warning("qpb_debug: con is null!"); + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_create_remote_dentry(con, dentry, mode, want_excl); + if (!err) + create_in_cache_file(con->device_id, dentry); + else + hmdfs_err("remote create failed err = %d", err); + +out: + trace_hmdfs_create_remote(dir, dentry, err); + return err; +} + +int hmdfs_rmdir_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry) +{ + int error = 0; + char *dir_path = NULL; + const char *dentry_name = dentry->d_name.name; + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto rmdir_out; + } + + error = hmdfs_client_start_rmdir(conn, dir_path, dentry_name); + if (!error) + delete_in_cache_file(conn->device_id, dentry); + +rmdir_out: + kfree(dir_path); + return error; +} + +int hmdfs_rmdir_remote(struct inode *dir, struct dentry *dentry) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct hmdfs_peer *con = info->conn; + + if (!con) + goto out; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_rmdir_remote_dentry(con, dentry); + /* drop dentry even remote failed + * it maybe cause that one remote devices disconnect + * when doing remote rmdir + */ + d_drop(dentry); +out: + /* return connect device's errcode */ + trace_hmdfs_rmdir_remote(dir, dentry, err); + return err; +} + +int hmdfs_dev_unlink_from_con(struct hmdfs_peer *conn, struct dentry *dentry) +{ + int error = 0; + char *dir_path = NULL; + const char *dentry_name = dentry->d_name.name; + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto unlink_out; + } + error = hmdfs_client_start_unlink(conn, dir_path, dentry_name); + if (!error) { + delete_in_cache_file(conn->device_id, dentry); + drop_nlink(d_inode(dentry)); + d_drop(dentry); + } +unlink_out: + kfree(dir_path); + return error; +} + +int hmdfs_unlink_remote(struct inode *dir, struct dentry *dentry) +{ + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct hmdfs_peer *conn = info->conn; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) + return -EACCES; + + if (!conn) + return 0; + + if (conn->status != NODE_STAT_ONLINE) + return 0; + + return conn->conn_operations->remote_unlink(conn, dentry); +} + +/* rename dentry in cache file */ +static void rename_in_cache_file(uint64_t dev_id, struct dentry *old_dentry, + struct dentry *new_dentry) +{ + struct clearcache_item *old_item = NULL; + struct clearcache_item *new_item = NULL; + + old_item = hmdfs_find_cache_item(dev_id, old_dentry->d_parent); + new_item = hmdfs_find_cache_item(dev_id, new_dentry->d_parent); + if (old_item != NULL && new_item != NULL) { + hmdfs_rename_dentry(old_dentry, new_dentry, old_item->filp, + new_item->filp); + } else if (old_item != NULL) { + hmdfs_err("new cache item find failed!"); + } else if (new_item != NULL) { + hmdfs_err("old cache item find failed!"); + } else { + hmdfs_err("both cache item find failed!"); + } + + if (old_item) + kref_put(&old_item->ref, release_cache_item); + if (new_item) + kref_put(&new_item->ref, release_cache_item); +} + +int hmdfs_rename_remote(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err = 0; + int ret = 0; + const char *old_dentry_d_name = old_dentry->d_name.name; + char *relative_old_dir_path = 0; + const char *new_dentry_d_name = new_dentry->d_name.name; + char *relative_new_dir_path = 0; + struct hmdfs_inode_info *info = hmdfs_i(old_dentry->d_inode); + struct hmdfs_peer *con = info->conn; + + trace_hmdfs_rename_remote(old_dir, old_dentry, new_dir, new_dentry, + flags); + + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + return -EACCES; + } + + relative_old_dir_path = + hmdfs_get_dentry_relative_path(old_dentry->d_parent); + relative_new_dir_path = + hmdfs_get_dentry_relative_path(new_dentry->d_parent); + if (!relative_old_dir_path || !relative_new_dir_path) { + err = -EACCES; + goto rename_out; + } + if (S_ISREG(old_dentry->d_inode->i_mode)) { + if (con->version > USERSPACE_MAX_VER) { + hmdfs_debug("send MSG to remote devID %llu", + con->device_id); + err = hmdfs_client_start_rename( + con, relative_old_dir_path, old_dentry_d_name, + relative_new_dir_path, new_dentry_d_name, + flags); + if (!err) + rename_in_cache_file(con->device_id, old_dentry, + new_dentry); + } + } else if (S_ISDIR(old_dentry->d_inode->i_mode)) { + if ((con->status == NODE_STAT_ONLINE) && + (con->version > USERSPACE_MAX_VER)) { + ret = hmdfs_client_start_rename( + con, relative_old_dir_path, old_dentry_d_name, + relative_new_dir_path, new_dentry_d_name, + flags); + if (!ret) + rename_in_cache_file(con->device_id, old_dentry, + new_dentry); + else + err = ret; + } + } + if (!err) + d_invalidate(old_dentry); +rename_out: + kfree(relative_old_dir_path); + kfree(relative_new_dir_path); + return err; +} + +static int hmdfs_dir_setattr_remote(struct dentry *dentry, struct iattr *ia) +{ + // Do not support dir setattr + return 0; +} + +const struct inode_operations hmdfs_dev_dir_inode_ops_remote = { + .lookup = hmdfs_lookup_remote, + .mkdir = hmdfs_mkdir_remote, + .create = hmdfs_create_remote, + .rmdir = hmdfs_rmdir_remote, + .unlink = hmdfs_unlink_remote, + .rename = hmdfs_rename_remote, + .setattr = hmdfs_dir_setattr_remote, + .permission = hmdfs_permission, +}; + +static int hmdfs_setattr_remote(struct dentry *dentry, struct iattr *ia) +{ + struct hmdfs_inode_info *info = hmdfs_i(d_inode(dentry)); + struct hmdfs_peer *conn = info->conn; + struct inode *inode = d_inode(dentry); + char *send_buf = NULL; + int err = 0; + + if (hmdfs_inode_is_stashing(info)) + return -EAGAIN; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) { + err = -ENOMEM; + goto out_free; + } + if (ia->ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(inode, ia->ia_size); + if (err) + goto out_free; + truncate_setsize(inode, ia->ia_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + } + if (ia->ia_valid & ATTR_MTIME) + inode->i_mtime = ia->ia_mtime; + + if ((ia->ia_valid & ATTR_SIZE) || (ia->ia_valid & ATTR_MTIME)) { + struct setattr_info send_setattr_info = { + .size = cpu_to_le64(ia->ia_size), + .valid = cpu_to_le32(ia->ia_valid), + .mtime = cpu_to_le64(ia->ia_mtime.tv_sec), + .mtime_nsec = cpu_to_le32(ia->ia_mtime.tv_nsec), + }; + err = hmdfs_send_setattr(conn, send_buf, &send_setattr_info); + } +out_free: + kfree(send_buf); + return err; +} + +int hmdfs_remote_getattr(struct hmdfs_peer *conn, struct dentry *dentry, + unsigned int lookup_flags, + struct hmdfs_getattr_ret **result) +{ + char *send_buf = NULL; + struct hmdfs_getattr_ret *attr = NULL; + int err = 0; + + if (dentry->d_sb != conn->sbi->sb || !result) + return -EINVAL; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) { + kfree(attr); + return -ENOMEM; + } + + err = hmdfs_send_getattr(conn, send_buf, lookup_flags, attr); + kfree(send_buf); + + if (err) { + kfree(attr); + return err; + } + + *result = attr; + return 0; +} + +static int hmdfs_get_cached_attr_remote(const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int flags) +{ + struct inode *inode = d_inode(path->dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + uint64_t size = info->getattr_isize; + + stat->ino = inode->i_ino; + stat->mtime = inode->i_mtime; + stat->mode = inode->i_mode; + stat->uid.val = inode->i_uid.val; + stat->gid.val = inode->i_gid.val; + if (size == HMDFS_STALE_REMOTE_ISIZE) + size = i_size_read(inode); + + stat->size = size; + return 0; +} + +ssize_t hmdfs_remote_listxattr(struct dentry *dentry, char *list, size_t size) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + ssize_t res = 0; + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (size > HMDFS_LISTXATTR_SIZE_MAX) + r_size = HMDFS_LISTXATTR_SIZE_MAX; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_listxattr(conn, send_buf, list, r_size); + kfree(send_buf); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support listxattr size over than %d", + HMDFS_LISTXATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} + +const struct inode_operations hmdfs_dev_file_iops_remote = { + .setattr = hmdfs_setattr_remote, + .permission = hmdfs_permission, + .getattr = hmdfs_get_cached_attr_remote, + .listxattr = hmdfs_remote_listxattr, +}; diff --git a/fs/hmdfs/inode_root.c b/fs/hmdfs/inode_root.c new file mode 100644 index 0000000000000000000000000000000000000000..30d0ca6a2264020fd54c9e856a2dc5497214a787 --- /dev/null +++ b/fs/hmdfs/inode_root.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_root.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" +#include "hmdfs_trace.h" + +static struct inode *fill_device_local_inode(struct super_block *sb, + struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV_LOCAL, lower_inode, + NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_SECOND_LOCAL; + + inode->i_mode = + (lower_inode->i_mode & S_IFMT) | S_IRWXU | S_IRWXG | S_IXOTH; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_op = &hmdfs_dir_inode_ops_local; + inode->i_fop = &hmdfs_dir_ops_local; + + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} + +static struct inode *fill_device_inode_remote(struct super_block *sb, + uint64_t dev_id) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + struct hmdfs_peer *con = NULL; + + con = hmdfs_lookup_from_devid(sb->s_fs_info, dev_id); + if (!con) + return ERR_PTR(-ENOENT); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV_REMOTE, NULL, con); + if (!inode) { + hmdfs_err("get inode NULL"); + inode = ERR_PTR(-ENOMEM); + goto out; + } + if (!(inode->i_state & I_NEW)) + goto out; + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_SECOND_REMOTE; + + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + inode->i_op = &hmdfs_dev_dir_inode_ops_remote; + inode->i_fop = &hmdfs_dev_dir_ops_remote; + + unlock_new_inode(inode); + +out: + peer_put(con); + return inode; +} + +struct dentry *hmdfs_device_lookup(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + struct inode *root_inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct dentry *ret_dentry = NULL; + int err = 0; + struct hmdfs_peer *con = NULL; + struct hmdfs_dentry_info *di = NULL; + uint8_t *cid = NULL; + struct path *root_lower_path = NULL; + + trace_hmdfs_device_lookup(parent_inode, child_dentry, flags); + if (!strncmp(d_name, DEVICE_VIEW_LOCAL, + sizeof(DEVICE_VIEW_LOCAL) - 1)) { + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_SECOND_LOCAL); + if (err) { + ret_dentry = ERR_PTR(err); + goto out; + } + di = hmdfs_d(sb->s_root); + root_lower_path = &(di->lower_path); + hmdfs_set_lower_path(child_dentry, root_lower_path); + path_get(root_lower_path); + root_inode = fill_device_local_inode( + sb, d_inode(root_lower_path->dentry)); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + ret_dentry = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out; + } + ret_dentry = d_splice_alias(root_inode, child_dentry); + if (IS_ERR(ret_dentry)) { + err = PTR_ERR(ret_dentry); + ret_dentry = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out; + } + } else { + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_SECOND_REMOTE); + di = hmdfs_d(child_dentry); + if (err) { + ret_dentry = ERR_PTR(err); + goto out; + } + cid = kzalloc(HMDFS_CID_SIZE + 1, GFP_KERNEL); + if (!cid) { + err = -ENOMEM; + ret_dentry = ERR_PTR(err); + goto out; + } + memcpy(cid, d_name, HMDFS_CID_SIZE); + cid[HMDFS_CID_SIZE] = '\0'; + con = hmdfs_lookup_from_cid(sbi, cid); + if (!con) { + kfree(cid); + err = -ENOENT; + ret_dentry = ERR_PTR(err); + goto out; + } + di->device_id = con->device_id; + root_inode = fill_device_inode_remote(sb, di->device_id); + if (IS_ERR(root_inode)) { + kfree(cid); + err = PTR_ERR(root_inode); + ret_dentry = ERR_PTR(err); + goto out; + } + ret_dentry = d_splice_alias(root_inode, child_dentry); + kfree(cid); + } + if (root_inode) + hmdfs_root_inode_perm_init(root_inode); + if (!err) + hmdfs_set_time(child_dentry, jiffies); +out: + if (con) + peer_put(con); + trace_hmdfs_device_lookup_end(parent_inode, child_dentry, err); + return ret_dentry; +} + +struct dentry *hmdfs_root_lookup(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + struct inode *root_inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct dentry *ret = ERR_PTR(-ENOENT); + struct path root_path; + + trace_hmdfs_root_lookup(parent_inode, child_dentry, flags); + if (sbi->s_merge_switch && !strcmp(d_name, MERGE_VIEW_ROOT)) { + ret = hmdfs_lookup_merge(parent_inode, child_dentry, flags); + if (ret && !IS_ERR(ret)) + child_dentry = ret; + root_inode = d_inode(child_dentry); + } else if (!strcmp(d_name, DEVICE_VIEW_ROOT)) { + ret = ERR_PTR(init_hmdfs_dentry_info( + sbi, child_dentry, HMDFS_LAYER_FIRST_DEVICE)); + if (IS_ERR(ret)) + goto out; + ret = ERR_PTR(kern_path(sbi->local_src, 0, &root_path)); + if (IS_ERR(ret)) + goto out; + root_inode = fill_device_inode(sb, d_inode(root_path.dentry)); + ret = d_splice_alias(root_inode, child_dentry); + path_put(&root_path); + } + if (!IS_ERR(ret) && root_inode) + hmdfs_root_inode_perm_init(root_inode); + +out: + trace_hmdfs_root_lookup_end(parent_inode, child_dentry, + PTR_ERR_OR_ZERO(ret)); + return ret; +} + +const struct inode_operations hmdfs_device_ops = { + .lookup = hmdfs_device_lookup, +}; + +const struct inode_operations hmdfs_root_ops = { + .lookup = hmdfs_root_lookup, +}; + +struct inode *fill_device_inode(struct super_block *sb, + struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV, NULL, NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) + return inode; + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_FIRST_DEVICE; + + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | S_IXUSR | + S_IRGRP | S_IXGRP | S_IXOTH; + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + inode->i_op = &hmdfs_device_ops; + inode->i_fop = &hmdfs_device_fops; + + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} + +struct inode *fill_root_inode(struct super_block *sb, struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_ANCESTOR, lower_inode, + NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_ZERO; + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | S_IXUSR | + S_IRGRP | S_IXGRP | S_IXOTH; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = lower_inode->i_uid; + inode->i_gid = lower_inode->i_gid; +#else + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); +#endif + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_op = &hmdfs_root_ops; + inode->i_fop = &hmdfs_root_fops; + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c new file mode 100644 index 0000000000000000000000000000000000000000..c9b28e8cb9f13232967b76bd598e38faec6a434e --- /dev/null +++ b/fs/hmdfs/main.c @@ -0,0 +1,1069 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/main.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + + +#include "hmdfs.h" + +#include +#include +#include +#include +#include +#if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE +#include +#else +#include +#endif + +#include "authority/authentication.h" +#include "hmdfs_server.h" +#include "comm/device_node.h" +#include "comm/message_verify.h" +#include "comm/protocol.h" +#include "comm/socket_adapter.h" +#include "hmdfs_merge_view.h" +#include "server_writeback.h" + +#include "comm/node_cb.h" +#include "stash.h" + +#define CREATE_TRACE_POINTS +#include "hmdfs_trace.h" + +#define HMDFS_BOOT_COOKIE_RAND_SHIFT 33 + +#define HMDFS_SB_SEQ_FROM 1 + +struct hmdfs_mount_priv { + const char *dev_name; + const char *raw_data; +}; + +struct syncfs_item { + struct list_head list; + struct completion done; + bool need_abort; +}; + +static DEFINE_IDA(hmdfs_sb_seq); + +static inline int hmdfs_alloc_sb_seq(void) +{ + return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL); +} + +static inline void hmdfs_free_sb_seq(unsigned int seq) +{ + if (!seq) + return; + ida_simple_remove(&hmdfs_sb_seq, seq); +} + +static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct path lower_path; + ssize_t res = 0; + + hmdfs_get_lower_path(dentry, &lower_path); + res = vfs_getxattr(lower_path.dentry, name, value, size); + hmdfs_put_lower_path(&lower_path); + return res; +} + +static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + ssize_t res = 0; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_getxattr(conn, send_buf, name, value, size); + kfree(send_buf); + return res; +} + +static int hmdfs_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) +{ + int res = 0; + struct hmdfs_inode_info *info = hmdfs_i(inode); + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + return -EOPNOTSUPP; + + if (size > HMDFS_XATTR_SIZE_MAX) + r_size = HMDFS_XATTR_SIZE_MAX; + + if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) + res = hmdfs_xattr_local_get(dentry, name, value, r_size); + else + res = hmdfs_xattr_remote_get(dentry, name, value, r_size); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support xattr value size over than: %d", + HMDFS_XATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} + +static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct path lower_path; + int res = 0; + + hmdfs_get_lower_path(dentry, &lower_path); + if (value) { + res = vfs_setxattr(lower_path.dentry, name, value, size, flags); + } else { + WARN_ON(flags != XATTR_REPLACE); + res = vfs_removexattr(lower_path.dentry, name); + } + + hmdfs_put_lower_path(&lower_path); + return res; +} + +static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + int res = 0; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags); + kfree(send_buf); + return res; +} + +static int hmdfs_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + return -EOPNOTSUPP; + + if (size > HMDFS_XATTR_SIZE_MAX) { + hmdfs_info("no support too long xattr value: %zu", size); + return -E2BIG; + } + + if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) + return hmdfs_xattr_local_set(dentry, name, value, size, flags); + + return hmdfs_xattr_remote_set(dentry, name, value, size, flags); +} + +const struct xattr_handler hmdfs_xattr_handler = { + .prefix = "", /* catch all */ + .get = hmdfs_xattr_get, + .set = hmdfs_xattr_set, +}; + +static const struct xattr_handler *hmdfs_xattr_handlers[] = { + &hmdfs_xattr_handler, +}; + +#define HMDFS_NODE_EVT_CB_DELAY 2 + +struct kmem_cache *hmdfs_inode_cachep; +struct kmem_cache *hmdfs_dentry_cachep; + +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + kmem_cache_free(hmdfs_inode_cachep, + container_of(inode, struct hmdfs_inode_info, + vfs_inode)); +} + +static void hmdfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, i_callback); +} + +static void hmdfs_evict_inode(struct inode *inode) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE || + info->inode_type == HMDFS_LAYER_SECOND_REMOTE) + return; + if (info->inode_type == HMDFS_LAYER_ZERO || + info->inode_type == HMDFS_LAYER_OTHER_LOCAL || + info->inode_type == HMDFS_LAYER_SECOND_LOCAL) { + iput(info->lower_inode); + info->lower_inode = NULL; + } +} + +void hmdfs_put_super(struct super_block *sb) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + struct super_block *lower_sb = sbi->lower_sb; + + hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst, + sbi->local_src); + + hmdfs_fault_inject_fini(&sbi->fault_inject); + hmdfs_cfn_destroy(sbi); + hmdfs_unregister_sysfs(sbi); + hmdfs_connections_stop(sbi); + hmdfs_destroy_server_writeback(sbi); + hmdfs_exit_stash(sbi); + atomic_dec(&lower_sb->s_active); + put_cred(sbi->cred); + if (sbi->system_cred) + put_cred(sbi->system_cred); + hmdfs_destroy_writeback(sbi); + kfree(sbi->local_src); + kfree(sbi->local_dst); + kfree(sbi->real_dst); + kfree(sbi->cache_dir); + kfifo_free(&sbi->notify_fifo); + sb->s_fs_info = NULL; + sbi->lower_sb = NULL; + hmdfs_release_sysfs(sbi); + /* After all access are completed */ + hmdfs_free_sb_seq(sbi->seq); + kfree(sbi->s_server_statis); + kfree(sbi->s_client_statis); + kfree(sbi); +} + +static struct inode *hmdfs_alloc_inode(struct super_block *sb) +{ + struct hmdfs_inode_info *gi = + kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL); + if (!gi) + return NULL; + memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode)); + INIT_LIST_HEAD(&gi->wb_list); + init_rwsem(&gi->wpage_sem); + gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + atomic64_set(&gi->write_counter, 0); + gi->fid.id = HMDFS_INODE_INVALID_FILE_ID; + spin_lock_init(&gi->fid_lock); + INIT_LIST_HEAD(&gi->wr_opened_node); + atomic_set(&gi->wr_opened_cnt, 0); + init_waitqueue_head(&gi->fid_wq); + INIT_LIST_HEAD(&gi->stash_node); + spin_lock_init(&gi->stash_lock); + return &gi->vfs_inode; +} + +static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int error = 0; + int ret = 0; + char *dir_path = NULL; + char *name_path = NULL; + struct hmdfs_peer *con = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb); + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto rmdir_out; + } + + name_path = hmdfs_connect_path(dir_path, dentry->d_name.name); + if (!name_path) { + error = -EACCES; + goto rmdir_out; + } + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (con->status == NODE_STAT_ONLINE && + con->version > USERSPACE_MAX_VER) { + peer_get(con); + mutex_unlock(&sbi->connections.node_lock); + hmdfs_debug("send MSG to remote devID %llu", + con->device_id); + ret = hmdfs_send_statfs(con, name_path, buf); + if (ret != 0) + error = ret; + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + } + } + mutex_unlock(&sbi->connections.node_lock); + +rmdir_out: + kfree(dir_path); + kfree(name_path); + return error; +} + +static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err = 0; + struct path lower_path; + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct super_block *sb = d_inode(dentry)->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + + trace_hmdfs_statfs(dentry, info->inode_type); + // merge_view & merge_view/xxx & device_view assigned src_inode info + if (hmdfs_i_merge(info) || + (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) { + err = kern_path(sbi->local_src, 0, &lower_path); + if (err) + goto out; + err = vfs_statfs(&lower_path, buf); + path_put(&lower_path); + } else if (!IS_ERR_OR_NULL(info->lower_inode)) { + hmdfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, buf); + hmdfs_put_lower_path(&lower_path); + } else { + err = hmdfs_remote_statfs(dentry, buf); + } + + buf->f_type = HMDFS_SUPER_MAGIC; +out: + return err; +} + +static int hmdfs_show_options(struct seq_file *m, struct dentry *root) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb); + + if (sbi->s_case_sensitive) + seq_puts(m, ",sensitive"); + else + seq_puts(m, ",insensitive"); + + if (sbi->s_merge_switch) + seq_puts(m, ",merge_enable"); + else + seq_puts(m, ",merge_disable"); + + seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages); + + if (sbi->cache_dir) + seq_printf(m, ",cache_dir=%s", sbi->cache_dir); + if (sbi->real_dst) + seq_printf(m, ",real_dst=%s", sbi->real_dst); + + seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_"); + seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_"); + + return 0; +} + +static int hmdfs_sync_fs(struct super_block *sb, int wait) +{ + int time_left; + int err = 0; + struct hmdfs_peer *con = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS); + struct syncfs_item item, *entry = NULL, *tmp = NULL; + + if (!wait) + return 0; + + trace_hmdfs_syncfs_enter(sbi); + + spin_lock(&sbi->hsi.list_lock); + if (!sbi->hsi.is_executing) { + sbi->hsi.is_executing = true; + item.need_abort = false; + spin_unlock(&sbi->hsi.list_lock); + } else { + init_completion(&item.done); + list_add_tail(&item.list, &sbi->hsi.wait_list); + spin_unlock(&sbi->hsi.list_lock); + wait_for_completion(&item.done); + } + + if (item.need_abort) + goto out; + + /* + * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make + * sure all remote syncfs calls return back or timeout by waiting, + * during the waiting period we must protect @sbi->remote_syncfs_count + * and @sbi->remote_syncfs_ret from concurrent executing. + */ + + spin_lock(&sbi->hsi.v_lock); + sbi->hsi.version++; + /* + * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count + * into spinlock protection area to avoid following scenario caused + * by out-of-order execution: + * + * synfs syncfs_cb + * sbi->hsi.remote_ret = 0; + * atomic_set(&sbi->hsi.wait_count, 0); + * lock + * version == old_version + * sbi->hsi.remote_ret = resp->ret_code + * atomic_dec(&sbi->hsi.wait_count); + * unlock + * lock + * version = old_version + 1 + * unlock + * + * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned + * before spin lock which may compete with syncfs_cb(), making + * these two values' assignment protected by spinlock can fix this. + */ + sbi->hsi.remote_ret = 0; + atomic_set(&sbi->hsi.wait_count, 0); + spin_unlock(&sbi->hsi.v_lock); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + /* + * Dirty data does not need to be synchronized to remote + * devices that go offline normally. It's okay to drop + * them. + */ + if (con->status != NODE_STAT_ONLINE) + continue; + + peer_get(con); + mutex_unlock(&sbi->connections.node_lock); + + /* + * There exists a gap between sync_inodes_sb() and sync_fs() + * which may race with remote writing, leading error count + * on @sb_dirty_count. The dirty data produced during the + * gap period won't be synced in next syncfs operation. + * To avoid this, we have to invoke sync_inodes_sb() again + * after getting @con->sb_dirty_count. + */ + con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count); + sync_inodes_sb(sb); + + if (!con->old_sb_dirty_count) { + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + continue; + } + + err = hmdfs_send_syncfs(con, syncfs_timeout); + if (err) { + hmdfs_warning("send syncfs failed with %d on node %llu", + err, con->device_id); + sbi->hsi.remote_ret = err; + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + continue; + } + + atomic_inc(&sbi->hsi.wait_count); + + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + /* + * Async work in background will make sure @sbi->remote_syncfs_count + * decreased to zero finally whether syncfs success or fail. + */ + time_left = wait_event_interruptible( + sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0); + if (time_left < 0) { + hmdfs_warning("syncfs is interrupted by external signal"); + err = -EINTR; + } + + if (!err && sbi->hsi.remote_ret) + err = sbi->hsi.remote_ret; + + /* Abandon syncfs processes in pending_list */ + list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) { + entry->need_abort = true; + complete(&entry->done); + } + INIT_LIST_HEAD(&sbi->hsi.pending_list); + + /* Pick the last syncfs process in wait_list */ + spin_lock(&sbi->hsi.list_lock); + if (list_empty(&sbi->hsi.wait_list)) { + sbi->hsi.is_executing = false; + } else { + entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item, + list); + list_del_init(&entry->list); + list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list); + entry->need_abort = false; + complete(&entry->done); + } + spin_unlock(&sbi->hsi.list_lock); + +out: + trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count), + get_cmd_timeout(sbi, F_SYNCFS), err); + + /* TODO: Return synfs err back to syscall */ + + return err; +} + +struct super_operations hmdfs_sops = { + .alloc_inode = hmdfs_alloc_inode, + .destroy_inode = hmdfs_destroy_inode, + .evict_inode = hmdfs_evict_inode, + .put_super = hmdfs_put_super, + .statfs = hmdfs_statfs, + .show_options = hmdfs_show_options, + .sync_fs = hmdfs_sync_fs, +}; + +static void init_once(void *obj) +{ + struct hmdfs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +static int __init hmdfs_init_caches(void) +{ + int err = -ENOMEM; + + hmdfs_inode_cachep = + kmem_cache_create("hmdfs_inode_cache", + sizeof(struct hmdfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + if (unlikely(!hmdfs_inode_cachep)) + goto out; + hmdfs_dentry_cachep = + kmem_cache_create("hmdfs_dentry_cache", + sizeof(struct hmdfs_dentry_info), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (unlikely(!hmdfs_dentry_cachep)) + goto out_des_ino; + hmdfs_dentry_merge_cachep = + kmem_cache_create("hmdfs_dentry_merge_cache", + sizeof(struct hmdfs_dentry_info_merge), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (unlikely(!hmdfs_dentry_merge_cachep)) + goto out_des_dc; + return 0; + +out_des_dc: + kmem_cache_destroy(hmdfs_dentry_cachep); +out_des_ino: + kmem_cache_destroy(hmdfs_inode_cachep); +out: + return err; +} + +static void hmdfs_destroy_caches(void) +{ + rcu_barrier(); + kmem_cache_destroy(hmdfs_inode_cachep); + hmdfs_inode_cachep = NULL; + kmem_cache_destroy(hmdfs_dentry_cachep); + hmdfs_dentry_cachep = NULL; + kmem_cache_destroy(hmdfs_dentry_merge_cachep); + hmdfs_dentry_merge_cachep = NULL; +} + +uint64_t path_hash(const char *path, int len, bool case_sense) +{ + uint64_t res = 0; + const char *kp = path; + char c; + /* Mocklisp hash function. */ + while (*kp) { + c = *kp; + if (!case_sense) + c = tolower(c); + res = (res << 5) - res + (uint64_t)(c); + kp++; + } + return res; +} + +static char *get_full_path(struct path *path) +{ + char *buf, *tmp; + char *ret = NULL; + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + goto out; + + tmp = d_path(path, buf, PATH_MAX); + if (IS_ERR(tmp)) + goto out; + + ret = kstrdup(tmp, GFP_KERNEL); +out: + kfree(buf); + return ret; +} + +static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi) +{ + memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout)); + + set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S); + set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S); + set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S); + set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON); +} + +static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi) +{ + int ret; + + ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL); + if (ret) + goto out; + + /* + * We have to use dynamic memory since struct server/client_statistic + * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h. + */ + sbi->s_server_statis = + kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL); + sbi->s_client_statis = + kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL); + if (!sbi->s_server_statis || !sbi->s_client_statis) { + ret = -ENOMEM; + goto out; + } + + ret = hmdfs_alloc_sb_seq(); + if (ret < 0) { + hmdfs_err("no sb seq available err %d", ret); + goto out; + } + sbi->seq = ret; + ret = 0; + + spin_lock_init(&sbi->notify_fifo_lock); + sbi->s_case_sensitive = false; + sbi->s_features = HMDFS_FEATURE_READPAGES | + HMDFS_FEATURE_READPAGES_OPEN | + HMDFS_ATOMIC_OPEN; + sbi->s_merge_switch = false; + sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD; + sbi->dcache_precision = DEFAULT_DCACHE_PRECISION; + sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT; + sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT; + hmdfs_init_cmd_timeout(sbi); + sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY; + sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE; + sbi->s_offline_stash = true; + sbi->s_dentry_cache = true; + sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS; + /* Initialize before hmdfs_register_sysfs() */ + atomic_set(&sbi->connections.conn_seq, 0); + mutex_init(&sbi->connections.node_lock); + INIT_LIST_HEAD(&sbi->connections.node_list); + + init_waitqueue_head(&sbi->async_readdir_wq); + INIT_LIST_HEAD(&sbi->async_readdir_msg_list); + INIT_LIST_HEAD(&sbi->async_readdir_work_list); + spin_lock_init(&sbi->async_readdir_msg_lock); + spin_lock_init(&sbi->async_readdir_work_lock); + + return 0; + +out: + return ret; +} + +void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, + enum hmdfs_resp_type type, unsigned long start, + unsigned long end) +{ + unsigned long duration; + + switch (type) { + case HMDFS_RESP_DELAY: + sbi->s_client_statis[cmd].delay_resp_cnt++; + break; + case HMDFS_RESP_TIMEOUT: + sbi->s_client_statis[cmd].timeout_cnt++; + break; + case HMDFS_RESP_NORMAL: + duration = end - start; + sbi->s_client_statis[cmd].total += duration; + sbi->s_client_statis[cmd].resp_cnt++; + if (sbi->s_client_statis[cmd].max < duration) + sbi->s_client_statis[cmd].max = duration; + break; + default: + hmdfs_err("Wrong cmd %d with resp type %d", cmd, type); + } +} + +static int hmdfs_update_dst(struct hmdfs_sb_info *sbi) +{ + int err = 0; + const char *path_local = UPDATE_LOCAL_DST; + int len = 0; + + sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL); + if (!sbi->real_dst) { + err = -ENOMEM; + goto out_err; + } + kfree(sbi->local_dst); + + len = strlen(sbi->real_dst) + strlen(path_local) + 1; + if (len > PATH_MAX) { + err = -EINVAL; + goto out_err; + } + sbi->local_dst = kmalloc(len, GFP_KERNEL); + if (!sbi->local_dst) { + err = -ENOMEM; + goto out_err; + } + snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1, + "%s%s", sbi->real_dst, path_local); +out_err: + return err; +} + +/* + * Generate boot cookie like following format: + * + * | random | boot time(ms) | 0x00 | + * |--------|-----------------|-------| + * 16 33 15 (bits) + * + * This will make sure boot cookie is unique in a period + * 2^33 / 1000 / 3600 / 24 = 99.4(days). + */ +uint64_t hmdfs_gen_boot_cookie(void) +{ + uint64_t now; + uint16_t rand; + + now = ktime_to_ms(ktime_get()); + prandom_bytes(&rand, sizeof(rand)); + + now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1; + now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT); + + return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT; +} + +static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data; + const char *dev_name = priv->dev_name; + const char *raw_data = priv->raw_data; + struct hmdfs_sb_info *sbi; + int err = 0; + struct inode *root_inode; + struct path lower_path; + struct super_block *lower_sb; + struct dentry *root_dentry; + char ctrl_path[CTRL_PATH_MAX_LEN]; + uint64_t ctrl_hash; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) { + err = -ENOMEM; + goto out_err; + } + err = hmdfs_init_sbi(sbi); + if (err) + goto out_freesbi; + sbi->sb = sb; + err = hmdfs_parse_options(sbi, raw_data); + if (err) + goto out_freesbi; + + sb->s_fs_info = sbi; + sb->s_magic = HMDFS_SUPER_MAGIC; + sb->s_xattr = hmdfs_xattr_handlers; + sb->s_op = &hmdfs_sops; + + sbi->boot_cookie = hmdfs_gen_boot_cookie(); + + err = hmdfs_init_writeback(sbi); + if (err) + goto out_freesbi; + err = hmdfs_init_server_writeback(sbi); + if (err) + goto out_freesbi; + + err = hmdfs_init_stash(sbi); + if (err) + goto out_freesbi; + + // add ctrl sysfs node + ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true); + scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash); + hmdfs_debug("hash %llu", ctrl_hash); + err = hmdfs_register_sysfs(ctrl_path, sbi); + if (err) + goto out_freesbi; + + err = hmdfs_update_dst(sbi); + if (err) + goto out_unreg_sysfs; + + err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &lower_path); + if (err) { + hmdfs_err("open dev failed, errno = %d", err); + goto out_unreg_sysfs; + } + + lower_sb = lower_path.dentry->d_sb; + atomic_inc(&lower_sb->s_active); + sbi->lower_sb = lower_sb; + sbi->local_src = get_full_path(&lower_path); + if (!sbi->local_src) { + hmdfs_err("get local_src failed!"); + goto out_sput; + } + + sb->s_time_gran = lower_sb->s_time_gran; + sb->s_maxbytes = lower_sb->s_maxbytes; + sb->s_stack_depth = lower_sb->s_stack_depth + 1; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + hmdfs_err("maximum fs stacking depth exceeded"); + err = -EINVAL; + goto out_sput; + } + root_inode = fill_root_inode(sb, d_inode(lower_path.dentry)); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto out_sput; + } + hmdfs_root_inode_perm_init(root_inode); + sb->s_root = root_dentry = d_make_root(root_inode); + if (!root_dentry) { + err = -ENOMEM; + goto out_sput; + } + + err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO); + if (err) + goto out_freeroot; + hmdfs_set_lower_path(root_dentry, &lower_path); + d_rehash(sb->s_root); + sbi->cred = get_cred(current_cred()); + INIT_LIST_HEAD(&sbi->client_cache); + INIT_LIST_HEAD(&sbi->server_cache); + INIT_LIST_HEAD(&sbi->to_delete); + mutex_init(&sbi->cache_list_lock); + hmdfs_cfn_load(sbi); + + /* Initialize syncfs info */ + spin_lock_init(&sbi->hsi.v_lock); + init_waitqueue_head(&sbi->hsi.wq); + sbi->hsi.version = 0; + sbi->hsi.is_executing = false; + INIT_LIST_HEAD(&sbi->hsi.wait_list); + INIT_LIST_HEAD(&sbi->hsi.pending_list); + spin_lock_init(&sbi->hsi.list_lock); + hmdfs_fault_inject_init(&sbi->fault_inject, ctrl_path); + + return err; +out_freeroot: + dput(sb->s_root); + sb->s_root = NULL; +out_sput: + atomic_dec(&lower_sb->s_active); + path_put(&lower_path); +out_unreg_sysfs: + hmdfs_unregister_sysfs(sbi); + hmdfs_release_sysfs(sbi); +out_freesbi: + if (sbi) { + sb->s_fs_info = NULL; + hmdfs_exit_stash(sbi); + hmdfs_destroy_writeback(sbi); + hmdfs_destroy_server_writeback(sbi); + kfifo_free(&sbi->notify_fifo); + hmdfs_free_sb_seq(sbi->seq); + kfree(sbi->local_src); + kfree(sbi->local_dst); + kfree(sbi->real_dst); + kfree(sbi->cache_dir); + kfree(sbi->s_server_statis); + kfree(sbi->s_client_statis); + kfree(sbi); + } +out_err: + return err; +} + +static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct hmdfs_mount_priv priv = { + .dev_name = dev_name, + .raw_data = raw_data, + }; + return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super); +} + + +static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi) +{ + struct sendmsg_wait_queue *msg_wq = NULL; + struct hmdfs_readdir_work *rw = NULL; + struct hmdfs_readdir_work *tmp = NULL; + struct list_head del_work; + + /* cancel work that are not running */ + + INIT_LIST_HEAD(&del_work); + spin_lock(&sbi->async_readdir_work_lock); + list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) { + if (cancel_delayed_work(&rw->dwork)) + list_move(&rw->head, &del_work); + } + spin_unlock(&sbi->async_readdir_work_lock); + + list_for_each_entry_safe(rw, tmp, &del_work, head) { + dput(rw->dentry); + peer_put(rw->con); + kfree(rw); + } + + /* wake up async readdir that are waiting for remote */ + spin_lock(&sbi->async_readdir_msg_lock); + sbi->async_readdir_prohibit = true; + list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg) + hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL); + spin_unlock(&sbi->async_readdir_msg_lock); + + /* wait for all async readdir to finish */ + if (!list_empty(&sbi->async_readdir_work_list)) + wait_event_interruptible_timeout(sbi->async_readdir_wq, + (list_empty(&sbi->async_readdir_work_list)), HZ); + + WARN_ON(!(list_empty(&sbi->async_readdir_work_list))); +} + +static void hmdfs_kill_super(struct super_block *sb) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + + /* + * async readdir is holding ref for dentry, not for vfsmount. Thus + * shrink_dcache_for_umount() will warn about dentry still in use + * if async readdir is not done. + */ + if (sbi) + hmdfs_cancel_async_readdir(sbi); + kill_anon_super(sb); +} + +static struct file_system_type hmdfs_fs_type = { + .owner = THIS_MODULE, + .name = "hmdfs", + .mount = hmdfs_mount, + .kill_sb = hmdfs_kill_super, +}; + +static int __init hmdfs_init(void) +{ + int err = 0; + + err = hmdfs_init_caches(); + if (err) + goto out_err; + + hmdfs_node_evt_cb_init(); + + hmdfs_stash_add_node_evt_cb(); + hmdfs_client_add_node_evt_cb(); + hmdfs_server_add_node_evt_cb(); + + err = register_filesystem(&hmdfs_fs_type); + if (err) { + hmdfs_err("hmdfs register failed!"); + goto out_err; + } + err = hmdfs_sysfs_init(); + if (err) + goto out_err; + + hmdfs_message_verify_init(); + hmdfs_create_debugfs_root(); + return 0; +out_err: + hmdfs_sysfs_exit(); + unregister_filesystem(&hmdfs_fs_type); + hmdfs_destroy_caches(); + hmdfs_err("hmdfs init failed!"); + return err; +} + +static void __exit hmdfs_exit(void) +{ + hmdfs_destroy_debugfs_root(); + hmdfs_sysfs_exit(); + unregister_filesystem(&hmdfs_fs_type); + ida_destroy(&hmdfs_sb_seq); + hmdfs_destroy_caches(); + hmdfs_info("hmdfs exited!"); +} + +module_init(hmdfs_init); +module_exit(hmdfs_exit); + +EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao"); +MODULE_DESCRIPTION("Harmony distributed file system"); diff --git a/fs/hmdfs/server_writeback.c b/fs/hmdfs/server_writeback.c new file mode 100644 index 0000000000000000000000000000000000000000..b3a18ff67691e879d93b756f5dd48c66e6cb5937 --- /dev/null +++ b/fs/hmdfs/server_writeback.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/server_writeback.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" +#include "server_writeback.h" + +#define HMDFS_SRV_WB_DEF_DIRTY_THRESH 50UL + +static void hmdfs_srv_wb_handler(struct work_struct *work) +{ + struct hmdfs_server_writeback *hswb = container_of(work, + struct hmdfs_server_writeback, + dirty_sb_writeback_work); + struct super_block *lower_sb = hswb->sbi->lower_sb; + int dirty_pages; + + if (writeback_in_progress(&lower_sb->s_bdi->wb) || + !down_read_trylock(&lower_sb->s_umount)) + return; + + dirty_pages = hswb->dirty_nr_pages_to_wb; + writeback_inodes_sb_nr(lower_sb, dirty_pages, WB_REASON_FS_FREE_SPACE); + up_read(&lower_sb->s_umount); + + trace_hmdfs_start_srv_wb(hswb->sbi, dirty_pages, hswb->dirty_thresh_pg); +} + +void hmdfs_server_check_writeback(struct hmdfs_server_writeback *hswb) +{ + unsigned long old_time, now; + int dirty_nr_pages; + + old_time = hswb->last_reset_time; + now = jiffies; + dirty_nr_pages = atomic_inc_return(&hswb->dirty_nr_pages); + if (time_after(now, old_time + HZ) && + cmpxchg(&hswb->last_reset_time, old_time, now) == old_time) { + /* + * We calculate the speed of page dirting to handle + * following situations: + * + * 1. Dense writing, average page writing speed + * exceeds @hswb->dirty_thresh_pg: + * 0-1s 100MB + * 2. Sporadic writing, average page writing speed + * belows @hswb->dirty_thresh_pg: + * 0-0.1s 40MB + * 3.1-3.2 20MB + */ + unsigned int writepage_speed; + + writepage_speed = dirty_nr_pages / ((now - old_time) / HZ); + if (writepage_speed >= hswb->dirty_thresh_pg) { + /* + * Writeback @hswb->dirty_nr_pages_to_wb pages in + * server-writeback work. If work is delayed after + * 1s, @hswb->dirty_nr_pages_to_wb could be assigned + * another new value (eg. 60MB), the old value (eg. + * 80MB) will be overwritten, which means 80MB data + * will be omitted to writeback. We can tolerate this + * situation, The writeback pressure is too high if + * the previous work is not completed, so it's + * meaningless to continue subsequent work. + */ + hswb->dirty_nr_pages_to_wb = dirty_nr_pages; + /* + * There are 3 conditions to trigger queuing work: + * + * A. Server successfully handles writepage for client + * B. Every 1 second interval + * C. Speed for page dirting exceeds @dirty_thresh_pg + */ + queue_work(hswb->dirty_writeback_wq, + &hswb->dirty_sb_writeback_work); + } + + /* + * There is no need to account the number of dirty pages + * from remote client very accurately. Allow the missing + * count to increase by other process in the gap between + * increment and zero out. + */ + atomic_set(&hswb->dirty_nr_pages, 0); + } +} + +void hmdfs_destroy_server_writeback(struct hmdfs_sb_info *sbi) +{ + if (!sbi->h_swb) + return; + + flush_work(&sbi->h_swb->dirty_sb_writeback_work); + destroy_workqueue(sbi->h_swb->dirty_writeback_wq); + kfree(sbi->h_swb); + sbi->h_swb = NULL; +} + +int hmdfs_init_server_writeback(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_server_writeback *hswb; + char name[HMDFS_WQ_NAME_LEN]; + + hswb = kzalloc(sizeof(struct hmdfs_server_writeback), GFP_KERNEL); + if (!hswb) + return -ENOMEM; + + hswb->sbi = sbi; + hswb->dirty_writeback_control = true; + hswb->dirty_thresh_pg = HMDFS_SRV_WB_DEF_DIRTY_THRESH << + HMDFS_MB_TO_PAGE_SHIFT; + atomic_set(&hswb->dirty_nr_pages, 0); + hswb->last_reset_time = jiffies; + + snprintf(name, sizeof(name), "dfs_srv_wb%u", sbi->seq); + hswb->dirty_writeback_wq = create_singlethread_workqueue(name); + if (!hswb->dirty_writeback_wq) { + hmdfs_err("Failed to create server writeback workqueue!"); + kfree(hswb); + return -ENOMEM; + } + INIT_WORK(&hswb->dirty_sb_writeback_work, hmdfs_srv_wb_handler); + sbi->h_swb = hswb; + + return 0; +} + diff --git a/fs/hmdfs/server_writeback.h b/fs/hmdfs/server_writeback.h new file mode 100644 index 0000000000000000000000000000000000000000..eb645e6391e9dd4c46deacf48a41711fc3191e0b --- /dev/null +++ b/fs/hmdfs/server_writeback.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/server_writeback.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef SERVER_WRITEBACK_H +#define SERVER_WRITEBACK_H + +#include "hmdfs.h" + +#define HMDFS_MB_TO_PAGE_SHIFT (20 - HMDFS_PAGE_OFFSET) + +struct hmdfs_server_writeback { + struct hmdfs_sb_info *sbi; + /* Enable hmdfs server dirty writeback control */ + bool dirty_writeback_control; + + /* Current # of dirty pages from remote client in recent 1s */ + atomic_t dirty_nr_pages; + /* Current # of dirty pages to writeback */ + int dirty_nr_pages_to_wb; + /* Dirty thresh(Dirty data pages in 1s) to trigger wb */ + unsigned int dirty_thresh_pg; + /* Last reset timestamp(in jiffies) for @dirty_nr_pages */ + unsigned long last_reset_time; + + struct workqueue_struct *dirty_writeback_wq; + /* Per-fs pages from client writeback work */ + struct work_struct dirty_sb_writeback_work; +}; + +void hmdfs_server_check_writeback(struct hmdfs_server_writeback *hswb); + +void hmdfs_destroy_server_writeback(struct hmdfs_sb_info *sbi); + +int hmdfs_init_server_writeback(struct hmdfs_sb_info *sbi); + +#endif diff --git a/fs/hmdfs/stash.c b/fs/hmdfs/stash.c new file mode 100644 index 0000000000000000000000000000000000000000..c320af7f60e0d42372c39a74709e1cdff7f36c74 --- /dev/null +++ b/fs/hmdfs/stash.c @@ -0,0 +1,2247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/stash.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stash.h" +#include "comm/node_cb.h" +#include "comm/protocol.h" +#include "comm/connection.h" +#include "file_remote.h" +#include "hmdfs_dentryfile.h" +#include "authority/authentication.h" + +/* Head magic used to identify a stash file */ +#define HMDFS_STASH_FILE_HEAD_MAGIC 0xF7AB06C3 +/* Head and path in stash file are aligned with HMDFS_STASH_BLK_SIZE */ +#define HMDFS_STASH_BLK_SIZE 4096 +#define HMDFS_STASH_BLK_SHIFT 12 +#define HMDFS_STASH_PAGE_TO_SECTOR_SHIFT 3 +#define HMDFS_STASH_DIR_NAME "stash" +#define HMDFS_STASH_FMT_DIR_NAME "v1" +#define HMDFS_STASH_WORK_DIR_NAME \ + (HMDFS_STASH_DIR_NAME "/" HMDFS_STASH_FMT_DIR_NAME) + +#define HMDFS_STASH_FILE_NAME_LEN 20 + +#define HMDFS_STASH_FLUSH_CNT 2 + +#define HMDFS_STASH_PATH_LEN (HMDFS_CID_SIZE + HMDFS_STASH_FILE_NAME_LEN + 1) + +struct hmdfs_cache_file_head { + __le32 magic; + __le32 crc_offset; + __le64 ino; + __le64 size; + __le64 blocks; + __le64 last_write_pos; + __le64 ctime; + __le32 ctime_nsec; + __le32 change_detect_cap; + __le64 ichange_count; + __le32 path_offs; + __le32 path_len; + __le32 path_cnt; + __le32 data_offs; + /* Attention: expand new fields in here to compatible with old ver */ + __le32 crc32; +} __packed; + +struct hmdfs_stash_work { + struct hmdfs_peer *conn; + struct list_head *list; + struct work_struct work; + struct completion done; +}; + +struct hmdfs_inode_tbl { + unsigned int cnt; + unsigned int max; + uint64_t inodes[0]; +}; + +struct hmdfs_stash_dir_context { + struct dir_context dctx; + char name[NAME_MAX + 1]; + struct hmdfs_inode_tbl *tbl; +}; + +struct hmdfs_restore_stats { + unsigned int succeed; + unsigned int fail; + unsigned int keep; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_stash_stats { + unsigned int succeed; + unsigned int donothing; + unsigned int fail; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_file_restore_ctx { + struct hmdfs_peer *conn; + struct path src_dir_path; + struct path dst_root_path; + char *dst; + char *page; + struct file *src_filp; + uint64_t inum; + uint64_t pages; + unsigned int seq; + unsigned int data_offs; + /* output */ + bool keep; +}; + +struct hmdfs_copy_args { + struct file *src; + struct file *dst; + void *buf; + size_t buf_len; + unsigned int seq; + unsigned int data_offs; + uint64_t inum; +}; + +struct hmdfs_copy_ctx { + struct hmdfs_copy_args args; + loff_t src_pos; + loff_t dst_pos; + /* output */ + size_t copied; + bool eof; +}; + +struct hmdfs_rebuild_stats { + unsigned int succeed; + unsigned int total; + unsigned int fail; + unsigned int invalid; +}; + +struct hmdfs_check_work { + struct hmdfs_peer *conn; + struct work_struct work; + struct completion done; +}; + +typedef int (*stash_operation_func)(struct hmdfs_peer *, + unsigned int, + struct path *, + const struct hmdfs_inode_tbl *, + void *); + +static struct dentry *hmdfs_do_vfs_mkdir(struct dentry *parent, + const char *name, int namelen, + umode_t mode) +{ + struct inode *dir = d_inode(parent); + struct dentry *child = NULL; + int err; + + inode_lock_nested(dir, I_MUTEX_PARENT); + + child = lookup_one_len(name, parent, namelen); + if (IS_ERR(child)) + goto out; + + if (d_is_positive(child)) { + if (d_can_lookup(child)) + goto out; + + dput(child); + child = ERR_PTR(-EINVAL); + goto out; + } + + err = vfs_mkdir(dir, child, mode); + if (err) { + dput(child); + child = ERR_PTR(err); + goto out; + } + +out: + inode_unlock(dir); + return child; +} + +struct dentry *hmdfs_stash_new_work_dir(struct dentry *parent) +{ + struct dentry *base = NULL; + struct dentry *work = NULL; + + base = hmdfs_do_vfs_mkdir(parent, HMDFS_STASH_DIR_NAME, + strlen(HMDFS_STASH_DIR_NAME), 0700); + if (IS_ERR(base)) + return base; + + work = hmdfs_do_vfs_mkdir(base, HMDFS_STASH_FMT_DIR_NAME, + strlen(HMDFS_STASH_FMT_DIR_NAME), 0700); + dput(base); + + return work; +} + +static struct file *hmdfs_new_stash_file(struct path *d_path, const char *cid) +{ + struct dentry *parent = NULL; + struct dentry *child = NULL; + struct file *filp = NULL; + struct path stash; + int err; + + parent = hmdfs_do_vfs_mkdir(d_path->dentry, cid, strlen(cid), 0700); + if (IS_ERR(parent)) { + err = PTR_ERR(parent); + hmdfs_err("mkdir error %d", err); + goto mkdir_err; + } + + child = vfs_tmpfile(parent, S_IFREG | 0600, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + hmdfs_err("new stash file error %d", err); + goto tmpfile_err; + } + + stash.mnt = d_path->mnt; + stash.dentry = child; + filp = dentry_open(&stash, O_LARGEFILE | O_WRONLY, current_cred()); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + hmdfs_err("open stash file error %d", err); + goto open_err; + } + + dput(child); + dput(parent); + + return filp; + +open_err: + dput(child); +tmpfile_err: + dput(parent); +mkdir_err: + return ERR_PTR(err); +} + +static inline bool hmdfs_is_dir(struct dentry *child) +{ + return d_is_positive(child) && d_can_lookup(child); +} + +static inline bool hmdfs_is_reg(struct dentry *child) +{ + return d_is_positive(child) && d_is_reg(child); +} + +static void hmdfs_set_stash_file_head(const struct hmdfs_cache_info *cache, + uint64_t ino, + struct hmdfs_cache_file_head *head) +{ + long long blocks; + unsigned int crc_offset; + + memset(head, 0, sizeof(*head)); + head->magic = cpu_to_le32(HMDFS_STASH_FILE_HEAD_MAGIC); + head->ino = cpu_to_le64(ino); + head->size = cpu_to_le64(i_size_read(file_inode(cache->cache_file))); + blocks = atomic64_read(&cache->written_pgs) << + HMDFS_STASH_PAGE_TO_SECTOR_SHIFT; + head->blocks = cpu_to_le64(blocks); + head->path_offs = cpu_to_le32(cache->path_offs); + head->path_len = cpu_to_le32(cache->path_len); + head->path_cnt = cpu_to_le32(cache->path_cnt); + head->data_offs = cpu_to_le32(cache->data_offs); + crc_offset = offsetof(struct hmdfs_cache_file_head, crc32); + head->crc_offset = cpu_to_le32(crc_offset); + head->crc32 = cpu_to_le32(crc32(0, head, crc_offset)); +} + +static int hmdfs_flush_stash_file_metadata(struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + struct hmdfs_peer *conn = info->conn; + struct hmdfs_cache_file_head cache_head; + size_t written; + loff_t pos; + unsigned int head_size; + + /* No metadata if no cache file info */ + cache = info->cache; + if (!cache) + return -EINVAL; + + if (strlen(cache->path) == 0) { + long long to_write_pgs = atomic64_read(&cache->to_write_pgs); + + /* Nothing to stash. No need to flush meta data. */ + if (to_write_pgs == 0) + return 0; + + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx lost %lld pages due to no path", + conn->owner, conn->device_id, + info->remote_ino, to_write_pgs); + return -EINVAL; + } + + hmdfs_set_stash_file_head(cache, info->remote_ino, &cache_head); + + /* Write head */ + pos = 0; + head_size = sizeof(cache_head); + written = kernel_write(cache->cache_file, &cache_head, head_size, &pos); + if (written != head_size) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write head len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + head_size, written); + return -EIO; + } + /* Write path */ + pos = (loff_t)cache->path_offs << HMDFS_STASH_BLK_SHIFT; + written = kernel_write(cache->cache_file, cache->path, cache->path_len, + &pos); + if (written != cache->path_len) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write path len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + cache->path_len, written); + return -EIO; + } + + return 0; +} + +/* Mainly from inode_wait_for_writeback() */ +static void hmdfs_wait_remote_writeback_once(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *inode = &info->vfs_inode; + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); + wait_queue_head_t *wq_head = NULL; + bool in_sync = false; + + spin_lock(&inode->i_lock); + in_sync = inode->i_state & I_SYNC; + spin_unlock(&inode->i_lock); + + if (!in_sync) + return; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx wait for wb once", + conn->owner, conn->device_id, info->remote_ino); + + wq_head = bit_waitqueue(&inode->i_state, __I_SYNC); + __wait_on_bit(wq_head, &wq, bit_wait, TASK_UNINTERRUPTIBLE); +} + +static void hmdfs_reset_remote_write_err(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct address_space *mapping = info->vfs_inode.i_mapping; + int flags_err; + errseq_t old; + int wb_err; + + flags_err = filemap_check_errors(mapping); + + old = errseq_sample(&mapping->wb_err); + wb_err = errseq_check_and_advance(&mapping->wb_err, &old); + if (flags_err || wb_err) + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx wb error %d %d before stash", + conn->owner, conn->device_id, info->remote_ino, + flags_err, wb_err); +} + +static bool hmdfs_is_mapping_clean(struct address_space *mapping) +{ + bool clean = false; + + /* b93b016313b3b ("page cache: use xa_lock") introduces i_pages */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + xa_lock_irq(&mapping->i_pages); +#else + spin_lock_irq(&mapping->tree_lock); +#endif + clean = !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + xa_unlock_irq(&mapping->i_pages); +#else + spin_unlock_irq(&mapping->tree_lock); +#endif + return clean; +} + +static int hmdfs_flush_stash_file_data(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *inode = &info->vfs_inode; + struct address_space *mapping = inode->i_mapping; + bool all_clean = true; + int err = 0; + int i; + + /* Wait for the completion of write syscall */ + inode_lock(inode); + inode_unlock(inode); + + all_clean = hmdfs_is_mapping_clean(mapping); + if (all_clean) { + hmdfs_reset_remote_write_err(conn, info); + return 0; + } + + /* + * No-sync_all writeback during offline may have not seen + * the setting of stash_status as HMDFS_REMOTE_INODE_STASHING + * and will call mapping_set_error() after we just reset + * the previous error. So waiting for these writeback once, + * and the following writeback will do local write. + */ + hmdfs_wait_remote_writeback_once(conn, info); + + /* Need to clear previous error ? */ + hmdfs_reset_remote_write_err(conn, info); + + /* + * 1. dirty page: do write back + * 2. writeback page: wait for its completion + * 3. writeback -> redirty page: do filemap_write_and_wait() + * twice, so 2th writeback should not allow + * writeback -> redirty transition + */ + for (i = 0; i < HMDFS_STASH_FLUSH_CNT; i++) { + err = filemap_write_and_wait(mapping); + if (err) { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx #%d stash flush error %d", + conn->owner, conn->device_id, + info->remote_ino, i, err); + return err; + } + } + + if (!hmdfs_is_mapping_clean(mapping)) + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx is still dirty dt %d wb %d", + conn->owner, conn->device_id, info->remote_ino, + !!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY), + !!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)); + + return 0; +} + +static int hmdfs_flush_stash_file(struct hmdfs_inode_info *info) +{ + int err; + + err = hmdfs_flush_stash_file_data(info->conn, info); + if (!err) + err = hmdfs_flush_stash_file_metadata(info); + + return err; +} + +static int hmdfs_enable_stash_file(struct hmdfs_inode_info *info, + struct dentry *stash) +{ + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct dentry *parent = NULL; + struct inode *dir = NULL; + struct dentry *child = NULL; + int err = 0; + bool retried = false; + + snprintf(name, sizeof(name), "0x%llx", info->remote_ino); + + parent = lock_parent(stash); + dir = d_inode(parent); + +lookup_again: + child = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(child)) { + err = PTR_ERR(child); + child = NULL; + hmdfs_err("lookup %s err %d", name, err); + goto out; + } + + if (d_is_positive(child)) { + hmdfs_warning("%s exists (mode 0%o)", + name, d_inode(child)->i_mode); + + err = vfs_unlink(dir, child, NULL); + if (err) { + hmdfs_err("unlink %s err %d", name, err); + goto out; + } + if (retried) { + err = -EEXIST; + goto out; + } + + retried = true; + dput(child); + goto lookup_again; + } + + err = vfs_link(stash, dir, child, NULL); + if (err) { + hmdfs_err("link stash file to %s err %d", name, err); + goto out; + } + +out: + unlock_dir(parent); + if (child) + dput(child); + + return err; +} + +/* Return 1 if stash is done, 0 if nothing is stashed */ +static int hmdfs_close_stash_file(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct file *cache_file = info->cache->cache_file; + struct dentry *c_dentry = file_dentry(cache_file); + struct inode *c_inode = d_inode(c_dentry); + long long to_write_pgs = atomic64_read(&info->cache->to_write_pgs); + int err; + + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx stashed bytes %lld pages %lld", + conn->owner, conn->device_id, info->remote_ino, + i_size_read(c_inode), to_write_pgs); + + if (to_write_pgs == 0) + return 0; + + err = vfs_fsync(cache_file, 0); + if (!err) + err = hmdfs_enable_stash_file(info, c_dentry); + else + hmdfs_err("fsync stash file err %d", err); + + return err < 0 ? err : 1; +} + +static void hmdfs_del_file_cache(struct hmdfs_cache_info *cache) +{ + if (!cache) + return; + + fput(cache->cache_file); + kfree(cache->path_buf); + kfree(cache); +} + +static struct hmdfs_cache_info * +hmdfs_new_file_cache(struct hmdfs_peer *conn, struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + struct dentry *stash_dentry = NULL; + int err; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return ERR_PTR(-ENOMEM); + + atomic64_set(&cache->to_write_pgs, 0); + atomic64_set(&cache->written_pgs, 0); + cache->path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!cache->path_buf) { + err = -ENOMEM; + goto free_cache; + } + + /* Need to handle "hardlink" ? */ + stash_dentry = d_find_any_alias(&info->vfs_inode); + if (stash_dentry) { + /* Needs full path in hmdfs, will be a device-view path */ + cache->path = dentry_path_raw(stash_dentry, cache->path_buf, + PATH_MAX); + dput(stash_dentry); + if (IS_ERR(cache->path)) { + err = PTR_ERR(cache->path); + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx gen path err %d", + conn->owner, conn->device_id, + info->remote_ino, err); + goto free_path; + } + } else { + /* Write-opened file was closed before finding dentry */ + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx no dentry found", + conn->owner, conn->device_id, info->remote_ino); + cache->path_buf[0] = '\0'; + cache->path = cache->path_buf; + } + + cache->path_cnt = 1; + cache->path_len = strlen(cache->path) + 1; + cache->path_offs = DIV_ROUND_UP(sizeof(struct hmdfs_cache_file_head), + HMDFS_STASH_BLK_SIZE); + cache->data_offs = cache->path_offs + DIV_ROUND_UP(cache->path_len, + HMDFS_STASH_BLK_SIZE); + cache->cache_file = hmdfs_new_stash_file(&conn->sbi->stash_work_dir, + conn->cid); + if (IS_ERR(cache->cache_file)) { + err = PTR_ERR(cache->cache_file); + goto free_path; + } + + return cache; + +free_path: + kfree(cache->path_buf); +free_cache: + kfree(cache); + return ERR_PTR(err); +} + +static void hmdfs_init_stash_file_cache(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + + cache = hmdfs_new_file_cache(conn, info); + if (IS_ERR(cache)) + /* + * Continue even creating stash info failed. + * We need to ensure there is no dirty pages + * after stash completes + */ + cache = NULL; + + /* Make write() returns */ + spin_lock(&info->stash_lock); + info->cache = cache; + info->stash_status = HMDFS_REMOTE_INODE_STASHING; + spin_unlock(&info->stash_lock); +} + +static void hmdfs_update_stash_stats(struct hmdfs_stash_stats *stats, + const struct hmdfs_cache_info *cache, + int err) +{ + unsigned long long ok_pages, fail_pages; + + if (cache) { + ok_pages = err > 0 ? atomic64_read(&cache->written_pgs) : 0; + fail_pages = atomic64_read(&cache->to_write_pgs) - ok_pages; + stats->ok_pages += ok_pages; + stats->fail_pages += fail_pages; + } + + if (err > 0) + stats->succeed++; + else if (!err) + stats->donothing++; + else + stats->fail++; +} + +/* Return 1 if stash is done, 0 if nothing is stashed */ +static int hmdfs_stash_remote_inode(struct hmdfs_inode_info *info, + struct hmdfs_stash_stats *stats) +{ + struct hmdfs_cache_info *cache = info->cache; + struct hmdfs_peer *conn = info->conn; + unsigned int status; + int err = 0; + + hmdfs_info("stash peer 0x%x:0x%llx ino 0x%llx", + conn->owner, conn->device_id, info->remote_ino); + + err = hmdfs_flush_stash_file(info); + if (!err) + err = hmdfs_close_stash_file(conn, info); + + if (err <= 0) + set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + status = err > 0 ? HMDFS_REMOTE_INODE_RESTORING : + HMDFS_REMOTE_INODE_NONE; + spin_lock(&info->stash_lock); + info->cache = NULL; + /* + * Use smp_store_release() to ensure order between HMDFS_FID_NEED_OPEN + * and HMDFS_REMOTE_INODE_NONE. + */ + smp_store_release(&info->stash_status, status); + spin_unlock(&info->stash_lock); + + hmdfs_update_stash_stats(stats, cache, err); + hmdfs_del_file_cache(cache); + + return err; +} + +static void hmdfs_init_cache_for_stash_files(struct hmdfs_peer *conn, + struct list_head *list) +{ + const struct cred *old_cred = NULL; + struct hmdfs_inode_info *info = NULL; + + /* For file creation under stash_work_dir */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + list_for_each_entry(info, list, stash_node) + hmdfs_init_stash_file_cache(conn, info); + hmdfs_revert_creds(old_cred); +} + +static void hmdfs_init_stash_cache_work_fn(struct work_struct *base) +{ + struct hmdfs_stash_work *work = + container_of(base, struct hmdfs_stash_work, work); + + hmdfs_init_cache_for_stash_files(work->conn, work->list); + complete(&work->done); +} + +static void hmdfs_init_cache_for_stash_files_by_work(struct hmdfs_peer *conn, + struct list_head *list) +{ + struct hmdfs_stash_work work = { + .conn = conn, + .list = list, + .done = COMPLETION_INITIALIZER_ONSTACK(work.done), + }; + + INIT_WORK_ONSTACK(&work.work, hmdfs_init_stash_cache_work_fn); + schedule_work(&work.work); + wait_for_completion(&work.done); +} + +static void hmdfs_stash_fetch_ready_files(struct hmdfs_peer *conn, + bool check, struct list_head *list) +{ + struct hmdfs_inode_info *info = NULL; + + spin_lock(&conn->wr_opened_inode_lock); + list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) { + int status; + + /* Paired with *_release() in hmdfs_reset_stashed_inode() */ + status = smp_load_acquire(&info->stash_status); + if (status == HMDFS_REMOTE_INODE_NONE) { + list_add_tail(&info->stash_node, list); + /* + * Prevent close() removing the inode from + * writeable-opened inode list + */ + hmdfs_remote_add_wr_opened_inode_nolock(conn, info); + /* Prevent the inode from eviction */ + ihold(&info->vfs_inode); + } else if (check && status == HMDFS_REMOTE_INODE_STASHING) { + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unexpected stash status %d", + conn->owner, conn->device_id, + info->remote_ino, status); + } + } + spin_unlock(&conn->wr_opened_inode_lock); +} + +static void hmdfs_stash_offline_prepare(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + LIST_HEAD(preparing); + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + mutex_lock(&conn->offline_cb_lock); + + hmdfs_stash_fetch_ready_files(conn, true, &preparing); + + if (list_empty(&preparing)) + goto out; + + hmdfs_init_cache_for_stash_files_by_work(conn, &preparing); +out: + mutex_unlock(&conn->offline_cb_lock); +} + +static void hmdfs_track_inode_locked(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->stashed_inode_lock); + list_add_tail(&info->stash_node, &conn->stashed_inode_list); + conn->stashed_inode_nr++; + spin_unlock(&conn->stashed_inode_lock); +} + +static void +hmdfs_update_peer_stash_stats(struct hmdfs_stash_statistics *stash_stats, + const struct hmdfs_stash_stats *stats) +{ + stash_stats->cur_ok = stats->succeed; + stash_stats->cur_nothing = stats->donothing; + stash_stats->cur_fail = stats->fail; + stash_stats->total_ok += stats->succeed; + stash_stats->total_nothing += stats->donothing; + stash_stats->total_fail += stats->fail; + stash_stats->ok_pages += stats->ok_pages; + stash_stats->fail_pages += stats->fail_pages; +} + +static void hmdfs_stash_remote_inodes(struct hmdfs_peer *conn, + struct list_head *list) +{ + const struct cred *old_cred = NULL; + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + struct hmdfs_stash_stats stats; + + /* For file creation, write and relink under stash_work_dir */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + + memset(&stats, 0, sizeof(stats)); + list_for_each_entry_safe(info, next, list, stash_node) { + int err; + + list_del_init(&info->stash_node); + + err = hmdfs_stash_remote_inode(info, &stats); + if (err > 0) + hmdfs_track_inode_locked(conn, info); + + hmdfs_remote_del_wr_opened_inode(conn, info); + if (err <= 0) + iput(&info->vfs_inode); + } + hmdfs_revert_creds(old_cred); + + hmdfs_update_peer_stash_stats(&conn->stats.stash, &stats); + hmdfs_info("peer 0x%x:0x%llx total stashed %u cur ok %u none %u fail %u", + conn->owner, conn->device_id, conn->stashed_inode_nr, + stats.succeed, stats.donothing, stats.fail); +} + +static void hmdfs_stash_offline_do_stash(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_inode_info *info = NULL; + LIST_HEAD(preparing); + LIST_HEAD(stashing); + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + /* release seq_lock to prevent blocking no-offline sync cb */ + mutex_unlock(&conn->seq_lock); + /* acquire offline_cb_lock to serialized with offline sync cb */ + mutex_lock(&conn->offline_cb_lock); + + hmdfs_stash_fetch_ready_files(conn, false, &preparing); + if (!list_empty(&preparing)) + hmdfs_init_cache_for_stash_files(conn, &preparing); + + spin_lock(&conn->wr_opened_inode_lock); + list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) { + int status = READ_ONCE(info->stash_status); + + if (status == HMDFS_REMOTE_INODE_STASHING) + list_add_tail(&info->stash_node, &stashing); + } + spin_unlock(&conn->wr_opened_inode_lock); + + if (list_empty(&stashing)) + goto unlock; + + hmdfs_stash_remote_inodes(conn, &stashing); + +unlock: + mutex_unlock(&conn->offline_cb_lock); + mutex_lock(&conn->seq_lock); +} + +static struct hmdfs_inode_info * +hmdfs_lookup_stash_inode(struct hmdfs_peer *conn, uint64_t inum) +{ + struct hmdfs_inode_info *info = NULL; + + list_for_each_entry(info, &conn->stashed_inode_list, stash_node) { + if (info->remote_ino == inum) + return info; + } + + return NULL; +} + +static void hmdfs_untrack_stashed_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + list_del_init(&info->stash_node); + iput(&info->vfs_inode); + + conn->stashed_inode_nr--; +} + +static void hmdfs_reset_stashed_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *ino = &info->vfs_inode; + + /* + * For updating stash_status after iput() + * in hmdfs_untrack_stashed_inode() + */ + ihold(ino); + hmdfs_untrack_stashed_inode(conn, info); + /* + * Ensure the order of stash_node and stash_status: + * only update stash_status to NONE after removal of + * stash_node is completed. + */ + smp_store_release(&info->stash_status, + HMDFS_REMOTE_INODE_NONE); + iput(ino); +} + +static void hmdfs_drop_stashed_inodes(struct hmdfs_peer *conn) +{ + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + + if (list_empty(&conn->stashed_inode_list)) + return; + + hmdfs_warning("peer 0x%x:0x%llx drop unrestorable file %u", + conn->owner, conn->device_id, conn->stashed_inode_nr); + + list_for_each_entry_safe(info, next, + &conn->stashed_inode_list, stash_node) { + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unrestorable status %u", + conn->owner, conn->device_id, info->remote_ino, + READ_ONCE(info->stash_status)); + + hmdfs_reset_stashed_inode(conn, info); + } +} + +static struct file *hmdfs_open_stash_dir(struct path *d_path, const char *cid) +{ + int err = 0; + struct dentry *parent = d_path->dentry; + struct inode *dir = d_inode(parent); + struct dentry *child = NULL; + struct path peer_path; + struct file *filp = NULL; + + inode_lock_nested(dir, I_MUTEX_PARENT); + child = lookup_one_len(cid, parent, strlen(cid)); + if (!IS_ERR(child)) { + if (!hmdfs_is_dir(child)) { + if (d_is_positive(child)) { + hmdfs_err("invalid stash dir mode 0%o", d_inode(child)->i_mode); + err = -EINVAL; + } else { + err = -ENOENT; + } + dput(child); + } + } else { + err = PTR_ERR(child); + hmdfs_err("lookup stash dir err %d", err); + } + inode_unlock(dir); + + if (err) + return ERR_PTR(err); + + peer_path.mnt = d_path->mnt; + peer_path.dentry = child; + filp = dentry_open(&peer_path, O_RDONLY | O_DIRECTORY, current_cred()); + if (IS_ERR(filp)) + hmdfs_err("open err %d", (int)PTR_ERR(filp)); + + dput(child); + + return filp; +} + +static int hmdfs_new_inode_tbl(struct hmdfs_inode_tbl **tbl) +{ + struct hmdfs_inode_tbl *new = NULL; + + new = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!new) + return -ENOMEM; + + new->cnt = 0; + new->max = (PAGE_SIZE - offsetof(struct hmdfs_inode_tbl, inodes)) / + sizeof(new->inodes[0]); + *tbl = new; + + return 0; +} + +static int hmdfs_parse_stash_file_name(struct dir_context *dctx, + const char *name, + int namelen, + unsigned int d_type, + uint64_t *stash_inum) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + int err; + + if (d_type != DT_UNKNOWN && d_type != DT_REG) + return 0; + if (namelen > NAME_MAX) + return 0; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + memcpy(ctx->name, name, namelen); + ctx->name[namelen] = '\0'; + err = kstrtoull(ctx->name, 16, stash_inum); + if (err) { + hmdfs_err("unexpected stash file err %d", err); + return 0; + } + return 1; +} + +static int hmdfs_has_stash_file(struct dir_context *dctx, const char *name, + int namelen, loff_t offset, + u64 inum, unsigned int d_type) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + uint64_t stash_inum; + int err; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + err = hmdfs_parse_stash_file_name(dctx, name, namelen, + d_type, &stash_inum); + if (!err) + return 0; + + ctx->tbl->cnt++; + return 1; +} + +static int hmdfs_fill_stash_file(struct dir_context *dctx, const char *name, + int namelen, loff_t offset, + u64 inum, unsigned int d_type) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + uint64_t stash_inum; + int err; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + err = hmdfs_parse_stash_file_name(dctx, name, namelen, + d_type, &stash_inum); + if (!err) + return 0; + if (ctx->tbl->cnt >= ctx->tbl->max) + return 1; + + ctx->tbl->inodes[ctx->tbl->cnt++] = stash_inum; + + return 0; +} + +static int hmdfs_del_stash_file(struct dentry *parent, struct dentry *child) +{ + struct inode *dir = d_inode(parent); + int err = 0; + + /* Prevent d_delete() from calling dentry_unlink_inode() */ + dget(child); + + inode_lock_nested(dir, I_MUTEX_PARENT); + err = vfs_unlink(dir, child, NULL); + if (err) + hmdfs_err("remove stash file err %d", err); + inode_unlock(dir); + + dput(child); + + return err; +} + +static inline bool hmdfs_is_node_offlined(const struct hmdfs_peer *conn, + unsigned int seq) +{ + /* + * open()/fsync() may fail due to "status = NODE_STAT_OFFLINE" + * in hmdfs_disconnect_node(). + * Pair with smp_mb() in hmdfs_disconnect_node() to ensure + * getting the newest event sequence. + */ + smp_mb__before_atomic(); + return hmdfs_node_evt_seq(conn) != seq; +} + +static int hmdfs_verify_restore_file_head(struct hmdfs_file_restore_ctx *ctx, + const struct hmdfs_cache_file_head *head) +{ + struct inode *inode = file_inode(ctx->src_filp); + struct hmdfs_peer *conn = ctx->conn; + unsigned int crc, read_crc, crc_offset; + loff_t path_offs, data_offs, isize; + int err = 0; + + if (le32_to_cpu(head->magic) != HMDFS_STASH_FILE_HEAD_MAGIC) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid magic: got 0x%x, exp 0x%x", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->magic), + HMDFS_STASH_FILE_HEAD_MAGIC); + goto out; + } + + crc_offset = le32_to_cpu(head->crc_offset); + read_crc = le32_to_cpu(*((__le32 *)((char *)head + crc_offset))); + crc = crc32(0, head, crc_offset); + if (read_crc != crc) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid crc: got 0x%x, exp 0x%x", + conn->owner, conn->device_id, ctx->inum, + read_crc, crc); + goto out; + } + + if (le64_to_cpu(head->ino) != ctx->inum) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid ino: got %llu, exp %llu", + conn->owner, conn->device_id, ctx->inum, + le64_to_cpu(head->ino), ctx->inum); + goto out; + } + + path_offs = (loff_t)le32_to_cpu(head->path_offs) << + HMDFS_STASH_BLK_SHIFT; + if (path_offs <= 0 || path_offs >= i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_offs %d, stash file size %llu", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->path_offs), i_size_read(inode)); + goto out; + } + + data_offs = (loff_t)le32_to_cpu(head->data_offs) << + HMDFS_STASH_BLK_SHIFT; + if (path_offs >= data_offs) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, path_offs %d", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->data_offs), + le32_to_cpu(head->path_offs)); + goto out; + } + if (data_offs <= 0 || data_offs >= i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, stash file size %llu", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->data_offs), i_size_read(inode)); + goto out; + } + + isize = le64_to_cpu(head->size); + if (isize != i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid isize: got %llu, exp %llu", + conn->owner, conn->device_id, ctx->inum, + le64_to_cpu(head->size), i_size_read(inode)); + goto out; + } + + if (le32_to_cpu(head->path_cnt) < 1) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_cnt %d", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->path_cnt)); + goto out; + } + +out: + return err; +} + +static int hmdfs_get_restore_file_metadata(struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_cache_file_head head; + struct hmdfs_peer *conn = ctx->conn; + unsigned int head_size, read_size, head_crc_offset; + loff_t pos; + ssize_t rd; + int err = 0; + + head_size = sizeof(struct hmdfs_cache_file_head); + memset(&head, 0, head_size); + /* Read part head */ + pos = 0; + read_size = offsetof(struct hmdfs_cache_file_head, crc_offset) + + sizeof(head.crc_offset); + rd = kernel_read(ctx->src_filp, &head, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read part head err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + head_crc_offset = le32_to_cpu(head.crc_offset); + if (head_crc_offset + sizeof(head.crc32) < head_crc_offset || + head_crc_offset + sizeof(head.crc32) > head_size) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx got bad head: Too long crc_offset %u which exceeds head size %u", + conn->owner, conn->device_id, ctx->inum, + head_crc_offset, head_size); + goto out; + } + + /* Read full head */ + pos = 0; + read_size = le32_to_cpu(head.crc_offset) + sizeof(head.crc32); + rd = kernel_read(ctx->src_filp, &head, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read full head err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + + err = hmdfs_verify_restore_file_head(ctx, &head); + if (err) + goto out; + + ctx->pages = le64_to_cpu(head.blocks) >> + HMDFS_STASH_PAGE_TO_SECTOR_SHIFT; + ctx->data_offs = le32_to_cpu(head.data_offs); + /* Read path */ + read_size = min_t(unsigned int, le32_to_cpu(head.path_len), PATH_MAX); + pos = (loff_t)le32_to_cpu(head.path_offs) << HMDFS_STASH_BLK_SHIFT; + rd = kernel_read(ctx->src_filp, ctx->dst, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + if (strnlen(ctx->dst, read_size) >= read_size) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path not end with \\0", + conn->owner, conn->device_id, ctx->inum); + goto out; + } + /* TODO: Pick a valid path from all paths */ + +out: + return err; +} + +static int hmdfs_open_restore_dst_file(struct hmdfs_file_restore_ctx *ctx, + unsigned int rw_flag, struct file **filp) +{ + struct hmdfs_peer *conn = ctx->conn; + struct file *dst = NULL; + int err = 0; + + err = hmdfs_get_restore_file_metadata(ctx); + if (err) + goto out; + + /* Error comes from connection or server ? */ + dst = file_open_root(&ctx->dst_root_path, + ctx->dst, O_LARGEFILE | rw_flag, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + hmdfs_err("open remote file ino 0x%llx err %d", ctx->inum, err); + if (hmdfs_is_node_offlined(conn, ctx->seq)) + err = -ESHUTDOWN; + goto out; + } + + *filp = dst; +out: + return err; +} + +static bool hmdfs_need_abort_restore(struct hmdfs_file_restore_ctx *ctx, + struct hmdfs_inode_info *pinned, + struct file *opened_file) +{ + struct hmdfs_inode_info *opened = hmdfs_i(file_inode(opened_file)); + + if (opened->inode_type != HMDFS_LAYER_OTHER_REMOTE) + goto abort; + + if (opened == pinned) + return false; + +abort: + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx invalid remote file", + ctx->conn->owner, ctx->conn->device_id, ctx->inum); + hmdfs_warning("got: peer 0x%x:0x%llx inode 0x%llx type %d status %d", + opened->conn ? opened->conn->owner : 0, + opened->conn ? opened->conn->device_id : 0, + opened->remote_ino, opened->inode_type, + opened->stash_status); + hmdfs_warning("pinned: peer 0x%x:0x%llx inode 0x%llx type %d status %d", + pinned->conn->owner, pinned->conn->device_id, + pinned->remote_ino, pinned->inode_type, + pinned->stash_status); + return true; +} + +static void hmdfs_init_copy_args(const struct hmdfs_file_restore_ctx *ctx, + struct file *dst, struct hmdfs_copy_args *args) +{ + args->src = ctx->src_filp; + args->dst = dst; + args->buf = ctx->page; + args->buf_len = PAGE_SIZE; + args->seq = ctx->seq; + args->data_offs = ctx->data_offs; + args->inum = ctx->inum; +} + +static ssize_t hmdfs_write_dst(struct hmdfs_peer *conn, struct file *filp, + void *buf, size_t len, loff_t pos) +{ + mm_segment_t old_fs; + struct kiocb kiocb; + struct iovec iov; + struct iov_iter iter; + ssize_t wr; + int err = 0; + + file_start_write(filp); + + old_fs = force_uaccess_begin(); + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = pos; + + iov.iov_base = buf; + iov.iov_len = len; + iov_iter_init(&iter, WRITE, &iov, 1, len); + + wr = hmdfs_file_write_iter_remote_nocheck(&kiocb, &iter); + + force_uaccess_end(old_fs); + + file_end_write(filp); + + if (wr != len) { + struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); + + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short write ret %zd exp %zu", + conn->owner, conn->device_id, info->remote_ino, + wr, len); + err = wr < 0 ? (int)wr : -EFAULT; + } + + return err; +} + +static int hmdfs_rd_src_wr_dst(struct hmdfs_peer *conn, + struct hmdfs_copy_ctx *ctx) +{ + const struct hmdfs_copy_args *args = NULL; + int err = 0; + loff_t rd_pos; + ssize_t rd; + + ctx->eof = false; + ctx->copied = 0; + + args = &ctx->args; + rd_pos = ctx->src_pos; + rd = kernel_read(args->src, args->buf, args->buf_len, &rd_pos); + if (rd < 0) { + err = (int)rd; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short read err %d", + conn->owner, conn->device_id, args->inum, err); + goto out; + } else if (rd == 0) { + ctx->eof = true; + goto out; + } + + err = hmdfs_write_dst(conn, args->dst, args->buf, rd, ctx->dst_pos); + if (!err) + ctx->copied = rd; + else if (hmdfs_is_node_offlined(conn, args->seq)) + err = -ESHUTDOWN; +out: + return err; +} + +static int hmdfs_copy_src_to_dst(struct hmdfs_peer *conn, + const struct hmdfs_copy_args *args) +{ + int err = 0; + struct file *src = NULL; + struct hmdfs_copy_ctx ctx; + loff_t seek_pos, data_init_pos; + loff_t src_size; + + ctx.args = *args; + + src = ctx.args.src; + data_init_pos = (loff_t)ctx.args.data_offs << HMDFS_STASH_BLK_SHIFT; + seek_pos = data_init_pos; + src_size = i_size_read(file_inode(src)); + while (true) { + loff_t data_pos; + + data_pos = vfs_llseek(src, seek_pos, SEEK_DATA); + if (data_pos > seek_pos) { + seek_pos = data_pos; + continue; + } else if (data_pos < 0) { + if (data_pos == -ENXIO) { + loff_t src_blks = file_inode(src)->i_blocks; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx end at 0x%llx (sz 0x%llx blk 0x%llx)", + conn->owner, conn->device_id, + args->inum, seek_pos, + src_size, src_blks); + } else { + err = (int)data_pos; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx seek pos 0x%llx err %d", + conn->owner, conn->device_id, + args->inum, seek_pos, err); + } + break; + } + + hmdfs_debug("peer 0x%x:0x%llx ino 0x%llx seek to 0x%llx", + conn->owner, conn->device_id, args->inum, data_pos); + + ctx.src_pos = data_pos; + ctx.dst_pos = data_pos - data_init_pos; + err = hmdfs_rd_src_wr_dst(conn, &ctx); + if (err || ctx.eof) + break; + + seek_pos += ctx.copied; + if (seek_pos >= src_size) + break; + } + + return err; +} + +static int hmdfs_restore_src_to_dst(struct hmdfs_file_restore_ctx *ctx, + struct file *dst) +{ + struct file *src = ctx->src_filp; + struct hmdfs_copy_args args; + int err; + + hmdfs_init_copy_args(ctx, dst, &args); + err = hmdfs_copy_src_to_dst(ctx->conn, &args); + if (err) + goto out; + + err = vfs_fsync(dst, 0); + if (err) { + hmdfs_err("fsync remote file ino 0x%llx err %d", ctx->inum, err); + if (hmdfs_is_node_offlined(ctx->conn, ctx->seq)) + err = -ESHUTDOWN; + } + +out: + if (err) + truncate_inode_pages(file_inode(dst)->i_mapping, 0); + + /* Remove the unnecessary cache */ + invalidate_mapping_pages(file_inode(src)->i_mapping, 0, -1); + + return err; +} + + +static int hmdfs_restore_file(struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_peer *conn = ctx->conn; + uint64_t inum = ctx->inum; + struct hmdfs_inode_info *pinned_info = NULL; + struct file *dst_filp = NULL; + int err = 0; + bool keep = false; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx do restore", + conn->owner, conn->device_id, inum); + + pinned_info = hmdfs_lookup_stash_inode(conn, inum); + if (pinned_info) { + unsigned int status = READ_ONCE(pinned_info->stash_status); + + if (status != HMDFS_REMOTE_INODE_RESTORING) { + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid status %u", + conn->owner, conn->device_id, inum, status); + err = -EINVAL; + goto clean; + } + } else { + hmdfs_warning("peer 0x%x:0x%llx ino 0x%llx doesn't being pinned", + conn->owner, conn->device_id, inum); + err = -EINVAL; + goto clean; + } + + set_bit(HMDFS_FID_NEED_OPEN, &pinned_info->fid_flags); + err = hmdfs_open_restore_dst_file(ctx, O_RDWR, &dst_filp); + if (err) { + if (err == -ESHUTDOWN) + keep = true; + goto clean; + } + + if (hmdfs_need_abort_restore(ctx, pinned_info, dst_filp)) + goto abort; + + err = hmdfs_restore_src_to_dst(ctx, dst_filp); + if (err == -ESHUTDOWN) + keep = true; +abort: + fput(dst_filp); +clean: + if (pinned_info && !keep) + hmdfs_reset_stashed_inode(conn, pinned_info); + ctx->keep = keep; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx restore err %d keep %d", + conn->owner, conn->device_id, inum, err, ctx->keep); + + return err; +} + +static int hmdfs_init_file_restore_ctx(struct hmdfs_peer *conn, + unsigned int seq, struct path *src_dir, + struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct path dst_root; + char *dst = NULL; + char *page = NULL; + int err = 0; + + err = hmdfs_get_path_in_sb(sbi->sb, sbi->real_dst, LOOKUP_DIRECTORY, + &dst_root); + if (err) + return err; + + dst = kmalloc(PATH_MAX, GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto put_path; + } + + page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto free_dst; + } + + ctx->conn = conn; + ctx->src_dir_path = *src_dir; + ctx->dst_root_path = dst_root; + ctx->dst = dst; + ctx->page = page; + ctx->seq = seq; + + return 0; +free_dst: + kfree(dst); +put_path: + path_put(&dst_root); + return err; +} + +static void hmdfs_exit_file_restore_ctx(struct hmdfs_file_restore_ctx *ctx) +{ + path_put(&ctx->dst_root_path); + kfree(ctx->dst); + kfree(ctx->page); +} + +static struct file *hmdfs_open_stash_file(struct path *p_path, char *name) +{ + struct dentry *parent = NULL; + struct inode *dir = NULL; + struct dentry *child = NULL; + struct file *filp = NULL; + struct path c_path; + int err = 0; + + parent = p_path->dentry; + dir = d_inode(parent); + inode_lock_nested(dir, I_MUTEX_PARENT); + child = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(child) && !hmdfs_is_reg(child)) { + if (d_is_positive(child)) { + hmdfs_err("invalid stash file (mode 0%o)", + d_inode(child)->i_mode); + err = -EINVAL; + } else { + hmdfs_err("missing stash file"); + err = -ENOENT; + } + dput(child); + } else if (IS_ERR(child)) { + err = PTR_ERR(child); + hmdfs_err("lookup stash file err %d", err); + } + inode_unlock(dir); + + if (err) + return ERR_PTR(err); + + c_path.mnt = p_path->mnt; + c_path.dentry = child; + filp = dentry_open(&c_path, O_RDONLY | O_LARGEFILE, current_cred()); + if (IS_ERR(filp)) + hmdfs_err("open stash file err %d", (int)PTR_ERR(filp)); + + dput(child); + + return filp; +} + +static void hmdfs_update_restore_stats(struct hmdfs_restore_stats *stats, + bool keep, uint64_t pages, int err) +{ + if (!err) { + stats->succeed++; + stats->ok_pages += pages; + } else if (keep) { + stats->keep++; + } else { + stats->fail++; + stats->fail_pages += pages; + } +} + +static int hmdfs_restore_files(struct hmdfs_peer *conn, + unsigned int seq, struct path *dir, + const struct hmdfs_inode_tbl *tbl, + void *priv) +{ + unsigned int i; + struct hmdfs_file_restore_ctx ctx; + int err = 0; + struct hmdfs_restore_stats *stats = priv; + + err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx); + if (err) + return err; + + for (i = 0; i < tbl->cnt; i++) { + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct file *filp = NULL; + + snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]); + filp = hmdfs_open_stash_file(dir, name); + /* Continue to restore if any error */ + if (IS_ERR(filp)) { + stats->fail++; + continue; + } + + ctx.inum = tbl->inodes[i]; + ctx.src_filp = filp; + ctx.keep = false; + ctx.pages = 0; + err = hmdfs_restore_file(&ctx); + hmdfs_update_restore_stats(stats, ctx.keep, ctx.pages, err); + + if (!ctx.keep) + hmdfs_del_stash_file(dir->dentry, + file_dentry(ctx.src_filp)); + fput(ctx.src_filp); + + /* Continue to restore */ + if (err == -ESHUTDOWN) + break; + err = 0; + } + + hmdfs_exit_file_restore_ctx(&ctx); + + return err; +} + +static bool hmdfs_is_valid_stash_status(struct hmdfs_inode_info *inode_info, + uint64_t ino) +{ + return (inode_info->inode_type == HMDFS_LAYER_OTHER_REMOTE && + inode_info->stash_status == HMDFS_REMOTE_INODE_RESTORING && + inode_info->remote_ino == ino); +} + +static int hmdfs_rebuild_stash_list(struct hmdfs_peer *conn, + unsigned int seq, + struct path *dir, + const struct hmdfs_inode_tbl *tbl, + void *priv) +{ + struct hmdfs_file_restore_ctx ctx; + unsigned int i; + int err; + struct hmdfs_rebuild_stats *stats = priv; + + err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx); + if (err) + return err; + + stats->total += tbl->cnt; + + for (i = 0; i < tbl->cnt; i++) { + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct file *src_filp = NULL; + struct file *dst_filp = NULL; + struct hmdfs_inode_info *inode_info = NULL; + bool is_valid = true; + + snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]); + src_filp = hmdfs_open_stash_file(dir, name); + if (IS_ERR(src_filp)) { + stats->fail++; + continue; + } + ctx.inum = tbl->inodes[i]; + ctx.src_filp = src_filp; + + /* No need to track the open which only needs meta info */ + err = hmdfs_open_restore_dst_file(&ctx, O_RDONLY, &dst_filp); + if (err) { + fput(src_filp); + if (err == -ESHUTDOWN) + break; + stats->fail++; + err = 0; + continue; + } + + inode_info = hmdfs_i(file_inode(dst_filp)); + is_valid = hmdfs_is_valid_stash_status(inode_info, + ctx.inum); + if (is_valid) { + stats->succeed++; + } else { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx invalid state: type: %d, status: %u, inode: %llu", + conn->owner, conn->device_id, ctx.inum, + inode_info->inode_type, + READ_ONCE(inode_info->stash_status), + inode_info->remote_ino); + stats->invalid++; + } + + fput(ctx.src_filp); + fput(dst_filp); + } + + hmdfs_exit_file_restore_ctx(&ctx); + return err; +} + +static int hmdfs_iter_stash_file(struct hmdfs_peer *conn, + unsigned int seq, + struct file *filp, + stash_operation_func op, + void *priv) +{ + int err = 0; + struct hmdfs_stash_dir_context ctx = { + .dctx.actor = hmdfs_fill_stash_file, + }; + struct hmdfs_inode_tbl *tbl = NULL; + struct path dir; + + err = hmdfs_new_inode_tbl(&tbl); + if (err) + goto out; + + dir.mnt = filp->f_path.mnt; + dir.dentry = file_dentry(filp); + + ctx.tbl = tbl; + ctx.dctx.pos = 0; + do { + tbl->cnt = 0; + err = iterate_dir(filp, &ctx.dctx); + if (err || !tbl->cnt) { + if (err) + hmdfs_err("iterate stash dir err %d", err); + break; + } + err = op(conn, seq, &dir, tbl, priv); + } while (!err); + +out: + kfree(tbl); + return err; +} + +static void hmdfs_rebuild_check_work_fn(struct work_struct *base) +{ + struct hmdfs_check_work *work = + container_of(base, struct hmdfs_check_work, work); + struct hmdfs_peer *conn = work->conn; + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + struct hmdfs_stash_dir_context ctx = { + .dctx.actor = hmdfs_has_stash_file, + }; + struct hmdfs_inode_tbl tbl; + int err; + + old_cred = hmdfs_override_creds(sbi->cred); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) + goto out; + + memset(&tbl, 0, sizeof(tbl)); + ctx.tbl = &tbl; + err = iterate_dir(filp, &ctx.dctx); + if (!err && ctx.tbl->cnt > 0) + conn->need_rebuild_stash_list = true; + + fput(filp); +out: + hmdfs_revert_creds(old_cred); + hmdfs_info("peer 0x%x:0x%llx %sneed to rebuild stash list", + conn->owner, conn->device_id, + conn->need_rebuild_stash_list ? "" : "don't "); + complete(&work->done); +} + +static void hmdfs_stash_add_do_check(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct hmdfs_check_work work = { + .conn = conn, + .done = COMPLETION_INITIALIZER_ONSTACK(work.done), + }; + + if (!hmdfs_is_stash_enabled(sbi)) + return; + + INIT_WORK_ONSTACK(&work.work, hmdfs_rebuild_check_work_fn); + schedule_work(&work.work); + wait_for_completion(&work.done); +} + +static void +hmdfs_update_peer_rebuild_stats(struct hmdfs_rebuild_statistics *rebuild_stats, + const struct hmdfs_rebuild_stats *stats) +{ + rebuild_stats->cur_ok = stats->succeed; + rebuild_stats->cur_fail = stats->fail; + rebuild_stats->cur_invalid = stats->invalid; + rebuild_stats->total_ok += stats->succeed; + rebuild_stats->total_fail += stats->fail; + rebuild_stats->total_invalid += stats->invalid; +} + +/* rebuild stash inode list */ +static void hmdfs_stash_online_prepare(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + int err; + struct hmdfs_rebuild_stats stats; + + if (!hmdfs_is_stash_enabled(sbi) || + !conn->need_rebuild_stash_list) + return; + + /* release seq_lock to prevent blocking no-online sync cb */ + mutex_unlock(&conn->seq_lock); + old_cred = hmdfs_override_creds(sbi->cred); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) + goto out; + + memset(&stats, 0, sizeof(stats)); + err = hmdfs_iter_stash_file(conn, seq, filp, + hmdfs_rebuild_stash_list, &stats); + if (err == -ESHUTDOWN) { + hmdfs_info("peer 0x%x:0x%llx offline again during rebuild", + conn->owner, conn->device_id); + } else { + WRITE_ONCE(conn->need_rebuild_stash_list, false); + if (err) + hmdfs_warning("partial rebuild fail err %d", err); + } + + hmdfs_update_peer_rebuild_stats(&conn->stats.rebuild, &stats); + hmdfs_info("peer 0x%x:0x%llx rebuild stashed-file total %u succeed %u fail %u invalid %u", + conn->owner, conn->device_id, stats.total, stats.succeed, + stats.fail, stats.invalid); + fput(filp); +out: + conn->stats.rebuild.time++; + hmdfs_revert_creds(old_cred); + if (!READ_ONCE(conn->need_rebuild_stash_list)) { + /* + * Use smp_mb__before_atomic() to ensure order between + * writing @conn->need_rebuild_stash_list and + * reading conn->rebuild_inode_status_nr. + */ + smp_mb__before_atomic(); + /* + * Wait until all inodes finish rebuilding stash status before + * accessing @conn->stashed_inode_list in restoring. + */ + wait_event(conn->rebuild_inode_status_wq, + !atomic_read(&conn->rebuild_inode_status_nr)); + } + mutex_lock(&conn->seq_lock); +} + +static void +hmdfs_update_peer_restore_stats(struct hmdfs_restore_statistics *restore_stats, + const struct hmdfs_restore_stats *stats) +{ + restore_stats->cur_ok = stats->succeed; + restore_stats->cur_fail = stats->fail; + restore_stats->cur_keep = stats->keep; + restore_stats->total_ok += stats->succeed; + restore_stats->total_fail += stats->fail; + restore_stats->total_keep += stats->keep; + restore_stats->ok_pages += stats->ok_pages; + restore_stats->fail_pages += stats->fail_pages; +} + +static void hmdfs_stash_online_do_restore(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + struct hmdfs_restore_stats stats; + int err = 0; + + if (!hmdfs_is_stash_enabled(sbi) || conn->need_rebuild_stash_list) { + if (conn->need_rebuild_stash_list) + hmdfs_info("peer 0x%x:0x%llx skip restoring due to rebuild-need", + conn->owner, conn->device_id); + return; + } + + /* release seq_lock to prevent blocking no-online sync cb */ + mutex_unlock(&conn->seq_lock); + /* For dir iteration, file read and unlink */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + + memset(&stats, 0, sizeof(stats)); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + + err = hmdfs_iter_stash_file(conn, seq, filp, + hmdfs_restore_files, &stats); + + fput(filp); +out: + hmdfs_revert_creds(old_cred); + + /* offline again ? */ + if (err != -ESHUTDOWN) + hmdfs_drop_stashed_inodes(conn); + + hmdfs_update_peer_restore_stats(&conn->stats.restore, &stats); + hmdfs_info("peer 0x%x:0x%llx restore stashed-file ok %u fail %u keep %u", + conn->owner, conn->device_id, + stats.succeed, stats.fail, stats.keep); + + mutex_lock(&conn->seq_lock); +} + +static void hmdfs_stash_del_do_cleanup(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + unsigned int preparing; + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + /* Async cb is cancelled */ + preparing = 0; + list_for_each_entry_safe(info, next, &conn->wr_opened_inode_list, + wr_opened_node) { + int status = READ_ONCE(info->stash_status); + + if (status == HMDFS_REMOTE_INODE_STASHING) { + struct hmdfs_cache_info *cache = NULL; + + spin_lock(&info->stash_lock); + cache = info->cache; + info->cache = NULL; + info->stash_status = HMDFS_REMOTE_INODE_NONE; + spin_unlock(&info->stash_lock); + + hmdfs_remote_del_wr_opened_inode(conn, info); + hmdfs_del_file_cache(cache); + /* put inode after all access are completed */ + iput(&info->vfs_inode); + preparing++; + } + } + hmdfs_info("release %u preparing inodes", preparing); + + hmdfs_info("release %u pinned inodes", conn->stashed_inode_nr); + if (list_empty(&conn->stashed_inode_list)) + return; + + list_for_each_entry_safe(info, next, + &conn->stashed_inode_list, stash_node) + hmdfs_untrack_stashed_inode(conn, info); +} + +void hmdfs_exit_stash(struct hmdfs_sb_info *sbi) +{ + if (!sbi->s_offline_stash) + return; + + if (sbi->stash_work_dir.dentry) { + path_put(&sbi->stash_work_dir); + sbi->stash_work_dir.dentry = NULL; + } +} + +int hmdfs_init_stash(struct hmdfs_sb_info *sbi) +{ + int err = 0; + struct path parent; + struct dentry *child = NULL; + + if (!sbi->s_offline_stash) + return 0; + + err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &parent); + if (err) { + hmdfs_err("invalid cache dir err %d", err); + goto out; + } + + child = hmdfs_stash_new_work_dir(parent.dentry); + if (!IS_ERR(child)) { + sbi->stash_work_dir.mnt = mntget(parent.mnt); + sbi->stash_work_dir.dentry = child; + } else { + err = PTR_ERR(child); + hmdfs_err("create stash work dir err %d", err); + } + + path_put(&parent); +out: + return err; +} + +static int hmdfs_stash_write_local_file(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx, + struct hmdfs_cache_info *cache) +{ + struct page *page = ctx->page; + const struct cred *old_cred = NULL; + void *buf = NULL; + loff_t pos; + unsigned int flags; + ssize_t written; + int err = 0; + + buf = kmap(page); + pos = (loff_t)page->index << PAGE_SHIFT; + /* enable NOFS for memory allocation */ + flags = memalloc_nofs_save(); + old_cred = hmdfs_override_creds(conn->sbi->cred); + pos += cache->data_offs << HMDFS_STASH_BLK_SHIFT; + written = kernel_write(cache->cache_file, buf, ctx->count, &pos); + hmdfs_revert_creds(old_cred); + memalloc_nofs_restore(flags); + kunmap(page); + + if (written != ctx->count) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx page 0x%lx data_offs 0x%x len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + page->index, cache->data_offs, ctx->count, written); + err = -EIO; + } + + return err; +} + +int hmdfs_stash_writepage(struct hmdfs_peer *conn, + struct hmdfs_writepage_context *ctx) +{ + struct inode *inode = ctx->page->mapping->host; + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_cache_info *cache = NULL; + int err; + + /* e.g. fail to create stash file */ + cache = info->cache; + if (!cache) + return -EIO; + + err = hmdfs_stash_write_local_file(conn, info, ctx, cache); + if (!err) { + hmdfs_client_writepage_done(info, ctx); + atomic64_inc(&cache->written_pgs); + put_task_struct(ctx->caller); + kfree(ctx); + } + atomic64_inc(&cache->to_write_pgs); + + return err; +} + +static void hmdfs_stash_rebuild_status(struct hmdfs_peer *conn, + struct inode *inode) +{ + char *path_str = NULL; + struct hmdfs_inode_info *info = NULL; + const struct cred *old_cred = NULL; + struct path path; + struct path *stash_path = NULL; + int err = 0; + + path_str = kmalloc(HMDFS_STASH_PATH_LEN, GFP_KERNEL); + if (!path_str) { + err = -ENOMEM; + return; + } + + info = hmdfs_i(inode); + err = snprintf(path_str, HMDFS_STASH_PATH_LEN, "%s/0x%llx", + conn->cid, info->remote_ino); + if (err >= HMDFS_STASH_PATH_LEN) { + kfree(path_str); + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx too long name len", + conn->owner, conn->device_id, info->remote_ino); + return; + } + old_cred = hmdfs_override_creds(conn->sbi->cred); + stash_path = &conn->sbi->stash_work_dir; + err = vfs_path_lookup(stash_path->dentry, stash_path->mnt, + path_str, 0, &path); + hmdfs_revert_creds(old_cred); + if (!err) { + if (hmdfs_is_reg(path.dentry)) { + WRITE_ONCE(info->stash_status, + HMDFS_REMOTE_INODE_RESTORING); + ihold(&info->vfs_inode); + hmdfs_track_inode_locked(conn, info); + } else { + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx unexpected stashed file mode 0%o", + conn->owner, conn->device_id, + info->remote_ino, + d_inode(path.dentry)->i_mode); + } + + path_put(&path); + } else if (err && err != -ENOENT) { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx find %s err %d", + conn->owner, conn->device_id, info->remote_ino, + path_str, err); + } + + kfree(path_str); +} + +static inline bool +hmdfs_need_rebuild_inode_stash_status(struct hmdfs_peer *conn, umode_t mode) +{ + return hmdfs_is_stash_enabled(conn->sbi) && + READ_ONCE(conn->need_rebuild_stash_list) && + (S_ISREG(mode) || S_ISLNK(mode)); +} + +void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn, + struct inode *inode, umode_t mode) +{ + if (!hmdfs_need_rebuild_inode_stash_status(conn, mode)) + return; + + atomic_inc(&conn->rebuild_inode_status_nr); + /* + * Use smp_mb__after_atomic() to ensure order between writing + * @conn->rebuild_inode_status_nr and reading + * @conn->need_rebuild_stash_list. + */ + smp_mb__after_atomic(); + if (READ_ONCE(conn->need_rebuild_stash_list)) + hmdfs_stash_rebuild_status(conn, inode); + if (atomic_dec_and_test(&conn->rebuild_inode_status_nr)) + wake_up(&conn->rebuild_inode_status_wq); +} + +static struct hmdfs_node_cb_desc stash_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_stash_offline_prepare, + }, + { + .evt = NODE_EVT_OFFLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_offline_do_stash, + }, + /* Don't known peer version yet, so min_version is 0 */ + { + .evt = NODE_EVT_ADD, + .sync = true, + .fn = hmdfs_stash_add_do_check, + }, + { + .evt = NODE_EVT_ONLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_online_prepare, + }, + { + .evt = NODE_EVT_ONLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_online_do_restore, + }, + { + .evt = NODE_EVT_DEL, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_stash_del_do_cleanup, + }, +}; + +void __init hmdfs_stash_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(stash_cb, ARRAY_SIZE(stash_cb)); +} + diff --git a/fs/hmdfs/stash.h b/fs/hmdfs/stash.h new file mode 100644 index 0000000000000000000000000000000000000000..f38e737f94721093eb305b08c8c4128dbed218e0 --- /dev/null +++ b/fs/hmdfs/stash.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/stash.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_STASH_H +#define HMDFS_STASH_H + +#include "hmdfs.h" +#include "hmdfs_client.h" + +extern void hmdfs_stash_add_node_evt_cb(void); + +extern void hmdfs_exit_stash(struct hmdfs_sb_info *sbi); +extern int hmdfs_init_stash(struct hmdfs_sb_info *sbi); + +extern int hmdfs_stash_writepage(struct hmdfs_peer *conn, + struct hmdfs_writepage_context *ctx); + +extern void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn, + struct inode *inode, umode_t mode); + +#endif diff --git a/fs/hmdfs/super.c b/fs/hmdfs/super.c new file mode 100644 index 0000000000000000000000000000000000000000..92012f80ab3768395a127d233e10c00d3eeb6b11 --- /dev/null +++ b/fs/hmdfs/super.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/super.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include + +#include "hmdfs.h" + +enum { + OPT_RA_PAGES, + OPT_LOCAL_DST, + OPT_CACHE_DIR, + OPT_S_CASE, + OPT_VIEW_TYPE, + OPT_NO_OFFLINE_STASH, + OPT_NO_DENTRY_CACHE, + OPT_ERR, +}; + +static match_table_t hmdfs_tokens = { + { OPT_RA_PAGES, "ra_pages=%s" }, + { OPT_LOCAL_DST, "local_dst=%s" }, + { OPT_CACHE_DIR, "cache_dir=%s" }, + { OPT_S_CASE, "sensitive" }, + { OPT_VIEW_TYPE, "merge" }, + { OPT_NO_OFFLINE_STASH, "no_offline_stash" }, + { OPT_NO_DENTRY_CACHE, "no_dentry_cache" }, + { OPT_ERR, NULL }, +}; + +#define DEAULT_RA_PAGES 128 + +void __hmdfs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (ratelimited) + printk_ratelimited("%s hmdfs: %s() %pV\n", level, + function, &vaf); + else + printk("%s hmdfs: %s() %pV\n", level, function, &vaf); + va_end(args); +} + +static int hmdfs_match_strdup(const substring_t *s, char **dst) +{ + char *dup = NULL; + + dup = match_strdup(s); + if (!dup) + return -ENOMEM; + + *dst = dup; + + return 0; +} + +int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data) +{ + char *p = NULL; + char *name = NULL; + char *options = NULL; + char *options_src = NULL; + substring_t args[MAX_OPT_ARGS]; + unsigned long value = DEAULT_RA_PAGES; + struct super_block *sb = sbi->sb; + int err = 0; + + options = kstrdup(data, GFP_KERNEL); + if (data && !options) { + err = -ENOMEM; + goto out; + } + options_src = options; + err = super_setup_bdi(sb); + if (err) + goto out; + + while ((p = strsep(&options_src, ",")) != NULL) { + int token; + + if (!*p) + continue; + args[0].to = args[0].from = NULL; + token = match_token(p, hmdfs_tokens, args); + + switch (token) { + case OPT_RA_PAGES: + name = match_strdup(&args[0]); + if (name) { + err = kstrtoul(name, 10, &value); + if (err) + goto out; + kfree(name); + name = NULL; + } + break; + case OPT_LOCAL_DST: + err = hmdfs_match_strdup(&args[0], &sbi->local_dst); + if (err) + goto out; + break; + case OPT_CACHE_DIR: + err = hmdfs_match_strdup(&args[0], &sbi->cache_dir); + if (err) + goto out; + break; + case OPT_S_CASE: + sbi->s_case_sensitive = true; + break; + case OPT_VIEW_TYPE: + sbi->s_merge_switch = true; + break; + case OPT_NO_OFFLINE_STASH: + sbi->s_offline_stash = false; + break; + case OPT_NO_DENTRY_CACHE: + sbi->s_dentry_cache = false; + break; + default: + err = -EINVAL; + goto out; + } + } +out: + kfree(options); + sb->s_bdi->ra_pages = value; + if (sbi->local_dst == NULL) + err = -EINVAL; + + if (sbi->s_offline_stash && !sbi->cache_dir) { + hmdfs_warning("no cache_dir for offline stash"); + sbi->s_offline_stash = false; + } + + if (sbi->s_dentry_cache && !sbi->cache_dir) { + hmdfs_warning("no cache_dir for dentry cache"); + sbi->s_dentry_cache = false; + } + + return err; +}