diff --git a/OAT.xml b/OAT.xml
new file mode 100644
index 0000000000000000000000000000000000000000..4f2fcc9b122539fbde514e32f28d90fe0bfad8d0
--- /dev/null
+++ b/OAT.xml
@@ -0,0 +1,448 @@
+
+
+
+
+
+ COPYING
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/README.OpenSource b/README.OpenSource
new file mode 100644
index 0000000000000000000000000000000000000000..8313cf171abe785223d6a57cd172c5f900b7e8e5
--- /dev/null
+++ b/README.OpenSource
@@ -0,0 +1,11 @@
+[
+ {
+ "Name": "linux-5.10",
+ "License": "GPL-2.0+",
+ "License File": "COPYING",
+ "Version Number": "5.10.79",
+ "Owner": "liuyu82@huawei.com",
+ "Upstream URL": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/log/?h=linux-5.10.y",
+ "Description": "linux kernel 5.10"
+ }
+]
\ No newline at end of file
diff --git a/fs/Kconfig b/fs/Kconfig
index da524c4d7b7e03f7d9a218f9e78ef0849b16de2d..b95f212be39e31501f342b42bdada3d3b1f4b1df 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -22,6 +22,7 @@ config FS_IOMAP
source "fs/ext2/Kconfig"
source "fs/ext4/Kconfig"
+source "fs/hmdfs/Kconfig"
source "fs/jbd2/Kconfig"
config FS_MBCACHE
diff --git a/fs/Makefile b/fs/Makefile
index 999d1a23f036c9f96a06e056d333e2e3832cdc37..d71954aaba20e3adf2e640c5f91549605d71af69 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_DLM) += dlm/
obj-$(CONFIG_FSCACHE) += fscache/
obj-$(CONFIG_REISERFS_FS) += reiserfs/
obj-$(CONFIG_EXT4_FS) += ext4/
+obj-$(CONFIG_HMDFS_FS) += hmdfs/
# We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the
# ext2 driver, which doesn't know about journalling! Explicitly request ext2
# by giving the rootfstype= parameter.
diff --git a/fs/hmdfs/Kconfig b/fs/hmdfs/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..379606a6f46630df6aa76b36c30993f682c8f353
--- /dev/null
+++ b/fs/hmdfs/Kconfig
@@ -0,0 +1,48 @@
+config HMDFS_FS
+ tristate "HMDFS filesystem support"
+ help
+ HMDFS is an overlay file system. Relying on the underlying file system,
+ under the premise of networking, file exchanges across devices can be
+ realized. Device view and merge view are provided. In the device view,
+ the shared directories of the corresponding devices are provided under
+ different device directories; in the merge view, acollection of shared
+ files of all devices is provided.
+
+config HMDFS_FS_PERMISSION
+ bool "HMDFS application permission management"
+ depends on HMDFS_FS
+ help
+ HMDFS provides cross-device file and directory sharing. Only the same
+ application can access the files and directories under the corresponding
+ package directory. it provides management and control of access
+ permissions.
+
+ If unsure, say N.
+
+config HMDFS_FS_ENCRYPTION
+ bool "HMDFS message encryption"
+ depends on HMDFS_FS && TLS
+ help
+ HMDFS provides cross-device file and directory sharing by sending and
+ receiving network messages. To ensure data security, TLS encryption is
+ provided.
+
+ If you want to improve performance, say N.
+
+config HMDFS_FS_DEBUG
+ bool "HMDFS debug log"
+ depends on HMDFS_FS
+ help
+ HMDFS print a lot of logs, but many of them are debugging information,
+ which is actually unnecessary during operation. If there is a problem,
+ it works.
+
+ If unsure, say N.
+
+config HMDFS_FS_FAULT_INJECT
+ bool "HMDFS fault inject"
+ depends on HMDFS_FS
+ help
+ HMDFS provide fault inject for test.
+
+ If unsure, say N.
diff --git a/fs/hmdfs/Makefile b/fs/hmdfs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..25c3eef3dd9d6cdcb55f9da348bb075fbaeae948
--- /dev/null
+++ b/fs/hmdfs/Makefile
@@ -0,0 +1,15 @@
+obj-$(CONFIG_HMDFS_FS) += hmdfs.o
+ccflags-y += -I$(src)
+
+hmdfs-y := main.o super.o inode.o dentry.o inode_root.o file_merge.o
+hmdfs-y += hmdfs_client.o hmdfs_server.o inode_local.o inode_remote.o
+hmdfs-y += inode_merge.o hmdfs_dentryfile.o file_root.o file_remote.o
+hmdfs-y += file_local.o client_writeback.o server_writeback.o stash.o
+
+hmdfs-y += comm/device_node.o comm/message_verify.o comm/node_cb.o
+hmdfs-y += comm/connection.o comm/socket_adapter.o comm/transport.o
+
+hmdfs-$(CONFIG_HMDFS_FS_ENCRYPTION) += comm/crypto.o
+hmdfs-$(CONFIG_HMDFS_FS_PERMISSION) += authority/authentication.o
+
+hmdfs-$(CONFIG_FS_FAULT_INJECTION) += comm/fault_inject.o
diff --git a/fs/hmdfs/authority/authentication.c b/fs/hmdfs/authority/authentication.c
new file mode 100644
index 0000000000000000000000000000000000000000..97d842147050e0fdc8db47824d5912be573e5ef9
--- /dev/null
+++ b/fs/hmdfs/authority/authentication.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/authority/authentication.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "authentication.h"
+#include
+#include
+
+#include "hmdfs.h"
+
+struct fs_struct *hmdfs_override_fsstruct(struct fs_struct *saved_fs)
+{
+#if (defined CONFIG_HMDFS_FS_PERMISSION) && (defined CONFIG_SDCARD_FS)
+ struct fs_struct *copied_fs = copy_fs_struct(saved_fs);
+
+ if (!copied_fs)
+ return NULL;
+ copied_fs->umask = 0;
+ task_lock(current);
+ current->fs = copied_fs;
+ task_unlock(current);
+ return copied_fs;
+#else
+ return saved_fs;
+#endif
+}
+
+void hmdfs_revert_fsstruct(struct fs_struct *saved_fs,
+ struct fs_struct *copied_fs)
+{
+#if (defined CONFIG_HMDFS_FS_PERMISSION) && (defined CONFIG_SDCARD_FS)
+ task_lock(current);
+ current->fs = saved_fs;
+ task_unlock(current);
+ free_fs_struct(copied_fs);
+#endif
+}
+
+const struct cred *hmdfs_override_fsids(bool is_recv_thread)
+{
+ struct cred *cred = NULL;
+ const struct cred *old_cred = NULL;
+
+ cred = prepare_creds();
+ if (!cred)
+ return NULL;
+
+ cred->fsuid = MEDIA_RW_UID;
+ cred->fsgid = is_recv_thread ?
+ KGIDT_INIT((gid_t)AID_EVERYBODY) : MEDIA_RW_GID;
+
+ old_cred = override_creds(cred);
+
+ return old_cred;
+}
+
+const struct cred *hmdfs_override_dir_fsids(struct inode *dir,
+ struct dentry *dentry, __u16 *_perm)
+{
+ struct hmdfs_inode_info *hii = hmdfs_i(dir);
+ struct cred *cred = NULL;
+ const struct cred *old_cred = NULL;
+ __u16 level = hmdfs_perm_get_next_level(hii->perm);
+ __u16 perm = 0;
+
+ cred = prepare_creds();
+ if (!cred)
+ return NULL;
+
+ switch (level) {
+ case HMDFS_PERM_MNT:
+ /* system : media_rw */
+ cred->fsuid = SYSTEM_UID;
+ perm = (hii->perm & HMDFS_DIR_TYPE_MASK) | level;
+ break;
+ case HMDFS_PERM_DFS:
+ /*
+ * data : system : media_rw
+ * system: system : media_rw, need authority
+ * other : media_rw : media_rw
+ **/
+ if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) {
+ cred->fsuid = SYSTEM_UID;
+ perm = HMDFS_DIR_DATA | level;
+ } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) {
+ cred->fsuid = SYSTEM_UID;
+ perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level;
+ } else {
+ cred->fsuid = MEDIA_RW_UID;
+ perm = HMDFS_DIR_PUBLIC | level;
+ }
+ break;
+ case HMDFS_PERM_PKG:
+ if (is_data_dir(hii->perm)) {
+ /*
+ * Mkdir for app pkg.
+ * Get the appid by passing pkgname to configfs.
+ * Set ROOT + media_rw for remote install,
+ * local uninstall.
+ * Set appid + media_rw for local install.
+ */
+ uid_t app_id = 0;
+
+ if (app_id != 0)
+ cred->fsuid = KUIDT_INIT(app_id);
+ else
+ cred->fsuid = ROOT_UID;
+ perm = AUTH_PKG | HMDFS_DIR_PKG | level;
+ } else {
+ cred->fsuid = dir->i_uid;
+ perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level;
+ }
+ break;
+ case HMDFS_PERM_OTHER:
+ cred->fsuid = dir->i_uid;
+ if (is_pkg_auth(hii->perm))
+ perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level;
+ else
+ perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level;
+ break;
+ default:
+ /* ! it should not get to here */
+ hmdfs_err("hmdfs perm incorrect got default case, level:%u", level);
+ break;
+ }
+
+ cred->fsgid = MEDIA_RW_GID;
+ *_perm = perm;
+ old_cred = override_creds(cred);
+
+ return old_cred;
+}
+
+int hmdfs_override_dir_id_fs(struct cache_fs_override *or,
+ struct inode *dir,
+ struct dentry *dentry,
+ __u16 *perm)
+{
+ or->saved_cred = hmdfs_override_dir_fsids(dir, dentry, perm);
+ if (!or->saved_cred)
+ return -ENOMEM;
+
+ or->saved_fs = current->fs;
+ or->copied_fs = hmdfs_override_fsstruct(or->saved_fs);
+ if (!or->copied_fs) {
+ hmdfs_revert_fsids(or->saved_cred);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void hmdfs_revert_dir_id_fs(struct cache_fs_override *or)
+{
+ hmdfs_revert_fsstruct(or->saved_fs, or->copied_fs);
+ hmdfs_revert_fsids(or->saved_cred);
+}
+
+const struct cred *hmdfs_override_file_fsids(struct inode *dir, __u16 *_perm)
+{
+ struct hmdfs_inode_info *hii = hmdfs_i(dir);
+ struct cred *cred = NULL;
+ const struct cred *old_cred = NULL;
+ __u16 level = hmdfs_perm_get_next_level(hii->perm);
+ uint16_t perm;
+
+ perm = HMDFS_FILE_DEFAULT | level;
+
+ cred = prepare_creds();
+ if (!cred)
+ return NULL;
+
+ cred->fsuid = dir->i_uid;
+ cred->fsgid = dir->i_gid;
+ if (is_pkg_auth(hii->perm))
+ perm = AUTH_PKG | HMDFS_FILE_PKG_SUB | level;
+ else
+ perm = (hii->perm & AUTH_MASK) | HMDFS_FILE_DEFAULT | level;
+
+ *_perm = perm;
+ old_cred = override_creds(cred);
+
+ return old_cred;
+}
+
+void hmdfs_revert_fsids(const struct cred *old_cred)
+{
+ const struct cred *cur_cred;
+
+ cur_cred = current->cred;
+ revert_creds(old_cred);
+ put_cred(cur_cred);
+}
+
+int hmdfs_persist_perm(struct dentry *dentry, __u16 *perm)
+{
+ int err;
+ struct inode *minode = d_inode(dentry);
+
+ if (!minode)
+ return -EINVAL;
+
+ inode_lock(minode);
+ err = __vfs_setxattr(dentry, minode, HMDFS_PERM_XATTR, perm,
+ sizeof(*perm), XATTR_CREATE);
+ if (!err)
+ fsnotify_xattr(dentry);
+ else if (err && err != -EEXIST)
+ hmdfs_err("failed to setxattr, err=%d", err);
+ inode_unlock(minode);
+ return err;
+}
+
+__u16 hmdfs_read_perm(struct inode *inode)
+{
+ __u16 ret = 0;
+ int size = 0;
+ struct dentry *dentry = d_find_alias(inode);
+
+ if (!dentry)
+ return ret;
+
+ size = __vfs_getxattr(dentry, inode, HMDFS_PERM_XATTR, &ret,
+ sizeof(ret));
+ /*
+ * some file may not set setxattr with perm
+ * eg. files created in sdcard dir by other user
+ **/
+ if (size < 0 || size != sizeof(ret))
+ ret = HMDFS_ALL_MASK;
+
+ dput(dentry);
+ return ret;
+}
+
+static __u16 __inherit_perm_dir(struct inode *parent, struct inode *inode)
+{
+ __u16 perm = 0;
+ struct hmdfs_inode_info *info = hmdfs_i(parent);
+ __u16 level = hmdfs_perm_get_next_level(info->perm);
+ struct dentry *dentry = d_find_alias(inode);
+
+ if (!dentry)
+ return perm;
+
+ switch (level) {
+ case HMDFS_PERM_MNT:
+ /* system : media_rw */
+ perm = (info->perm & HMDFS_DIR_TYPE_MASK) | level;
+ break;
+ case HMDFS_PERM_DFS:
+ /*
+ * data : system : media_rw
+ * system: system : media_rw, need authority
+ * other : media_rw : media_rw
+ **/
+ if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) {
+ // "data"
+ perm = HMDFS_DIR_DATA | level;
+ } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) {
+ // "system"
+ perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level;
+ } else {
+ perm = HMDFS_DIR_PUBLIC | level;
+ }
+ break;
+ case HMDFS_PERM_PKG:
+ if (is_data_dir(info->perm)) {
+ /*
+ * Mkdir for app pkg.
+ * Get the appid by passing pkgname to configfs.
+ * Set ROOT + media_rw for remote install,
+ * local uninstall.
+ * Set appid + media_rw for local install.
+ */
+ perm = AUTH_PKG | HMDFS_DIR_PKG | level;
+ } else {
+ perm = (info->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level;
+ }
+ break;
+ case HMDFS_PERM_OTHER:
+ if (is_pkg_auth(info->perm))
+ perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level;
+ else
+ perm = (info->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level;
+ break;
+ default:
+ /* ! it should not get to here */
+ hmdfs_err("hmdfs perm incorrect got default case, level:%u", level);
+ break;
+ }
+ dput(dentry);
+ return perm;
+}
+
+static __u16 __inherit_perm_file(struct inode *parent)
+{
+ struct hmdfs_inode_info *hii = hmdfs_i(parent);
+ __u16 level = hmdfs_perm_get_next_level(hii->perm);
+ uint16_t perm;
+
+ perm = HMDFS_FILE_DEFAULT | level;
+
+ if (is_pkg_auth(hii->perm))
+ perm = AUTH_PKG | HMDFS_FILE_PKG_SUB | level;
+ else
+ perm = (hii->perm & AUTH_MASK) | HMDFS_FILE_DEFAULT | level;
+
+ return perm;
+}
+
+static void fixup_ownership(struct inode *child, struct dentry *lower_dentry,
+ uid_t uid)
+{
+ int err;
+ struct iattr newattrs;
+
+ newattrs.ia_valid = ATTR_UID | ATTR_FORCE;
+ newattrs.ia_uid = KUIDT_INIT(uid);
+ if (!S_ISDIR(d_inode(lower_dentry)->i_mode))
+ newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV;
+
+ inode_lock(d_inode(lower_dentry));
+ err = notify_change(lower_dentry, &newattrs, NULL);
+ inode_unlock(d_inode(lower_dentry));
+
+ if (!err)
+ child->i_uid = KUIDT_INIT(uid);
+ else
+ hmdfs_err("update PKG uid failed, err = %d", err);
+}
+
+static void fixup_ownership_user_group(struct inode *child, struct dentry *lower_dentry,
+ uid_t uid, gid_t gid)
+{
+ int err;
+ struct iattr newattrs;
+
+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_FORCE;
+ newattrs.ia_uid = KUIDT_INIT(uid);
+ newattrs.ia_gid = KGIDT_INIT(gid);
+ if (!S_ISDIR(d_inode(lower_dentry)->i_mode))
+ newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+
+ inode_lock(d_inode(lower_dentry));
+ err = notify_change(lower_dentry, &newattrs, NULL);
+ inode_unlock(d_inode(lower_dentry));
+
+ if (!err) {
+ child->i_uid = KUIDT_INIT(uid);
+ child->i_gid = KGIDT_INIT(gid);
+ } else {
+ hmdfs_err("update PKG uid failed, err = %d", err);
+ }
+}
+
+__u16 hmdfs_perm_inherit(struct inode *parent_inode, struct inode *child)
+{
+ __u16 perm;
+
+ if (S_ISDIR(child->i_mode))
+ perm = __inherit_perm_dir(parent_inode, child);
+ else
+ perm = __inherit_perm_file(parent_inode);
+ return perm;
+}
+
+void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child,
+ struct dentry *lower_dentry, const char *name)
+{
+ uid_t appid;
+ struct hmdfs_inode_info *info = hmdfs_i(child);
+
+ if (info->perm == HMDFS_ALL_MASK)
+ info->perm = hmdfs_perm_inherit(parent_inode, child);
+
+ switch (info->perm & HMDFS_DIR_TYPE_MASK) {
+ case HMDFS_DIR_PKG:
+ appid = 0;
+ if (appid != child->i_uid.val)
+ fixup_ownership(child, lower_dentry, appid);
+
+ break;
+ case HMDFS_DIR_DATA:
+ case HMDFS_FILE_PKG_SUB:
+ case HMDFS_DIR_DEFAULT:
+ case HMDFS_FILE_DEFAULT:
+ if (parent_inode->i_uid.val != child->i_uid.val ||
+ parent_inode->i_gid.val != child->i_gid.val)
+ fixup_ownership_user_group(child, lower_dentry,
+ parent_inode->i_uid.val,
+ parent_inode->i_gid.val);
+ break;
+ case HMDFS_DIR_PUBLIC:
+ fixup_ownership(child, lower_dentry, (uid_t)AID_MEDIA_RW);
+
+ break;
+ default:
+ break;
+ }
+}
+
+void check_and_fixup_ownership_remote(struct inode *dir,
+ struct dentry *dentry)
+{
+ struct hmdfs_inode_info *hii = hmdfs_i(dir);
+ struct inode *dinode = d_inode(dentry);
+ struct hmdfs_inode_info *dinfo = hmdfs_i(dinode);
+ __u16 level = hmdfs_perm_get_next_level(hii->perm);
+ __u16 perm = 0;
+
+ hmdfs_debug("level:0x%X", level);
+ switch (level) {
+ case HMDFS_PERM_MNT:
+ /* system : media_rw */
+ dinode->i_uid = SYSTEM_UID;
+ perm = (hii->perm & HMDFS_DIR_TYPE_MASK) | level;
+ break;
+ case HMDFS_PERM_DFS:
+ /*
+ * data : system : media_rw
+ * system: system : media_rw, need authority
+ * other : media_rw : media_rw
+ **/
+ if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) {
+ // "data"
+ dinode->i_uid = SYSTEM_UID;
+ perm = HMDFS_DIR_DATA | level;
+ } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) {
+ // "system"
+ dinode->i_uid = SYSTEM_UID;
+ perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level;
+ } else {
+ dinode->i_uid = MEDIA_RW_UID;
+ perm = HMDFS_DIR_PUBLIC | level;
+ }
+ break;
+ case HMDFS_PERM_PKG:
+ if (is_data_dir(hii->perm)) {
+ /*
+ * Mkdir for app pkg.
+ * Get the appid by passing pkgname to configfs.
+ * Set ROOT + media_rw for remote install,
+ * local uninstall.
+ * Set appid + media_rw for local install.
+ */
+ uid_t app_id = 0;
+
+ if (app_id != 0)
+ dinode->i_uid = KUIDT_INIT(app_id);
+ else
+ dinode->i_uid = ROOT_UID;
+ perm = AUTH_PKG | HMDFS_DIR_PKG | level;
+ } else {
+ dinode->i_uid = dir->i_uid;
+ perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level;
+ }
+ break;
+ case HMDFS_PERM_OTHER:
+ dinode->i_uid = dir->i_uid;
+ if (is_pkg_auth(hii->perm))
+ perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level;
+ else
+ perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level;
+ break;
+ default:
+ /* ! it should not get to here */
+ hmdfs_err("hmdfs perm incorrect got default case, level:%u", level);
+ break;
+ }
+
+ dinode->i_gid = MEDIA_RW_GID;
+ dinfo->perm = perm;
+}
+
+void hmdfs_root_inode_perm_init(struct inode *root_inode)
+{
+ struct hmdfs_inode_info *hii = hmdfs_i(root_inode);
+
+ hii->perm = HMDFS_DIR_ROOT | HMDFS_PERM_MNT;
+ set_inode_uid(root_inode, SYSTEM_UID);
+ set_inode_gid(root_inode, MEDIA_RW_GID);
+}
diff --git a/fs/hmdfs/authority/authentication.h b/fs/hmdfs/authority/authentication.h
new file mode 100644
index 0000000000000000000000000000000000000000..e8b7bed53fb9197b0456f3d8e446a8a43cb57fad
--- /dev/null
+++ b/fs/hmdfs/authority/authentication.h
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/authority/authentication.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef AUTHENTICATION_H
+#define AUTHENTICATION_H
+
+#include
+#include
+#include
+#include
+#include
+#include "hmdfs.h"
+
+struct cache_fs_override {
+ struct fs_struct *saved_fs;
+ struct fs_struct *copied_fs;
+ const struct cred *saved_cred;
+};
+
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+
+#define AID_ROOT 0
+#define AID_SYSTEM 1000
+#define AID_SDCARD_RW 1015
+#define AID_MEDIA_RW 1023
+#define AID_EVERYBODY 9997
+
+/* copied from sdcardfs/multiuser.h */
+#define AID_USER_OFFSET 100000 /* offset for uid ranges for each user */
+
+#define HMDFS_PERM_XATTR "user.hmdfs.perm"
+
+#define ROOT_UID KUIDT_INIT(AID_ROOT)
+#define SYSTEM_UID KUIDT_INIT(AID_SYSTEM)
+#define MEDIA_RW_UID KUIDT_INIT(AID_MEDIA_RW)
+
+#define SYSTEM_GID KGIDT_INIT((gid_t) AID_SYSTEM)
+#define MEDIA_RW_GID KGIDT_INIT(AID_MEDIA_RW)
+#define SDCARD_RW_GID KGIDT_INIT(AID_SDCARD_RW)
+
+#define PKG_ROOT_NAME "data"
+#define SYSTEM_NAME "system"
+
+/*
+ * | perm fix | permmnt | permdfs | permpkg | perm other
+ * /mnt/mdfs/ accoundID / device view / local / DATA / packageName /...
+ * / system /...
+ * / documents /...
+ * / devid /.......
+ * / merge view /
+ * / sdcard /
+ **/
+#define HMDFS_PERM_MASK 0x000F
+
+#define HMDFS_PERM_FIX 0
+#define HMDFS_PERM_MNT 1
+#define HMDFS_PERM_DFS 2
+#define HMDFS_PERM_PKG 3
+#define HMDFS_PERM_OTHER 4
+
+static inline bool is_perm_fix(__u16 perm)
+{
+ return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_FIX;
+}
+
+static inline bool is_perm_mnt(__u16 perm)
+{
+ return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_MNT;
+}
+
+static inline bool is_perm_dfs(__u16 perm)
+{
+ return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_DFS;
+}
+
+static inline bool is_perm_pkg(__u16 perm)
+{
+ return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_PKG;
+}
+
+static inline bool is_perm_other(__u16 perm)
+{
+ return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_OTHER;
+}
+
+static inline void hmdfs_check_cred(const struct cred *cred)
+{
+ if (cred->fsuid.val != AID_SYSTEM || cred->fsgid.val != AID_SYSTEM)
+ hmdfs_warning("uid is %u, gid is %u", cred->fsuid.val,
+ cred->fsgid.val);
+}
+
+/* dir and file type mask for hmdfs */
+#define HMDFS_DIR_TYPE_MASK 0x00F0
+
+/* LEVEL 0 perm fix - permmnt , only root dir */
+#define HMDFS_DIR_ROOT 0x0010
+
+/* LEVEL 1 perm dfs */
+#define HMDFS_DIR_PUBLIC 0x0020
+#define HMDFS_DIR_DATA 0x0030
+#define HMDFS_DIR_SYSTEM 0x0040
+
+/* LEVEL 2 HMDFS_PERM_PKG */
+#define HMDFS_DIR_PKG 0x0050
+
+/* LEVEL 2~n HMDFS_PERM_OTHER */
+#define PUBLIC_FILE 0x0060
+#define PUBLIC_SUB_DIR 0x0070
+#define SYSTEM_SUB_DIR 0x0080
+#define SYSTEM_SUB_FILE 0x0090
+
+#define HMDFS_DIR_PKG_SUB 0x00A0
+#define HMDFS_FILE_PKG_SUB 0x00B0
+
+/* access right is derived
+ * PUBLIC_SUB_DIR SYSTEM_SUB_DIR HMDFS_DIR_PKG_SUB
+ * PUBLIC_FILE SYSTEM_SUB_FILE HMDFS_FILE_PKG_SUB
+ */
+#define HMDFS_DIR_DEFAULT 0x00C0
+#define HMDFS_FILE_DEFAULT 0x00D0
+#define HMDFS_TYPE_DEFAULT 0x0000
+
+static inline bool is_data_dir(__u16 perm)
+{
+ return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_DATA;
+}
+
+static inline bool is_pkg_dir(__u16 perm)
+{
+ return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_PKG;
+}
+
+static inline bool is_pkg_sub_dir(__u16 perm)
+{
+ return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_PKG_SUB;
+}
+
+static inline bool is_pkg_sub_file(__u16 perm)
+{
+ return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_FILE_PKG_SUB;
+}
+
+static inline bool is_default_dir(__u16 perm)
+{
+ return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_DEFAULT;
+}
+
+static inline bool is_default_file(__u16 perm)
+{
+ return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_FILE_DEFAULT;
+}
+
+#define AUTH_MASK 0x0F00
+#define AUTH_PKG 0x0100
+#define AUTH_SYSTEM 0x0200
+
+static inline bool is_pkg_auth(__u16 perm)
+{
+ return (perm & AUTH_MASK) == AUTH_PKG;
+}
+
+static inline bool is_system_auth(__u16 perm)
+{
+ return (perm & AUTH_MASK) == AUTH_SYSTEM;
+}
+
+#define HMDFS_MOUNT_POINT_MASK 0xF000
+#define HMDFS_MNT_COMMON 0x0000 // sdcard
+#define HMDFS_MNT_SDCARD 0x1000 // sdcard
+#define HMDFS_MNT_ACNTID 0x2000 // accound id
+
+#define HMDFS_ALL_MASK (HMDFS_MOUNT_POINT_MASK | AUTH_MASK | HMDFS_DIR_TYPE_MASK | HMDFS_PERM_MASK)
+
+
+static inline void set_inode_gid(struct inode *inode, kgid_t gid)
+{
+ inode->i_gid = gid;
+}
+
+static inline kuid_t get_inode_uid(struct inode *inode)
+{
+ kuid_t uid = inode->i_uid;
+ return uid;
+}
+
+static inline void set_inode_uid(struct inode *inode, kuid_t uid)
+{
+ inode->i_uid = uid;
+}
+
+static inline kuid_t hmdfs_override_inode_uid(struct inode *inode)
+{
+ kuid_t uid = get_inode_uid(inode);
+
+ set_inode_uid(inode, current_fsuid());
+ return uid;
+}
+
+static inline void hmdfs_revert_inode_uid(struct inode *inode, kuid_t uid)
+{
+ set_inode_uid(inode, uid);
+}
+
+static inline const struct cred *hmdfs_override_creds(const struct cred *new)
+{
+ if (!new)
+ return NULL;
+
+ return override_creds(new);
+}
+
+static inline void hmdfs_revert_creds(const struct cred *old)
+{
+ if (old)
+ revert_creds(old);
+}
+
+static inline __u16 hmdfs_perm_get_next_level(__u16 perm)
+{
+ __u16 level = (perm & HMDFS_PERM_MASK) + 1;
+
+ if (level <= HMDFS_PERM_OTHER)
+ return level;
+ else
+ return HMDFS_PERM_OTHER;
+}
+
+struct fs_struct *hmdfs_override_fsstruct(struct fs_struct *saved_fs);
+void hmdfs_revert_fsstruct(struct fs_struct *saved_fs,
+ struct fs_struct *copied_fs);
+const struct cred *hmdfs_override_fsids(bool is_recv_thread);
+const struct cred *hmdfs_override_dir_fsids(struct inode *dir,
+ struct dentry *dentry, __u16 *perm);
+const struct cred *hmdfs_override_file_fsids(struct inode *dir, __u16 *perm);
+void hmdfs_revert_fsids(const struct cred *old_cred);
+int hmdfs_persist_perm(struct dentry *dentry, __u16 *perm);
+__u16 hmdfs_read_perm(struct inode *inode);
+void hmdfs_root_inode_perm_init(struct inode *root_inode);
+void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child,
+ struct dentry *lower_dentry, const char *name);
+int hmdfs_override_dir_id_fs(struct cache_fs_override *or,
+ struct inode *dir,
+ struct dentry *dentry,
+ __u16 *perm);
+void hmdfs_revert_dir_id_fs(struct cache_fs_override *or);
+void check_and_fixup_ownership_remote(struct inode *dir,
+ struct dentry *dentry);
+
+#else
+
+static inline
+void hmdfs_root_inode_perm_init(struct inode *root_inode)
+{
+}
+
+static inline
+void hmdfs_revert_fsids(const struct cred *old_cred)
+{
+}
+
+static inline
+int hmdfs_override_dir_id_fs(struct cache_fs_override *or,
+ struct inode *dir,
+ struct dentry *dentry,
+ __u16 *perm)
+{
+ return 0;
+}
+
+static inline
+void hmdfs_revert_dir_id_fs(struct cache_fs_override *or)
+{
+}
+
+static inline
+void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child,
+ struct dentry *lower_dentry, const char *name)
+{
+}
+
+static inline
+const struct cred *hmdfs_override_fsids(bool is_recv_thread)
+{
+ return ERR_PTR(-ENOTTY);
+}
+
+static inline
+const struct cred *hmdfs_override_creds(const struct cred *new)
+{
+ return ERR_PTR(-ENOTTY);
+}
+
+static inline
+void hmdfs_revert_creds(const struct cred *old)
+{
+
+}
+
+static inline
+void check_and_fixup_ownership_remote(struct inode *dir,
+ struct dentry *dentry)
+{
+}
+
+static inline
+kuid_t hmdfs_override_inode_uid(struct inode *inode)
+{
+ return KUIDT_INIT((uid_t)0);
+}
+
+static inline
+void hmdfs_revert_inode_uid(struct inode *inode, kuid_t uid)
+{
+}
+
+static inline
+void hmdfs_check_cred(const struct cred *cred)
+{
+}
+
+#endif /* CONFIG_HMDFS_FS_PERMISSION */
+
+#endif
diff --git a/fs/hmdfs/client_writeback.c b/fs/hmdfs/client_writeback.c
new file mode 100644
index 0000000000000000000000000000000000000000..d4da7ec482a5b5bea34b21e2793e4120da16d090
--- /dev/null
+++ b/fs/hmdfs/client_writeback.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/client_writeback.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "hmdfs.h"
+#include "hmdfs_trace.h"
+
+/* 200ms */
+#define HMDFS_MAX_PAUSE max((HZ / 5), 1)
+#define HMDFS_BANDWIDTH_INTERVAL max((HZ / 5), 1)
+/* Dirty type */
+#define HMDFS_DIRTY_FS 0
+#define HMDFS_DIRTY_FILE 1
+/* Exceed flags */
+#define HMDFS_FS_EXCEED (1 << HMDFS_DIRTY_FS)
+#define HMDFS_FILE_EXCEED (1 << HMDFS_DIRTY_FILE)
+/* Ratelimit calculate shift */
+#define HMDFS_LIMIT_SHIFT 10
+
+void hmdfs_writeback_inodes_sb_handler(struct work_struct *work)
+{
+ struct hmdfs_writeback *hwb = container_of(
+ work, struct hmdfs_writeback, dirty_sb_writeback_work.work);
+
+ try_to_writeback_inodes_sb(hwb->sbi->sb, WB_REASON_FS_FREE_SPACE);
+}
+
+void hmdfs_writeback_inode_handler(struct work_struct *work)
+{
+ struct hmdfs_inode_info *info = NULL;
+ struct inode *inode = NULL;
+ struct hmdfs_writeback *hwb = container_of(
+ work, struct hmdfs_writeback, dirty_inode_writeback_work.work);
+
+ spin_lock(&hwb->inode_list_lock);
+ while (likely(!list_empty(&hwb->inode_list_head))) {
+ info = list_first_entry(&hwb->inode_list_head,
+ struct hmdfs_inode_info, wb_list);
+ list_del_init(&info->wb_list);
+ spin_unlock(&hwb->inode_list_lock);
+
+ inode = &info->vfs_inode;
+ write_inode_now(inode, 0);
+ iput(inode);
+ spin_lock(&hwb->inode_list_lock);
+ }
+ spin_unlock(&hwb->inode_list_lock);
+}
+
+static void hmdfs_writeback_inodes_sb_delayed(struct super_block *sb,
+ unsigned int delay)
+{
+ struct hmdfs_sb_info *sbi = sb->s_fs_info;
+ unsigned long timeout;
+
+ timeout = msecs_to_jiffies(delay);
+ if (!timeout || !work_busy(&sbi->h_wb->dirty_sb_writeback_work.work))
+ mod_delayed_work(sbi->h_wb->dirty_sb_writeback_wq,
+ &sbi->h_wb->dirty_sb_writeback_work, timeout);
+}
+
+static inline void hmdfs_writeback_inodes_sb(struct super_block *sb)
+{
+ hmdfs_writeback_inodes_sb_delayed(sb, 0);
+}
+
+static void hmdfs_writeback_inode(struct super_block *sb, struct inode *inode)
+{
+ struct hmdfs_sb_info *sbi = sb->s_fs_info;
+ struct hmdfs_writeback *hwb = sbi->h_wb;
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+
+ spin_lock(&hwb->inode_list_lock);
+ if (list_empty(&info->wb_list)) {
+ ihold(inode);
+ list_add_tail(&info->wb_list, &hwb->inode_list_head);
+ queue_delayed_work(hwb->dirty_inode_writeback_wq,
+ &hwb->dirty_inode_writeback_work, 0);
+ }
+ spin_unlock(&hwb->inode_list_lock);
+}
+
+static unsigned long hmdfs_idirty_pages(struct inode *inode, int tag)
+{
+ struct pagevec pvec;
+ unsigned long nr_dirty_pages = 0;
+ pgoff_t index = 0;
+
+#if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE
+ pagevec_init(&pvec);
+#else
+ pagevec_init(&pvec, 0);
+#endif
+ while (pagevec_lookup_tag(&pvec, inode->i_mapping, &index, tag)) {
+ nr_dirty_pages += pagevec_count(&pvec);
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+ return nr_dirty_pages;
+}
+
+static inline unsigned long hmdfs_ratio_thresh(unsigned long ratio,
+ unsigned long thresh)
+{
+ unsigned long ret = (ratio * thresh) >> HMDFS_LIMIT_SHIFT;
+
+ return (ret == 0) ? 1 : ret;
+}
+
+static inline unsigned long hmdfs_thresh_ratio(unsigned long base,
+ unsigned long thresh)
+{
+ unsigned long ratio = (base << HMDFS_LIMIT_SHIFT) / thresh;
+
+ return (ratio == 0) ? 1 : ratio;
+}
+
+void hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb)
+{
+ hwb->dirty_fs_thresh = DIV_ROUND_UP(hwb->dirty_fs_bytes, PAGE_SIZE);
+ hwb->dirty_file_thresh = DIV_ROUND_UP(hwb->dirty_file_bytes, PAGE_SIZE);
+ hwb->dirty_fs_bg_thresh =
+ DIV_ROUND_UP(hwb->dirty_fs_bg_bytes, PAGE_SIZE);
+ hwb->dirty_file_bg_thresh =
+ DIV_ROUND_UP(hwb->dirty_file_bg_bytes, PAGE_SIZE);
+
+ hwb->fs_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_fs_bg_thresh,
+ hwb->dirty_fs_thresh);
+ hwb->file_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_file_bg_thresh,
+ hwb->dirty_file_thresh);
+ hwb->fs_file_ratio = hmdfs_thresh_ratio(hwb->dirty_file_thresh,
+ hwb->dirty_fs_thresh);
+}
+
+static void hmdfs_init_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc)
+{
+ struct hmdfs_writeback *hwb = hdtc->hwb;
+
+ hdtc->fs_thresh = hdtc->hwb->dirty_fs_thresh;
+ hdtc->file_thresh = hdtc->hwb->dirty_file_thresh;
+ hdtc->fs_bg_thresh = hdtc->hwb->dirty_fs_bg_thresh;
+ hdtc->file_bg_thresh = hdtc->hwb->dirty_file_bg_thresh;
+
+ if (!hwb->dirty_auto_threshold)
+ return;
+
+ /*
+ * Init thresh according the previous bandwidth adjusted thresh,
+ * thresh should be no more than setting thresh.
+ */
+ if (hwb->bw_fs_thresh < hdtc->fs_thresh) {
+ hdtc->fs_thresh = hwb->bw_fs_thresh;
+ hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio,
+ hdtc->fs_thresh);
+ }
+ if (hwb->bw_file_thresh < hdtc->file_thresh) {
+ hdtc->file_thresh = hwb->bw_file_thresh;
+ hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio,
+ hdtc->file_thresh);
+ }
+ /*
+ * The thresh should be updated in the first time of dirty pages
+ * exceed the freerun ceiling.
+ */
+ hdtc->thresh_time_stamp = jiffies - HMDFS_BANDWIDTH_INTERVAL - 1;
+}
+
+static void hmdfs_update_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc)
+{
+ struct hmdfs_writeback *hwb = hdtc->hwb;
+ struct bdi_writeback *wb = hwb->wb;
+ unsigned int time_limit = hwb->writeback_timelimit;
+ unsigned long bw = wb->avg_write_bandwidth;
+ unsigned long thresh;
+
+ if (!hwb->dirty_auto_threshold)
+ return;
+
+ spin_lock(&hwb->write_bandwidth_lock);
+ if (bw > hwb->max_write_bandwidth)
+ hwb->max_write_bandwidth = bw;
+
+ if (bw < hwb->min_write_bandwidth)
+ hwb->min_write_bandwidth = bw;
+ hwb->avg_write_bandwidth = bw;
+ spin_unlock(&hwb->write_bandwidth_lock);
+
+ /*
+ * If the bandwidth is lower than the lower limit, it may propably
+ * offline, there is meaningless to set such a lower thresh.
+ */
+ bw = max(bw, hwb->bw_thresh_lowerlimit);
+ thresh = bw * time_limit / roundup_pow_of_two(HZ);
+ if (thresh >= hwb->dirty_fs_thresh) {
+ hdtc->fs_thresh = hwb->dirty_fs_thresh;
+ hdtc->file_thresh = hwb->dirty_file_thresh;
+ hdtc->fs_bg_thresh = hwb->dirty_fs_bg_thresh;
+ hdtc->file_bg_thresh = hwb->dirty_file_bg_thresh;
+ } else {
+ /* Adjust thresh according to current bandwidth */
+ hdtc->fs_thresh = thresh;
+ hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio,
+ hdtc->fs_thresh);
+ hdtc->file_thresh = hmdfs_ratio_thresh(hwb->fs_file_ratio,
+ hdtc->fs_thresh);
+ hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio,
+ hdtc->file_thresh);
+ }
+ /* Save bandwidth adjusted thresh */
+ hwb->bw_fs_thresh = hdtc->fs_thresh;
+ hwb->bw_file_thresh = hdtc->file_thresh;
+ /* Update time stamp */
+ hdtc->thresh_time_stamp = jiffies;
+}
+
+void hmdfs_update_ratelimit(struct hmdfs_writeback *hwb)
+{
+ struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb};
+
+ hmdfs_init_dirty_limit(&hdtc);
+
+ /* hdtc.file_bg_thresh should be the lowest thresh */
+ hwb->ratelimit_pages = hdtc.file_bg_thresh /
+ (num_online_cpus() * HMDFS_RATELIMIT_PAGES_GAP);
+ if (hwb->ratelimit_pages < HMDFS_MIN_RATELIMIT_PAGES)
+ hwb->ratelimit_pages = HMDFS_MIN_RATELIMIT_PAGES;
+}
+
+/* This is a copy of wb_max_pause() */
+static unsigned long hmdfs_wb_pause(struct bdi_writeback *wb,
+ unsigned long wb_dirty)
+{
+ unsigned long bw = wb->avg_write_bandwidth;
+ unsigned long t;
+
+ /*
+ * Limit pause time for small memory systems. If sleeping for too long
+ * time, a small pool of dirty/writeback pages may go empty and disk go
+ * idle.
+ *
+ * 8 serves as the safety ratio.
+ */
+ t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
+ t++;
+
+ return min_t(unsigned long, t, HMDFS_MAX_PAUSE);
+}
+
+static unsigned long
+hmdfs_dirty_freerun_ceiling(struct hmdfs_dirty_throttle_control *hdtc,
+ unsigned int type)
+{
+ if (type == HMDFS_DIRTY_FS)
+ return (hdtc->fs_thresh + hdtc->fs_bg_thresh) / 2;
+ else /* HMDFS_DIRTY_FILE_TYPE */
+ return (hdtc->file_thresh + hdtc->file_bg_thresh) / 2;
+}
+
+/* This is a copy of dirty_poll_interval() */
+static inline unsigned long hmdfs_dirty_intv(unsigned long dirty,
+ unsigned long thresh)
+{
+ if (thresh > dirty)
+ return 1UL << (ilog2(thresh - dirty) >> 1);
+ return 1;
+}
+
+static void hmdfs_balance_dirty_pages(struct address_space *mapping)
+{
+ struct inode *inode = mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct hmdfs_sb_info *sbi = sb->s_fs_info;
+ struct hmdfs_writeback *hwb = sbi->h_wb;
+ struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+ struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb};
+ unsigned int dirty_exceeded = 0;
+ unsigned long start_time = jiffies;
+ unsigned long pause = 0;
+
+ /* Add delay work to trigger timeout writeback */
+ if (hwb->dirty_writeback_interval != 0)
+ hmdfs_writeback_inodes_sb_delayed(
+ sb, hwb->dirty_writeback_interval * 10);
+
+ hmdfs_init_dirty_limit(&hdtc);
+
+ while (1) {
+ unsigned long exceed = 0;
+ unsigned long diff;
+
+ /* Per-filesystem overbalance writeback */
+ hdtc.fs_nr_dirty = wb_stat_sum(wb, WB_RECLAIMABLE);
+ hdtc.fs_nr_reclaimable =
+ hdtc.fs_nr_dirty + wb_stat_sum(wb, WB_WRITEBACK);
+ if (hdtc.fs_nr_reclaimable < hdtc.file_bg_thresh) {
+ diff = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable,
+ hdtc.file_thresh);
+ goto free_running;
+ }
+
+ /* Per-file overbalance writeback */
+ hdtc.file_nr_dirty =
+ hmdfs_idirty_pages(inode, PAGECACHE_TAG_DIRTY);
+ hdtc.file_nr_reclaimable =
+ hmdfs_idirty_pages(inode, PAGECACHE_TAG_WRITEBACK) +
+ hdtc.file_nr_dirty;
+ if ((hdtc.fs_nr_reclaimable <
+ hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) &&
+ (hdtc.file_nr_reclaimable <
+ hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FILE))) {
+ unsigned long fs_intv, file_intv;
+
+ fs_intv = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable,
+ hdtc.fs_thresh);
+ file_intv = hmdfs_dirty_intv(hdtc.file_nr_reclaimable,
+ hdtc.file_thresh);
+ diff = min(fs_intv, file_intv);
+free_running:
+ current->nr_dirtied_pause = diff;
+ current->nr_dirtied = 0;
+ break;
+ }
+
+ if (hdtc.fs_nr_reclaimable >=
+ hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) {
+ if (unlikely(!writeback_in_progress(wb)))
+ hmdfs_writeback_inodes_sb(sb);
+ } else {
+ hmdfs_writeback_inode(sb, inode);
+ }
+
+ /*
+ * If dirty_auto_threshold is enabled, recalculate writeback
+ * thresh according to current bandwidth. Update bandwidth
+ * could be better if possible, but wb_update_bandwidth() is
+ * not exported, so we cannot update bandwidth here, so the
+ * bandwidth' update will be delayed if writing a lot to a
+ * single file.
+ */
+ if (hwb->dirty_auto_threshold &&
+ time_is_before_jiffies(hdtc.thresh_time_stamp +
+ HMDFS_BANDWIDTH_INTERVAL))
+ hmdfs_update_dirty_limit(&hdtc);
+
+ if (unlikely(hdtc.fs_nr_reclaimable >= hdtc.fs_thresh))
+ exceed |= HMDFS_FS_EXCEED;
+ if (unlikely(hdtc.file_nr_reclaimable >= hdtc.file_thresh))
+ exceed |= HMDFS_FILE_EXCEED;
+
+ if (!exceed) {
+ trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc,
+ 0UL, start_time);
+ current->nr_dirtied = 0;
+ break;
+ }
+ /*
+ * Per-file or per-fs reclaimable pages exceed throttle limit,
+ * sleep pause time and check again.
+ */
+ dirty_exceeded |= exceed;
+ if (dirty_exceeded && !hwb->dirty_exceeded)
+ hwb->dirty_exceeded = true;
+
+ /* Pause */
+ pause = hmdfs_wb_pause(wb, hdtc.fs_nr_reclaimable);
+
+ trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, pause,
+ start_time);
+
+ __set_current_state(TASK_KILLABLE);
+ io_schedule_timeout(pause);
+
+ if (fatal_signal_pending(current))
+ break;
+ }
+
+ if (!dirty_exceeded && hwb->dirty_exceeded)
+ hwb->dirty_exceeded = false;
+
+ if (hdtc.fs_nr_reclaimable >= hdtc.fs_bg_thresh) {
+ if (unlikely(!writeback_in_progress(wb)))
+ hmdfs_writeback_inodes_sb(sb);
+ } else if (hdtc.file_nr_reclaimable >= hdtc.file_bg_thresh) {
+ hmdfs_writeback_inode(sb, inode);
+ }
+}
+
+void hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping)
+{
+ struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info;
+ struct hmdfs_writeback *hwb = sbi->h_wb;
+ int *bdp_ratelimits = NULL;
+ int ratelimit;
+
+ if (!hwb->dirty_writeback_control)
+ return;
+
+ /* Add delay work to trigger timeout writeback */
+ if (hwb->dirty_writeback_interval != 0)
+ hmdfs_writeback_inodes_sb_delayed(
+ mapping->host->i_sb,
+ hwb->dirty_writeback_interval * 10);
+
+ ratelimit = current->nr_dirtied_pause;
+ if (hwb->dirty_exceeded)
+ ratelimit = min(ratelimit, HMDFS_DIRTY_EXCEED_RATELIMIT);
+
+ /*
+ * This prevents one CPU to accumulate too many dirtied pages
+ * without calling into hmdfs_balance_dirty_pages(), which can
+ * happen when there are 1000+ tasks, all of them start dirtying
+ * pages at exactly the same time, hence all honoured too large
+ * initial task->nr_dirtied_pause.
+ */
+ preempt_disable();
+ bdp_ratelimits = this_cpu_ptr(hwb->bdp_ratelimits);
+
+ trace_hmdfs_balance_dirty_pages_ratelimited(sbi, hwb, *bdp_ratelimits);
+
+ if (unlikely(current->nr_dirtied >= ratelimit)) {
+ *bdp_ratelimits = 0;
+ } else if (unlikely(*bdp_ratelimits >= hwb->ratelimit_pages)) {
+ *bdp_ratelimits = 0;
+ ratelimit = 0;
+ }
+ preempt_enable();
+
+ if (unlikely(current->nr_dirtied >= ratelimit))
+ hmdfs_balance_dirty_pages(mapping);
+}
+
+void hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi)
+{
+ if (!sbi->h_wb)
+ return;
+
+ flush_delayed_work(&sbi->h_wb->dirty_sb_writeback_work);
+ flush_delayed_work(&sbi->h_wb->dirty_inode_writeback_work);
+ destroy_workqueue(sbi->h_wb->dirty_sb_writeback_wq);
+ destroy_workqueue(sbi->h_wb->dirty_inode_writeback_wq);
+ free_percpu(sbi->h_wb->bdp_ratelimits);
+ kfree(sbi->h_wb);
+ sbi->h_wb = NULL;
+}
+
+int hmdfs_init_writeback(struct hmdfs_sb_info *sbi)
+{
+ struct hmdfs_writeback *hwb;
+ char name[HMDFS_WQ_NAME_LEN];
+ int ret = -ENOMEM;
+
+ hwb = kzalloc(sizeof(struct hmdfs_writeback), GFP_KERNEL);
+ if (!hwb)
+ return ret;
+
+ hwb->sbi = sbi;
+ hwb->wb = &sbi->sb->s_bdi->wb;
+ hwb->dirty_writeback_control = true;
+ hwb->dirty_writeback_interval = HM_DEFAULT_WRITEBACK_INTERVAL;
+ hwb->dirty_file_bg_bytes = HMDFS_FILE_BG_WB_BYTES;
+ hwb->dirty_fs_bg_bytes = HMDFS_FS_BG_WB_BYTES;
+ hwb->dirty_file_bytes = HMDFS_FILE_WB_BYTES;
+ hwb->dirty_fs_bytes = HMDFS_FS_WB_BYTES;
+ hmdfs_calculate_dirty_thresh(hwb);
+ hwb->bw_file_thresh = hwb->dirty_file_thresh;
+ hwb->bw_fs_thresh = hwb->dirty_fs_thresh;
+ spin_lock_init(&hwb->inode_list_lock);
+ INIT_LIST_HEAD(&hwb->inode_list_head);
+ hwb->dirty_exceeded = false;
+ hwb->ratelimit_pages = HMDFS_DEF_RATELIMIT_PAGES;
+ hwb->dirty_auto_threshold = true;
+ hwb->writeback_timelimit = HMDFS_DEF_WB_TIMELIMIT;
+ hwb->bw_thresh_lowerlimit = HMDFS_BW_THRESH_DEF_LIMIT;
+ spin_lock_init(&hwb->write_bandwidth_lock);
+ hwb->avg_write_bandwidth = 0;
+ hwb->max_write_bandwidth = 0;
+ hwb->min_write_bandwidth = ULONG_MAX;
+ hwb->bdp_ratelimits = alloc_percpu(int);
+ if (!hwb->bdp_ratelimits)
+ goto free_hwb;
+
+ snprintf(name, sizeof(name), "dfs_ino_wb%u", sbi->seq);
+ hwb->dirty_inode_writeback_wq = create_singlethread_workqueue(name);
+ if (!hwb->dirty_inode_writeback_wq) {
+ hmdfs_err("Failed to create inode writeback workqueue!");
+ goto free_bdp;
+ }
+ snprintf(name, sizeof(name), "dfs_sb_wb%u", sbi->seq);
+ hwb->dirty_sb_writeback_wq = create_singlethread_workqueue(name);
+ if (!hwb->dirty_sb_writeback_wq) {
+ hmdfs_err("Failed to create filesystem writeback workqueue!");
+ goto free_i_wq;
+ }
+ INIT_DELAYED_WORK(&hwb->dirty_sb_writeback_work,
+ hmdfs_writeback_inodes_sb_handler);
+ INIT_DELAYED_WORK(&hwb->dirty_inode_writeback_work,
+ hmdfs_writeback_inode_handler);
+ sbi->h_wb = hwb;
+ return 0;
+free_i_wq:
+ destroy_workqueue(hwb->dirty_inode_writeback_wq);
+free_bdp:
+ free_percpu(sbi->h_wb->bdp_ratelimits);
+free_hwb:
+ kfree(hwb);
+ return ret;
+}
diff --git a/fs/hmdfs/client_writeback.h b/fs/hmdfs/client_writeback.h
new file mode 100644
index 0000000000000000000000000000000000000000..689a5e733ece47a4894a10ce9eac099a5b9047f1
--- /dev/null
+++ b/fs/hmdfs/client_writeback.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/client_writeback.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef CLIENT_WRITEBACK_H
+#define CLIENT_WRITEBACK_H
+
+#include "hmdfs.h"
+
+/*
+ * HM_DEFAULT_WRITEBACK_INTERVAL - centiseconds
+ * HMDFS_FILE_BG_WB_BYTES - background per-file threshold 10M
+ * HMDFS_FS_BG_WB_BYTES - background per-fs threshold 50M
+ * HMDFS_FILE_WB_BYTES - per-file throttle threshold
+ * HMDFS_FS_WB_BYTES - per-fs throttle threshold
+ */
+#define HM_DEFAULT_WRITEBACK_INTERVAL 500
+#define HMDFS_FILE_BG_WB_BYTES (10 * 1024 * 1024)
+#define HMDFS_FS_BG_WB_BYTES (50 * 1024 * 1024)
+#define HMDFS_FILE_WB_BYTES (HMDFS_FILE_BG_WB_BYTES << 1)
+#define HMDFS_FS_WB_BYTES (HMDFS_FS_BG_WB_BYTES << 1)
+
+/* writeback time limit (default 5s) */
+#define HMDFS_DEF_WB_TIMELIMIT (5 * HZ)
+#define HMDFS_MAX_WB_TIMELIMIT (30 * HZ)
+
+/* bandwidth adjusted lower limit (default 1MB/s) */
+#define HMDFS_BW_THRESH_MIN_LIMIT (1 << (20 - PAGE_SHIFT))
+#define HMDFS_BW_THRESH_MAX_LIMIT (100 << (20 - PAGE_SHIFT))
+#define HMDFS_BW_THRESH_DEF_LIMIT HMDFS_BW_THRESH_MIN_LIMIT
+
+#define HMDFS_DIRTY_EXCEED_RATELIMIT (32 >> (PAGE_SHIFT - 10))
+#define HMDFS_RATELIMIT_PAGES_GAP 16
+#define HMDFS_DEF_RATELIMIT_PAGES 32
+#define HMDFS_MIN_RATELIMIT_PAGES 1
+
+struct hmdfs_dirty_throttle_control {
+ struct hmdfs_writeback *hwb;
+ /* last time threshes are updated */
+ unsigned long thresh_time_stamp;
+
+ unsigned long file_bg_thresh;
+ unsigned long fs_bg_thresh;
+ unsigned long file_thresh;
+ unsigned long fs_thresh;
+
+ unsigned long file_nr_dirty;
+ unsigned long fs_nr_dirty;
+ unsigned long file_nr_reclaimable;
+ unsigned long fs_nr_reclaimable;
+};
+
+struct hmdfs_writeback {
+ struct hmdfs_sb_info *sbi;
+ struct bdi_writeback *wb;
+ /* enable hmdfs dirty writeback control */
+ bool dirty_writeback_control;
+
+ /* writeback per-file inode list */
+ struct list_head inode_list_head;
+ spinlock_t inode_list_lock;
+
+ /* centiseconds */
+ unsigned int dirty_writeback_interval;
+ /* per-file background threshold */
+ unsigned long dirty_file_bg_bytes;
+ unsigned long dirty_file_bg_thresh;
+ /* per-fs background threshold */
+ unsigned long dirty_fs_bg_bytes;
+ unsigned long dirty_fs_bg_thresh;
+ /* per-file throttle threshold */
+ unsigned long dirty_file_bytes;
+ unsigned long dirty_file_thresh;
+ /* per-fs throttle threshold */
+ unsigned long dirty_fs_bytes;
+ unsigned long dirty_fs_thresh;
+ /* ratio between background thresh and throttle thresh */
+ unsigned long fs_bg_ratio;
+ unsigned long file_bg_ratio;
+ /* ratio between file and fs throttle thresh */
+ unsigned long fs_file_ratio;
+
+ /*
+ * Enable auto-thresh. If enabled, the background and throttle
+ * thresh are nolonger a fixed value storeed in dirty_*_bytes,
+ * they are determined by the bandwidth of the network and the
+ * writeback timelimit.
+ */
+ bool dirty_auto_threshold;
+ unsigned int writeback_timelimit;
+ /* bandwitdh adjusted filesystem throttle thresh */
+ unsigned long bw_fs_thresh;
+ /* bandwidth adjusted per-file throttle thresh */
+ unsigned long bw_file_thresh;
+ /* bandwidth adjusted thresh lower limit */
+ unsigned long bw_thresh_lowerlimit;
+
+ /* reclaimable pages exceed throttle thresh */
+ bool dirty_exceeded;
+ /* percpu dirty pages ratelimit */
+ long ratelimit_pages;
+ /* count percpu dirty pages */
+ int __percpu *bdp_ratelimits;
+
+ /* per-fs writeback work */
+ struct workqueue_struct *dirty_sb_writeback_wq;
+ struct delayed_work dirty_sb_writeback_work;
+ /* per-file writeback work */
+ struct workqueue_struct *dirty_inode_writeback_wq;
+ struct delayed_work dirty_inode_writeback_work;
+
+ /* per-fs writeback bandwidth */
+ spinlock_t write_bandwidth_lock;
+ unsigned long max_write_bandwidth;
+ unsigned long min_write_bandwidth;
+ unsigned long avg_write_bandwidth;
+};
+
+void hmdfs_writeback_inodes_sb_handler(struct work_struct *work);
+
+void hmdfs_writeback_inode_handler(struct work_struct *work);
+
+void hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb);
+
+void hmdfs_update_ratelimit(struct hmdfs_writeback *hwb);
+
+void hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping);
+
+void hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi);
+
+int hmdfs_init_writeback(struct hmdfs_sb_info *sbi);
+
+#endif
diff --git a/fs/hmdfs/comm/connection.c b/fs/hmdfs/comm/connection.c
new file mode 100644
index 0000000000000000000000000000000000000000..51e6f829eb343b7bc929b899d5d2243cfe31ba49
--- /dev/null
+++ b/fs/hmdfs/comm/connection.c
@@ -0,0 +1,1311 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/connection.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "connection.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "device_node.h"
+#include "hmdfs.h"
+#include "message_verify.h"
+#include "node_cb.h"
+#include "protocol.h"
+#include "socket_adapter.h"
+
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+#include "crypto.h"
+#endif
+
+#define HMDFS_WAIT_REQUEST_END_MIN 20
+#define HMDFS_WAIT_REQUEST_END_MAX 30
+
+#define HMDFS_WAIT_CONN_RELEASE (3 * HZ)
+
+#define HMDFS_RETRY_WB_WQ_MAX_ACTIVE 16
+
+static void hs_fill_crypto_data(struct connection *conn_impl, __u8 ops,
+ void *data, __u32 len)
+{
+ struct crypto_body *body = NULL;
+
+ if (len < sizeof(struct crypto_body)) {
+ hmdfs_info("crpto body len %u is err", len);
+ return;
+ }
+ body = (struct crypto_body *)data;
+
+ /* this is only test, later need to fill right algorithm. */
+ body->crypto |= HMDFS_HS_CRYPTO_KTLS_AES128;
+ body->crypto = cpu_to_le32(body->crypto);
+
+ hmdfs_info("fill crypto. ccrtypto=0x%08x", body->crypto);
+}
+
+static int hs_parse_crypto_data(struct connection *conn_impl, __u8 ops,
+ void *data, __u32 len)
+{
+ struct crypto_body *hs_crypto = NULL;
+ uint32_t crypto;
+
+ if (len < sizeof(struct crypto_body)) {
+ hmdfs_info("handshake msg len error, len=%u", len);
+ return -1;
+ }
+ hs_crypto = (struct crypto_body *)data;
+ crypto = le16_to_cpu(hs_crypto->crypto);
+ conn_impl->crypto = crypto;
+ hmdfs_info("ops=%u, len=%u, crypto=0x%08x", ops, len, crypto);
+ return 0;
+}
+
+static void hs_fill_case_sense_data(struct connection *conn_impl, __u8 ops,
+ void *data, __u32 len)
+{
+ struct case_sense_body *body = (struct case_sense_body *)data;
+
+ if (len < sizeof(struct case_sense_body)) {
+ hmdfs_err("case sensitive len %u is err", len);
+ return;
+ }
+ body->case_sensitive = conn_impl->node->sbi->s_case_sensitive;
+}
+
+static int hs_parse_case_sense_data(struct connection *conn_impl, __u8 ops,
+ void *data, __u32 len)
+{
+ struct case_sense_body *body = (struct case_sense_body *)data;
+ __u8 sensitive = conn_impl->node->sbi->s_case_sensitive ? 1 : 0;
+
+ if (len < sizeof(struct case_sense_body)) {
+ hmdfs_info("case sensitive len %u is err", len);
+ return -1;
+ }
+ if (body->case_sensitive != sensitive) {
+ hmdfs_err("case sensitive inconsistent, server: %u,client: %u, ops: %u",
+ body->case_sensitive, sensitive, ops);
+ return -1;
+ }
+ return 0;
+}
+
+static void hs_fill_feature_data(struct connection *conn_impl, __u8 ops,
+ void *data, __u32 len)
+{
+ struct feature_body *body = (struct feature_body *)data;
+
+ if (len < sizeof(struct feature_body)) {
+ hmdfs_err("feature len %u is err", len);
+ return;
+ }
+ body->features = cpu_to_le64(conn_impl->node->sbi->s_features);
+ body->reserved = cpu_to_le64(0);
+}
+
+static int hs_parse_feature_data(struct connection *conn_impl, __u8 ops,
+ void *data, __u32 len)
+{
+ struct feature_body *body = (struct feature_body *)data;
+
+ if (len < sizeof(struct feature_body)) {
+ hmdfs_err("feature len %u is err", len);
+ return -1;
+ }
+
+ conn_impl->node->features = le64_to_cpu(body->features);
+ return 0;
+}
+
+/* should ensure len is small than 0xffff. */
+static const struct conn_hs_extend_reg s_hs_extend_reg[HS_EXTEND_CODE_COUNT] = {
+ [HS_EXTEND_CODE_CRYPTO] = {
+ .len = sizeof(struct crypto_body),
+ .resv = 0,
+ .filler = hs_fill_crypto_data,
+ .parser = hs_parse_crypto_data
+ },
+ [HS_EXTEND_CODE_CASE_SENSE] = {
+ .len = sizeof(struct case_sense_body),
+ .resv = 0,
+ .filler = hs_fill_case_sense_data,
+ .parser = hs_parse_case_sense_data,
+ },
+ [HS_EXTEND_CODE_FEATURE_SUPPORT] = {
+ .len = sizeof(struct feature_body),
+ .resv = 0,
+ .filler = hs_fill_feature_data,
+ .parser = hs_parse_feature_data,
+ },
+ [HS_EXTEND_CODE_FEATURE_SUPPORT] = {
+ .len = sizeof(struct feature_body),
+ .resv = 0,
+ .filler = hs_fill_feature_data,
+ .parser = hs_parse_feature_data,
+ },
+};
+
+static __u32 hs_get_extend_data_len(void)
+{
+ __u32 len;
+ int i;
+
+ len = sizeof(struct conn_hs_extend_head);
+
+ for (i = 0; i < HS_EXTEND_CODE_COUNT; i++) {
+ len += sizeof(struct extend_field_head);
+ len += s_hs_extend_reg[i].len;
+ }
+
+ hmdfs_info("extend data total len is %u", len);
+ return len;
+}
+
+static void hs_fill_extend_data(struct connection *conn_impl, __u8 ops,
+ void *extend_data, __u32 len)
+{
+ struct conn_hs_extend_head *extend_head = NULL;
+ struct extend_field_head *field = NULL;
+ uint8_t *body = NULL;
+ __u32 offset;
+ __u16 i;
+
+ if (sizeof(struct conn_hs_extend_head) > len) {
+ hmdfs_info("len error. len=%u", len);
+ return;
+ }
+ extend_head = (struct conn_hs_extend_head *)extend_data;
+ extend_head->field_cn = 0;
+ offset = sizeof(struct conn_hs_extend_head);
+
+ for (i = 0; i < HS_EXTEND_CODE_COUNT; i++) {
+ if (sizeof(struct extend_field_head) > (len - offset))
+ break;
+ field = (struct extend_field_head *)((uint8_t *)extend_data +
+ offset);
+ offset += sizeof(struct extend_field_head);
+
+ if (s_hs_extend_reg[i].len > (len - offset))
+ break;
+ body = (uint8_t *)extend_data + offset;
+ offset += s_hs_extend_reg[i].len;
+
+ field->code = cpu_to_le16(i);
+ field->len = cpu_to_le16(s_hs_extend_reg[i].len);
+
+ if (s_hs_extend_reg[i].filler)
+ s_hs_extend_reg[i].filler(conn_impl, ops,
+ body, s_hs_extend_reg[i].len);
+
+ extend_head->field_cn += 1;
+ }
+
+ extend_head->field_cn = cpu_to_le32(extend_head->field_cn);
+}
+
+static int hs_parse_extend_data(struct connection *conn_impl, __u8 ops,
+ void *extend_data, __u32 extend_len)
+{
+ struct conn_hs_extend_head *extend_head = NULL;
+ struct extend_field_head *field = NULL;
+ uint8_t *body = NULL;
+ __u32 offset;
+ __u32 field_cnt;
+ __u16 code;
+ __u16 len;
+ int i;
+ int ret;
+
+ if (sizeof(struct conn_hs_extend_head) > extend_len) {
+ hmdfs_err("ops=%u,extend_len=%u", ops, extend_len);
+ return -1;
+ }
+ extend_head = (struct conn_hs_extend_head *)extend_data;
+ field_cnt = le32_to_cpu(extend_head->field_cn);
+ hmdfs_info("extend_len=%u,field_cnt=%u", extend_len, field_cnt);
+
+ offset = sizeof(struct conn_hs_extend_head);
+
+ for (i = 0; i < field_cnt; i++) {
+ if (sizeof(struct extend_field_head) > (extend_len - offset)) {
+ hmdfs_err("cnt err, op=%u, extend_len=%u, cnt=%u, i=%u",
+ ops, extend_len, field_cnt, i);
+ return -1;
+ }
+ field = (struct extend_field_head *)((uint8_t *)extend_data +
+ offset);
+ offset += sizeof(struct extend_field_head);
+ code = le16_to_cpu(field->code);
+ len = le16_to_cpu(field->len);
+ if (len > (extend_len - offset)) {
+ hmdfs_err("len err, op=%u, extend_len=%u, cnt=%u, i=%u",
+ ops, extend_len, field_cnt, i);
+ hmdfs_err("len err, code=%u, len=%u, offset=%u", code,
+ len, offset);
+ return -1;
+ }
+
+ body = (uint8_t *)extend_data + offset;
+ offset += len;
+ if ((code < HS_EXTEND_CODE_COUNT) &&
+ (s_hs_extend_reg[code].parser)) {
+ ret = s_hs_extend_reg[code].parser(conn_impl, ops,
+ body, len);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int hs_proc_msg_data(struct connection *conn_impl, __u8 ops, void *data,
+ __u32 data_len)
+{
+ struct connection_handshake_req *hs_req = NULL;
+ uint8_t *extend_data = NULL;
+ __u32 extend_len;
+ __u32 req_len;
+ int ret;
+
+ if (!data) {
+ hmdfs_err("err, msg data is null");
+ return -1;
+ }
+
+ if (data_len < sizeof(struct connection_handshake_req)) {
+ hmdfs_err("ack msg data len error. data_len=%u, device_id=%llu",
+ data_len, conn_impl->node->device_id);
+ return -1;
+ }
+
+ hs_req = (struct connection_handshake_req *)data;
+ req_len = le32_to_cpu(hs_req->len);
+ if (req_len > (data_len - sizeof(struct connection_handshake_req))) {
+ hmdfs_info(
+ "ack msg hs_req len(%u) error. data_len=%u, device_id=%llu",
+ req_len, data_len, conn_impl->node->device_id);
+ return -1;
+ }
+ extend_len =
+ data_len - sizeof(struct connection_handshake_req) - req_len;
+ extend_data = (uint8_t *)data +
+ sizeof(struct connection_handshake_req) + req_len;
+ ret = hs_parse_extend_data(conn_impl, ops, extend_data, extend_len);
+ if (!ret)
+ hmdfs_info(
+ "hs msg rcv, ops=%u, data_len=%u, device_id=%llu, req_len=%u",
+ ops, data_len, conn_impl->node->device_id, hs_req->len);
+ return ret;
+}
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+static int connection_handshake_init_tls(struct connection *conn_impl, __u8 ops)
+{
+ // init ktls config, use key1/key2 as init write-key of each direction
+ __u8 key1[HMDFS_KEY_SIZE];
+ __u8 key2[HMDFS_KEY_SIZE];
+ int ret;
+
+ if ((ops != CONNECT_MESG_HANDSHAKE_RESPONSE) &&
+ (ops != CONNECT_MESG_HANDSHAKE_ACK)) {
+ hmdfs_err("ops %u is err", ops);
+ return -EINVAL;
+ }
+
+ update_key(conn_impl->master_key, key1, HKDF_TYPE_KEY_INITIATOR);
+ update_key(conn_impl->master_key, key2, HKDF_TYPE_KEY_ACCEPTER);
+
+ if (ops == CONNECT_MESG_HANDSHAKE_ACK) {
+ memcpy(conn_impl->send_key, key1, HMDFS_KEY_SIZE);
+ memcpy(conn_impl->recv_key, key2, HMDFS_KEY_SIZE);
+ } else {
+ memcpy(conn_impl->send_key, key2, HMDFS_KEY_SIZE);
+ memcpy(conn_impl->recv_key, key1, HMDFS_KEY_SIZE);
+ }
+
+ memset(key1, 0, HMDFS_KEY_SIZE);
+ memset(key2, 0, HMDFS_KEY_SIZE);
+
+ hmdfs_info("hs: ops=%u start set crypto tls", ops);
+ ret = tls_crypto_info_init(conn_impl);
+ if (ret)
+ hmdfs_err("setting tls fail. ops is %u", ops);
+
+ return ret;
+}
+#endif
+
+static int do_send_handshake(struct connection *conn_impl, __u8 ops,
+ __le16 request_id)
+{
+ int err;
+ struct connection_msg_head *hs_head = NULL;
+ struct connection_handshake_req *hs_data = NULL;
+ uint8_t *hs_extend_data = NULL;
+ struct hmdfs_send_data msg;
+ __u32 send_len;
+ __u32 len;
+ __u32 extend_len;
+ char buf[HMDFS_CID_SIZE] = { 0 };
+
+ len = scnprintf(buf, HMDFS_CID_SIZE, "%llu", 0ULL);
+ send_len = sizeof(struct connection_msg_head) +
+ sizeof(struct connection_handshake_req) + len;
+
+ if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) ||
+ (ops == CONNECT_MESG_HANDSHAKE_ACK)) &&
+ (conn_impl->node->version >= DFS_2_0)) {
+ extend_len = hs_get_extend_data_len();
+ send_len += extend_len;
+ }
+
+ hs_head = kzalloc(send_len, GFP_KERNEL);
+ if (!hs_head)
+ return -ENOMEM;
+
+ hs_data = (struct connection_handshake_req
+ *)((uint8_t *)hs_head +
+ sizeof(struct connection_msg_head));
+
+ hs_data->len = cpu_to_le32(len);
+ memcpy(hs_data->dev_id, buf, len);
+
+ if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) ||
+ ops == CONNECT_MESG_HANDSHAKE_ACK) &&
+ (conn_impl->node->version >= DFS_2_0)) {
+ hs_extend_data = (uint8_t *)hs_data +
+ sizeof(struct connection_handshake_req) + len;
+ hs_fill_extend_data(conn_impl, ops, hs_extend_data, extend_len);
+ }
+
+ hs_head->magic = HMDFS_MSG_MAGIC;
+ hs_head->version = DFS_2_0;
+ hs_head->flags |= 0x1;
+ hmdfs_info("Send handshake message: ops = %d, fd = %d", ops,
+ ((struct tcp_handle *)(conn_impl->connect_handle))->fd);
+ hs_head->operations = ops;
+ hs_head->request_id = request_id;
+ hs_head->datasize = cpu_to_le32(send_len);
+ hs_head->source = 0;
+ hs_head->msg_id = 0;
+
+ msg.head = hs_head;
+ msg.head_len = sizeof(struct connection_msg_head);
+ msg.data = hs_data;
+ msg.len = send_len - msg.head_len;
+ msg.sdesc = NULL;
+ msg.sdesc_len = 0;
+ err = conn_impl->send_message(conn_impl, &msg);
+ kfree(hs_head);
+ return err;
+}
+
+static int hmdfs_node_waiting_evt_sum(const struct hmdfs_peer *node)
+{
+ int sum = 0;
+ int i;
+
+ for (i = 0; i < RAW_NODE_EVT_NR; i++)
+ sum += node->waiting_evt[i];
+
+ return sum;
+}
+
+static int hmdfs_update_node_waiting_evt(struct hmdfs_peer *node, int evt,
+ unsigned int *seq)
+{
+ int last;
+ int sum;
+ unsigned int next;
+
+ sum = hmdfs_node_waiting_evt_sum(node);
+ if (sum % RAW_NODE_EVT_NR)
+ last = !node->pending_evt;
+ else
+ last = node->pending_evt;
+
+ /* duplicated event */
+ if (evt == last) {
+ node->dup_evt[evt]++;
+ return 0;
+ }
+
+ node->waiting_evt[evt]++;
+ hmdfs_debug("add node->waiting_evt[%d]=%d", evt,
+ node->waiting_evt[evt]);
+
+ /* offline wait + online wait + offline wait = offline wait
+ * online wait + offline wait + online wait != online wait
+ * As the first online related resource (e.g. fd) must be invalidated
+ */
+ if (node->waiting_evt[RAW_NODE_EVT_OFF] >= 2 &&
+ node->waiting_evt[RAW_NODE_EVT_ON] >= 1) {
+ node->waiting_evt[RAW_NODE_EVT_OFF] -= 1;
+ node->waiting_evt[RAW_NODE_EVT_ON] -= 1;
+ node->seq_wr_idx -= 2;
+ node->merged_evt += 2;
+ }
+
+ next = hmdfs_node_inc_evt_seq(node);
+ node->seq_tbl[(node->seq_wr_idx++) % RAW_NODE_EVT_MAX_NR] = next;
+ *seq = next;
+
+ return 1;
+}
+
+static void hmdfs_run_evt_cb_verbosely(struct hmdfs_peer *node, int raw_evt,
+ bool sync, unsigned int seq)
+{
+ int evt = (raw_evt == RAW_NODE_EVT_OFF) ? NODE_EVT_OFFLINE :
+ NODE_EVT_ONLINE;
+ int cur_evt_idx = sync ? 1 : 0;
+
+ node->cur_evt[cur_evt_idx] = raw_evt;
+ node->cur_evt_seq[cur_evt_idx] = seq;
+ hmdfs_node_call_evt_cb(node, evt, sync, seq);
+ node->cur_evt[cur_evt_idx] = RAW_NODE_EVT_NR;
+}
+
+static void hmdfs_node_evt_work(struct work_struct *work)
+{
+ struct hmdfs_peer *node =
+ container_of(work, struct hmdfs_peer, evt_dwork.work);
+ unsigned int seq;
+
+ /*
+ * N-th sync cb completes before N-th async cb,
+ * so use seq_lock as a barrier in read & write path
+ * to ensure we can read the required seq.
+ */
+ mutex_lock(&node->seq_lock);
+ seq = node->seq_tbl[(node->seq_rd_idx++) % RAW_NODE_EVT_MAX_NR];
+ hmdfs_run_evt_cb_verbosely(node, node->pending_evt, false, seq);
+ mutex_unlock(&node->seq_lock);
+
+ mutex_lock(&node->evt_lock);
+ if (hmdfs_node_waiting_evt_sum(node)) {
+ node->pending_evt = !node->pending_evt;
+ node->pending_evt_seq =
+ node->seq_tbl[node->seq_rd_idx % RAW_NODE_EVT_MAX_NR];
+ node->waiting_evt[node->pending_evt]--;
+ /* sync cb has been done */
+ schedule_delayed_work(&node->evt_dwork,
+ node->sbi->async_cb_delay * HZ);
+ } else {
+ node->last_evt = node->pending_evt;
+ node->pending_evt = RAW_NODE_EVT_NR;
+ }
+ mutex_unlock(&node->evt_lock);
+}
+
+/*
+ * The running orders of cb are:
+ *
+ * (1) sync callbacks are invoked according to the queue order of raw events:
+ * ensured by seq_lock.
+ * (2) async callbacks are invoked according to the queue order of raw events:
+ * ensured by evt_lock & evt_dwork
+ * (3) async callback is invoked after sync callback of the same raw event:
+ * ensured by seq_lock.
+ * (4) async callback of N-th raw event and sync callback of (N+x)-th raw
+ * event can run concurrently.
+ */
+static void hmdfs_queue_raw_node_evt(struct hmdfs_peer *node, int evt)
+{
+ unsigned int seq = 0;
+
+ mutex_lock(&node->evt_lock);
+ if (node->pending_evt == RAW_NODE_EVT_NR) {
+ if (evt == node->last_evt) {
+ node->dup_evt[evt]++;
+ mutex_unlock(&node->evt_lock);
+ return;
+ }
+ node->pending_evt = evt;
+ seq = hmdfs_node_inc_evt_seq(node);
+ node->seq_tbl[(node->seq_wr_idx++) % RAW_NODE_EVT_MAX_NR] = seq;
+ node->pending_evt_seq = seq;
+ mutex_lock(&node->seq_lock);
+ mutex_unlock(&node->evt_lock);
+ /* call sync cb, then async cb */
+ hmdfs_run_evt_cb_verbosely(node, evt, true, seq);
+ mutex_unlock(&node->seq_lock);
+ schedule_delayed_work(&node->evt_dwork,
+ node->sbi->async_cb_delay * HZ);
+ } else if (hmdfs_update_node_waiting_evt(node, evt, &seq) > 0) {
+ /*
+ * Take seq_lock firstly to ensure N-th sync cb
+ * is called before N-th async cb.
+ */
+ mutex_lock(&node->seq_lock);
+ mutex_unlock(&node->evt_lock);
+ hmdfs_run_evt_cb_verbosely(node, evt, true, seq);
+ mutex_unlock(&node->seq_lock);
+ } else {
+ mutex_unlock(&node->evt_lock);
+ }
+}
+
+void connection_send_handshake(struct connection *conn_impl, __u8 ops,
+ __le16 request_id)
+{
+ struct tcp_handle *tcp = NULL;
+ int err = do_send_handshake(conn_impl, ops, request_id);
+
+ if (likely(err >= 0))
+ return;
+
+ tcp = conn_impl->connect_handle;
+ hmdfs_err("Failed to send handshake: err = %d, fd = %d", err, tcp->fd);
+ hmdfs_reget_connection(conn_impl);
+}
+
+void connection_handshake_notify(struct hmdfs_peer *node, int notify_type)
+{
+ struct notify_param param;
+
+ param.notify = notify_type;
+ param.fd = INVALID_SOCKET_FD;
+ memcpy(param.remote_cid, node->cid, HMDFS_CID_SIZE);
+ notify(node, ¶m);
+}
+
+
+void peer_online(struct hmdfs_peer *peer)
+{
+ // To evaluate if someone else has made the peer online
+ u8 prev_stat = xchg(&peer->status, NODE_STAT_ONLINE);
+ unsigned long jif_tmp = jiffies;
+
+ if (prev_stat == NODE_STAT_ONLINE)
+ return;
+ WRITE_ONCE(peer->conn_time, jif_tmp);
+ WRITE_ONCE(peer->sbi->connections.recent_ol, jif_tmp);
+ hmdfs_queue_raw_node_evt(peer, RAW_NODE_EVT_ON);
+}
+
+void connection_to_working(struct hmdfs_peer *node)
+{
+ struct connection *conn_impl = NULL;
+ struct tcp_handle *tcp = NULL;
+
+ if (!node)
+ return;
+ mutex_lock(&node->conn_impl_list_lock);
+ list_for_each_entry(conn_impl, &node->conn_impl_list, list) {
+ if (conn_impl->type == CONNECT_TYPE_TCP &&
+ conn_impl->status == CONNECT_STAT_WAIT_RESPONSE) {
+ tcp = conn_impl->connect_handle;
+ hmdfs_info("fd %d to working", tcp->fd);
+ conn_impl->status = CONNECT_STAT_WORKING;
+ }
+ }
+ mutex_unlock(&node->conn_impl_list_lock);
+ peer_online(node);
+}
+
+static int connection_check_version(__u8 version)
+{
+ __u8 min_ver = USERSPACE_MAX_VER;
+
+ if (version <= min_ver || version >= MAX_VERSION) {
+ hmdfs_info("version err. version %u", version);
+ return -1;
+ }
+ return 0;
+}
+
+void connection_handshake_recv_handler(struct connection *conn_impl, void *buf,
+ void *data, __u32 data_len)
+{
+ __u8 version;
+ __u8 ops;
+ __u8 status;
+ int fd = ((struct tcp_handle *)(conn_impl->connect_handle))->fd;
+ struct connection_msg_head *head = (struct connection_msg_head *)buf;
+ int ret;
+
+ version = head->version;
+ conn_impl->node->version = version;
+ if (connection_check_version(version) != 0)
+ goto out;
+ conn_impl->node->conn_operations = hmdfs_get_peer_operation(version);
+ ops = head->operations;
+ status = conn_impl->status;
+ switch (ops) {
+ case CONNECT_MESG_HANDSHAKE_REQUEST:
+ hmdfs_info(
+ "Recved handshake request: device_id = %llu, version = %d, head->len = %d, tcp->fd = %d",
+ conn_impl->node->device_id, version, head->datasize, fd);
+ connection_send_handshake(conn_impl,
+ CONNECT_MESG_HANDSHAKE_RESPONSE,
+ head->msg_id);
+ if (conn_impl->node->version >= DFS_2_0) {
+ conn_impl->status = CONNECT_STAT_WAIT_ACK;
+ conn_impl->node->status = NODE_STAT_SHAKING;
+ } else {
+ conn_impl->status = CONNECT_STAT_WORKING;
+ }
+ break;
+ case CONNECT_MESG_HANDSHAKE_RESPONSE:
+ hmdfs_info(
+ "Recved handshake response: device_id = %llu, cmd->status = %hhu, tcp->fd = %d",
+ conn_impl->node->device_id, status, fd);
+ if (status == CONNECT_STAT_WAIT_REQUEST) {
+ // must be 10.1 device, no need to set ktls
+ connection_to_working(conn_impl->node);
+ goto out;
+ }
+
+ if (conn_impl->node->version >= DFS_2_0) {
+ ret = hs_proc_msg_data(conn_impl, ops, data, data_len);
+ if (ret)
+ goto nego_err;
+ connection_send_handshake(conn_impl,
+ CONNECT_MESG_HANDSHAKE_ACK,
+ head->msg_id);
+ hmdfs_info("respon rcv handle,conn_impl->crypto=0x%0x",
+ conn_impl->crypto);
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+ ret = connection_handshake_init_tls(conn_impl, ops);
+ if (ret) {
+ hmdfs_err("init_tls_key fail, ops %u", ops);
+ goto out;
+ }
+#endif
+ }
+
+ conn_impl->status = CONNECT_STAT_WORKING;
+ peer_online(conn_impl->node);
+ break;
+ case CONNECT_MESG_HANDSHAKE_ACK:
+ if (conn_impl->node->version >= DFS_2_0) {
+ ret = hs_proc_msg_data(conn_impl, ops, data, data_len);
+ if (ret)
+ goto nego_err;
+ hmdfs_info("ack rcv handle, conn_impl->crypto=0x%0x",
+ conn_impl->crypto);
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+ ret = connection_handshake_init_tls(conn_impl, ops);
+ if (ret) {
+ hmdfs_err("init_tls_key fail, ops %u", ops);
+ goto out;
+ }
+#endif
+ conn_impl->status = CONNECT_STAT_WORKING;
+ peer_online(conn_impl->node);
+ break;
+ }
+ fallthrough;
+ default:
+ return;
+ }
+out:
+ kfree(data);
+ return;
+nego_err:
+ conn_impl->status = CONNECT_STAT_NEGO_FAIL;
+ connection_handshake_notify(conn_impl->node,
+ NOTIFY_OFFLINE);
+ hmdfs_err("protocol negotiation failed, remote device_id = %llu, tcp->fd = %d",
+ conn_impl->node->device_id, fd);
+ goto out;
+}
+
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+static void update_tls_crypto_key(struct connection *conn,
+ struct hmdfs_head_cmd *head, void *data,
+ __u32 data_len)
+{
+ // rekey message handler
+ struct connection_rekey_request *rekey_req = NULL;
+ int ret = 0;
+
+ if (hmdfs_message_verify(conn->node, head, data) < 0) {
+ hmdfs_err("Rekey msg %d has been abandoned", head->msg_id);
+ goto out_err;
+ }
+
+ hmdfs_info("recv REKEY request");
+ set_crypto_info(conn, SET_CRYPTO_RECV);
+ // update send key if requested
+ rekey_req = data;
+ if (le32_to_cpu(rekey_req->update_request) == UPDATE_REQUESTED) {
+ ret = tcp_send_rekey_request(conn);
+ if (ret == 0)
+ set_crypto_info(conn, SET_CRYPTO_SEND);
+ }
+out_err:
+ kfree(data);
+}
+
+static bool cmd_update_tls_crypto_key(struct connection *conn,
+ struct hmdfs_head_cmd *head)
+{
+ __u8 version = conn->node->version;
+ struct tcp_handle *tcp = conn->connect_handle;
+
+ if (version < DFS_2_0 || conn->type != CONNECT_TYPE_TCP || !tcp)
+ return false;
+ return head->operations.command == F_CONNECT_REKEY;
+}
+#endif
+
+void connection_working_recv_handler(struct connection *conn_impl, void *buf,
+ void *data, __u32 data_len)
+{
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+ if (cmd_update_tls_crypto_key(conn_impl, buf)) {
+ update_tls_crypto_key(conn_impl, buf, data, data_len);
+ return;
+ }
+#endif
+ conn_impl->node->conn_operations->recvmsg(conn_impl->node, buf, data);
+}
+
+static void connection_release(struct kref *ref)
+{
+ struct tcp_handle *tcp = NULL;
+ struct connection *conn = container_of(ref, struct connection, ref_cnt);
+
+ hmdfs_info("connection release");
+ memset(conn->master_key, 0, HMDFS_KEY_SIZE);
+ memset(conn->send_key, 0, HMDFS_KEY_SIZE);
+ memset(conn->recv_key, 0, HMDFS_KEY_SIZE);
+ if (conn->close)
+ conn->close(conn);
+ tcp = conn->connect_handle;
+ crypto_free_aead(conn->tfm);
+ // need to check and test: fput(tcp->sock->file);
+ if (tcp && tcp->sock) {
+ hmdfs_info("connection release: fd = %d, refcount %ld", tcp->fd,
+ file_count(tcp->sock->file));
+ sockfd_put(tcp->sock);
+ }
+ if (tcp && tcp->recv_cache)
+ kmem_cache_destroy(tcp->recv_cache);
+
+ if (!list_empty(&conn->list)) {
+ mutex_lock(&conn->node->conn_impl_list_lock);
+ list_del(&conn->list);
+ mutex_unlock(&conn->node->conn_impl_list_lock);
+ /*
+ * wakup hmdfs_disconnect_node to check
+ * conn_deleting_list if empty.
+ */
+ wake_up_interruptible(&conn->node->deleting_list_wq);
+ }
+
+ kfree(tcp);
+ kfree(conn);
+}
+
+static void hmdfs_peer_release(struct kref *ref)
+{
+ struct hmdfs_peer *peer = container_of(ref, struct hmdfs_peer, ref_cnt);
+ struct mutex *lock = &peer->sbi->connections.node_lock;
+
+ if (!list_empty(&peer->list))
+ hmdfs_info("releasing a on-sbi peer: device_id %llu ",
+ peer->device_id);
+ else
+ hmdfs_info("releasing a redundant peer: device_id %llu ",
+ peer->device_id);
+
+ cancel_delayed_work_sync(&peer->evt_dwork);
+ list_del(&peer->list);
+ idr_destroy(&peer->msg_idr);
+ idr_destroy(&peer->file_id_idr);
+ flush_workqueue(peer->req_handle_wq);
+ flush_workqueue(peer->async_wq);
+ flush_workqueue(peer->retry_wb_wq);
+ destroy_workqueue(peer->dentry_wq);
+ destroy_workqueue(peer->req_handle_wq);
+ destroy_workqueue(peer->async_wq);
+ destroy_workqueue(peer->retry_wb_wq);
+ destroy_workqueue(peer->reget_conn_wq);
+ kfree(peer);
+ mutex_unlock(lock);
+}
+
+void connection_put(struct connection *conn)
+{
+ struct mutex *lock = &conn->ref_lock;
+
+ kref_put_mutex(&conn->ref_cnt, connection_release, lock);
+}
+
+void peer_put(struct hmdfs_peer *peer)
+{
+ struct mutex *lock = &peer->sbi->connections.node_lock;
+
+ kref_put_mutex(&peer->ref_cnt, hmdfs_peer_release, lock);
+}
+
+static void hmdfs_dump_deleting_list(struct hmdfs_peer *node)
+{
+ struct connection *con = NULL;
+ struct tcp_handle *tcp = NULL;
+ int count = 0;
+
+ mutex_lock(&node->conn_impl_list_lock);
+ list_for_each_entry(con, &node->conn_deleting_list, list) {
+ tcp = con->connect_handle;
+ hmdfs_info("deleting list %d:device_id %llu tcp_fd %d refcnt %d",
+ count, node->device_id, tcp ? tcp->fd : -1,
+ kref_read(&con->ref_cnt));
+ count++;
+ }
+ mutex_unlock(&node->conn_impl_list_lock);
+}
+
+static bool hmdfs_conn_deleting_list_empty(struct hmdfs_peer *node)
+{
+ bool empty = false;
+
+ mutex_lock(&node->conn_impl_list_lock);
+ empty = list_empty(&node->conn_deleting_list);
+ mutex_unlock(&node->conn_impl_list_lock);
+
+ return empty;
+}
+
+void hmdfs_disconnect_node(struct hmdfs_peer *node)
+{
+ LIST_HEAD(local_conns);
+ struct connection *conn_impl = NULL;
+ struct connection *next = NULL;
+ struct tcp_handle *tcp = NULL;
+
+ if (unlikely(!node))
+ return;
+
+ hmdfs_node_inc_evt_seq(node);
+ /* Refer to comments in hmdfs_is_node_offlined() */
+ smp_mb__after_atomic();
+ node->status = NODE_STAT_OFFLINE;
+ hmdfs_info("Try to disconnect peer: device_id %llu", node->device_id);
+
+ mutex_lock(&node->conn_impl_list_lock);
+ if (!list_empty(&node->conn_impl_list))
+ list_replace_init(&node->conn_impl_list, &local_conns);
+ mutex_unlock(&node->conn_impl_list_lock);
+
+ list_for_each_entry_safe(conn_impl, next, &local_conns, list) {
+ tcp = conn_impl->connect_handle;
+ if (tcp && tcp->sock) {
+ kernel_sock_shutdown(tcp->sock, SHUT_RDWR);
+ hmdfs_info("shudown sock: fd = %d, refcount %ld",
+ tcp->fd, file_count(tcp->sock->file));
+ }
+ if (tcp)
+ tcp->fd = INVALID_SOCKET_FD;
+
+ tcp_close_socket(tcp);
+ list_del_init(&conn_impl->list);
+
+ connection_put(conn_impl);
+ }
+
+ if (wait_event_interruptible_timeout(node->deleting_list_wq,
+ hmdfs_conn_deleting_list_empty(node),
+ HMDFS_WAIT_CONN_RELEASE) <= 0)
+ hmdfs_dump_deleting_list(node);
+
+ /* wait all request process end */
+ spin_lock(&node->idr_lock);
+ while (node->msg_idr_process) {
+ spin_unlock(&node->idr_lock);
+ usleep_range(HMDFS_WAIT_REQUEST_END_MIN,
+ HMDFS_WAIT_REQUEST_END_MAX);
+ spin_lock(&node->idr_lock);
+ }
+ spin_unlock(&node->idr_lock);
+
+ hmdfs_queue_raw_node_evt(node, RAW_NODE_EVT_OFF);
+}
+
+static void hmdfs_run_simple_evt_cb(struct hmdfs_peer *node, int evt)
+{
+ unsigned int seq = hmdfs_node_inc_evt_seq(node);
+
+ mutex_lock(&node->seq_lock);
+ hmdfs_node_call_evt_cb(node, evt, true, seq);
+ mutex_unlock(&node->seq_lock);
+}
+
+static void hmdfs_del_peer(struct hmdfs_peer *node)
+{
+ /*
+ * No need for offline evt cb, because all files must
+ * have been flushed and closed, else the filesystem
+ * will be un-mountable.
+ */
+ cancel_delayed_work_sync(&node->evt_dwork);
+
+ hmdfs_run_simple_evt_cb(node, NODE_EVT_DEL);
+
+ hmdfs_release_peer_sysfs(node);
+
+ flush_workqueue(node->reget_conn_wq);
+ peer_put(node);
+}
+
+void hmdfs_connections_stop(struct hmdfs_sb_info *sbi)
+{
+ struct hmdfs_peer *node = NULL;
+ struct hmdfs_peer *con_tmp = NULL;
+
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry_safe(node, con_tmp, &sbi->connections.node_list,
+ list) {
+ mutex_unlock(&sbi->connections.node_lock);
+ hmdfs_disconnect_node(node);
+ hmdfs_del_peer(node);
+ mutex_lock(&sbi->connections.node_lock);
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+}
+
+struct connection *get_conn_impl(struct hmdfs_peer *node, int connect_type)
+{
+ struct connection *conn_impl = NULL;
+
+ if (!node)
+ return NULL;
+ mutex_lock(&node->conn_impl_list_lock);
+ list_for_each_entry(conn_impl, &node->conn_impl_list, list) {
+ if (conn_impl->type == connect_type &&
+ conn_impl->status == CONNECT_STAT_WORKING) {
+ connection_get(conn_impl);
+ mutex_unlock(&node->conn_impl_list_lock);
+ return conn_impl;
+ }
+ }
+ mutex_unlock(&node->conn_impl_list_lock);
+ hmdfs_err_ratelimited("device %llu not find connection, type %d",
+ node->device_id, connect_type);
+ return NULL;
+}
+
+void set_conn_sock_quickack(struct hmdfs_peer *node)
+{
+ struct connection *conn_impl = NULL;
+ struct tcp_handle *tcp = NULL;
+ int option = 1;
+
+ if (!node)
+ return;
+ mutex_lock(&node->conn_impl_list_lock);
+ list_for_each_entry(conn_impl, &node->conn_impl_list, list) {
+ if (conn_impl->type == CONNECT_TYPE_TCP &&
+ conn_impl->status == CONNECT_STAT_WORKING &&
+ conn_impl->connect_handle) {
+ tcp = (struct tcp_handle *)(conn_impl->connect_handle);
+ tcp_sock_set_quickack(tcp->sock->sk, option);
+ }
+ }
+ mutex_unlock(&node->conn_impl_list_lock);
+}
+
+struct hmdfs_peer *hmdfs_lookup_from_devid(struct hmdfs_sb_info *sbi,
+ uint64_t device_id)
+{
+ struct hmdfs_peer *con = NULL;
+ struct hmdfs_peer *lookup = NULL;
+
+ if (!sbi)
+ return NULL;
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(con, &sbi->connections.node_list, list) {
+ if (con->status != NODE_STAT_ONLINE ||
+ con->device_id != device_id)
+ continue;
+ lookup = con;
+ peer_get(lookup);
+ break;
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+ return lookup;
+}
+
+struct hmdfs_peer *hmdfs_lookup_from_cid(struct hmdfs_sb_info *sbi,
+ uint8_t *cid)
+{
+ struct hmdfs_peer *con = NULL;
+ struct hmdfs_peer *lookup = NULL;
+
+ if (!sbi)
+ return NULL;
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(con, &sbi->connections.node_list, list) {
+ if (strncmp(con->cid, cid, HMDFS_CID_SIZE) != 0)
+ continue;
+ lookup = con;
+ peer_get(lookup);
+ break;
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+ return lookup;
+}
+
+static struct hmdfs_peer *lookup_peer_by_cid_unsafe(struct hmdfs_sb_info *sbi,
+ uint8_t *cid)
+{
+ struct hmdfs_peer *node = NULL;
+
+ list_for_each_entry(node, &sbi->connections.node_list, list)
+ if (!strncmp(node->cid, cid, HMDFS_CID_SIZE)) {
+ peer_get(node);
+ return node;
+ }
+ return NULL;
+}
+
+static struct hmdfs_peer *add_peer_unsafe(struct hmdfs_sb_info *sbi,
+ struct hmdfs_peer *peer2add)
+{
+ struct hmdfs_peer *peer;
+ int err;
+
+ peer = lookup_peer_by_cid_unsafe(sbi, peer2add->cid);
+ if (peer)
+ return peer;
+
+ err = hmdfs_register_peer_sysfs(sbi, peer2add);
+ if (err) {
+ hmdfs_err("register peer %llu sysfs err %d",
+ peer2add->device_id, err);
+ return ERR_PTR(err);
+ }
+ list_add_tail(&peer2add->list, &sbi->connections.node_list);
+ peer_get(peer2add);
+ hmdfs_run_simple_evt_cb(peer2add, NODE_EVT_ADD);
+ return peer2add;
+}
+
+static struct hmdfs_peer *
+alloc_peer(struct hmdfs_sb_info *sbi, uint8_t *cid,
+ const struct connection_operations *conn_operations)
+{
+ struct hmdfs_peer *node = kzalloc(sizeof(*node), GFP_KERNEL);
+
+ if (!node)
+ return NULL;
+
+ node->device_id = (u32)atomic_inc_return(&sbi->connections.conn_seq);
+
+ node->async_wq = alloc_workqueue("dfs_async%u_%llu", WQ_MEM_RECLAIM, 0,
+ sbi->seq, node->device_id);
+ if (!node->async_wq) {
+ hmdfs_err("Failed to alloc async wq");
+ goto out_err;
+ }
+ node->req_handle_wq = alloc_workqueue("dfs_req%u_%llu",
+ WQ_UNBOUND | WQ_MEM_RECLAIM,
+ sbi->async_req_max_active,
+ sbi->seq, node->device_id);
+ if (!node->req_handle_wq) {
+ hmdfs_err("Failed to alloc req wq");
+ goto out_err;
+ }
+ node->dentry_wq = alloc_workqueue("dfs_dentry%u_%llu",
+ WQ_UNBOUND | WQ_MEM_RECLAIM,
+ 0, sbi->seq, node->device_id);
+ if (!node->dentry_wq) {
+ hmdfs_err("Failed to alloc dentry wq");
+ goto out_err;
+ }
+ node->retry_wb_wq = alloc_workqueue("dfs_rwb%u_%llu",
+ WQ_UNBOUND | WQ_MEM_RECLAIM,
+ HMDFS_RETRY_WB_WQ_MAX_ACTIVE,
+ sbi->seq, node->device_id);
+ if (!node->retry_wb_wq) {
+ hmdfs_err("Failed to alloc retry writeback wq");
+ goto out_err;
+ }
+ node->reget_conn_wq = alloc_workqueue("dfs_regetcon%u_%llu",
+ WQ_UNBOUND, 0,
+ sbi->seq, node->device_id);
+ if (!node->reget_conn_wq) {
+ hmdfs_err("Failed to alloc reget conn wq");
+ goto out_err;
+ }
+ INIT_LIST_HEAD(&node->conn_impl_list);
+ mutex_init(&node->conn_impl_list_lock);
+ INIT_LIST_HEAD(&node->conn_deleting_list);
+ init_waitqueue_head(&node->deleting_list_wq);
+ idr_init(&node->msg_idr);
+ spin_lock_init(&node->idr_lock);
+ idr_init(&node->file_id_idr);
+ spin_lock_init(&node->file_id_lock);
+ INIT_LIST_HEAD(&node->list);
+ kref_init(&node->ref_cnt);
+ node->owner = sbi->seq;
+ node->conn_operations = conn_operations;
+ node->sbi = sbi;
+ node->status = NODE_STAT_SHAKING;
+ node->conn_time = jiffies;
+ memcpy(node->cid, cid, HMDFS_CID_SIZE);
+ atomic64_set(&node->sb_dirty_count, 0);
+ node->fid_cookie = 0;
+ atomic_set(&node->evt_seq, 0);
+ mutex_init(&node->seq_lock);
+ mutex_init(&node->offline_cb_lock);
+ mutex_init(&node->evt_lock);
+ node->pending_evt = RAW_NODE_EVT_NR;
+ node->last_evt = RAW_NODE_EVT_NR;
+ node->cur_evt[0] = RAW_NODE_EVT_NR;
+ node->cur_evt[1] = RAW_NODE_EVT_NR;
+ node->seq_wr_idx = (unsigned char)UINT_MAX;
+ node->seq_rd_idx = node->seq_wr_idx;
+ INIT_DELAYED_WORK(&node->evt_dwork, hmdfs_node_evt_work);
+ node->msg_idr_process = 0;
+ node->offline_start = false;
+ spin_lock_init(&node->wr_opened_inode_lock);
+ INIT_LIST_HEAD(&node->wr_opened_inode_list);
+ spin_lock_init(&node->stashed_inode_lock);
+ node->stashed_inode_nr = 0;
+ atomic_set(&node->rebuild_inode_status_nr, 0);
+ init_waitqueue_head(&node->rebuild_inode_status_wq);
+ INIT_LIST_HEAD(&node->stashed_inode_list);
+ node->need_rebuild_stash_list = false;
+
+ return node;
+
+out_err:
+ if (node->async_wq) {
+ destroy_workqueue(node->async_wq);
+ node->async_wq = NULL;
+ }
+ if (node->req_handle_wq) {
+ destroy_workqueue(node->req_handle_wq);
+ node->req_handle_wq = NULL;
+ }
+ if (node->dentry_wq) {
+ destroy_workqueue(node->dentry_wq);
+ node->dentry_wq = NULL;
+ }
+ if (node->retry_wb_wq) {
+ destroy_workqueue(node->retry_wb_wq);
+ node->retry_wb_wq = NULL;
+ }
+ if (node->reget_conn_wq) {
+ destroy_workqueue(node->reget_conn_wq);
+ node->reget_conn_wq = NULL;
+ }
+ kfree(node);
+ return NULL;
+}
+
+struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid)
+{
+ struct hmdfs_peer *peer = NULL, *on_sbi_peer = NULL;
+ const struct connection_operations *conn_opr_ptr = NULL;
+
+ mutex_lock(&sbi->connections.node_lock);
+ peer = lookup_peer_by_cid_unsafe(sbi, cid);
+ mutex_unlock(&sbi->connections.node_lock);
+ if (peer) {
+ hmdfs_info("Got a existing peer: device_id = %llu",
+ peer->device_id);
+ goto out;
+ }
+
+ conn_opr_ptr = hmdfs_get_peer_operation(DFS_2_0);
+ if (unlikely(!conn_opr_ptr)) {
+ hmdfs_info("Fatal! Cannot get peer operation");
+ goto out;
+ }
+ peer = alloc_peer(sbi, cid, conn_opr_ptr);
+ if (unlikely(!peer)) {
+ hmdfs_info("Failed to alloc a peer");
+ goto out;
+ }
+
+ mutex_lock(&sbi->connections.node_lock);
+ on_sbi_peer = add_peer_unsafe(sbi, peer);
+ mutex_unlock(&sbi->connections.node_lock);
+ if (IS_ERR(on_sbi_peer)) {
+ peer_put(peer);
+ peer = NULL;
+ goto out;
+ } else if (unlikely(on_sbi_peer != peer)) {
+ hmdfs_info("Got a existing peer: device_id = %llu",
+ on_sbi_peer->device_id);
+ peer_put(peer);
+ peer = on_sbi_peer;
+ } else {
+ hmdfs_info("Got a newly allocated peer: device_id = %llu",
+ peer->device_id);
+ }
+
+out:
+ return peer;
+}
+
+static void head_release(struct kref *kref)
+{
+ struct hmdfs_msg_idr_head *head;
+ struct hmdfs_peer *con;
+
+ head = (struct hmdfs_msg_idr_head *)container_of(kref,
+ struct hmdfs_msg_idr_head, ref);
+ con = head->peer;
+ idr_remove(&con->msg_idr, head->msg_id);
+ spin_unlock(&con->idr_lock);
+
+ kfree(head);
+}
+
+void head_put(struct hmdfs_msg_idr_head *head)
+{
+ kref_put_lock(&head->ref, head_release, &head->peer->idr_lock);
+}
+
+struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id)
+{
+ struct hmdfs_msg_idr_head *head = NULL;
+
+ spin_lock(&peer->idr_lock);
+ head = idr_find(&peer->msg_idr, id);
+ if (head)
+ kref_get(&head->ref);
+ spin_unlock(&peer->idr_lock);
+
+ return head;
+}
+
+int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type,
+ void *ptr)
+{
+ int ret = -EAGAIN;
+ struct hmdfs_msg_idr_head *head = ptr;
+ int end = peer->version < DFS_2_0 ? (USHRT_MAX + 1) : 0;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&peer->idr_lock);
+ if (!peer->offline_start)
+ ret = idr_alloc_cyclic(&peer->msg_idr, ptr,
+ 1, end, GFP_NOWAIT);
+ if (ret >= 0) {
+ kref_init(&head->ref);
+ head->msg_id = ret;
+ head->type = type;
+ head->peer = peer;
+ peer->msg_idr_process++;
+ ret = 0;
+ }
+ spin_unlock(&peer->idr_lock);
+ idr_preload_end();
+
+ return ret;
+}
diff --git a/fs/hmdfs/comm/connection.h b/fs/hmdfs/comm/connection.h
new file mode 100644
index 0000000000000000000000000000000000000000..6f3ee1baddf2177a5d9714a5ed1a8b419c73ae11
--- /dev/null
+++ b/fs/hmdfs/comm/connection.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/connection.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_CONNECTION_H
+#define HMDFS_CONNECTION_H
+
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+#include
+#endif
+
+#include
+#include
+#include "protocol.h"
+#include "node_cb.h"
+
+#define HMDFS_KEY_SIZE 32
+#define HMDFS_IV_SIZE 12
+#define HMDFS_TAG_SIZE 16
+#define HMDFS_CID_SIZE 64
+
+enum {
+ CONNECT_MESG_HANDSHAKE_REQUEST = 1,
+ CONNECT_MESG_HANDSHAKE_RESPONSE = 2,
+ CONNECT_MESG_HANDSHAKE_ACK = 3,
+};
+
+enum {
+ CONNECT_STAT_WAIT_REQUEST = 0,
+ CONNECT_STAT_WAIT_RESPONSE,
+ CONNECT_STAT_WORKING,
+ CONNECT_STAT_STOP,
+ CONNECT_STAT_WAIT_ACK,
+ CONNECT_STAT_NEGO_FAIL,
+ CONNECT_STAT_COUNT
+};
+
+enum {
+ CONNECT_TYPE_TCP = 0,
+ CONNECT_TYPE_UNSUPPORT,
+};
+
+struct connection_stat {
+ int64_t send_bytes;
+ int64_t recv_bytes;
+ int send_message_count;
+ int recv_message_count;
+ unsigned long rekey_time;
+};
+
+struct connection {
+ struct list_head list;
+ struct kref ref_cnt;
+ struct mutex ref_lock;
+ struct hmdfs_peer *node;
+ int type;
+ int status;
+ void *connect_handle;
+ struct crypto_aead *tfm;
+ u8 master_key[HMDFS_KEY_SIZE];
+ u8 send_key[HMDFS_KEY_SIZE];
+ u8 recv_key[HMDFS_KEY_SIZE];
+ struct connection_stat stat;
+ struct work_struct reget_work;
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+ struct tls12_crypto_info_aes_gcm_128 send_crypto_info;
+ struct tls12_crypto_info_aes_gcm_128 recv_crypto_info;
+#endif
+ void (*close)(struct connection *connect);
+ int (*send_message)(struct connection *connect,
+ struct hmdfs_send_data *msg);
+ uint32_t crypto;
+};
+
+enum {
+ NODE_STAT_SHAKING = 0,
+ NODE_STAT_ONLINE,
+ NODE_STAT_OFFLINE,
+};
+
+struct hmdfs_async_work {
+ struct hmdfs_msg_idr_head head;
+ struct page *page;
+ struct delayed_work d_work;
+ unsigned long start;
+};
+
+enum {
+ RAW_NODE_EVT_OFF = 0,
+ RAW_NODE_EVT_ON,
+ RAW_NODE_EVT_NR,
+};
+
+#define RAW_NODE_EVT_MAX_NR 4
+
+struct hmdfs_stash_statistics {
+ unsigned int cur_ok;
+ unsigned int cur_nothing;
+ unsigned int cur_fail;
+ unsigned int total_ok;
+ unsigned int total_nothing;
+ unsigned int total_fail;
+ unsigned long long ok_pages;
+ unsigned long long fail_pages;
+};
+
+struct hmdfs_restore_statistics {
+ unsigned int cur_ok;
+ unsigned int cur_fail;
+ unsigned int cur_keep;
+ unsigned int total_ok;
+ unsigned int total_fail;
+ unsigned int total_keep;
+ unsigned long long ok_pages;
+ unsigned long long fail_pages;
+};
+
+struct hmdfs_rebuild_statistics {
+ unsigned int cur_ok;
+ unsigned int cur_fail;
+ unsigned int cur_invalid;
+ unsigned int total_ok;
+ unsigned int total_fail;
+ unsigned int total_invalid;
+ unsigned int time;
+};
+
+struct hmdfs_peer_statistics {
+ /* stash statistics */
+ struct hmdfs_stash_statistics stash;
+ /* restore statistics */
+ struct hmdfs_restore_statistics restore;
+ /* rebuild statistics */
+ struct hmdfs_rebuild_statistics rebuild;
+};
+
+struct hmdfs_peer {
+ struct list_head list;
+ struct kref ref_cnt;
+ unsigned int owner;
+ uint64_t device_id;
+ unsigned long conn_time;
+ uint8_t version;
+ u8 status;
+ u64 features;
+ long long old_sb_dirty_count;
+ atomic64_t sb_dirty_count;
+ /*
+ * cookie for opened file id.
+ * It will be increased if peer has offlined
+ */
+ uint16_t fid_cookie;
+ struct mutex conn_impl_list_lock;
+ struct list_head conn_impl_list;
+ /*
+ * when async message process context call hmdfs_reget_connection
+ * add conn node to conn_deleting_list, so call hmdfs_disconnect_node
+ * can wait all receive thread exit
+ */
+ struct list_head conn_deleting_list;
+ wait_queue_head_t deleting_list_wq;
+ struct idr msg_idr;
+ spinlock_t idr_lock;
+ struct idr file_id_idr;
+ spinlock_t file_id_lock;
+ int recvbuf_maxsize;
+ struct crypto_aead *tfm;
+ char cid[HMDFS_CID_SIZE + 1];
+ const struct connection_operations *conn_operations;
+ struct hmdfs_sb_info *sbi;
+ struct workqueue_struct *async_wq;
+ struct workqueue_struct *req_handle_wq;
+ struct workqueue_struct *dentry_wq;
+ struct workqueue_struct *retry_wb_wq;
+ struct workqueue_struct *reget_conn_wq;
+ atomic_t evt_seq;
+ /* sync cb may be blocking */
+ struct mutex seq_lock;
+ struct mutex offline_cb_lock;
+ struct mutex evt_lock;
+ unsigned char pending_evt;
+ unsigned char last_evt;
+ unsigned char waiting_evt[RAW_NODE_EVT_NR];
+ unsigned char seq_rd_idx;
+ unsigned char seq_wr_idx;
+ unsigned int seq_tbl[RAW_NODE_EVT_MAX_NR];
+ unsigned int pending_evt_seq;
+ unsigned char cur_evt[NODE_EVT_TYPE_NR];
+ unsigned int cur_evt_seq[NODE_EVT_TYPE_NR];
+ unsigned int merged_evt;
+ unsigned int dup_evt[RAW_NODE_EVT_NR];
+ struct delayed_work evt_dwork;
+ /* protected by idr_lock */
+ uint64_t msg_idr_process;
+ bool offline_start;
+ spinlock_t wr_opened_inode_lock;
+ struct list_head wr_opened_inode_list;
+ /*
+ * protect @stashed_inode_list and @stashed_inode_nr in stash process
+ * and fill_inode_remote->hmdfs_remote_init_stash_status process
+ */
+ spinlock_t stashed_inode_lock;
+ unsigned int stashed_inode_nr;
+ struct list_head stashed_inode_list;
+ bool need_rebuild_stash_list;
+ /* how many inodes are rebuilding statsh status */
+ atomic_t rebuild_inode_status_nr;
+ wait_queue_head_t rebuild_inode_status_wq;
+ struct hmdfs_peer_statistics stats;
+ /* sysfs */
+ struct kobject kobj;
+ struct completion kobj_unregister;
+};
+
+#define HMDFS_DEVID_LOCAL 0
+
+/* Be Compatible to DFS1.0, dont add packed attribute so far */
+struct connection_msg_head {
+ __u8 magic;
+ __u8 version;
+ __u8 operations;
+ __u8 flags;
+ __le32 datasize;
+ __le64 source;
+ __le16 msg_id;
+ __le16 request_id;
+ __le32 reserved1;
+} __packed;
+
+struct connection_handshake_req {
+ __le32 len;
+ char dev_id[0];
+} __packed;
+
+enum {
+ HS_EXTEND_CODE_CRYPTO = 0,
+ HS_EXTEND_CODE_CASE_SENSE,
+ HS_EXTEND_CODE_FEATURE_SUPPORT,
+ HS_EXTEND_CODE_COUNT
+};
+
+struct conn_hs_extend_reg {
+ __u16 len;
+ __u16 resv;
+ void (*filler)(struct connection *conn_impl, __u8 ops,
+ void *data, __u32 len);
+ int (*parser)(struct connection *conn_impl, __u8 ops,
+ void *data, __u32 len);
+};
+
+struct conn_hs_extend_head {
+ __le32 field_cn;
+ char data[0];
+};
+
+struct extend_field_head {
+ __le16 code;
+ __le16 len;
+} __packed;
+
+struct crypto_body {
+ __le32 crypto;
+} __packed;
+
+struct case_sense_body {
+ __u8 case_sensitive;
+} __packed;
+
+struct feature_body {
+ __u64 features;
+ __u64 reserved;
+} __packed;
+
+#define HMDFS_HS_CRYPTO_KTLS_AES128 0x00000001
+#define HMDFS_HS_CRYPTO_KTLS_AES256 0x00000002
+
+static inline bool hmdfs_is_node_online(const struct hmdfs_peer *node)
+{
+ return READ_ONCE(node->status) == NODE_STAT_ONLINE;
+}
+
+static inline unsigned int hmdfs_node_inc_evt_seq(struct hmdfs_peer *node)
+{
+ /* Use the atomic as an unsigned integer */
+ return atomic_inc_return(&node->evt_seq);
+}
+
+static inline unsigned int hmdfs_node_evt_seq(const struct hmdfs_peer *node)
+{
+ return atomic_read(&node->evt_seq);
+}
+
+struct connection *get_conn_impl(struct hmdfs_peer *node, int connect_type);
+
+void set_conn_sock_quickack(struct hmdfs_peer *node);
+
+struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid);
+
+struct hmdfs_peer *hmdfs_lookup_from_devid(struct hmdfs_sb_info *sbi,
+ uint64_t device_id);
+struct hmdfs_peer *hmdfs_lookup_from_cid(struct hmdfs_sb_info *sbi,
+ uint8_t *cid);
+void connection_send_handshake(struct connection *conn_impl, __u8 operations,
+ __le16 request_id);
+void connection_handshake_recv_handler(struct connection *conn_impl, void *buf,
+ void *data, __u32 data_len);
+void connection_working_recv_handler(struct connection *conn_impl, void *head,
+ void *data, __u32 data_len);
+static inline void connection_get(struct connection *conn)
+{
+ kref_get(&conn->ref_cnt);
+}
+
+void connection_put(struct connection *conn);
+static inline void peer_get(struct hmdfs_peer *peer)
+{
+ kref_get(&peer->ref_cnt);
+}
+
+void peer_put(struct hmdfs_peer *peer);
+
+int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg);
+void hmdfs_connections_stop(struct hmdfs_sb_info *sbi);
+
+void hmdfs_disconnect_node(struct hmdfs_peer *node);
+
+void connection_to_working(struct hmdfs_peer *node);
+
+int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type,
+ void *ptr);
+struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id);
+
+static inline void hmdfs_start_process_offline(struct hmdfs_peer *peer)
+{
+ spin_lock(&peer->idr_lock);
+ peer->offline_start = true;
+ spin_unlock(&peer->idr_lock);
+}
+
+static inline void hmdfs_stop_process_offline(struct hmdfs_peer *peer)
+{
+ spin_lock(&peer->idr_lock);
+ peer->offline_start = false;
+ spin_unlock(&peer->idr_lock);
+}
+
+static inline void hmdfs_dec_msg_idr_process(struct hmdfs_peer *peer)
+{
+ spin_lock(&peer->idr_lock);
+ peer->msg_idr_process--;
+ spin_unlock(&peer->idr_lock);
+}
+#endif
diff --git a/fs/hmdfs/comm/crypto.c b/fs/hmdfs/comm/crypto.c
new file mode 100644
index 0000000000000000000000000000000000000000..60bb08f1697f90d72439b8ad64046bdfeb5558df
--- /dev/null
+++ b/fs/hmdfs/comm/crypto.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/crypto.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "crypto.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "hmdfs.h"
+
+static void tls_crypto_set_key(struct connection *conn_impl, int tx)
+{
+ int rc = 0;
+ struct tcp_handle *tcp = conn_impl->connect_handle;
+ struct tls_context *ctx = tls_get_ctx(tcp->sock->sk);
+ struct cipher_context *cctx = NULL;
+ struct tls_sw_context_tx *sw_ctx_tx = NULL;
+ struct tls_sw_context_rx *sw_ctx_rx = NULL;
+ struct crypto_aead **aead = NULL;
+ struct tls12_crypto_info_aes_gcm_128 *crypto_info = NULL;
+
+ if (tx) {
+ crypto_info = &conn_impl->send_crypto_info;
+ cctx = &ctx->tx;
+ sw_ctx_tx = tls_sw_ctx_tx(ctx);
+ aead = &sw_ctx_tx->aead_send;
+ } else {
+ crypto_info = &conn_impl->recv_crypto_info;
+ cctx = &ctx->rx;
+ sw_ctx_rx = tls_sw_ctx_rx(ctx);
+ aead = &sw_ctx_rx->aead_recv;
+ }
+
+ memcpy(cctx->iv, crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, crypto_info->iv,
+ TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ memcpy(cctx->rec_seq, crypto_info->rec_seq,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+ rc = crypto_aead_setkey(*aead, crypto_info->key,
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+ if (rc)
+ hmdfs_err("crypto set key error");
+}
+
+int tls_crypto_info_init(struct connection *conn_impl)
+{
+ int ret = 0;
+ u8 key_meterial[HMDFS_KEY_SIZE];
+ struct tcp_handle *tcp =
+ (struct tcp_handle *)(conn_impl->connect_handle);
+ if (conn_impl->node->version < DFS_2_0 || !tcp)
+ return -EINVAL;
+ // send
+ update_key(conn_impl->send_key, key_meterial, HKDF_TYPE_IV);
+ ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TCP, TCP_ULP,
+ KERNEL_SOCKPTR("tls"), sizeof("tls"));
+ if (ret)
+ hmdfs_err("set tls error %d", ret);
+ tcp->connect->send_crypto_info.info.version = TLS_1_2_VERSION;
+ tcp->connect->send_crypto_info.info.cipher_type =
+ TLS_CIPHER_AES_GCM_128;
+
+ memcpy(tcp->connect->send_crypto_info.key, tcp->connect->send_key,
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+ memcpy(tcp->connect->send_crypto_info.iv,
+ key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ memcpy(tcp->connect->send_crypto_info.salt,
+ key_meterial + CRYPTO_SALT_OFFSET,
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(tcp->connect->send_crypto_info.rec_seq,
+ key_meterial + CRYPTO_SEQ_OFFSET,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+
+ ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TLS, TLS_TX,
+ KERNEL_SOCKPTR(&(tcp->connect->send_crypto_info)),
+ sizeof(tcp->connect->send_crypto_info));
+ if (ret)
+ hmdfs_err("set tls send_crypto_info error %d", ret);
+
+ // recv
+ update_key(tcp->connect->recv_key, key_meterial, HKDF_TYPE_IV);
+ tcp->connect->recv_crypto_info.info.version = TLS_1_2_VERSION;
+ tcp->connect->recv_crypto_info.info.cipher_type =
+ TLS_CIPHER_AES_GCM_128;
+
+ memcpy(tcp->connect->recv_crypto_info.key, tcp->connect->recv_key,
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+ memcpy(tcp->connect->recv_crypto_info.iv,
+ key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ memcpy(tcp->connect->recv_crypto_info.salt,
+ key_meterial + CRYPTO_SALT_OFFSET,
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(tcp->connect->recv_crypto_info.rec_seq,
+ key_meterial + CRYPTO_SEQ_OFFSET,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+ memset(key_meterial, 0, HMDFS_KEY_SIZE);
+
+ ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TLS, TLS_RX,
+ KERNEL_SOCKPTR(&(tcp->connect->recv_crypto_info)),
+ sizeof(tcp->connect->recv_crypto_info));
+ if (ret)
+ hmdfs_err("set tls recv_crypto_info error %d", ret);
+ return ret;
+}
+
+static int tls_set_tx(struct tcp_handle *tcp)
+{
+ int ret = 0;
+ u8 new_key[HMDFS_KEY_SIZE];
+ u8 key_meterial[HMDFS_KEY_SIZE];
+
+ ret = update_key(tcp->connect->send_key, new_key, HKDF_TYPE_REKEY);
+ if (ret < 0)
+ return ret;
+ memcpy(tcp->connect->send_key, new_key, HMDFS_KEY_SIZE);
+ ret = update_key(tcp->connect->send_key, key_meterial, HKDF_TYPE_IV);
+ if (ret < 0)
+ return ret;
+
+ memcpy(tcp->connect->send_crypto_info.key, tcp->connect->send_key,
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+ memcpy(tcp->connect->send_crypto_info.iv,
+ key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ memcpy(tcp->connect->send_crypto_info.salt,
+ key_meterial + CRYPTO_SALT_OFFSET,
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(tcp->connect->send_crypto_info.rec_seq,
+ key_meterial + CRYPTO_SEQ_OFFSET,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+ memset(new_key, 0, HMDFS_KEY_SIZE);
+ memset(key_meterial, 0, HMDFS_KEY_SIZE);
+
+ tls_crypto_set_key(tcp->connect, 1);
+ return 0;
+}
+
+static int tls_set_rx(struct tcp_handle *tcp)
+{
+ int ret = 0;
+ u8 new_key[HMDFS_KEY_SIZE];
+ u8 key_meterial[HMDFS_KEY_SIZE];
+
+ ret = update_key(tcp->connect->recv_key, new_key, HKDF_TYPE_REKEY);
+ if (ret < 0)
+ return ret;
+ memcpy(tcp->connect->recv_key, new_key, HMDFS_KEY_SIZE);
+ ret = update_key(tcp->connect->recv_key, key_meterial, HKDF_TYPE_IV);
+ if (ret < 0)
+ return ret;
+
+ memcpy(tcp->connect->recv_crypto_info.key, tcp->connect->recv_key,
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+ memcpy(tcp->connect->recv_crypto_info.iv,
+ key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ memcpy(tcp->connect->recv_crypto_info.salt,
+ key_meterial + CRYPTO_SALT_OFFSET,
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(tcp->connect->recv_crypto_info.rec_seq,
+ key_meterial + CRYPTO_SEQ_OFFSET,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+ memset(new_key, 0, HMDFS_KEY_SIZE);
+ memset(key_meterial, 0, HMDFS_KEY_SIZE);
+ tls_crypto_set_key(tcp->connect, 0);
+ return 0;
+}
+
+int set_crypto_info(struct connection *conn_impl, int set_type)
+{
+ int ret = 0;
+ __u8 version = conn_impl->node->version;
+ struct tcp_handle *tcp =
+ (struct tcp_handle *)(conn_impl->connect_handle);
+ if (version < DFS_2_0 || !tcp)
+ return -EINVAL;
+
+ if (set_type == SET_CRYPTO_SEND) {
+ ret = tls_set_tx(tcp);
+ if (ret) {
+ hmdfs_err("tls set tx fail");
+ return ret;
+ }
+ }
+ if (set_type == SET_CRYPTO_RECV) {
+ ret = tls_set_rx(tcp);
+ if (ret) {
+ hmdfs_err("tls set rx fail");
+ return ret;
+ }
+ }
+ hmdfs_info("KTLS setting success");
+ return ret;
+}
+
+static int hmac_sha256(u8 *key, u8 key_len, char *info, u8 info_len, u8 *output)
+{
+ struct crypto_shash *tfm = NULL;
+ struct shash_desc *shash = NULL;
+ int ret = 0;
+
+ if (!key)
+ return -EINVAL;
+
+ tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
+ if (IS_ERR(tfm)) {
+ hmdfs_err("crypto_alloc_ahash failed: err %ld", PTR_ERR(tfm));
+ return PTR_ERR(tfm);
+ }
+
+ ret = crypto_shash_setkey(tfm, key, key_len);
+ if (ret) {
+ hmdfs_err("crypto_ahash_setkey failed: err %d", ret);
+ goto failed;
+ }
+
+ shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm),
+ GFP_KERNEL);
+ if (!shash) {
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ shash->tfm = tfm;
+
+ ret = crypto_shash_digest(shash, info, info_len, output);
+
+ kfree(shash);
+
+failed:
+ crypto_free_shash(tfm);
+ return ret;
+}
+
+static const char *const g_key_lable[] = { "ktls key initiator",
+ "ktls key accepter",
+ "ktls key update", "ktls iv&salt" };
+static const int g_key_lable_len[] = { 18, 17, 15, 12 };
+
+int update_key(__u8 *old_key, __u8 *new_key, int type)
+{
+ int ret = 0;
+ char lable[MAX_LABLE_SIZE];
+ u8 lable_size;
+
+ lable_size = g_key_lable_len[type] + sizeof(u16) + sizeof(char);
+ *((u16 *)lable) = HMDFS_KEY_SIZE;
+ memcpy(lable + sizeof(u16), g_key_lable[type], g_key_lable_len[type]);
+ *(lable + sizeof(u16) + g_key_lable_len[type]) = 0x01;
+ ret = hmac_sha256(old_key, HMDFS_KEY_SIZE, lable, lable_size, new_key);
+ if (ret < 0)
+ hmdfs_err("hmac sha256 error");
+ return ret;
+}
diff --git a/fs/hmdfs/comm/crypto.h b/fs/hmdfs/comm/crypto.h
new file mode 100644
index 0000000000000000000000000000000000000000..7549f3897336b0358b2a5cea76f8d45391c4f489
--- /dev/null
+++ b/fs/hmdfs/comm/crypto.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/crypto.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_FS_ENCRYPTION_H
+#define HMDFS_FS_ENCRYPTION_H
+
+#include "transport.h"
+
+#define MAX_LABLE_SIZE 30
+#define CRYPTO_IV_OFFSET 0
+#define CRYPTO_SALT_OFFSET (CRYPTO_IV_OFFSET + TLS_CIPHER_AES_GCM_128_IV_SIZE)
+#define CRYPTO_SEQ_OFFSET \
+ (CRYPTO_SALT_OFFSET + TLS_CIPHER_AES_GCM_128_SALT_SIZE)
+#define REKEY_LIFETIME (60 * 60 * HZ)
+
+enum HKDF_TYPE {
+ HKDF_TYPE_KEY_INITIATOR = 0,
+ HKDF_TYPE_KEY_ACCEPTER = 1,
+ HKDF_TYPE_REKEY = 2,
+ HKDF_TYPE_IV = 3,
+};
+
+enum SET_CRYPTO_TYPE {
+ SET_CRYPTO_SEND = 0,
+ SET_CRYPTO_RECV = 1,
+};
+
+int tls_crypto_info_init(struct connection *conn_impl);
+int set_crypto_info(struct connection *conn_impl, int set_type);
+int update_key(__u8 *old_key, __u8 *new_key, int type);
+
+#endif
diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c
new file mode 100644
index 0000000000000000000000000000000000000000..54eaaf06f22373c23570b1fd91ca72a2c27a2a7d
--- /dev/null
+++ b/fs/hmdfs/comm/device_node.c
@@ -0,0 +1,1665 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/device_node.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "device_node.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "client_writeback.h"
+#include "server_writeback.h"
+#include "connection.h"
+#include "hmdfs_client.h"
+#include "socket_adapter.h"
+#include "authority/authentication.h"
+
+DEFINE_MUTEX(hmdfs_sysfs_mutex);
+static struct kset *hmdfs_kset;
+
+struct hmdfs_disconnect_node_work {
+ struct hmdfs_peer *conn;
+ struct work_struct work;
+ atomic_t *cnt;
+ struct wait_queue_head *waitq;
+};
+
+static void ctrl_cmd_update_socket_handler(const char *buf, size_t len,
+ struct hmdfs_sb_info *sbi)
+{
+ struct update_socket_param cmd;
+ struct hmdfs_peer *node = NULL;
+ struct connection *conn = NULL;
+
+ if (unlikely(!buf || len != sizeof(cmd))) {
+ hmdfs_err("len/buf error");
+ goto out;
+ }
+ memcpy(&cmd, buf, sizeof(cmd));
+
+ node = hmdfs_get_peer(sbi, cmd.cid);
+ if (unlikely(!node)) {
+ hmdfs_err("failed to update ctrl node: cannot get peer");
+ goto out;
+ }
+
+ conn = hmdfs_get_conn_tcp(node, cmd.newfd, cmd.masterkey, cmd.status);
+ if (unlikely(!conn)) {
+ hmdfs_err("failed to update ctrl node: cannot get conn");
+ } else if (!sbi->system_cred) {
+ const struct cred *system_cred = get_cred(current_cred());
+
+ if (cmpxchg_relaxed(&sbi->system_cred, NULL, system_cred))
+ put_cred(system_cred);
+ else
+ hmdfs_check_cred(system_cred);
+ }
+out:
+ if (conn)
+ connection_put(conn);
+ if (node)
+ peer_put(node);
+}
+
+static inline void hmdfs_disconnect_node_marked(struct hmdfs_peer *conn)
+{
+ hmdfs_start_process_offline(conn);
+ hmdfs_disconnect_node(conn);
+ hmdfs_stop_process_offline(conn);
+}
+
+static void ctrl_cmd_off_line_handler(const char *buf, size_t len,
+ struct hmdfs_sb_info *sbi)
+{
+ struct offline_param cmd;
+ struct hmdfs_peer *node = NULL;
+
+ if (unlikely(!buf || len != sizeof(cmd))) {
+ hmdfs_err("Recved a invalid userbuf");
+ return;
+ }
+ memcpy(&cmd, buf, sizeof(cmd));
+ node = hmdfs_lookup_from_cid(sbi, cmd.remote_cid);
+ if (unlikely(!node)) {
+ hmdfs_err("Cannot find node by device");
+ return;
+ }
+ hmdfs_info("Found peer: device_id = %llu", node->device_id);
+ hmdfs_disconnect_node_marked(node);
+ peer_put(node);
+}
+
+static void hmdfs_disconnect_node_work_fn(struct work_struct *base)
+{
+ struct hmdfs_disconnect_node_work *work =
+ container_of(base, struct hmdfs_disconnect_node_work, work);
+
+ hmdfs_disconnect_node_marked(work->conn);
+ if (atomic_dec_and_test(work->cnt))
+ wake_up(work->waitq);
+ kfree(work);
+}
+
+static void ctrl_cmd_off_line_all_handler(const char *buf, size_t len,
+ struct hmdfs_sb_info *sbi)
+{
+ struct hmdfs_peer *node = NULL;
+ struct hmdfs_disconnect_node_work *work = NULL;
+ atomic_t cnt = ATOMIC_INIT(0);
+ wait_queue_head_t waitq;
+
+ if (unlikely(len != sizeof(struct offline_all_param))) {
+ hmdfs_err("Recved a invalid userbuf, len %zu, expect %zu\n",
+ len, sizeof(struct offline_all_param));
+ return;
+ }
+
+ init_waitqueue_head(&waitq);
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(node, &sbi->connections.node_list, list) {
+ mutex_unlock(&sbi->connections.node_lock);
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (work) {
+ atomic_inc(&cnt);
+ work->conn = node;
+ work->cnt = &cnt;
+ work->waitq = &waitq;
+ INIT_WORK(&work->work, hmdfs_disconnect_node_work_fn);
+ schedule_work(&work->work);
+ } else {
+ hmdfs_disconnect_node_marked(node);
+ }
+ mutex_lock(&sbi->connections.node_lock);
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+
+ wait_event(waitq, !atomic_read(&cnt));
+}
+
+typedef void (*ctrl_cmd_handler)(const char *buf, size_t len,
+ struct hmdfs_sb_info *sbi);
+
+static const ctrl_cmd_handler cmd_handler[CMD_CNT] = {
+ [CMD_UPDATE_SOCKET] = ctrl_cmd_update_socket_handler,
+ [CMD_OFF_LINE] = ctrl_cmd_off_line_handler,
+ [CMD_OFF_LINE_ALL] = ctrl_cmd_off_line_all_handler,
+};
+
+static ssize_t sbi_cmd_show(struct kobject *kobj, struct sbi_attribute *attr,
+ char *buf)
+{
+ struct notify_param param;
+ int out_len;
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ memset(¶m, 0, sizeof(param));
+ spin_lock(&sbi->notify_fifo_lock);
+ out_len = kfifo_out(&sbi->notify_fifo, ¶m, sizeof(param));
+ spin_unlock(&sbi->notify_fifo_lock);
+ if (out_len != sizeof(param))
+ param.notify = NOTIFY_NONE;
+ memcpy(buf, ¶m, sizeof(param));
+ return sizeof(param);
+}
+
+static const char *cmd2str(int cmd)
+{
+ switch (cmd) {
+ case 0:
+ return "CMD_UPDATE_SOCKET";
+ case 1:
+ return "CMD_OFF_LINE";
+ case 2:
+ return "CMD_OFF_LINE_ALL";
+ default:
+ return "illegal cmd";
+ }
+}
+
+static ssize_t sbi_cmd_store(struct kobject *kobj, struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ int cmd;
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ if (!sbi) {
+ hmdfs_info("Fatal! Empty sbi. Mount fs first");
+ return len;
+ }
+ if (len < sizeof(int)) {
+ hmdfs_err("Illegal cmd: cmd len = %zu", len);
+ return len;
+ }
+ cmd = *(int *)buf;
+ if (cmd < 0 || cmd >= CMD_CNT) {
+ hmdfs_err("Illegal cmd : cmd = %d", cmd);
+ return len;
+ }
+ hmdfs_info("Recved cmd: %s", cmd2str(cmd));
+ if (cmd_handler[cmd])
+ cmd_handler[cmd](buf, len, sbi);
+ return len;
+}
+
+static struct sbi_attribute sbi_cmd_attr =
+ __ATTR(cmd, 0664, sbi_cmd_show, sbi_cmd_store);
+
+static ssize_t sbi_status_show(struct kobject *kobj, struct sbi_attribute *attr,
+ char *buf)
+{
+ ssize_t size = 0;
+ struct hmdfs_sb_info *sbi = NULL;
+ struct hmdfs_peer *peer = NULL;
+ struct connection *conn_impl = NULL;
+ struct tcp_handle *tcp = NULL;
+
+ sbi = to_sbi(kobj);
+ size += sprintf(buf + size, "peers version status\n");
+
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(peer, &sbi->connections.node_list, list) {
+ size += sprintf(buf + size, "%llu %d %d\n", peer->device_id,
+ peer->version, peer->status);
+ // connection information
+ size += sprintf(
+ buf + size,
+ "\t socket_fd connection_status tcp_status ... refcnt\n");
+ mutex_lock(&peer->conn_impl_list_lock);
+ list_for_each_entry(conn_impl, &peer->conn_impl_list, list) {
+ tcp = conn_impl->connect_handle;
+ size += sprintf(buf + size, "\t %d \t%d \t%d \t%p \t%ld\n",
+ tcp->fd, conn_impl->status,
+ tcp->sock->state, tcp->sock, file_count(tcp->sock->file));
+ }
+ mutex_unlock(&peer->conn_impl_list_lock);
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+ return size;
+}
+
+static ssize_t sbi_status_store(struct kobject *kobj,
+ struct sbi_attribute *attr, const char *buf,
+ size_t len)
+{
+ return len;
+}
+
+static struct sbi_attribute sbi_status_attr =
+ __ATTR(status, 0664, sbi_status_show, sbi_status_store);
+
+static ssize_t sbi_stat_show(struct kobject *kobj, struct sbi_attribute *attr,
+ char *buf)
+{
+ ssize_t size = 0;
+ struct hmdfs_sb_info *sbi = NULL;
+ struct hmdfs_peer *peer = NULL;
+ struct connection *conn_impl = NULL;
+ struct tcp_handle *tcp = NULL;
+
+ sbi = to_sbi(kobj);
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(peer, &sbi->connections.node_list, list) {
+ // connection information
+ mutex_lock(&peer->conn_impl_list_lock);
+ list_for_each_entry(conn_impl, &peer->conn_impl_list, list) {
+ tcp = conn_impl->connect_handle;
+ size += sprintf(buf + size, "socket_fd: %d\n", tcp->fd);
+ size += sprintf(buf + size,
+ "\tsend_msg %d \tsend_bytes %llu\n",
+ conn_impl->stat.send_message_count,
+ conn_impl->stat.send_bytes);
+ size += sprintf(buf + size,
+ "\trecv_msg %d \trecv_bytes %llu\n",
+ conn_impl->stat.recv_message_count,
+ conn_impl->stat.recv_bytes);
+ }
+ mutex_unlock(&peer->conn_impl_list_lock);
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+ return size;
+}
+
+static ssize_t sbi_stat_store(struct kobject *kobj, struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct hmdfs_sb_info *sbi = NULL;
+ struct hmdfs_peer *peer = NULL;
+ struct connection *conn_impl = NULL;
+
+ sbi = to_sbi(kobj);
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(peer, &sbi->connections.node_list, list) {
+ // connection information
+ mutex_lock(&peer->conn_impl_list_lock);
+ list_for_each_entry(conn_impl, &peer->conn_impl_list, list) {
+ conn_impl->stat.send_message_count = 0;
+ conn_impl->stat.send_bytes = 0;
+ conn_impl->stat.recv_message_count = 0;
+ conn_impl->stat.recv_bytes = 0;
+ }
+ mutex_unlock(&peer->conn_impl_list_lock);
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+ return len;
+}
+
+static struct sbi_attribute sbi_statistic_attr =
+ __ATTR(statistic, 0664, sbi_stat_show, sbi_stat_store);
+
+static ssize_t sbi_dcache_precision_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", to_sbi(kobj)->dcache_precision);
+}
+
+#define PRECISION_MAX 3600000
+
+static ssize_t sbi_dcache_precision_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ int ret;
+ unsigned int precision;
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ ret = kstrtouint(skip_spaces(buf), 0, &precision);
+ if (!ret) {
+ if (precision <= PRECISION_MAX)
+ sbi->dcache_precision = precision;
+ else
+ ret = -EINVAL;
+ }
+
+ return ret ? ret : len;
+}
+
+static struct sbi_attribute sbi_dcache_precision_attr =
+ __ATTR(dcache_precision, 0664, sbi_dcache_precision_show,
+ sbi_dcache_precision_store);
+
+static ssize_t sbi_dcache_threshold_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n",
+ to_sbi(kobj)->dcache_threshold);
+}
+
+static ssize_t sbi_dcache_threshold_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ int ret;
+ unsigned long threshold;
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ ret = kstrtoul(skip_spaces(buf), 0, &threshold);
+ if (!ret)
+ sbi->dcache_threshold = threshold;
+
+ return ret ? ret : len;
+}
+
+static struct sbi_attribute sbi_dcache_threshold_attr =
+ __ATTR(dcache_threshold, 0664, sbi_dcache_threshold_show,
+ sbi_dcache_threshold_store);
+
+static ssize_t server_statistic_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ int i, ret;
+ const size_t size = PAGE_SIZE - 1;
+ ssize_t pos = 0;
+ struct server_statistic *stat = to_sbi(kobj)->s_server_statis;
+
+ for (i = 0; i < F_SIZE; i++) {
+
+ ret = snprintf(buf + pos, size - pos,
+ "%llu %u %llu %llu\n",
+ stat[i].cnt,
+ jiffies_to_msecs(stat[i].max),
+ stat[i].snd_cnt, stat[i].snd_fail_cnt);
+ if (ret > size - pos)
+ break;
+ pos += ret;
+ }
+
+ /* If break, we should add a new line */
+ if (i < F_SIZE) {
+ ret = snprintf(buf + pos, size + 1 - pos, "\n");
+ pos += ret;
+ }
+ return pos;
+}
+
+static struct sbi_attribute sbi_local_op_attr = __ATTR_RO(server_statistic);
+
+static ssize_t client_statistic_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ int i, ret;
+ const size_t size = PAGE_SIZE - 1;
+ ssize_t pos = 0;
+ struct client_statistic *stat = to_sbi(kobj)->s_client_statis;
+
+ for (i = 0; i < F_SIZE; i++) {
+
+ ret = snprintf(buf + pos, size - pos,
+ "%llu %llu %llu %llu %llu %u\n",
+ stat[i].snd_cnt,
+ stat[i].snd_fail_cnt,
+ stat[i].resp_cnt,
+ stat[i].timeout_cnt,
+ stat[i].delay_resp_cnt,
+ jiffies_to_msecs(stat[i].max));
+ if (ret > size - pos)
+ break;
+ pos += ret;
+ }
+
+ /* If break, we should add a new line */
+ if (i < F_SIZE) {
+ ret = snprintf(buf + pos, size + 1 - pos, "\n");
+ pos += ret;
+ }
+
+ return pos;
+}
+
+static struct sbi_attribute sbi_delay_resp_attr = __ATTR_RO(client_statistic);
+
+static inline unsigned long pages_to_kbytes(unsigned long page)
+{
+ return page << (PAGE_SHIFT - 10);
+}
+
+static ssize_t dirty_writeback_stats_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ struct hmdfs_writeback *hwb = sbi->h_wb;
+ unsigned long avg;
+ unsigned long max;
+ unsigned long min;
+
+ spin_lock(&hwb->write_bandwidth_lock);
+ avg = hwb->avg_write_bandwidth;
+ max = hwb->max_write_bandwidth;
+ min = hwb->min_write_bandwidth;
+ spin_unlock(&hwb->write_bandwidth_lock);
+
+ if (min == ULONG_MAX)
+ min = 0;
+
+ return snprintf(buf, PAGE_SIZE,
+ "%10lu\n"
+ "%10lu\n"
+ "%10lu\n",
+ pages_to_kbytes(avg),
+ pages_to_kbytes(max),
+ pages_to_kbytes(min));
+}
+
+static struct sbi_attribute sbi_dirty_writeback_stats_attr =
+ __ATTR_RO(dirty_writeback_stats);
+
+static ssize_t sbi_wb_timeout_ms_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->wb_timeout_ms);
+}
+
+static ssize_t sbi_wb_timeout_ms_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned int val;
+ int err;
+
+ err = kstrtouint(buf, 10, &val);
+ if (err)
+ return err;
+
+ if (!val || val > HMDFS_MAX_WB_TIMEOUT_MS)
+ return -EINVAL;
+
+ sbi->wb_timeout_ms = val;
+
+ return len;
+}
+
+static struct sbi_attribute sbi_wb_timeout_ms_attr =
+ __ATTR(wb_timeout_ms, 0664, sbi_wb_timeout_ms_show,
+ sbi_wb_timeout_ms_store);
+
+static ssize_t sbi_dirty_writeback_centisecs_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ sbi->h_wb->dirty_writeback_interval);
+}
+
+static ssize_t sbi_dirty_writeback_centisecs_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ int err;
+
+ err = kstrtouint(buf, 10, &sbi->h_wb->dirty_writeback_interval);
+ if (err)
+ return err;
+ return len;
+}
+
+static struct sbi_attribute sbi_dirty_writeback_centisecs_attr =
+ __ATTR(dirty_writeback_centisecs, 0664,
+ sbi_dirty_writeback_centisecs_show,
+ sbi_dirty_writeback_centisecs_store);
+
+static ssize_t sbi_dirty_file_background_bytes_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n",
+ sbi->h_wb->dirty_file_bg_bytes);
+}
+
+static ssize_t sbi_dirty_file_background_bytes_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned long file_background_bytes = 0;
+ int err;
+
+ err = kstrtoul(buf, 10, &file_background_bytes);
+ if (err)
+ return err;
+ if (file_background_bytes == 0)
+ return -EINVAL;
+
+ sbi->h_wb->dirty_fs_bytes =
+ max(sbi->h_wb->dirty_fs_bytes, file_background_bytes);
+ sbi->h_wb->dirty_fs_bg_bytes =
+ max(sbi->h_wb->dirty_fs_bg_bytes, file_background_bytes);
+ sbi->h_wb->dirty_file_bytes =
+ max(sbi->h_wb->dirty_file_bytes, file_background_bytes);
+
+ sbi->h_wb->dirty_file_bg_bytes = file_background_bytes;
+ hmdfs_calculate_dirty_thresh(sbi->h_wb);
+ hmdfs_update_ratelimit(sbi->h_wb);
+ return len;
+}
+
+static ssize_t sbi_dirty_fs_background_bytes_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_fs_bg_bytes);
+}
+
+static ssize_t sbi_dirty_fs_background_bytes_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned long fs_background_bytes = 0;
+ int err;
+
+ err = kstrtoul(buf, 10, &fs_background_bytes);
+ if (err)
+ return err;
+ if (fs_background_bytes == 0)
+ return -EINVAL;
+
+ sbi->h_wb->dirty_file_bg_bytes =
+ min(sbi->h_wb->dirty_file_bg_bytes, fs_background_bytes);
+ sbi->h_wb->dirty_fs_bytes =
+ max(sbi->h_wb->dirty_fs_bytes, fs_background_bytes);
+
+ sbi->h_wb->dirty_fs_bg_bytes = fs_background_bytes;
+ hmdfs_calculate_dirty_thresh(sbi->h_wb);
+ hmdfs_update_ratelimit(sbi->h_wb);
+ return len;
+}
+
+static struct sbi_attribute sbi_dirty_file_background_bytes_attr =
+ __ATTR(dirty_file_background_bytes, 0644,
+ sbi_dirty_file_background_bytes_show,
+ sbi_dirty_file_background_bytes_store);
+static struct sbi_attribute sbi_dirty_fs_background_bytes_attr =
+ __ATTR(dirty_fs_background_bytes, 0644,
+ sbi_dirty_fs_background_bytes_show,
+ sbi_dirty_fs_background_bytes_store);
+
+static ssize_t sbi_dirty_file_bytes_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_file_bytes);
+}
+
+static ssize_t sbi_dirty_file_bytes_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned long file_bytes = 0;
+ int err;
+
+ err = kstrtoul(buf, 10, &file_bytes);
+ if (err)
+ return err;
+ if (file_bytes == 0)
+ return -EINVAL;
+
+ sbi->h_wb->dirty_file_bg_bytes =
+ min(sbi->h_wb->dirty_file_bg_bytes, file_bytes);
+ sbi->h_wb->dirty_fs_bytes = max(sbi->h_wb->dirty_fs_bytes, file_bytes);
+
+ sbi->h_wb->dirty_file_bytes = file_bytes;
+ hmdfs_calculate_dirty_thresh(sbi->h_wb);
+ hmdfs_update_ratelimit(sbi->h_wb);
+ return len;
+}
+
+static ssize_t sbi_dirty_fs_bytes_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_fs_bytes);
+}
+
+static ssize_t sbi_dirty_fs_bytes_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned long fs_bytes = 0;
+ int err;
+
+ err = kstrtoul(buf, 10, &fs_bytes);
+ if (err)
+ return err;
+ if (fs_bytes == 0)
+ return -EINVAL;
+
+ sbi->h_wb->dirty_file_bg_bytes =
+ min(sbi->h_wb->dirty_file_bg_bytes, fs_bytes);
+ sbi->h_wb->dirty_file_bytes =
+ min(sbi->h_wb->dirty_file_bytes, fs_bytes);
+ sbi->h_wb->dirty_fs_bg_bytes =
+ min(sbi->h_wb->dirty_fs_bg_bytes, fs_bytes);
+
+ sbi->h_wb->dirty_fs_bytes = fs_bytes;
+ hmdfs_calculate_dirty_thresh(sbi->h_wb);
+ hmdfs_update_ratelimit(sbi->h_wb);
+ return len;
+}
+
+static struct sbi_attribute sbi_dirty_file_bytes_attr =
+ __ATTR(dirty_file_bytes, 0644, sbi_dirty_file_bytes_show,
+ sbi_dirty_file_bytes_store);
+static struct sbi_attribute sbi_dirty_fs_bytes_attr =
+ __ATTR(dirty_fs_bytes, 0644, sbi_dirty_fs_bytes_show,
+ sbi_dirty_fs_bytes_store);
+
+static ssize_t sbi_dirty_writeback_timelimit_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ sbi->h_wb->writeback_timelimit / HZ);
+}
+
+static ssize_t sbi_dirty_writeback_timelimit_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned int time_limit = 0;
+ int err;
+
+ err = kstrtouint(buf, 10, &time_limit);
+ if (err)
+ return err;
+ if (time_limit == 0 || time_limit > (HMDFS_MAX_WB_TIMELIMIT / HZ))
+ return -EINVAL;
+
+ sbi->h_wb->writeback_timelimit = time_limit * HZ;
+ return len;
+}
+
+static struct sbi_attribute sbi_dirty_writeback_timelimit_attr =
+__ATTR(dirty_writeback_timelimit, 0644, sbi_dirty_writeback_timelimit_show,
+ sbi_dirty_writeback_timelimit_store);
+
+static ssize_t sbi_dirty_thresh_lowerlimit_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n",
+ sbi->h_wb->bw_thresh_lowerlimit << PAGE_SHIFT);
+}
+
+static ssize_t sbi_dirty_thresh_lowerlimit_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned long bw_thresh_lowerbytes = 0;
+ unsigned long bw_thresh_lowerlimit;
+ int err;
+
+ err = kstrtoul(buf, 10, &bw_thresh_lowerbytes);
+ if (err)
+ return err;
+
+ bw_thresh_lowerlimit = DIV_ROUND_UP(bw_thresh_lowerbytes, PAGE_SIZE);
+ if (bw_thresh_lowerlimit < HMDFS_BW_THRESH_MIN_LIMIT ||
+ bw_thresh_lowerlimit > HMDFS_BW_THRESH_MAX_LIMIT)
+ return -EINVAL;
+
+ sbi->h_wb->bw_thresh_lowerlimit = bw_thresh_lowerlimit;
+ return len;
+}
+
+static struct sbi_attribute sbi_dirty_thresh_lowerlimit_attr =
+__ATTR(dirty_thresh_lowerlimit, 0644, sbi_dirty_thresh_lowerlimit_show,
+ sbi_dirty_thresh_lowerlimit_store);
+
+static ssize_t sbi_dirty_writeback_autothresh_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ sbi->h_wb->dirty_auto_threshold);
+}
+
+static ssize_t sbi_dirty_writeback_autothresh_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ bool dirty_auto_threshold = false;
+ int err;
+
+ err = kstrtobool(buf, &dirty_auto_threshold);
+ if (err)
+ return err;
+
+ sbi->h_wb->dirty_auto_threshold = dirty_auto_threshold;
+ return len;
+}
+
+static struct sbi_attribute sbi_dirty_writeback_autothresh_attr =
+__ATTR(dirty_writeback_autothresh, 0644, sbi_dirty_writeback_autothresh_show,
+ sbi_dirty_writeback_autothresh_store);
+
+static ssize_t sbi_dirty_writeback_control_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ sbi->h_wb->dirty_writeback_control);
+}
+
+static ssize_t sbi_dirty_writeback_control_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned int dirty_writeback_control = 0;
+ int err;
+
+ err = kstrtouint(buf, 10, &dirty_writeback_control);
+ if (err)
+ return err;
+
+ sbi->h_wb->dirty_writeback_control = (bool)dirty_writeback_control;
+ return len;
+}
+
+static struct sbi_attribute sbi_dirty_writeback_control_attr =
+ __ATTR(dirty_writeback_control, 0644, sbi_dirty_writeback_control_show,
+ sbi_dirty_writeback_control_store);
+
+static ssize_t sbi_srv_dirty_thresh_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ sbi->h_swb->dirty_thresh_pg >> HMDFS_MB_TO_PAGE_SHIFT);
+}
+
+static ssize_t sbi_srv_dirty_thresh_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct hmdfs_server_writeback *hswb = to_sbi(kobj)->h_swb;
+ int dirty_thresh_mb;
+ unsigned long long pages;
+ int err;
+
+ err = kstrtoint(buf, 10, &dirty_thresh_mb);
+ if (err)
+ return err;
+
+ if (dirty_thresh_mb <= 0)
+ return -EINVAL;
+
+ pages = dirty_thresh_mb;
+ pages <<= HMDFS_MB_TO_PAGE_SHIFT;
+ if (pages > INT_MAX) {
+ hmdfs_err("Illegal dirty_thresh_mb %d, its page count beyonds max int",
+ dirty_thresh_mb);
+ return -EINVAL;
+ }
+
+ hswb->dirty_thresh_pg = (unsigned int)pages;
+ return len;
+}
+
+static struct sbi_attribute sbi_srv_dirty_thresh_attr =
+__ATTR(srv_dirty_thresh, 0644, sbi_srv_dirty_thresh_show,
+ sbi_srv_dirty_thresh_store);
+
+
+static ssize_t sbi_srv_dirty_wb_control_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ sbi->h_swb->dirty_writeback_control);
+}
+
+static ssize_t sbi_srv_dirty_wb_conctrol_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct hmdfs_server_writeback *hswb = to_sbi(kobj)->h_swb;
+ bool dirty_writeback_control = true;
+ int err;
+
+ err = kstrtobool(buf, &dirty_writeback_control);
+ if (err)
+ return err;
+
+ hswb->dirty_writeback_control = dirty_writeback_control;
+
+ return len;
+}
+
+static struct sbi_attribute sbi_srv_dirty_wb_control_attr =
+__ATTR(srv_dirty_writeback_control, 0644, sbi_srv_dirty_wb_control_show,
+ sbi_srv_dirty_wb_conctrol_store);
+
+static ssize_t sbi_dcache_timeout_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->dcache_timeout);
+}
+
+static ssize_t sbi_dcache_timeout_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned int timeout;
+ int err;
+
+ err = kstrtouint(buf, 0, &timeout);
+ if (err)
+ return err;
+
+ /* zero is invalid, and it doesn't mean no cache */
+ if (timeout == 0 || timeout > MAX_DCACHE_TIMEOUT)
+ return -EINVAL;
+
+ sbi->dcache_timeout = timeout;
+
+ return len;
+}
+
+static struct sbi_attribute sbi_dcache_timeout_attr =
+ __ATTR(dcache_timeout, 0644, sbi_dcache_timeout_show,
+ sbi_dcache_timeout_store);
+
+static ssize_t sbi_write_cache_timeout_sec_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ to_sbi(kobj)->write_cache_timeout);
+}
+
+static ssize_t sbi_write_cache_timeout_sec_store(struct kobject *kobj,
+ struct sbi_attribute *attr, const char *buf, size_t len)
+{
+ int ret;
+ unsigned int timeout;
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ ret = kstrtouint(buf, 0, &timeout);
+ if (ret)
+ return ret;
+
+ /* set write_cache_timeout to 0 means this functionality is disabled */
+ sbi->write_cache_timeout = timeout;
+
+ return len;
+}
+
+static struct sbi_attribute sbi_write_cache_timeout_sec_attr =
+ __ATTR(write_cache_timeout_sec, 0664, sbi_write_cache_timeout_sec_show,
+ sbi_write_cache_timeout_sec_store);
+
+static ssize_t sbi_node_evt_cb_delay_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->async_cb_delay);
+}
+
+static ssize_t sbi_node_evt_cb_delay_store(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ unsigned int delay = 0;
+ int err;
+
+ err = kstrtouint(buf, 10, &delay);
+ if (err)
+ return err;
+
+ sbi->async_cb_delay = delay;
+
+ return len;
+}
+
+static struct sbi_attribute sbi_node_evt_cb_delay_attr =
+__ATTR(node_event_delay, 0644, sbi_node_evt_cb_delay_show,
+ sbi_node_evt_cb_delay_store);
+
+static int calc_idr_number(struct idr *idr)
+{
+ void *entry = NULL;
+ int id;
+ int number = 0;
+
+ idr_for_each_entry(idr, entry, id) {
+ number++;
+ if (number % HMDFS_IDR_RESCHED_COUNT == 0)
+ cond_resched();
+ }
+
+ return number;
+}
+
+static ssize_t sbi_show_idr_stats(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf, bool showmsg)
+{
+ ssize_t size = 0;
+ int count;
+ struct hmdfs_sb_info *sbi = NULL;
+ struct hmdfs_peer *peer = NULL;
+ struct idr *idr = NULL;
+
+ sbi = to_sbi(kobj);
+
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(peer, &sbi->connections.node_list, list) {
+ idr = showmsg ? &peer->msg_idr : &peer->file_id_idr;
+ count = calc_idr_number(idr);
+ size += snprintf(buf + size, PAGE_SIZE - size,
+ "device-id\tcount\tnext-id\n\t%llu\t\t%d\t%u\n",
+ peer->device_id, count, idr_get_cursor(idr));
+ if (size >= PAGE_SIZE) {
+ size = PAGE_SIZE;
+ break;
+ }
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+
+ return size;
+}
+
+static ssize_t pending_message_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ return sbi_show_idr_stats(kobj, attr, buf, true);
+}
+
+static struct sbi_attribute sbi_pending_message_attr =
+ __ATTR_RO(pending_message);
+
+static ssize_t peer_opened_fd_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ return sbi_show_idr_stats(kobj, attr, buf, false);
+}
+
+static struct sbi_attribute sbi_peer_opened_fd_attr = __ATTR_RO(peer_opened_fd);
+
+static ssize_t sbi_srv_req_max_active_attr_show(struct kobject *kobj,
+ struct sbi_attribute *attr,
+ char *buf)
+{
+ const struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->async_req_max_active);
+}
+
+static ssize_t sbi_srv_req_max_active_attr_store(struct kobject *kobj,
+ struct sbi_attribute *attr, const char *buf, size_t len)
+{
+ int ret;
+ unsigned int max_active;
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ ret = kstrtouint(buf, 0, &max_active);
+ if (ret)
+ return ret;
+
+ sbi->async_req_max_active = max_active;
+
+ return len;
+}
+
+static struct sbi_attribute sbi_srv_req_max_active_attr =
+__ATTR(srv_req_handle_max_active, 0644, sbi_srv_req_max_active_attr_show,
+ sbi_srv_req_max_active_attr_store);
+
+
+static ssize_t cache_file_show(struct hmdfs_sb_info *sbi,
+ struct list_head *head, char *buf)
+{
+ struct cache_file_node *cfn = NULL;
+ ssize_t pos = 0;
+
+ mutex_lock(&sbi->cache_list_lock);
+ list_for_each_entry(cfn, head, list) {
+ pos += snprintf(buf + pos, PAGE_SIZE - pos,
+ "dev_id: %s relative_path: %s\n",
+ cfn->cid, cfn->relative_path);
+ if (pos >= PAGE_SIZE) {
+ pos = PAGE_SIZE;
+ break;
+ }
+ }
+ mutex_unlock(&sbi->cache_list_lock);
+
+ return pos;
+}
+
+static ssize_t client_cache_file_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ return cache_file_show(to_sbi(kobj), &to_sbi(kobj)->client_cache, buf);
+}
+static ssize_t server_cache_file_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ return cache_file_show(to_sbi(kobj), &to_sbi(kobj)->server_cache, buf);
+}
+
+static struct sbi_attribute sbi_server_cache_file_attr =
+ __ATTR_RO(server_cache_file);
+static struct sbi_attribute sbi_client_cache_file_attr =
+ __ATTR_RO(client_cache_file);
+
+static ssize_t sb_seq_show(struct kobject *kobj, struct sbi_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", to_sbi(kobj)->seq);
+}
+
+static struct sbi_attribute sbi_seq_attr = __ATTR_RO(sb_seq);
+
+static ssize_t peers_sum_attr_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+ struct hmdfs_peer *node = NULL;
+ unsigned int stash_ok = 0, stash_fail = 0, restore_ok = 0,
+ restore_fail = 0, rebuild_ok = 0, rebuild_fail = 0, rebuild_invalid = 0,
+ rebuild_time = 0;
+ unsigned long long stash_ok_pages = 0, stash_fail_pages = 0,
+ restore_ok_pages = 0, restore_fail_pages = 0;
+
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(node, &sbi->connections.node_list, list) {
+ peer_get(node);
+ mutex_unlock(&sbi->connections.node_lock);
+ stash_ok += node->stats.stash.total_ok;
+ stash_fail += node->stats.stash.total_fail;
+ stash_ok_pages += node->stats.stash.ok_pages;
+ stash_fail_pages += node->stats.stash.fail_pages;
+ restore_ok += node->stats.restore.total_ok;
+ restore_fail += node->stats.restore.total_fail;
+ restore_ok_pages += node->stats.restore.ok_pages;
+ restore_fail_pages += node->stats.restore.fail_pages;
+ rebuild_ok += node->stats.rebuild.total_ok;
+ rebuild_fail += node->stats.rebuild.total_fail;
+ rebuild_invalid += node->stats.rebuild.total_invalid;
+ rebuild_time += node->stats.rebuild.time;
+ peer_put(node);
+ mutex_lock(&sbi->connections.node_lock);
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+
+ return snprintf(buf, PAGE_SIZE,
+ "%u %u %llu %llu\n"
+ "%u %u %llu %llu\n"
+ "%u %u %u %u\n",
+ stash_ok, stash_fail, stash_ok_pages, stash_fail_pages,
+ restore_ok, restore_fail, restore_ok_pages,
+ restore_fail_pages, rebuild_ok, rebuild_fail,
+ rebuild_invalid, rebuild_time);
+}
+
+static struct sbi_attribute sbi_peers_attr = __ATTR_RO(peers_sum_attr);
+
+const char * const flag_name[] = {
+ "READPAGES",
+ "READPAGES_OPEN",
+ "ATOMIC_OPEN",
+};
+
+static ssize_t fill_features(char *buf, unsigned long long flag)
+{
+ int i;
+ ssize_t pos = 0;
+ bool sep = false;
+ int flag_name_count = ARRAY_SIZE(flag_name) / sizeof(flag_name[0]);
+
+ for (i = 0; i < sizeof(flag) * BITS_PER_BYTE; ++i) {
+ if (!(flag & BIT(i)))
+ continue;
+
+ if (sep)
+ pos += snprintf(buf + pos, PAGE_SIZE - pos, "|");
+ sep = true;
+
+ if (pos >= PAGE_SIZE) {
+ pos = PAGE_SIZE;
+ break;
+ }
+
+ if (i < flag_name_count && flag_name[i])
+ pos += snprintf(buf + pos, PAGE_SIZE - pos, "%s",
+ flag_name[i]);
+ else
+ pos += snprintf(buf + pos, PAGE_SIZE - pos, "%d", i);
+
+ if (pos >= PAGE_SIZE) {
+ pos = PAGE_SIZE;
+ break;
+ }
+ }
+ pos += snprintf(buf + pos, PAGE_SIZE - pos, "\n");
+ if (pos >= PAGE_SIZE)
+ pos = PAGE_SIZE;
+
+ return pos;
+}
+
+static ssize_t sbi_features_show(struct kobject *kobj,
+ struct sbi_attribute *attr, char *buf)
+{
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ return fill_features(buf, sbi->s_features);
+}
+
+static struct sbi_attribute sbi_features_attr = __ATTR(features, 0444,
+ sbi_features_show, NULL);
+
+static struct attribute *sbi_attrs[] = {
+ &sbi_cmd_attr.attr,
+ &sbi_status_attr.attr,
+ &sbi_statistic_attr.attr,
+ &sbi_dcache_precision_attr.attr,
+ &sbi_dcache_threshold_attr.attr,
+ &sbi_dcache_timeout_attr.attr,
+ &sbi_write_cache_timeout_sec_attr.attr,
+ &sbi_local_op_attr.attr,
+ &sbi_delay_resp_attr.attr,
+ &sbi_wb_timeout_ms_attr.attr,
+ &sbi_dirty_writeback_centisecs_attr.attr,
+ &sbi_dirty_file_background_bytes_attr.attr,
+ &sbi_dirty_fs_background_bytes_attr.attr,
+ &sbi_dirty_file_bytes_attr.attr,
+ &sbi_dirty_fs_bytes_attr.attr,
+ &sbi_dirty_writeback_autothresh_attr.attr,
+ &sbi_dirty_writeback_timelimit_attr.attr,
+ &sbi_dirty_thresh_lowerlimit_attr.attr,
+ &sbi_dirty_writeback_control_attr.attr,
+ &sbi_dirty_writeback_stats_attr.attr,
+ &sbi_srv_dirty_thresh_attr.attr,
+ &sbi_srv_dirty_wb_control_attr.attr,
+ &sbi_node_evt_cb_delay_attr.attr,
+ &sbi_srv_req_max_active_attr.attr,
+ &sbi_pending_message_attr.attr,
+ &sbi_peer_opened_fd_attr.attr,
+ &sbi_server_cache_file_attr.attr,
+ &sbi_client_cache_file_attr.attr,
+ &sbi_seq_attr.attr,
+ &sbi_peers_attr.attr,
+ &sbi_features_attr.attr,
+ NULL,
+};
+
+static ssize_t sbi_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct sbi_attribute *sbi_attr = to_sbi_attr(attr);
+
+ if (!sbi_attr->show)
+ return -EIO;
+ return sbi_attr->show(kobj, sbi_attr, buf);
+}
+
+static ssize_t sbi_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct sbi_attribute *sbi_attr = to_sbi_attr(attr);
+
+ if (!sbi_attr->store)
+ return -EIO;
+ return sbi_attr->store(kobj, sbi_attr, buf, len);
+}
+
+static const struct sysfs_ops sbi_sysfs_ops = {
+ .show = sbi_attr_show,
+ .store = sbi_attr_store,
+};
+
+static void sbi_release(struct kobject *kobj)
+{
+ struct hmdfs_sb_info *sbi = to_sbi(kobj);
+
+ complete(&sbi->s_kobj_unregister);
+}
+
+static struct kobj_type sbi_ktype = {
+ .sysfs_ops = &sbi_sysfs_ops,
+ .default_attrs = sbi_attrs,
+ .release = sbi_release,
+};
+
+static inline struct sbi_cmd_attribute *to_sbi_cmd_attr(struct attribute *x)
+{
+ return container_of(x, struct sbi_cmd_attribute, attr);
+}
+
+static inline struct hmdfs_sb_info *cmd_kobj_to_sbi(struct kobject *x)
+{
+ return container_of(x, struct hmdfs_sb_info, s_cmd_timeout_kobj);
+}
+
+static ssize_t cmd_timeout_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ int cmd = to_sbi_cmd_attr(attr)->command;
+ struct hmdfs_sb_info *sbi = cmd_kobj_to_sbi(kobj);
+
+ if (cmd < 0 && cmd >= F_SIZE)
+ return 0;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", get_cmd_timeout(sbi, cmd));
+}
+
+static ssize_t cmd_timeout_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ unsigned int value;
+ int cmd = to_sbi_cmd_attr(attr)->command;
+ int ret = kstrtouint(skip_spaces(buf), 0, &value);
+ struct hmdfs_sb_info *sbi = cmd_kobj_to_sbi(kobj);
+
+ if (cmd < 0 && cmd >= F_SIZE)
+ return -EINVAL;
+
+ if (!ret)
+ set_cmd_timeout(sbi, cmd, value);
+
+ return ret ? ret : len;
+}
+
+#define HMDFS_CMD_ATTR(_name, _cmd) \
+ static struct sbi_cmd_attribute hmdfs_attr_##_name = { \
+ .attr = { .name = __stringify(_name), .mode = 0664 }, \
+ .command = (_cmd), \
+ }
+
+HMDFS_CMD_ATTR(open, F_OPEN);
+HMDFS_CMD_ATTR(release, F_RELEASE);
+HMDFS_CMD_ATTR(readpage, F_READPAGE);
+HMDFS_CMD_ATTR(writepage, F_WRITEPAGE);
+HMDFS_CMD_ATTR(iterate, F_ITERATE);
+HMDFS_CMD_ATTR(rmdir, F_RMDIR);
+HMDFS_CMD_ATTR(unlink, F_UNLINK);
+HMDFS_CMD_ATTR(rename, F_RENAME);
+HMDFS_CMD_ATTR(setattr, F_SETATTR);
+HMDFS_CMD_ATTR(statfs, F_STATFS);
+HMDFS_CMD_ATTR(drop_push, F_DROP_PUSH);
+HMDFS_CMD_ATTR(getattr, F_GETATTR);
+HMDFS_CMD_ATTR(fsync, F_FSYNC);
+HMDFS_CMD_ATTR(syncfs, F_SYNCFS);
+HMDFS_CMD_ATTR(getxattr, F_GETXATTR);
+HMDFS_CMD_ATTR(setxattr, F_SETXATTR);
+HMDFS_CMD_ATTR(listxattr, F_LISTXATTR);
+
+#define ATTR_LIST(_name) (&hmdfs_attr_##_name.attr)
+
+static struct attribute *sbi_timeout_attrs[] = {
+ ATTR_LIST(open), ATTR_LIST(release),
+ ATTR_LIST(readpage), ATTR_LIST(writepage),
+ ATTR_LIST(iterate), ATTR_LIST(rmdir),
+ ATTR_LIST(unlink), ATTR_LIST(rename),
+ ATTR_LIST(setattr),
+ ATTR_LIST(statfs), ATTR_LIST(drop_push),
+ ATTR_LIST(getattr), ATTR_LIST(fsync),
+ ATTR_LIST(syncfs), ATTR_LIST(getxattr),
+ ATTR_LIST(setxattr), ATTR_LIST(listxattr),
+ NULL
+};
+
+static const struct sysfs_ops sbi_cmd_sysfs_ops = {
+ .show = cmd_timeout_show,
+ .store = cmd_timeout_store,
+};
+
+static void sbi_timeout_release(struct kobject *kobj)
+{
+ struct hmdfs_sb_info *sbi = container_of(kobj, struct hmdfs_sb_info,
+ s_cmd_timeout_kobj);
+
+ complete(&sbi->s_timeout_kobj_unregister);
+}
+
+static struct kobj_type sbi_timeout_ktype = {
+ .sysfs_ops = &sbi_cmd_sysfs_ops,
+ .default_attrs = sbi_timeout_attrs,
+ .release = sbi_timeout_release,
+};
+
+void hmdfs_release_sysfs(struct hmdfs_sb_info *sbi)
+{
+ kobject_put(&sbi->s_cmd_timeout_kobj);
+ wait_for_completion(&sbi->s_timeout_kobj_unregister);
+ kobject_put(&sbi->kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+}
+
+int hmdfs_register_sysfs(const char *name, struct hmdfs_sb_info *sbi)
+{
+ int ret;
+ struct kobject *kobj = NULL;
+
+ mutex_lock(&hmdfs_sysfs_mutex);
+ kobj = kset_find_obj(hmdfs_kset, name);
+ if (kobj) {
+ hmdfs_err("mount failed, already exist");
+ kobject_put(kobj);
+ mutex_unlock(&hmdfs_sysfs_mutex);
+ return -EEXIST;
+ }
+
+ sbi->kobj.kset = hmdfs_kset;
+ init_completion(&sbi->s_kobj_unregister);
+ ret = kobject_init_and_add(&sbi->kobj, &sbi_ktype,
+ &hmdfs_kset->kobj, "%s", name);
+ mutex_unlock(&hmdfs_sysfs_mutex);
+
+ if (ret) {
+ kobject_put(&sbi->kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+ return ret;
+ }
+
+ init_completion(&sbi->s_timeout_kobj_unregister);
+ ret = kobject_init_and_add(&sbi->s_cmd_timeout_kobj, &sbi_timeout_ktype,
+ &sbi->kobj, "cmd_timeout");
+ if (ret) {
+ hmdfs_release_sysfs(sbi);
+ return ret;
+ }
+
+ kobject_uevent(&sbi->kobj, KOBJ_ADD);
+ return 0;
+}
+
+void hmdfs_unregister_sysfs(struct hmdfs_sb_info *sbi)
+{
+ kobject_del(&sbi->s_cmd_timeout_kobj);
+ kobject_del(&sbi->kobj);
+}
+
+static inline int to_sysfs_fmt_evt(unsigned int evt)
+{
+ return evt == RAW_NODE_EVT_NR ? -1 : evt;
+}
+
+static ssize_t features_show(struct kobject *kobj, struct peer_attribute *attr,
+ char *buf)
+{
+ struct hmdfs_peer *peer = to_peer(kobj);
+
+ return fill_features(buf, peer->features);
+}
+
+static ssize_t event_show(struct kobject *kobj, struct peer_attribute *attr,
+ char *buf)
+{
+ struct hmdfs_peer *peer = to_peer(kobj);
+
+ return snprintf(buf, PAGE_SIZE,
+ "cur_async evt %d seq %u\n"
+ "cur_sync evt %d seq %u\n"
+ "pending evt %d seq %u\n"
+ "merged evt %u\n"
+ "dup_drop evt %u %u\n"
+ "waiting evt %u %u\n"
+ "seq_tbl %u %u %u %u\n"
+ "seq_rd_idx %u\n"
+ "seq_wr_idx %u\n",
+ to_sysfs_fmt_evt(peer->cur_evt[0]),
+ peer->cur_evt_seq[0],
+ to_sysfs_fmt_evt(peer->cur_evt[1]),
+ peer->cur_evt_seq[1],
+ to_sysfs_fmt_evt(peer->pending_evt),
+ peer->pending_evt_seq,
+ peer->merged_evt,
+ peer->dup_evt[RAW_NODE_EVT_OFF],
+ peer->dup_evt[RAW_NODE_EVT_ON],
+ peer->waiting_evt[RAW_NODE_EVT_OFF],
+ peer->waiting_evt[RAW_NODE_EVT_ON],
+ peer->seq_tbl[0], peer->seq_tbl[1], peer->seq_tbl[2],
+ peer->seq_tbl[3],
+ peer->seq_rd_idx % RAW_NODE_EVT_MAX_NR,
+ peer->seq_wr_idx % RAW_NODE_EVT_MAX_NR);
+}
+
+static ssize_t stash_show(struct kobject *kobj, struct peer_attribute *attr,
+ char *buf)
+{
+ struct hmdfs_peer *peer = to_peer(kobj);
+
+ return snprintf(buf, PAGE_SIZE,
+ "cur_ok %u\n"
+ "cur_nothing %u\n"
+ "cur_fail %u\n"
+ "total_ok %u\n"
+ "total_nothing %u\n"
+ "total_fail %u\n"
+ "ok_pages %llu\n"
+ "fail_pages %llu\n",
+ peer->stats.stash.cur_ok,
+ peer->stats.stash.cur_nothing,
+ peer->stats.stash.cur_fail,
+ peer->stats.stash.total_ok,
+ peer->stats.stash.total_nothing,
+ peer->stats.stash.total_fail,
+ peer->stats.stash.ok_pages,
+ peer->stats.stash.fail_pages);
+}
+
+static ssize_t restore_show(struct kobject *kobj, struct peer_attribute *attr,
+ char *buf)
+{
+ struct hmdfs_peer *peer = to_peer(kobj);
+
+ return snprintf(buf, PAGE_SIZE,
+ "cur_ok %u\n"
+ "cur_fail %u\n"
+ "cur_keep %u\n"
+ "total_ok %u\n"
+ "total_fail %u\n"
+ "total_keep %u\n"
+ "ok_pages %llu\n"
+ "fail_pages %llu\n",
+ peer->stats.restore.cur_ok,
+ peer->stats.restore.cur_fail,
+ peer->stats.restore.cur_keep,
+ peer->stats.restore.total_ok,
+ peer->stats.restore.total_fail,
+ peer->stats.restore.total_keep,
+ peer->stats.restore.ok_pages,
+ peer->stats.restore.fail_pages);
+}
+
+static ssize_t rebuild_show(struct kobject *kobj, struct peer_attribute *attr,
+ char *buf)
+{
+ struct hmdfs_peer *peer = to_peer(kobj);
+
+ return snprintf(buf, PAGE_SIZE,
+ "cur_ok %u\n"
+ "cur_fail %u\n"
+ "cur_invalid %u\n"
+ "total_ok %u\n"
+ "total_fail %u\n"
+ "total_invalid %u\n"
+ "time %u\n",
+ peer->stats.rebuild.cur_ok,
+ peer->stats.rebuild.cur_fail,
+ peer->stats.rebuild.cur_invalid,
+ peer->stats.rebuild.total_ok,
+ peer->stats.rebuild.total_fail,
+ peer->stats.rebuild.total_invalid,
+ peer->stats.rebuild.time);
+}
+
+static struct peer_attribute peer_features_attr = __ATTR_RO(features);
+static struct peer_attribute peer_event_attr = __ATTR_RO(event);
+static struct peer_attribute peer_stash_attr = __ATTR_RO(stash);
+static struct peer_attribute peer_restore_attr = __ATTR_RO(restore);
+static struct peer_attribute peer_rebuild_attr = __ATTR_RO(rebuild);
+
+static struct attribute *peer_attrs[] = {
+ &peer_features_attr.attr,
+ &peer_event_attr.attr,
+ &peer_stash_attr.attr,
+ &peer_restore_attr.attr,
+ &peer_rebuild_attr.attr,
+ NULL,
+};
+
+static ssize_t peer_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct peer_attribute *peer_attr = to_peer_attr(attr);
+
+ if (!peer_attr->show)
+ return -EIO;
+ return peer_attr->show(kobj, peer_attr, buf);
+}
+
+static ssize_t peer_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct peer_attribute *peer_attr = to_peer_attr(attr);
+
+ if (!peer_attr->store)
+ return -EIO;
+ return peer_attr->store(kobj, peer_attr, buf, len);
+}
+
+static const struct sysfs_ops peer_sysfs_ops = {
+ .show = peer_attr_show,
+ .store = peer_attr_store,
+};
+
+static void peer_sysfs_release(struct kobject *kobj)
+{
+ struct hmdfs_peer *peer = to_peer(kobj);
+
+ complete(&peer->kobj_unregister);
+}
+
+static struct kobj_type peer_ktype = {
+ .sysfs_ops = &peer_sysfs_ops,
+ .default_attrs = peer_attrs,
+ .release = peer_sysfs_release,
+};
+
+int hmdfs_register_peer_sysfs(struct hmdfs_sb_info *sbi,
+ struct hmdfs_peer *peer)
+{
+ int err = 0;
+
+ init_completion(&peer->kobj_unregister);
+ err = kobject_init_and_add(&peer->kobj, &peer_ktype, &sbi->kobj,
+ "peer_%llu", peer->device_id);
+ return err;
+}
+
+void hmdfs_release_peer_sysfs(struct hmdfs_peer *peer)
+{
+ kobject_del(&peer->kobj);
+ kobject_put(&peer->kobj);
+ wait_for_completion(&peer->kobj_unregister);
+}
+
+void notify(struct hmdfs_peer *node, struct notify_param *param)
+{
+ struct hmdfs_sb_info *sbi = node->sbi;
+ int in_len;
+
+ if (!param)
+ return;
+ spin_lock(&sbi->notify_fifo_lock);
+ in_len =
+ kfifo_in(&sbi->notify_fifo, param, sizeof(struct notify_param));
+ spin_unlock(&sbi->notify_fifo_lock);
+ if (in_len != sizeof(struct notify_param))
+ return;
+ sysfs_notify(&sbi->kobj, NULL, "cmd");
+}
+
+int hmdfs_sysfs_init(void)
+{
+ hmdfs_kset = kset_create_and_add("hmdfs", NULL, fs_kobj);
+ if (!hmdfs_kset)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void hmdfs_sysfs_exit(void)
+{
+ kset_unregister(hmdfs_kset);
+ hmdfs_kset = NULL;
+}
diff --git a/fs/hmdfs/comm/device_node.h b/fs/hmdfs/comm/device_node.h
new file mode 100644
index 0000000000000000000000000000000000000000..3c99c7fb679fbe723deb4c56e77a52d115992969
--- /dev/null
+++ b/fs/hmdfs/comm/device_node.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/device_node.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_DEVICE_NODE_H
+#define HMDFS_DEVICE_NODE_H
+
+#include "hmdfs.h"
+#include "transport.h"
+
+enum CTRL_NODE_CMD {
+ CMD_UPDATE_SOCKET = 0,
+ CMD_OFF_LINE,
+ CMD_OFF_LINE_ALL,
+ CMD_CNT,
+};
+
+struct update_socket_param {
+ int32_t cmd;
+ int32_t newfd;
+ uint8_t status;
+ uint8_t masterkey[HMDFS_KEY_SIZE];
+ uint8_t cid[HMDFS_CID_SIZE];
+} __packed;
+
+struct offline_param {
+ int32_t cmd;
+ uint8_t remote_cid[HMDFS_CID_SIZE];
+} __packed;
+
+struct offline_all_param {
+ int32_t cmd;
+} __packed;
+
+enum NOTIFY {
+ NOTIFY_GET_SESSION,
+ NOTIFY_OFFLINE,
+ NOTIFY_NONE,
+ NOTIFY_CNT,
+};
+
+struct notify_param {
+ int32_t notify;
+ int32_t fd;
+ uint8_t remote_cid[HMDFS_CID_SIZE];
+} __packed;
+
+struct sbi_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct kobject *kobj, struct sbi_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct kobject *kobj, struct sbi_attribute *attr,
+ const char *buf, size_t len);
+};
+
+struct peer_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct kobject *kobj, struct peer_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct kobject *kobj, struct peer_attribute *attr,
+ const char *buf, size_t len);
+};
+
+struct sbi_cmd_attribute {
+ struct attribute attr;
+ int command;
+};
+
+void notify(struct hmdfs_peer *node, struct notify_param *param);
+int hmdfs_register_sysfs(const char *name, struct hmdfs_sb_info *sbi);
+void hmdfs_unregister_sysfs(struct hmdfs_sb_info *sbi);
+void hmdfs_release_sysfs(struct hmdfs_sb_info *sbi);
+int hmdfs_register_peer_sysfs(struct hmdfs_sb_info *sbi,
+ struct hmdfs_peer *peer);
+void hmdfs_release_peer_sysfs(struct hmdfs_peer *peer);
+int hmdfs_sysfs_init(void);
+void hmdfs_sysfs_exit(void);
+
+static inline struct sbi_attribute *to_sbi_attr(struct attribute *x)
+{
+ return container_of(x, struct sbi_attribute, attr);
+}
+
+static inline struct hmdfs_sb_info *to_sbi(struct kobject *x)
+{
+ return container_of(x, struct hmdfs_sb_info, kobj);
+}
+
+static inline struct peer_attribute *to_peer_attr(struct attribute *x)
+{
+ return container_of(x, struct peer_attribute, attr);
+}
+
+static inline struct hmdfs_peer *to_peer(struct kobject *x)
+{
+ return container_of(x, struct hmdfs_peer, kobj);
+}
+#endif
diff --git a/fs/hmdfs/comm/fault_inject.c b/fs/hmdfs/comm/fault_inject.c
new file mode 100644
index 0000000000000000000000000000000000000000..11779b53b0ea38e6daebcf03d4a27edc07b46a47
--- /dev/null
+++ b/fs/hmdfs/comm/fault_inject.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/fault_inject.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "hmdfs.h"
+#include "fault_inject.h"
+#include "connection.h"
+
+static DECLARE_FAULT_ATTR(fail_default_attr);
+static struct dentry *hmdfs_debugfs_root;
+
+void __init hmdfs_create_debugfs_root(void)
+{
+ hmdfs_debugfs_root = debugfs_create_dir("hmdfs", NULL);
+ if (!hmdfs_debugfs_root)
+ hmdfs_warning("failed to create debugfs directory");
+}
+
+void hmdfs_destroy_debugfs_root(void)
+{
+ debugfs_remove_recursive(hmdfs_debugfs_root);
+ hmdfs_debugfs_root = NULL;
+}
+
+void hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject,
+ const char *name)
+{
+ struct dentry *dir = NULL;
+ struct dentry *parent = NULL;
+ struct fault_attr *attr = &fault_inject->attr;
+
+ if (!hmdfs_debugfs_root)
+ return;
+
+ parent = debugfs_create_dir(name, hmdfs_debugfs_root);
+ if (!parent) {
+ hmdfs_warning("failed to create %s debugfs directory", name);
+ return;
+ }
+
+ *attr = fail_default_attr;
+ dir = fault_create_debugfs_attr("fault_inject", parent, attr);
+ if (IS_ERR(dir)) {
+ hmdfs_warning("hmdfs: failed to create debugfs attr");
+ debugfs_remove_recursive(parent);
+ return;
+ }
+ fault_inject->parent = parent;
+ debugfs_create_ulong("op_mask", 0600, dir, &fault_inject->op_mask);
+ debugfs_create_ulong("fail_send_message", 0600, dir,
+ &fault_inject->fail_send_message);
+ debugfs_create_ulong("fake_fid_ver", 0600, dir,
+ &fault_inject->fake_fid_ver);
+ debugfs_create_bool("fail_req", 0600, dir, &fault_inject->fail_req);
+}
+
+void hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject)
+{
+ debugfs_remove_recursive(fault_inject->parent);
+}
+
+bool hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con,
+ struct hmdfs_send_data *msg, int *err)
+{
+ struct hmdfs_head_cmd *head = (struct hmdfs_head_cmd *)msg->head;
+ unsigned long type = fault_inject->fail_send_message;
+
+ if (!test_bit(head->operations.command, &fault_inject->op_mask))
+ return false;
+
+ if (type != T_MSG_FAIL && type != T_MSG_DISCARD)
+ return false;
+
+ if (!should_fail(&fault_inject->attr, 1))
+ return false;
+
+ if (type == T_MSG_FAIL)
+ *err = -EINVAL;
+ else if (type == T_MSG_DISCARD)
+ *err = 0;
+
+ hmdfs_err(
+ "fault injection err %d, %s message, device_id %llu, msg_id %u, cmd %d",
+ *err, (type == T_MSG_FAIL) ? "fail" : "discard", con->device_id,
+ le32_to_cpu(head->msg_id), head->operations.command);
+ return true;
+}
+
+bool hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ int *err)
+{
+ if (!test_bit(cmd->operations.command, &fault_inject->op_mask))
+ return false;
+
+ if (!fault_inject->fail_req)
+ return false;
+
+ if (!should_fail(&fault_inject->attr, 1))
+ return false;
+
+ *err = -EIO;
+ hmdfs_err("fault injection err %d, device_id %llu, msg_id %u, cmd %d",
+ *err, con->device_id, le32_to_cpu(cmd->msg_id),
+ cmd->operations.command);
+ return true;
+}
+
+bool hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd,
+ enum CHANGE_FID_VER_TYPE fake_type)
+{
+ unsigned long type = fault_inject->fake_fid_ver;
+
+ if (!test_bit(cmd->operations.command, &fault_inject->op_mask))
+ return false;
+
+ if (type != fake_type)
+ return false;
+
+ if (!should_fail(&fault_inject->attr, 1))
+ return false;
+
+ hmdfs_err(
+ "fault injection to change fid ver by %s cookie, device_id %llu, msg_id %u, cmd %d",
+ (type == T_BOOT_COOKIE) ? "boot" : "con", con->device_id,
+ le32_to_cpu(cmd->msg_id), cmd->operations.command);
+ return true;
+}
diff --git a/fs/hmdfs/comm/fault_inject.h b/fs/hmdfs/comm/fault_inject.h
new file mode 100644
index 0000000000000000000000000000000000000000..be8876ab0328e4a1aa1d71b5aa1f2b9946db348b
--- /dev/null
+++ b/fs/hmdfs/comm/fault_inject.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/fault_inject.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_FAULT_INJECT_H
+#define HMDFS_FAULT_INJECT_H
+
+#include
+#include "protocol.h"
+
+struct hmdfs_fault_inject {
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+ struct fault_attr attr;
+ struct dentry *parent;
+ unsigned long op_mask;
+ unsigned long fail_send_message;
+ unsigned long fake_fid_ver;
+ bool fail_req;
+#endif
+};
+
+enum FAIL_MESSAGE_TYPE {
+ T_MSG_FAIL = 1,
+ T_MSG_DISCARD = 2,
+};
+
+enum CHANGE_FID_VER_TYPE {
+ T_BOOT_COOKIE = 1,
+ T_CON_COOKIE = 2,
+};
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+void __init hmdfs_create_debugfs_root(void);
+void hmdfs_destroy_debugfs_root(void);
+
+void hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject,
+ const char *name);
+void hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject);
+bool hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con,
+ struct hmdfs_send_data *msg, int *err);
+bool hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ int *err);
+bool hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd,
+ enum CHANGE_FID_VER_TYPE fake_type);
+#else
+static inline void __init hmdfs_create_debugfs_root(void) {}
+static inline void hmdfs_destroy_debugfs_root(void) {}
+
+static inline void
+hmdfs_fault_inject_init(struct hmdfs_fault_inject *fault_inject,
+ const char *name)
+{
+}
+static inline void
+hmdfs_fault_inject_fini(struct hmdfs_fault_inject *fault_inject)
+{
+}
+static inline bool
+hmdfs_should_fail_sendmsg(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con, struct hmdfs_send_data *msg,
+ int *err)
+{
+ return false;
+}
+static inline bool
+hmdfs_should_fail_req(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ int *err)
+{
+ return false;
+}
+static inline bool
+hmdfs_should_fake_fid_ver(struct hmdfs_fault_inject *fault_inject,
+ struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ enum CHANGE_FID_VER_TYPE fake_type)
+{
+ return false;
+}
+#endif
+
+#endif // HMDFS_FAULT_INJECT_H
diff --git a/fs/hmdfs/comm/message_verify.c b/fs/hmdfs/comm/message_verify.c
new file mode 100644
index 0000000000000000000000000000000000000000..c9eb94d8b615eaf4c0675b5a6756147f2ab6a3a5
--- /dev/null
+++ b/fs/hmdfs/comm/message_verify.c
@@ -0,0 +1,985 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/message_verify.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "message_verify.h"
+
+#include
+#include
+#include
+
+#include "connection.h"
+#include "hmdfs.h"
+#include "hmdfs_server.h"
+
+size_t message_length[C_FLAG_SIZE][F_SIZE][HMDFS_MESSAGE_MIN_MAX];
+bool need_response[F_SIZE];
+
+void hmdfs_message_verify_init(void)
+{
+ int flag, cmd;
+
+ for (cmd = 0; cmd < F_SIZE; cmd++)
+ need_response[cmd] = true;
+ need_response[F_RELEASE] = false;
+ need_response[F_CONNECT_REKEY] = false;
+ need_response[F_DROP_PUSH] = false;
+
+ for (flag = 0; flag < C_FLAG_SIZE; flag++) {
+ for (cmd = 0; cmd < F_SIZE; cmd++) {
+ message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX] = 1;
+ message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX] = 0;
+ message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ }
+ }
+
+ message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct open_request);
+ message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct open_request) + PATH_MAX + 1;
+ message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct open_response);
+ message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct atomic_open_request);
+ message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct atomic_open_request) + PATH_MAX + NAME_MAX + 1;
+ message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX]
+ = MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct atomic_open_response);
+ message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX]
+ = MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct release_request);
+ message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct release_request);
+ message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct fsync_request);
+ message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct fsync_request);
+ message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+ message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_MAX_INDEX] = 0;
+ message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct readpage_request);
+ message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct readpage_request);
+ message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+ message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_MAX_INDEX] =
+ HMDFS_PAGE_SIZE;
+ message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+
+ message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct readpages_request);
+ message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct readpages_request);
+ message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+ message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_MAX_INDEX] =
+ HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE;
+ message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+
+ message_length[C_REQUEST][F_READPAGES_OPEN][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct readpages_open_request);
+ message_length[C_REQUEST][F_READPAGES_OPEN][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct readpages_open_request) + PATH_MAX + 1;
+ message_length[C_REQUEST][F_READPAGES_OPEN][
+ HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_READPAGES_OPEN][HMDFS_MESSAGE_MIN_INDEX] =
+ 0;
+ message_length[C_RESPONSE][F_READPAGES_OPEN][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct readpages_open_response) +
+ HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE;
+ message_length[C_RESPONSE][F_READPAGES_OPEN][
+ HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_RANGE;
+
+ message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct writepage_request) + HMDFS_PAGE_SIZE;
+ message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct writepage_request) + HMDFS_PAGE_SIZE;
+ message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+ message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct writepage_response);
+ message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct readdir_request);
+ message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct readdir_request) + PATH_MAX + 1;
+ message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(__le64) + HMDFS_MAX_MESSAGE_LEN;
+ message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+
+ message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct mkdir_request);
+ message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct mkdir_request) + PATH_MAX + NAME_MAX + 2;
+ message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct hmdfs_inodeinfo_response);
+ message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct hmdfs_inodeinfo_response);
+ message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct create_request);
+ message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct create_request) + PATH_MAX + NAME_MAX + 2;
+ message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct hmdfs_inodeinfo_response);
+ message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct hmdfs_inodeinfo_response);
+ message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct rmdir_request);
+ message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct rmdir_request) + PATH_MAX + NAME_MAX + 2;
+ message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_MAX_INDEX] = 0;
+ message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct unlink_request);
+ message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct unlink_request) + PATH_MAX + NAME_MAX + 2;
+ message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_MAX_INDEX] = 0;
+ message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct rename_request);
+ message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct rename_request) + 4 + 4 * PATH_MAX;
+ message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_MAX_INDEX] = 0;
+ message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct setattr_request);
+ message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct setattr_request) + PATH_MAX + 1;
+ message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_MAX_INDEX] = 0;
+ message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct getattr_request);
+ message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct getattr_request) + PATH_MAX + 1;
+ message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct getattr_response);
+ message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct statfs_request);
+ message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct statfs_request) + PATH_MAX + 1;
+ message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct statfs_response);
+ message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct syncfs_request);
+ message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct syncfs_request);
+ message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+ message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_MAX_INDEX] = 0;
+ message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct getxattr_request);
+ message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct getxattr_request) + PATH_MAX + XATTR_NAME_MAX + 2;
+ message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct getxattr_response) + HMDFS_XATTR_SIZE_MAX;
+ message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+
+ message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct setxattr_request);
+ message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct setxattr_request) + PATH_MAX + XATTR_NAME_MAX +
+ HMDFS_XATTR_SIZE_MAX + 2;
+ message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_MAX_INDEX] = 0;
+ message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct listxattr_request);
+ message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct listxattr_request) + PATH_MAX + 1;
+ message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+ message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0;
+ message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct listxattr_response) + HMDFS_LISTXATTR_SIZE_MAX;
+ message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+
+ message_length[C_REQUEST][F_CONNECT_REKEY][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct connection_rekey_request);
+ message_length[C_REQUEST][F_CONNECT_REKEY][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct connection_rekey_request);
+ message_length[C_REQUEST][F_CONNECT_REKEY]
+ [HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_BIN;
+
+ message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_MIN_INDEX] =
+ sizeof(struct drop_push_request);
+ message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_MAX_INDEX] =
+ sizeof(struct drop_push_request) + PATH_MAX + 1;
+ message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_LEN_JUDGE_INDEX] =
+ MESSAGE_LEN_JUDGE_RANGE;
+}
+
+static void find_first_no_slash(const char **name, int *len)
+{
+ const char *s = *name;
+ int l = *len;
+
+ while (*s == '/' && l > 0) {
+ s++;
+ l--;
+ }
+
+ *name = s;
+ *len = l;
+}
+
+static void find_first_slash(const char **name, int *len)
+{
+ const char *s = *name;
+ int l = *len;
+
+ while (*s != '/' && l > 0) {
+ s++;
+ l--;
+ }
+
+ *name = s;
+ *len = l;
+}
+
+static bool path_contain_dotdot(const char *name, int len)
+{
+ while (true) {
+ find_first_no_slash(&name, &len);
+
+ if (len == 0)
+ return false;
+
+ if (len >= 2 && name[0] == '.' && name[1] == '.' &&
+ (len == 2 || name[2] == '/'))
+ return true;
+
+ find_first_slash(&name, &len);
+ }
+}
+
+static int hmdfs_open_message_verify(int flag, size_t len, void *data)
+{
+ struct open_request *req = NULL;
+ size_t tmp_len = 0;
+ int path_len;
+
+ if (flag != C_REQUEST || !data)
+ return 0;
+
+ req = data;
+ path_len = le32_to_cpu(req->path_len);
+ tmp_len = strnlen(req->buf, PATH_MAX);
+ if (tmp_len == PATH_MAX ||
+ tmp_len != len - sizeof(struct open_request) - 1 ||
+ path_len != tmp_len) {
+ hmdfs_err("verify fail");
+ return -EINVAL;
+ }
+
+ /*
+ * We only allow server to open file in hmdfs, thus we need to
+ * make sure path don't contain "..".
+ */
+ if (path_contain_dotdot(req->buf, path_len)) {
+ hmdfs_err("verify fail, path contain dotdot");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hmdfs_atomic_open_verify(int flag, size_t len, void *data)
+{
+ struct atomic_open_request *req = NULL;
+ size_t total_len;
+ size_t path_len;
+ size_t max_path_size;
+ size_t file_len;
+ size_t max_file_size;
+
+ if (flag != C_REQUEST || !data)
+ return 0;
+
+ req = data;
+ total_len = len - sizeof(*req);
+ max_path_size = min_t(size_t, PATH_MAX, total_len);
+ path_len = strnlen(req->buf, max_path_size);
+ /* file name need 2 byte at least */
+ if (path_len == max_path_size || path_len + 3 > total_len) {
+ hmdfs_err("verify fail, len %zu, path_len %zu", len, path_len);
+ return -EINVAL;
+ }
+
+ max_file_size = min_t(size_t, NAME_MAX + 1, total_len - path_len - 1);
+ file_len = strnlen(req->buf + path_len + 1, max_file_size);
+
+ if (file_len == max_file_size ||
+ total_len != path_len + 1 + file_len + 1 ||
+ le32_to_cpu(req->path_len) != path_len ||
+ le32_to_cpu(req->file_len) != file_len) {
+ hmdfs_err("verify fail total len %zu path_len %zu, decalared path len %u, file_len %zu, decalared file_len %u",
+ total_len, path_len, le32_to_cpu(req->path_len),
+ file_len, le32_to_cpu(req->file_len) != file_len);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hmdfs_iterate_verify(int flag, size_t len, void *data)
+{
+ int err = 0;
+ struct readdir_request *tmp_request = NULL;
+ char *tmp_char = NULL;
+ size_t tmp_len = 0;
+
+ if (flag == C_REQUEST) {
+ if (data) {
+ tmp_request = data;
+ tmp_char = tmp_request->path;
+ tmp_len = strnlen(tmp_char, PATH_MAX);
+ } else {
+ return err;
+ }
+
+ if (le32_to_cpu(tmp_request->path_len) != tmp_len ||
+ len - sizeof(struct readdir_request) - 1 != tmp_len) {
+ err = -EINVAL;
+ hmdfs_err("verify fail");
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int hmdfs_mkdir_verify(int flag, size_t len, void *data)
+{
+ int err = 0;
+ struct mkdir_request *tmp_request = NULL;
+ char *tmp_char = NULL;
+ size_t tmp_path_len = 0;
+ size_t tmp_name_len = 0;
+ size_t tmp_char_path_len = 0;
+ size_t tmp_char_name_len = 0;
+
+ if (flag == C_REQUEST) {
+ if (data) {
+ tmp_request = data;
+ tmp_char = tmp_request->path;
+ tmp_path_len = le32_to_cpu(tmp_request->path_len);
+ tmp_name_len = le32_to_cpu(tmp_request->name_len);
+ tmp_char_path_len = strnlen(tmp_char, PATH_MAX);
+ tmp_char_name_len = strnlen(
+ tmp_char + tmp_char_path_len + 1, NAME_MAX);
+ } else {
+ return err;
+ }
+
+ if (tmp_path_len != tmp_char_path_len ||
+ tmp_name_len != tmp_char_name_len ||
+ len - sizeof(struct mkdir_request) !=
+ tmp_path_len + 1 + tmp_name_len + 1) {
+ err = -EINVAL;
+ hmdfs_err("verify fail");
+ return err;
+ }
+ }
+ return err;
+}
+
+static int hmdfs_create_verify(int flag, size_t len, void *data)
+{
+ int err = 0;
+ struct create_request *tmp_request = NULL;
+ char *tmp_char = NULL;
+ size_t tmp_path_len = 0;
+ size_t tmp_name_len = 0;
+ size_t tmp_char_path_len = 0;
+ size_t tmp_char_name_len = 0;
+
+ if (flag == C_REQUEST) {
+ if (data) {
+ tmp_request = data;
+ tmp_char = tmp_request->path;
+ tmp_path_len = le32_to_cpu(tmp_request->path_len);
+ tmp_name_len = le32_to_cpu(tmp_request->name_len);
+ tmp_char_path_len = strnlen(tmp_char, PATH_MAX);
+ tmp_char_name_len = strnlen(
+ tmp_char + tmp_char_path_len + 1, NAME_MAX);
+ } else {
+ return err;
+ }
+
+ if (tmp_path_len != tmp_char_path_len ||
+ tmp_name_len != tmp_char_name_len ||
+ len - sizeof(struct create_request) !=
+ tmp_path_len + 1 + tmp_name_len + 1) {
+ err = -EINVAL;
+ hmdfs_err("verify fail");
+ return err;
+ }
+ }
+ return err;
+}
+
+static int hmdfs_rmdir_verify(int flag, size_t len, void *data)
+{
+ int err = 0;
+ struct rmdir_request *tmp_request = NULL;
+ char *tmp_char = NULL;
+ size_t tmp_path_len = 0;
+ size_t tmp_name_len = 0;
+ size_t tmp_char_path_len = 0;
+ size_t tmp_char_name_len = 0;
+
+ if (flag == C_REQUEST) {
+ if (data) {
+ tmp_request = data;
+ tmp_char = tmp_request->path;
+ tmp_path_len = le32_to_cpu(tmp_request->path_len);
+ tmp_name_len = le32_to_cpu(tmp_request->name_len);
+ tmp_char_path_len = strnlen(tmp_char, PATH_MAX);
+ tmp_char_name_len = strnlen(
+ tmp_char + tmp_char_path_len + 1, NAME_MAX);
+ } else {
+ return err;
+ }
+
+ if (tmp_path_len != tmp_char_path_len ||
+ tmp_name_len != tmp_char_name_len ||
+ len - sizeof(struct rmdir_request) !=
+ tmp_path_len + 1 + tmp_name_len + 1) {
+ err = -EINVAL;
+ hmdfs_err("verify fail");
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int hmdfs_unlink_verify(int flag, size_t len, void *data)
+{
+ int err = 0;
+ struct unlink_request *tmp_request = NULL;
+ char *tmp_char = NULL;
+ size_t tmp_path_len = 0;
+ size_t tmp_name_len = 0;
+ size_t tmp_char_path_len = 0;
+ size_t tmp_char_name_len = 0;
+
+ if (flag == C_REQUEST) {
+ if (data) {
+ tmp_request = data;
+ tmp_char = tmp_request->path;
+ tmp_path_len = le32_to_cpu(tmp_request->path_len);
+ tmp_name_len = le32_to_cpu(tmp_request->name_len);
+ tmp_char_path_len = strnlen(tmp_char, PATH_MAX);
+ tmp_char_name_len = strnlen(
+ tmp_char + tmp_char_path_len + 1, NAME_MAX);
+ } else {
+ return err;
+ }
+
+ if (tmp_path_len != tmp_char_path_len ||
+ tmp_name_len != tmp_char_name_len ||
+ len - sizeof(struct unlink_request) !=
+ tmp_path_len + 1 + tmp_name_len + 1) {
+ err = -EINVAL;
+ hmdfs_err("verify fail");
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int hmdfs_rename_verify(int flag, size_t len, void *data)
+{
+ int err = 0;
+ struct rename_request *tmp_request = NULL;
+ char *tmp_char = NULL;
+ size_t tmp_old_path_len = 0;
+ size_t tmp_new_path_len = 0;
+ size_t tmp_old_name_len = 0;
+ size_t tmp_new_name_len = 0;
+ size_t tmp_char_old_path_len = 0;
+ size_t tmp_char_new_path_len = 0;
+ size_t tmp_char_old_name_len = 0;
+ size_t tmp_char_new_name_len = 0;
+
+ if (flag == C_REQUEST) {
+ if (data) {
+ tmp_request = data;
+ tmp_char = tmp_request->path;
+
+ tmp_old_path_len =
+ le32_to_cpu(tmp_request->old_path_len);
+ tmp_new_path_len =
+ le32_to_cpu(tmp_request->new_path_len);
+ tmp_old_name_len =
+ le32_to_cpu(tmp_request->old_name_len);
+ tmp_new_name_len =
+ le32_to_cpu(tmp_request->new_name_len);
+
+ tmp_char_old_path_len = strnlen(tmp_char, PATH_MAX);
+ tmp_char_new_path_len = strnlen(
+ tmp_char + tmp_char_old_path_len + 1, PATH_MAX);
+
+ tmp_char_old_name_len =
+ strnlen(tmp_char + tmp_char_old_path_len + 1 +
+ tmp_char_new_path_len + 1,
+ PATH_MAX);
+ tmp_char_new_name_len =
+ strnlen(tmp_char + tmp_char_old_path_len + 1 +
+ tmp_char_new_path_len + 1 +
+ tmp_char_old_name_len + 1,
+ PATH_MAX);
+ } else {
+ return err;
+ }
+
+ if (tmp_new_name_len != tmp_char_new_name_len ||
+ tmp_old_name_len != tmp_char_old_name_len ||
+ tmp_new_path_len != tmp_char_new_path_len ||
+ tmp_old_path_len != tmp_char_old_path_len ||
+ len - sizeof(struct rename_request) !=
+ tmp_new_name_len + 1 + tmp_old_name_len + 1 +
+ tmp_new_path_len + 1 + tmp_old_path_len +
+ 1) {
+ err = -EINVAL;
+ hmdfs_err("verify fail");
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int hmdfs_setattr_verify(int flag, size_t len, void *data)
+{
+ int err = 0;
+ struct setattr_request *tmp_request = NULL;
+ char *tmp_char = NULL;
+ size_t tmp_len = 0;
+
+ if (flag == C_REQUEST) {
+ if (data) {
+ tmp_request = data;
+ tmp_char = tmp_request->buf;
+ tmp_len = strnlen(tmp_char, PATH_MAX);
+ } else {
+ return err;
+ }
+
+ if (tmp_len != len - sizeof(struct setattr_request) - 1 ||
+ le32_to_cpu(tmp_request->path_len) != tmp_len) {
+ err = -EINVAL;
+ hmdfs_err("verify fail");
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int hmdfs_getattr_verify(int flag, size_t len, void *data)
+{
+ struct getattr_request *req = NULL;
+ size_t tmp_len;
+
+ if (flag != C_REQUEST || !data)
+ return 0;
+
+ req = data;
+ tmp_len = strnlen(req->buf, PATH_MAX);
+ if (tmp_len != len - sizeof(struct getattr_request) - 1 ||
+ le32_to_cpu(req->path_len) != tmp_len) {
+ hmdfs_err("verify fail");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hmdfs_getxattr_verify(int flag, size_t len, void *data)
+{
+ struct getxattr_request *req = NULL;
+ struct getxattr_response *resp = NULL;
+ size_t path_len = 0;
+ size_t name_len = 0;
+ size_t size = 0;
+
+ if (!data)
+ return 0;
+
+ if (flag == C_REQUEST) {
+ req = data;
+ path_len = le32_to_cpu(req->path_len);
+ name_len = le32_to_cpu(req->name_len);
+ size = le32_to_cpu(req->size);
+ if (path_len >= PATH_MAX ||
+ path_len != strnlen(req->buf, PATH_MAX) ||
+ name_len !=
+ strnlen(req->buf + path_len + 1, XATTR_NAME_MAX) ||
+ size > HMDFS_XATTR_SIZE_MAX)
+ return -EINVAL;
+ } else {
+ resp = data;
+ size = le32_to_cpu(resp->size);
+ if (len != sizeof(struct getxattr_response) &&
+ len < sizeof(struct getxattr_response) + size)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hmdfs_setxattr_verify(int flag, size_t len, void *data)
+{
+ struct setxattr_request *req = NULL;
+ size_t path_len = 0;
+ size_t name_len = 0;
+ size_t size = 0;
+
+ /* No need to verify response */
+ if (flag != C_REQUEST || !data)
+ return 0;
+
+ req = data;
+ path_len = le32_to_cpu(req->path_len);
+ name_len = le32_to_cpu(req->name_len);
+ size = le32_to_cpu(req->size);
+ if (path_len >= PATH_MAX || path_len != strnlen(req->buf, PATH_MAX) ||
+ name_len != strnlen(req->buf + path_len + 1, XATTR_NAME_MAX) ||
+ len != path_len + name_len + size + 2 +
+ sizeof(struct setxattr_request) ||
+ size > HMDFS_XATTR_SIZE_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int hmdfs_listxattr_verify(int flag, size_t len, void *data)
+{
+ struct listxattr_request *req = NULL;
+ struct listxattr_response *resp = NULL;
+ size_t path_len = 0;
+ size_t size = 0;
+
+ if (!data)
+ return 0;
+
+ if (flag == C_REQUEST) {
+ req = data;
+ path_len = le32_to_cpu(req->path_len);
+ size = le32_to_cpu(req->size);
+ if (path_len >= PATH_MAX ||
+ path_len != strnlen(req->buf, PATH_MAX) ||
+ size > HMDFS_LISTXATTR_SIZE_MAX)
+ return -EINVAL;
+ } else {
+ resp = data;
+ size = le32_to_cpu(resp->size);
+ if (len != sizeof(struct listxattr_response) &&
+ len < sizeof(struct listxattr_response) + size)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hmdfs_writepage_verify(int flag, size_t len, void *data)
+{
+ struct writepage_request *req = NULL;
+ __u32 count;
+
+ if (flag != C_REQUEST || !data)
+ return 0;
+
+ req = data;
+ count = le32_to_cpu(req->count);
+ if (count == 0 || count > HMDFS_PAGE_SIZE ||
+ len - sizeof(struct writepage_request) != HMDFS_PAGE_SIZE) {
+ hmdfs_err("verify fail, count is %d", count);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hmdfs_statfs_verify(int flag, size_t len, void *data)
+{
+ int err = 0;
+ struct statfs_request *tmp_request = NULL;
+ char *tmp_char = NULL;
+ size_t tmp_len = 0;
+
+ if (flag == C_REQUEST) {
+ if (data) {
+ tmp_request = data;
+ tmp_char = tmp_request->path;
+ tmp_len = strnlen(tmp_char, PATH_MAX);
+ } else {
+ return err;
+ }
+
+ if (le32_to_cpu(tmp_request->path_len) != tmp_len ||
+ tmp_len != len - sizeof(struct statfs_request) - 1) {
+ err = -EINVAL;
+ hmdfs_err("verify fail");
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int hmdfs_readpages_verify(int flag, size_t len, void *data)
+{
+ struct readpages_request *req = NULL;
+ unsigned int size;
+
+ if (flag != C_REQUEST || !data)
+ return 0;
+
+ req = data;
+ size = le32_to_cpu(req->size);
+ if (size > HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE) {
+ hmdfs_err("verify fail, invalid req->size %u", size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hmdfs_readpages_open_verify(int flag, size_t len, void *data)
+{
+ struct readpages_open_request *req = NULL;
+ unsigned int size;
+ size_t tmp_len;
+
+ if (flag != C_REQUEST || !data)
+ return 0;
+
+ req = data;
+ size = le32_to_cpu(req->size);
+ tmp_len = strnlen(req->buf, PATH_MAX);
+ if (tmp_len + 1 != len - sizeof(*req) ||
+ le32_to_cpu(req->path_len) != tmp_len ||
+ size > HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE) {
+ hmdfs_err("verify fail, req->size %u", size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+typedef int (*hmdfs_message_verify_func)(int, size_t, void *);
+
+static const hmdfs_message_verify_func message_verify[F_SIZE] = {
+ [F_OPEN] = hmdfs_open_message_verify,
+ [F_WRITEPAGE] = hmdfs_writepage_verify,
+ [F_ITERATE] = hmdfs_iterate_verify,
+ [F_MKDIR] = hmdfs_mkdir_verify,
+ [F_CREATE] = hmdfs_create_verify,
+ [F_RMDIR] = hmdfs_rmdir_verify,
+ [F_UNLINK] = hmdfs_unlink_verify,
+ [F_RENAME] = hmdfs_rename_verify,
+ [F_SETATTR] = hmdfs_setattr_verify,
+ [F_STATFS] = hmdfs_statfs_verify,
+ [F_GETATTR] = hmdfs_getattr_verify,
+ [F_GETXATTR] = hmdfs_getxattr_verify,
+ [F_SETXATTR] = hmdfs_setxattr_verify,
+ [F_LISTXATTR] = hmdfs_listxattr_verify,
+ [F_READPAGES] = hmdfs_readpages_verify,
+ [F_READPAGES_OPEN] = hmdfs_readpages_open_verify,
+ [F_ATOMIC_OPEN] = hmdfs_atomic_open_verify,
+};
+
+static void handle_bad_message(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *head, int *err)
+{
+ /*
+ * Bad message won't be awared by upper layer, so ETIME is
+ * always given to upper layer. It is prefer to pass EOPNOTSUPP
+ * to upper layer when bad message (eg. caused by wrong len)
+ * received.
+ */
+ if (head->operations.cmd_flag == C_RESPONSE) {
+ /*
+ * Change msg ret code. To let upper layer handle
+ * EOPNOTSUPP, hmdfs_message_verify() should return
+ * 0, so err code is modified either.
+ */
+ head->ret_code = cpu_to_le32(-EOPNOTSUPP);
+ *err = 0;
+ } else {
+ if (head->operations.command >= F_SIZE)
+ return;
+ /*
+ * Some request messages do not need to be responded.
+ * Even if a response is returned, the response msg
+ * is automatically ignored in hmdfs_response_recv().
+ * Therefore, it is normal to directly return a response.
+ */
+ if (need_response[head->operations.command])
+ hmdfs_send_err_response(con, head, -EOPNOTSUPP);
+ }
+}
+
+int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head,
+ void *data)
+{
+ int err = 0;
+ int flag, cmd, len_type;
+ size_t len, min, max;
+
+ if (!head)
+ return -EINVAL;
+
+ flag = head->operations.cmd_flag;
+ if (flag != C_REQUEST && flag != C_RESPONSE)
+ return -EINVAL;
+
+ cmd = head->operations.command;
+ if (cmd >= F_SIZE || cmd < F_OPEN || cmd == F_RESERVED_0 ||
+ (cmd >= F_RESERVED_1 && cmd <= F_RESERVED_4) || cmd == F_RESERVED_5) {
+ err = -EINVAL;
+ goto handle_bad_msg;
+ }
+
+ if (head->version == DFS_2_0) {
+ len = le32_to_cpu(head->data_len) -
+ sizeof(struct hmdfs_head_cmd);
+ min = message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX];
+ if (head->operations.command == F_ITERATE && flag == C_RESPONSE)
+ max = sizeof(struct slice_descriptor) + PAGE_SIZE;
+ else
+ max = message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX];
+ len_type =
+ message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX];
+
+ if (len_type == MESSAGE_LEN_JUDGE_RANGE) {
+ if (len < min || len > max) {
+ hmdfs_err(
+ "cmd %d -> %d message verify fail, len = %zu",
+ cmd, flag, len);
+ err = -EINVAL;
+ goto handle_bad_msg;
+ }
+ } else {
+ if (len != min && len != max) {
+ hmdfs_err(
+ "cmd %d -> %d message verify fail, len = %zu",
+ cmd, flag, len);
+ err = -EINVAL;
+ goto handle_bad_msg;
+ }
+ }
+
+ if (message_verify[cmd])
+ err = message_verify[cmd](flag, len, data);
+
+ if (err)
+ goto handle_bad_msg;
+
+ return err;
+ }
+
+handle_bad_msg:
+ if (err) {
+ handle_bad_message(con, head, &err);
+ return err;
+ }
+
+ if (head->version == DFS_1_0)
+ return err; // now, DFS_1_0 version do not verify
+
+ return -EINVAL;
+}
diff --git a/fs/hmdfs/comm/message_verify.h b/fs/hmdfs/comm/message_verify.h
new file mode 100644
index 0000000000000000000000000000000000000000..99e696a448f122735707ed03d86572d280773e70
--- /dev/null
+++ b/fs/hmdfs/comm/message_verify.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/message_verify.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_MESSAGE_VERIFY_H
+#define HMDFS_MESSAGE_VERIFY_H
+
+#include "protocol.h"
+
+enum MESSAGE_LEN_JUDGE_TYPE {
+ MESSAGE_LEN_JUDGE_RANGE = 0,
+ MESSAGE_LEN_JUDGE_BIN = 1,
+};
+
+#define HMDFS_MESSAGE_MIN_INDEX 0
+#define HMDFS_MESSAGE_MAX_INDEX 1
+#define HMDFS_MESSAGE_LEN_JUDGE_INDEX 2
+#define HMDFS_MESSAGE_MIN_MAX 3
+
+void hmdfs_message_verify_init(void);
+int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head,
+ void *data);
+
+#endif
diff --git a/fs/hmdfs/comm/node_cb.c b/fs/hmdfs/comm/node_cb.c
new file mode 100644
index 0000000000000000000000000000000000000000..21b84d2fff82af4217b88876aceb20605625c854
--- /dev/null
+++ b/fs/hmdfs/comm/node_cb.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/node_cb.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+
+#include "node_cb.h"
+#include "connection.h"
+
+static struct list_head cb_head[NODE_EVT_NR][NODE_EVT_TYPE_NR];
+
+static const char *evt_str_tbl[NODE_EVT_NR] = {
+ "add", "online", "offline", "del",
+};
+
+static inline bool hmdfs_is_valid_node_evt(int evt)
+{
+ return (evt >= 0 && evt < NODE_EVT_NR);
+}
+
+static const char *hmdfs_evt_str(int evt)
+{
+ if (!hmdfs_is_valid_node_evt(evt))
+ return "unknown";
+ return evt_str_tbl[evt];
+}
+
+void hmdfs_node_evt_cb_init(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cb_head); i++) {
+ int j;
+
+ for (j = 0; j < ARRAY_SIZE(cb_head[0]); j++)
+ INIT_LIST_HEAD(&cb_head[i][j]);
+ }
+}
+
+void hmdfs_node_add_evt_cb(struct hmdfs_node_cb_desc *desc, int nr)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ int evt = desc[i].evt;
+ bool sync = desc[i].sync;
+
+ if (!hmdfs_is_valid_node_evt(evt))
+ continue;
+
+ list_add_tail(&desc[i].list, &cb_head[evt][sync]);
+ }
+}
+
+void hmdfs_node_call_evt_cb(struct hmdfs_peer *conn, int evt, bool sync,
+ unsigned int seq)
+{
+ struct hmdfs_node_cb_desc *desc = NULL;
+
+ hmdfs_info("node 0x%x:0x%llx call %s %s cb seq %u",
+ conn->owner, conn->device_id, hmdfs_evt_str(evt),
+ sync ? "sync" : "async", seq);
+
+ if (!hmdfs_is_valid_node_evt(evt))
+ return;
+
+ list_for_each_entry(desc, &cb_head[evt][sync], list) {
+ if (conn->version < desc->min_version)
+ continue;
+
+ desc->fn(conn, evt, seq);
+ }
+}
diff --git a/fs/hmdfs/comm/node_cb.h b/fs/hmdfs/comm/node_cb.h
new file mode 100644
index 0000000000000000000000000000000000000000..fe53b946f66846909d9e328a6093b496c380e0cf
--- /dev/null
+++ b/fs/hmdfs/comm/node_cb.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/node_cb.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_NODE_CB_H
+#define HMDFS_NODE_CB_H
+
+#include "hmdfs.h"
+
+/* async & sync */
+#define NODE_EVT_TYPE_NR 2
+
+enum {
+ NODE_EVT_ADD = 0,
+ NODE_EVT_ONLINE,
+ NODE_EVT_OFFLINE,
+ NODE_EVT_DEL,
+ NODE_EVT_NR,
+};
+
+struct hmdfs_peer;
+
+typedef void (*hmdfs_node_evt_cb)(struct hmdfs_peer *conn,
+ int evt, unsigned int seq);
+
+struct hmdfs_node_cb_desc {
+ int evt;
+ bool sync;
+ unsigned char min_version;
+ hmdfs_node_evt_cb fn;
+ struct list_head list;
+};
+
+extern void hmdfs_node_evt_cb_init(void);
+
+/* Only initialize during module init */
+extern void hmdfs_node_add_evt_cb(struct hmdfs_node_cb_desc *desc, int nr);
+extern void hmdfs_node_call_evt_cb(struct hmdfs_peer *node, int evt, bool sync,
+ unsigned int seq);
+
+#endif /* HMDFS_NODE_CB_H */
diff --git a/fs/hmdfs/comm/protocol.h b/fs/hmdfs/comm/protocol.h
new file mode 100644
index 0000000000000000000000000000000000000000..a873143f20d7989c45879ae54f9de30c8e977409
--- /dev/null
+++ b/fs/hmdfs/comm/protocol.h
@@ -0,0 +1,489 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/protocol.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_PROTOCOL_H
+#define HMDFS_PROTOCOL_H
+
+#include
+#include
+#include
+#include
+
+struct hmdfs_cmd {
+ __u8 reserved;
+ __u8 cmd_flag;
+ __u8 command;
+ __u8 reserved2;
+} __packed;
+
+#define HMDFS_MSG_MAGIC 0xF7
+#define HMDFS_MAX_MESSAGE_LEN (8 * 1024 * 1024)
+
+struct hmdfs_head_cmd {
+ __u8 magic;
+ __u8 version;
+ __le16 reserved;
+ __le32 data_len;
+ struct hmdfs_cmd operations;
+ __le32 ret_code;
+ __le32 msg_id;
+ __le32 reserved1;
+} __packed;
+
+enum FILE_RECV_STATE {
+ FILE_RECV_PROCESS = 0,
+ FILE_RECV_SUCC,
+ FILE_RECV_ERR_NET,
+ FILE_RECV_ERR_SPC,
+};
+
+struct file_recv_info {
+ void *local_filp;
+ atomic_t local_fslices;
+ atomic_t state;
+};
+
+enum MSG_IDR_TYPE {
+ MSG_IDR_1_0_NONE = 0,
+ MSG_IDR_1_0_MESSAGE_SYNC,
+ MSG_IDR_1_0_PAGE,
+ MSG_IDR_MESSAGE_SYNC,
+ MSG_IDR_MESSAGE_ASYNC,
+ MSG_IDR_PAGE,
+ MSG_IDR_MAX,
+};
+
+struct hmdfs_msg_idr_head {
+ __u32 type;
+ __u32 msg_id;
+ struct kref ref;
+ struct hmdfs_peer *peer;
+};
+
+struct sendmsg_wait_queue {
+ struct hmdfs_msg_idr_head head;
+ wait_queue_head_t response_q;
+ struct list_head async_msg;
+ atomic_t valid;
+ __u32 size;
+ void *buf;
+ __u32 ret;
+ unsigned long start;
+ struct file_recv_info recv_info;
+};
+
+struct hmdfs_send_command {
+ struct hmdfs_cmd operations;
+ void *data;
+ size_t len;
+ void *local_filp;
+ void *out_buf;
+ size_t out_len;
+ __u32 ret_code;
+};
+
+struct hmdfs_req {
+ struct hmdfs_cmd operations;
+ /*
+ * Normally, the caller ought set timeout to TIMEOUT_CONFIG, so that
+ * hmdfs_send_async_request will search s_cmd_timeout for the user-
+ * configured timeout values.
+ *
+ * However, consider the given scenery:
+ * The caller may want to issue multiple requests sharing the same
+ * timeout value, but the users may update the value during the gap.
+ * To ensure the "atomicty" of timeout-using for these requests, we
+ * provide the timeout field for hacking.
+ */
+ unsigned int timeout;
+ void *data;
+ size_t data_len;
+
+ void *private; // optional
+ size_t private_len; // optional
+};
+
+struct hmdfs_resp {
+ void *out_buf;
+ size_t out_len;
+ __u32 ret_code;
+};
+
+struct hmdfs_msg_parasite {
+ struct hmdfs_msg_idr_head head;
+ struct delayed_work d_work;
+ bool wfired;
+ struct hmdfs_req req;
+ struct hmdfs_resp resp;
+ unsigned long start;
+};
+
+struct hmdfs_send_data {
+ // sect1: head
+ void *head;
+ size_t head_len;
+
+ // sect2: slice descriptor
+ void *sdesc;
+ size_t sdesc_len;
+
+ // sect3: request / response / file slice
+ void *data;
+ size_t len;
+};
+
+struct slice_descriptor {
+ __le32 num_slices;
+ __le32 slice_size;
+ __le32 slice_sn;
+ __le32 content_size;
+} __packed;
+
+enum DFS_VERSION {
+ INVALID_VERSION = 0,
+ DFS_1_0,
+
+ USERSPACE_MAX_VER = 0x3F,
+ DFS_2_0,
+
+ MAX_VERSION = 0xFF
+};
+
+enum CONN_OPERATIONS_VERSION { USERDFS_VERSION, PROTOCOL_VERSION };
+
+enum CMD_FLAG { C_REQUEST = 0, C_RESPONSE = 1, C_FLAG_SIZE };
+
+enum FILE_CMD {
+ F_OPEN = 0,
+ F_RELEASE = 1,
+ F_READPAGE = 2,
+ F_WRITEPAGE = 3,
+ F_ITERATE = 4,
+ F_RESERVED_1 = 5,
+ F_RESERVED_2 = 6,
+ F_RESERVED_3 = 7,
+ F_RESERVED_4 = 8,
+ F_MKDIR = 9,
+ F_RMDIR = 10,
+ F_CREATE = 11,
+ F_UNLINK = 12,
+ F_RENAME = 13,
+ F_SETATTR = 14,
+ F_RESERVED_5 = 15,
+ F_STATFS = 16,
+ F_CONNECT_REKEY = 17,
+ F_DROP_PUSH = 18,
+ F_RESERVED_0 = 19,
+ F_GETATTR = 20,
+ F_FSYNC = 21,
+ F_SYNCFS = 22,
+ F_GETXATTR = 23,
+ F_SETXATTR = 24,
+ F_LISTXATTR = 25,
+ F_READPAGES = 26,
+ F_READPAGES_OPEN = 27,
+ F_ATOMIC_OPEN = 28,
+ F_SIZE,
+};
+
+struct open_request {
+ __u8 file_type;
+ __le32 flags;
+ __le32 path_len;
+ char buf[0];
+} __packed;
+
+struct open_response {
+ __le32 change_detect_cap;
+ __le64 file_ver;
+ __le32 file_id;
+ __le64 file_size;
+ __le64 ino;
+ __le64 ctime;
+ __le32 ctime_nsec;
+ __le64 mtime;
+ __le32 mtime_nsec;
+ __le64 stable_ctime;
+ __le32 stable_ctime_nsec;
+ __le64 ichange_count;
+} __packed;
+
+enum hmdfs_open_flags {
+ HMDFS_O_TRUNC = O_TRUNC,
+ HMDFS_O_EXCL = O_EXCL,
+};
+
+struct atomic_open_request {
+ __le32 open_flags;
+ __le16 mode;
+ __le16 reserved1;
+ __le32 path_len;
+ __le32 file_len;
+ __le64 reserved2[4];
+ char buf[0];
+} __packed;
+
+struct atomic_open_response {
+ __le32 fno;
+ __le16 i_mode;
+ __le16 reserved1;
+ __le32 i_flags;
+ __le32 reserved2;
+ __le64 reserved3[4];
+ struct open_response open_resp;
+} __packed;
+
+struct release_request {
+ __le64 file_ver;
+ __le32 file_id;
+} __packed;
+
+struct fsync_request {
+ __le64 file_ver;
+ __le32 file_id;
+ __le32 datasync;
+ __le64 start;
+ __le64 end;
+} __packed;
+
+struct readpage_request {
+ __le64 file_ver;
+ __le32 file_id;
+ __le32 size;
+ __le64 index;
+} __packed;
+
+struct readpage_response {
+ char buf[0];
+} __packed;
+
+struct readpages_request {
+ __le64 file_ver;
+ __le32 file_id;
+ __le32 size;
+ __le64 index;
+ __le64 reserved;
+} __packed;
+
+struct readpages_response {
+ char buf[0];
+} __packed;
+
+struct readpages_open_request {
+ __u8 file_type;
+ __u8 reserved1[3];
+ __le32 flags;
+ __le32 path_len;
+ __le32 size;
+ __le64 index;
+ __le64 reserved2;
+ char buf[0];
+} __packed;
+
+struct readpages_open_response {
+ struct open_response open_resp;
+ __le64 reserved[4];
+ char buf[0];
+} __packed;
+
+struct writepage_request {
+ __le64 file_ver;
+ __le32 file_id;
+ __le64 index;
+ __le32 count;
+ char buf[0];
+} __packed;
+
+struct writepage_response {
+ __le64 ichange_count;
+ __le64 ctime;
+ __le32 ctime_nsec;
+} __packed;
+
+struct readdir_request {
+ __le64 dcache_crtime;
+ __le64 dcache_crtime_nsec;
+ __le64 dentry_ctime;
+ __le64 dentry_ctime_nsec;
+ __le64 num;
+ __le32 verify_cache;
+ __le32 path_len;
+ char path[0];
+} __packed;
+
+struct hmdfs_inodeinfo_response {
+ __le64 i_size;
+ __le64 i_mtime;
+ __le32 i_mtime_nsec;
+ __le32 fno;
+ __le16 i_mode;
+ __le64 i_ino;
+ __le32 i_flags;
+ __le32 i_reserved;
+} __packed;
+
+struct mkdir_request {
+ __le32 path_len;
+ __le32 name_len;
+ __le16 mode;
+ char path[0];
+} __packed;
+
+struct create_request {
+ __le32 path_len;
+ __le32 name_len;
+ __le16 mode;
+ __u8 want_excl;
+ char path[0];
+} __packed;
+
+struct rmdir_request {
+ __le32 path_len;
+ __le32 name_len;
+ char path[0];
+} __packed;
+
+struct unlink_request {
+ __le32 path_len;
+ __le32 name_len;
+ char path[0];
+} __packed;
+
+struct rename_request {
+ __le32 old_path_len;
+ __le32 new_path_len;
+ __le32 old_name_len;
+ __le32 new_name_len;
+ __le32 flags;
+ char path[0];
+} __packed;
+
+struct drop_push_request {
+ __le32 path_len;
+ char path[0];
+} __packed;
+
+struct setattr_request {
+ __le64 size;
+ __le32 valid;
+ __le16 mode;
+ __le32 uid;
+ __le32 gid;
+ __le64 atime;
+ __le32 atime_nsec;
+ __le64 mtime;
+ __le32 mtime_nsec;
+ __le32 path_len;
+ char buf[0];
+} __packed;
+
+struct getattr_request {
+ __le32 lookup_flags;
+ __le32 path_len;
+ char buf[0];
+} __packed;
+
+struct getattr_response {
+ __le32 change_detect_cap;
+ __le32 result_mask;
+ __le32 flags;
+ __le64 fsid;
+ __le16 mode;
+ __le32 nlink;
+ __le32 uid;
+ __le32 gid;
+ __le32 rdev;
+ __le64 ino;
+ __le64 size;
+ __le64 blocks;
+ __le32 blksize;
+ __le64 atime;
+ __le32 atime_nsec;
+ __le64 mtime;
+ __le32 mtime_nsec;
+ __le64 ctime;
+ __le32 ctime_nsec;
+ __le64 crtime;
+ __le32 crtime_nsec;
+ __le64 ichange_count;
+} __packed;
+
+struct statfs_request {
+ __le32 path_len;
+ char path[0];
+} __packed;
+
+struct statfs_response {
+ __le64 f_type;
+ __le64 f_bsize;
+ __le64 f_blocks;
+ __le64 f_bfree;
+ __le64 f_bavail;
+ __le64 f_files;
+ __le64 f_ffree;
+ __le32 f_fsid_0;
+ __le32 f_fsid_1;
+ __le64 f_namelen;
+ __le64 f_frsize;
+ __le64 f_flags;
+ __le64 f_spare_0;
+ __le64 f_spare_1;
+ __le64 f_spare_2;
+ __le64 f_spare_3;
+} __packed;
+
+struct syncfs_request {
+ __le64 version;
+ __le32 flags;
+} __packed;
+
+struct getxattr_request {
+ __le32 path_len;
+ __le32 name_len;
+ __le32 size;
+ char buf[0];
+} __packed;
+
+struct getxattr_response {
+ __le32 size;
+ char value[0]; /* xattr value may non-printable */
+} __packed;
+
+struct setxattr_request {
+ __le32 path_len;
+ __le32 name_len;
+ __le32 size;
+ __le32 flags;
+ __u8 del; /* remove xattr */
+ char buf[0];
+} __packed;
+
+struct listxattr_request {
+ __le32 path_len;
+ __le32 size;
+ char buf[0];
+} __packed;
+
+struct listxattr_response {
+ __le32 size;
+ char list[0];
+} __packed;
+
+struct connection_rekey_request {
+ __le32 update_request;
+} __packed;
+
+enum CONNECTION_KEY_UPDATE_REQUEST {
+ UPDATE_NOT_REQUESTED = 0,
+ UPDATE_REQUESTED = 1
+};
+
+enum MSG_QUEUE_STATUS {
+ MSG_Q_SEND = 0,
+ MSG_Q_END_RECV,
+};
+#endif
diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c
new file mode 100644
index 0000000000000000000000000000000000000000..769b6d28ebcef214973e36d9d19bb9bbe254f4f5
--- /dev/null
+++ b/fs/hmdfs/comm/socket_adapter.c
@@ -0,0 +1,1151 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/socket_adapter.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "socket_adapter.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "authority/authentication.h"
+#include "comm/device_node.h"
+#include "hmdfs_client.h"
+#include "hmdfs_server.h"
+#include "hmdfs_trace.h"
+#include "message_verify.h"
+
+#define ACQUIRE_WFIRED_INTVAL_USEC_MIN 10
+#define ACQUIRE_WFIRED_INTVAL_USEC_MAX 30
+
+typedef void (*request_callback)(struct hmdfs_peer *, struct hmdfs_head_cmd *,
+ void *);
+typedef void (*response_callback)(struct hmdfs_peer *,
+ struct sendmsg_wait_queue *, void *, size_t);
+
+static const request_callback s_recv_callbacks[F_SIZE] = {
+ [F_OPEN] = hmdfs_server_open,
+ [F_READPAGE] = hmdfs_server_readpage,
+ [F_RELEASE] = hmdfs_server_release,
+ [F_WRITEPAGE] = hmdfs_server_writepage,
+ [F_ITERATE] = hmdfs_server_readdir,
+ [F_MKDIR] = hmdfs_server_mkdir,
+ [F_CREATE] = hmdfs_server_create,
+ [F_RMDIR] = hmdfs_server_rmdir,
+ [F_UNLINK] = hmdfs_server_unlink,
+ [F_RENAME] = hmdfs_server_rename,
+ [F_SETATTR] = hmdfs_server_setattr,
+ [F_STATFS] = hmdfs_server_statfs,
+ [F_DROP_PUSH] = hmdfs_server_get_drop_push,
+ [F_GETATTR] = hmdfs_server_getattr,
+ [F_FSYNC] = hmdfs_server_fsync,
+ [F_SYNCFS] = hmdfs_server_syncfs,
+ [F_GETXATTR] = hmdfs_server_getxattr,
+ [F_SETXATTR] = hmdfs_server_setxattr,
+ [F_LISTXATTR] = hmdfs_server_listxattr,
+ [F_READPAGES] = hmdfs_server_readpages,
+ [F_READPAGES_OPEN] = hmdfs_server_readpages_open,
+ [F_ATOMIC_OPEN] = hmdfs_server_atomic_open,
+};
+
+typedef void (*file_request_callback)(struct hmdfs_peer *,
+ struct hmdfs_send_command *);
+
+struct async_req_callbacks {
+ void (*on_wakeup)(struct hmdfs_peer *peer, const struct hmdfs_req *req,
+ const struct hmdfs_resp *resp);
+};
+
+static const struct async_req_callbacks g_async_req_callbacks[F_SIZE] = {
+ [F_SYNCFS] = { .on_wakeup = hmdfs_recv_syncfs_cb },
+ [F_WRITEPAGE] = { .on_wakeup = hmdfs_writepage_cb },
+};
+
+static void msg_release(struct kref *kref)
+{
+ struct sendmsg_wait_queue *msg_wq;
+ struct hmdfs_peer *con;
+
+ msg_wq = (struct sendmsg_wait_queue *)container_of(kref,
+ struct hmdfs_msg_idr_head, ref);
+ con = msg_wq->head.peer;
+ idr_remove(&con->msg_idr, msg_wq->head.msg_id);
+ spin_unlock(&con->idr_lock);
+
+ kfree(msg_wq->buf);
+ if (msg_wq->recv_info.local_filp)
+ fput(msg_wq->recv_info.local_filp);
+ kfree(msg_wq);
+}
+
+// Always remember to find before put, and make sure con is avilable
+void msg_put(struct sendmsg_wait_queue *msg_wq)
+{
+ kref_put_lock(&msg_wq->head.ref, msg_release,
+ &msg_wq->head.peer->idr_lock);
+}
+
+static void recv_info_init(struct file_recv_info *recv_info)
+{
+ memset(recv_info, 0, sizeof(struct file_recv_info));
+ atomic_set(&recv_info->local_fslices, 0);
+ atomic_set(&recv_info->state, FILE_RECV_PROCESS);
+}
+
+static int msg_init(struct hmdfs_peer *con, struct sendmsg_wait_queue *msg_wq)
+{
+ int ret = 0;
+ struct file_recv_info *recv_info = &msg_wq->recv_info;
+
+ ret = hmdfs_alloc_msg_idr(con, MSG_IDR_MESSAGE_SYNC, msg_wq);
+ if (unlikely(ret))
+ return ret;
+
+ atomic_set(&msg_wq->valid, MSG_Q_SEND);
+ init_waitqueue_head(&msg_wq->response_q);
+ recv_info_init(recv_info);
+ msg_wq->start = jiffies;
+ return 0;
+}
+
+static inline void statistic_con_sb_dirty(struct hmdfs_peer *con,
+ const struct hmdfs_cmd *op)
+{
+ if (op->command == F_WRITEPAGE && op->cmd_flag == C_REQUEST)
+ atomic64_inc(&con->sb_dirty_count);
+}
+
+int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg)
+{
+ int ret = 0;
+ struct connection *connect = NULL;
+ struct tcp_handle *tcp = NULL;
+ struct hmdfs_head_cmd *head = msg->head;
+ const struct cred *old_cred;
+
+ if (!node) {
+ hmdfs_err("node NULL when send cmd %d",
+ head->operations.command);
+ ret = -EAGAIN;
+ goto out_err;
+ } else if (node->status != NODE_STAT_ONLINE) {
+ hmdfs_err("device %llu OFFLINE %d when send cmd %d",
+ node->device_id, node->status,
+ head->operations.command);
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (hmdfs_should_fail_sendmsg(&node->sbi->fault_inject, node, msg,
+ &ret))
+ goto out;
+
+ old_cred = hmdfs_override_creds(node->sbi->system_cred);
+
+ do {
+ connect = get_conn_impl(node, CONNECT_TYPE_TCP);
+ if (!connect) {
+ hmdfs_info_ratelimited(
+ "device %llu no connection available when send cmd %d, get new session",
+ node->device_id, head->operations.command);
+ if (node->status != NODE_STAT_OFFLINE) {
+ struct notify_param param;
+
+ memcpy(param.remote_cid, node->cid,
+ HMDFS_CID_SIZE);
+ param.notify = NOTIFY_OFFLINE;
+ param.fd = INVALID_SOCKET_FD;
+ notify(node, ¶m);
+ }
+ ret = -EAGAIN;
+ goto revert_cred;
+ }
+
+ ret = connect->send_message(connect, msg);
+ if (ret == -ESHUTDOWN) {
+ hmdfs_info("device %llu send cmd %d message fail, connection stop",
+ node->device_id, head->operations.command);
+ connect->status = CONNECT_STAT_STOP;
+ tcp = connect->connect_handle;
+ if (node->status != NODE_STAT_OFFLINE) {
+ connection_get(connect);
+ if (!queue_work(node->reget_conn_wq,
+ &connect->reget_work))
+ connection_put(connect);
+ }
+ connection_put(connect);
+ /*
+ * node->status is OFFLINE can not ensure
+ * node_seq will be increased before
+ * hmdfs_sendmessage() returns.
+ */
+ hmdfs_node_inc_evt_seq(node);
+ } else {
+ connection_put(connect);
+ goto revert_cred;
+ }
+ } while (node->status != NODE_STAT_OFFLINE);
+revert_cred:
+ hmdfs_revert_creds(old_cred);
+
+ if (!ret)
+ statistic_con_sb_dirty(node, &head->operations);
+out:
+ if (node->version == DFS_2_0 &&
+ head->operations.cmd_flag == C_REQUEST)
+ hmdfs_client_snd_statis(node->sbi,
+ head->operations.command, ret);
+ else if (node->version == DFS_2_0 &&
+ head->operations.cmd_flag == C_RESPONSE)
+ hmdfs_server_snd_statis(node->sbi,
+ head->operations.command, ret);
+out_err:
+ return ret;
+}
+
+int hmdfs_sendmessage_response(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, __u32 data_len,
+ void *buf, __u32 ret_code)
+{
+ int ret;
+ struct hmdfs_send_data msg;
+ struct hmdfs_head_cmd head;
+
+ head.magic = HMDFS_MSG_MAGIC;
+ head.version = DFS_2_0;
+ head.operations = cmd->operations;
+ head.operations.cmd_flag = C_RESPONSE;
+ head.data_len = cpu_to_le32(data_len + sizeof(struct hmdfs_head_cmd));
+ head.ret_code = cpu_to_le32(ret_code);
+ head.msg_id = cmd->msg_id;
+ head.reserved = cmd->reserved;
+ head.reserved1 = cmd->reserved1;
+ msg.head = &head;
+ msg.head_len = sizeof(struct hmdfs_head_cmd);
+ msg.data = buf;
+ msg.len = data_len;
+ msg.sdesc = NULL;
+ msg.sdesc_len = 0;
+
+ ret = hmdfs_sendmessage(con, &msg);
+ return ret;
+}
+
+static void mp_release(struct kref *kref)
+{
+ struct hmdfs_msg_parasite *mp = NULL;
+ struct hmdfs_peer *peer = NULL;
+
+ mp = (struct hmdfs_msg_parasite *)container_of(kref,
+ struct hmdfs_msg_idr_head, ref);
+ peer = mp->head.peer;
+ idr_remove(&peer->msg_idr, mp->head.msg_id);
+ spin_unlock(&peer->idr_lock);
+
+ peer_put(peer);
+ kfree(mp->resp.out_buf);
+ kfree(mp);
+}
+
+void mp_put(struct hmdfs_msg_parasite *mp)
+{
+ kref_put_lock(&mp->head.ref, mp_release, &mp->head.peer->idr_lock);
+}
+
+static void async_request_cb_on_wakeup_fn(struct work_struct *w)
+{
+ struct hmdfs_msg_parasite *mp =
+ container_of(w, struct hmdfs_msg_parasite, d_work.work);
+ struct async_req_callbacks cbs;
+ const struct cred *old_cred =
+ hmdfs_override_creds(mp->head.peer->sbi->cred);
+
+ if (mp->resp.ret_code == -ETIME)
+ hmdfs_client_resp_statis(mp->head.peer->sbi,
+ mp->req.operations.command,
+ HMDFS_RESP_TIMEOUT, 0, 0);
+
+ cbs = g_async_req_callbacks[mp->req.operations.command];
+ if (cbs.on_wakeup)
+ (*cbs.on_wakeup)(mp->head.peer, &mp->req, &mp->resp);
+ mp_put(mp);
+ hmdfs_revert_creds(old_cred);
+}
+
+static struct hmdfs_msg_parasite *mp_alloc(struct hmdfs_peer *peer,
+ const struct hmdfs_req *req)
+{
+ struct hmdfs_msg_parasite *mp = kzalloc(sizeof(*mp), GFP_KERNEL);
+ int ret;
+
+ if (unlikely(!mp))
+ return ERR_PTR(-ENOMEM);
+
+ ret = hmdfs_alloc_msg_idr(peer, MSG_IDR_MESSAGE_ASYNC, mp);
+ if (unlikely(ret)) {
+ kfree(mp);
+ return ERR_PTR(ret);
+ }
+
+ mp->start = jiffies;
+ peer_get(mp->head.peer);
+ mp->resp.ret_code = -ETIME;
+ INIT_DELAYED_WORK(&mp->d_work, async_request_cb_on_wakeup_fn);
+ mp->wfired = false;
+ mp->req = *req;
+ return mp;
+}
+
+/**
+ * hmdfs_send_async_request - sendout a async request
+ * @peer: target device node
+ * @req: request descriptor + necessary contexts
+ *
+ * Sendout a request synchronously and wait for its response asynchronously
+ * Return -ESHUTDOWN when the device node is unachievable
+ * Return -EAGAIN if the network is recovering
+ * Return -ENOMEM if out of memory
+ *
+ * Register g_async_req_callbacks to recv the response
+ */
+int hmdfs_send_async_request(struct hmdfs_peer *peer,
+ const struct hmdfs_req *req)
+{
+ int ret = 0;
+ struct hmdfs_send_data msg;
+ struct hmdfs_head_cmd head;
+ struct hmdfs_msg_parasite *mp = NULL;
+ size_t msg_len = req->data_len + sizeof(struct hmdfs_head_cmd);
+ unsigned int timeout;
+
+ if (req->timeout == TIMEOUT_CONFIG)
+ timeout = get_cmd_timeout(peer->sbi, req->operations.command);
+ else
+ timeout = req->timeout;
+ if (timeout == TIMEOUT_UNINIT || timeout == TIMEOUT_NONE) {
+ hmdfs_err("send msg %d with uninitialized/invalid timeout",
+ req->operations.command);
+ return -EINVAL;
+ }
+
+ if (!hmdfs_is_node_online(peer))
+ return -EAGAIN;
+
+ mp = mp_alloc(peer, req);
+ if (IS_ERR(mp))
+ return PTR_ERR(mp);
+ head.magic = HMDFS_MSG_MAGIC;
+ head.version = DFS_2_0;
+ head.data_len = cpu_to_le32(msg_len);
+ head.operations = mp->req.operations;
+ head.msg_id = cpu_to_le32(mp->head.msg_id);
+ head.reserved = 0;
+ head.reserved1 = 0;
+
+ msg.head = &head;
+ msg.head_len = sizeof(head);
+ msg.data = mp->req.data;
+ msg.len = mp->req.data_len;
+ msg.sdesc_len = 0;
+ msg.sdesc = NULL;
+
+ ret = hmdfs_sendmessage(peer, &msg);
+ if (unlikely(ret)) {
+ mp_put(mp);
+ goto out;
+ }
+
+ queue_delayed_work(peer->async_wq, &mp->d_work, timeout * HZ);
+ /*
+ * The work may havn't been queued upon the arriving of it's response,
+ * resulting in meaningless waiting. So we use the membar to tell the
+ * recv thread if the work has been queued
+ */
+ smp_store_release(&mp->wfired, true);
+out:
+ hmdfs_dec_msg_idr_process(peer);
+ return ret;
+}
+
+static int hmdfs_record_async_readdir(struct hmdfs_peer *con,
+ struct sendmsg_wait_queue *msg_wq)
+{
+ struct hmdfs_sb_info *sbi = con->sbi;
+
+ spin_lock(&sbi->async_readdir_msg_lock);
+ if (sbi->async_readdir_prohibit) {
+ spin_unlock(&sbi->async_readdir_msg_lock);
+ return -EINTR;
+ }
+
+ list_add(&msg_wq->async_msg, &sbi->async_readdir_msg_list);
+ spin_unlock(&sbi->async_readdir_msg_lock);
+
+ return 0;
+}
+
+static void hmdfs_untrack_async_readdir(struct hmdfs_peer *con,
+ struct sendmsg_wait_queue *msg_wq)
+{
+ struct hmdfs_sb_info *sbi = con->sbi;
+
+ spin_lock(&sbi->async_readdir_msg_lock);
+ list_del(&msg_wq->async_msg);
+ spin_unlock(&sbi->async_readdir_msg_lock);
+}
+
+int hmdfs_sendmessage_request(struct hmdfs_peer *con,
+ struct hmdfs_send_command *sm)
+{
+ int time_left;
+ int ret = 0;
+ struct sendmsg_wait_queue *msg_wq = NULL;
+ struct hmdfs_send_data msg;
+ size_t outlen = sm->len + sizeof(struct hmdfs_head_cmd);
+ unsigned int timeout =
+ get_cmd_timeout(con->sbi, sm->operations.command);
+ struct hmdfs_head_cmd *head = NULL;
+ bool dec = false;
+
+ if (!hmdfs_is_node_online(con))
+ return -EAGAIN;
+
+ if (timeout == TIMEOUT_UNINIT) {
+ hmdfs_err_ratelimited("send msg %d with uninitialized timeout",
+ sm->operations.command);
+ return -EINVAL;
+ }
+
+ head = kzalloc(sizeof(struct hmdfs_head_cmd), GFP_KERNEL);
+ if (!head)
+ return -ENOMEM;
+
+ sm->out_buf = NULL;
+ head->magic = HMDFS_MSG_MAGIC;
+ head->version = DFS_2_0;
+ head->operations = sm->operations;
+ head->data_len = cpu_to_le32(outlen);
+ head->ret_code = cpu_to_le32(sm->ret_code);
+ head->reserved = 0;
+ head->reserved1 = 0;
+ if (timeout != TIMEOUT_NONE) {
+ msg_wq = kzalloc(sizeof(*msg_wq), GFP_KERNEL);
+ if (!msg_wq) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ ret = msg_init(con, msg_wq);
+ if (ret) {
+ kfree(msg_wq);
+ msg_wq = NULL;
+ goto free;
+ }
+ dec = true;
+ head->msg_id = cpu_to_le32(msg_wq->head.msg_id);
+ if (sm->operations.command == F_ITERATE)
+ msg_wq->recv_info.local_filp = sm->local_filp;
+ }
+ msg.head = head;
+ msg.head_len = sizeof(struct hmdfs_head_cmd);
+ msg.data = sm->data;
+ msg.len = sm->len;
+ msg.sdesc_len = 0;
+ msg.sdesc = NULL;
+ ret = hmdfs_sendmessage(con, &msg);
+ if (ret) {
+ hmdfs_err_ratelimited("send err sm->device_id, %lld, msg_id %u",
+ con->device_id, head->msg_id);
+ goto free;
+ }
+
+ if (timeout == TIMEOUT_NONE)
+ goto free;
+
+ hmdfs_dec_msg_idr_process(con);
+ dec = false;
+
+ if (sm->operations.command == F_ITERATE) {
+ ret = hmdfs_record_async_readdir(con, msg_wq);
+ if (ret) {
+ atomic_set(&msg_wq->recv_info.state, FILE_RECV_ERR_SPC);
+ goto free;
+ }
+ }
+
+ time_left = wait_event_interruptible_timeout(
+ msg_wq->response_q,
+ (atomic_read(&msg_wq->valid) == MSG_Q_END_RECV), timeout * HZ);
+
+ if (sm->operations.command == F_ITERATE)
+ hmdfs_untrack_async_readdir(con, msg_wq);
+
+ if (time_left == -ERESTARTSYS || time_left == 0) {
+ hmdfs_err("timeout err sm->device_id %lld, msg_id %d cmd %d",
+ con->device_id, head->msg_id,
+ head->operations.command);
+ if (sm->operations.command == F_ITERATE)
+ atomic_set(&msg_wq->recv_info.state, FILE_RECV_ERR_NET);
+ ret = -ETIME;
+ hmdfs_client_resp_statis(con->sbi, sm->operations.command,
+ HMDFS_RESP_TIMEOUT, 0, 0);
+ goto free;
+ }
+ sm->out_buf = msg_wq->buf;
+ msg_wq->buf = NULL;
+ sm->out_len = msg_wq->size - sizeof(struct hmdfs_head_cmd);
+ ret = msg_wq->ret;
+
+free:
+ if (msg_wq)
+ msg_put(msg_wq);
+ if (dec)
+ hmdfs_dec_msg_idr_process(con);
+ kfree(head);
+ return ret;
+}
+
+static int hmdfs_send_slice(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ struct slice_descriptor *sdesc, void *slice_buf)
+{
+ int ret;
+ struct hmdfs_send_data msg;
+ struct hmdfs_head_cmd head;
+ int content_size = le32_to_cpu(sdesc->content_size);
+ int msg_len = sizeof(struct hmdfs_head_cmd) + content_size +
+ sizeof(struct slice_descriptor);
+
+ head.magic = HMDFS_MSG_MAGIC;
+ head.version = DFS_2_0;
+ head.operations = cmd->operations;
+ head.operations.cmd_flag = C_RESPONSE;
+ head.data_len = cpu_to_le32(msg_len);
+ head.ret_code = cpu_to_le32(0);
+ head.msg_id = cmd->msg_id;
+ head.reserved = cmd->reserved;
+ head.reserved1 = cmd->reserved1;
+
+ msg.head = &head;
+ msg.head_len = sizeof(struct hmdfs_head_cmd);
+ msg.sdesc = sdesc;
+ msg.sdesc_len = le32_to_cpu(sizeof(struct slice_descriptor));
+ msg.data = slice_buf;
+ msg.len = content_size;
+
+ ret = hmdfs_sendmessage(con, &msg);
+
+ return ret;
+}
+
+int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head,
+ struct file *filp)
+{
+ int ret;
+ const unsigned int slice_size = PAGE_SIZE;
+ char *slice_buf = NULL;
+ loff_t file_offset = 0, file_size;
+ ssize_t size;
+ struct slice_descriptor sdesc;
+ unsigned int slice_sn = 0;
+
+ if (!filp)
+ return hmdfs_sendmessage_response(con, head, 0, NULL, 0);
+
+ sdesc.slice_size = cpu_to_le32(slice_size);
+ file_size = i_size_read(file_inode(filp));
+ file_size = round_up(file_size, slice_size);
+ sdesc.num_slices = cpu_to_le32(file_size / slice_size);
+
+ slice_buf = kmalloc(slice_size, GFP_KERNEL);
+ if (!slice_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ while (1) {
+ sdesc.slice_sn = cpu_to_le32(slice_sn++);
+ size = kernel_read(filp, slice_buf, (size_t)slice_size,
+ &file_offset);
+ if (IS_ERR_VALUE(size)) {
+ ret = (int)size;
+ goto out;
+ }
+ sdesc.content_size = cpu_to_le32(size);
+ ret = hmdfs_send_slice(con, head, &sdesc, slice_buf);
+ if (ret) {
+ hmdfs_info("Cannot send file slice %d ",
+ le32_to_cpu(sdesc.slice_sn));
+ break;
+ }
+ if (file_offset >= i_size_read(file_inode(filp)))
+ break;
+ }
+
+out:
+ kfree(slice_buf);
+ if (ret)
+ hmdfs_sendmessage_response(con, head, 0, NULL, ret);
+ return ret;
+}
+
+static void asw_release(struct kref *kref)
+{
+ struct hmdfs_async_work *asw = NULL;
+ struct hmdfs_peer *peer = NULL;
+
+ asw = (struct hmdfs_async_work *)container_of(kref,
+ struct hmdfs_msg_idr_head, ref);
+ peer = asw->head.peer;
+ idr_remove(&peer->msg_idr, asw->head.msg_id);
+ spin_unlock(&peer->idr_lock);
+ kfree(asw);
+}
+
+void asw_put(struct hmdfs_async_work *asw)
+{
+ kref_put_lock(&asw->head.ref, asw_release, &asw->head.peer->idr_lock);
+}
+
+void hmdfs_recv_page_work_fn(struct work_struct *ptr)
+{
+ struct hmdfs_async_work *async_work =
+ container_of(ptr, struct hmdfs_async_work, d_work.work);
+
+ if (async_work->head.peer->version >= DFS_2_0)
+ hmdfs_client_resp_statis(async_work->head.peer->sbi,
+ F_READPAGE, HMDFS_RESP_TIMEOUT, 0, 0);
+ hmdfs_err_ratelimited("timeout and release page, msg_id:%u",
+ async_work->head.msg_id);
+ asw_done(async_work);
+}
+
+int hmdfs_sendpage_request(struct hmdfs_peer *con,
+ struct hmdfs_send_command *sm)
+{
+ int ret = 0;
+ struct hmdfs_send_data msg;
+ struct hmdfs_async_work *async_work = NULL;
+ size_t outlen = sm->len + sizeof(struct hmdfs_head_cmd);
+ struct hmdfs_head_cmd head;
+ unsigned int timeout;
+ unsigned long start = jiffies;
+
+ WARN_ON(!sm->out_buf);
+
+ timeout = get_cmd_timeout(con->sbi, sm->operations.command);
+ if (timeout == TIMEOUT_UNINIT) {
+ hmdfs_err("send msg %d with uninitialized timeout",
+ sm->operations.command);
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (!hmdfs_is_node_online(con)) {
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ memset(&head, 0, sizeof(head));
+ head.magic = HMDFS_MSG_MAGIC;
+ head.version = DFS_2_0;
+ head.operations = sm->operations;
+ head.data_len = cpu_to_le32(outlen);
+ head.ret_code = cpu_to_le32(sm->ret_code);
+ head.reserved = 0;
+ head.reserved1 = 0;
+
+ msg.head = &head;
+ msg.head_len = sizeof(struct hmdfs_head_cmd);
+ msg.data = sm->data;
+ msg.len = sm->len;
+ msg.sdesc_len = 0;
+ msg.sdesc = NULL;
+
+ async_work = kzalloc(sizeof(*async_work), GFP_KERNEL);
+ if (!async_work) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+ async_work->start = start;
+ ret = hmdfs_alloc_msg_idr(con, MSG_IDR_PAGE, async_work);
+ if (ret) {
+ hmdfs_err("alloc msg_id failed, err %d", ret);
+ goto unlock;
+ }
+ head.msg_id = cpu_to_le32(async_work->head.msg_id);
+ async_work->page = sm->out_buf;
+ asw_get(async_work);
+ INIT_DELAYED_WORK(&async_work->d_work, hmdfs_recv_page_work_fn);
+ ret = queue_delayed_work(con->async_wq, &async_work->d_work,
+ timeout * HZ);
+ if (!ret) {
+ hmdfs_err("queue_delayed_work failed, msg_id %u", head.msg_id);
+ goto fail_and_unlock_page;
+ }
+ ret = hmdfs_sendmessage(con, &msg);
+ if (ret) {
+ hmdfs_err("send err sm->device_id, %lld, msg_id %u",
+ con->device_id, head.msg_id);
+ if (!cancel_delayed_work(&async_work->d_work)) {
+ hmdfs_err("cancel async work err");
+ asw_put(async_work);
+ hmdfs_dec_msg_idr_process(con);
+ goto out;
+ }
+ goto fail_and_unlock_page;
+ }
+
+ asw_put(async_work);
+ hmdfs_dec_msg_idr_process(con);
+ return 0;
+
+fail_and_unlock_page:
+ asw_put(async_work);
+ asw_done(async_work);
+ hmdfs_dec_msg_idr_process(con);
+ return ret;
+unlock:
+ kfree(async_work);
+ unlock_page(sm->out_buf);
+out:
+ return ret;
+}
+
+static void hmdfs_request_handle_sync(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *head, void *buf)
+{
+ unsigned long start = jiffies;
+ const struct cred *saved_cred = hmdfs_override_fsids(true);
+
+ if (!saved_cred) {
+ hmdfs_err("prepare cred failed!");
+ kfree(buf);
+ return;
+ }
+
+ s_recv_callbacks[head->operations.command](con, head, buf);
+ hmdfs_statistic(con->sbi, head->operations.command, jiffies - start);
+
+ kfree(buf);
+
+ hmdfs_revert_fsids(saved_cred);
+}
+
+static void hmdfs_msg_handle_sync(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *head, void *buf)
+{
+ const struct cred *old_cred = hmdfs_override_creds(con->sbi->cred);
+
+ /*
+ * Reuse PF_NPROC_EXCEEDED as an indication of hmdfs server context:
+ * 1. PF_NPROC_EXCEEDED will set by setreuid()/setuid()/setresuid(),
+ * we assume kwork will not call theses syscalls.
+ * 2. PF_NPROC_EXCEEDED will be cleared by execv(), and kworker
+ * will not call it.
+ */
+ current->flags |= PF_NPROC_EXCEEDED;
+ hmdfs_request_handle_sync(con, head, buf);
+ current->flags &= ~PF_NPROC_EXCEEDED;
+
+ hmdfs_revert_creds(old_cred);
+}
+
+
+static void hmdfs_request_work_fn(struct work_struct *ptr)
+{
+ struct work_handler_desp *desp =
+ container_of(ptr, struct work_handler_desp, work);
+
+ hmdfs_msg_handle_sync(desp->peer, desp->head, desp->buf);
+ peer_put(desp->peer);
+ kfree(desp->head);
+ kfree(desp);
+}
+
+static int hmdfs_msg_handle_async(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *head, void *buf,
+ struct workqueue_struct *wq,
+ void (*work_fn)(struct work_struct *ptr))
+{
+ struct work_handler_desp *desp = NULL;
+ struct hmdfs_head_cmd *dup_head = NULL;
+ int ret;
+
+ desp = kzalloc(sizeof(*desp), GFP_KERNEL);
+ if (!desp) {
+ ret = -ENOMEM;
+ goto exit_desp;
+ }
+
+ dup_head = kzalloc(sizeof(*dup_head), GFP_KERNEL);
+ if (!dup_head) {
+ ret = -ENOMEM;
+ goto exit_desp;
+ }
+
+ *dup_head = *head;
+ desp->peer = con;
+ desp->head = dup_head;
+ desp->buf = buf;
+ INIT_WORK(&desp->work, work_fn);
+
+ peer_get(con);
+ queue_work(wq, &desp->work);
+
+ ret = 0;
+ return ret;
+
+exit_desp:
+ kfree(desp);
+ return ret;
+}
+
+static int hmdfs_request_recv(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *head, void *buf)
+{
+ int ret;
+
+ if (head->operations.command >= F_SIZE ||
+ !s_recv_callbacks[head->operations.command]) {
+ ret = -EINVAL;
+ hmdfs_err("NULL callback, command %d",
+ head->operations.command);
+ goto out;
+ }
+
+ switch (head->operations.command) {
+ case F_OPEN:
+ case F_RELEASE:
+ case F_ITERATE:
+ case F_MKDIR:
+ case F_RMDIR:
+ case F_CREATE:
+ case F_UNLINK:
+ case F_RENAME:
+ case F_SETATTR:
+ case F_STATFS:
+ case F_CONNECT_REKEY:
+ case F_DROP_PUSH:
+ case F_GETATTR:
+ case F_FSYNC:
+ case F_SYNCFS:
+ case F_GETXATTR:
+ case F_SETXATTR:
+ case F_LISTXATTR:
+ case F_READPAGES_OPEN:
+ case F_ATOMIC_OPEN:
+ ret = hmdfs_msg_handle_async(con, head, buf, con->req_handle_wq,
+ hmdfs_request_work_fn);
+ break;
+ case F_WRITEPAGE:
+ case F_READPAGE:
+ case F_READPAGES:
+ hmdfs_msg_handle_sync(con, head, buf);
+ ret = 0;
+ break;
+ default:
+ hmdfs_err("Fatal! Unexpected request command %d",
+ head->operations.command);
+ ret = -EINVAL;
+ }
+
+out:
+ return ret;
+}
+
+void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info,
+ __u32 ret_code, __u32 data_len, void *buf)
+{
+ msg_info->ret = ret_code;
+ msg_info->size = data_len;
+ msg_info->buf = buf;
+ atomic_set(&msg_info->valid, MSG_Q_END_RECV);
+ wake_up_interruptible(&msg_info->response_q);
+}
+
+static int hmdfs_readfile_slice(struct sendmsg_wait_queue *msg_info,
+ struct work_handler_desp *desp)
+{
+ struct slice_descriptor *sdesc = desp->buf;
+ void *slice_buf = sdesc + 1;
+ struct file_recv_info *recv_info = &msg_info->recv_info;
+ struct file *filp = recv_info->local_filp;
+ loff_t offset;
+ ssize_t written_size;
+
+ if (atomic_read(&recv_info->state) != FILE_RECV_PROCESS)
+ return -EBUSY;
+
+ offset = le32_to_cpu(sdesc->slice_size) * le32_to_cpu(sdesc->slice_sn);
+
+ written_size = kernel_write(filp, slice_buf,
+ le32_to_cpu(sdesc->content_size), &offset);
+ if (IS_ERR_VALUE(written_size)) {
+ atomic_set(&recv_info->state, FILE_RECV_ERR_SPC);
+ hmdfs_info("Fatal! Cannot store a file slice %d/%d, ret = %d",
+ le32_to_cpu(sdesc->slice_sn),
+ le32_to_cpu(sdesc->num_slices), (int)written_size);
+ return (int)written_size;
+ }
+
+ if (atomic_inc_return(&recv_info->local_fslices) >=
+ le32_to_cpu(sdesc->num_slices))
+ atomic_set(&recv_info->state, FILE_RECV_SUCC);
+ return 0;
+}
+
+static void hmdfs_file_response_work_fn(struct work_struct *ptr)
+{
+ struct work_handler_desp *desp =
+ container_of(ptr, struct work_handler_desp, work);
+ struct sendmsg_wait_queue *msg_info = NULL;
+ int ret;
+ atomic_t *pstate = NULL;
+ u8 cmd = desp->head->operations.command;
+ const struct cred *old_cred =
+ hmdfs_override_creds(desp->peer->sbi->cred);
+
+ msg_info = (struct sendmsg_wait_queue *)hmdfs_find_msg_head(desp->peer,
+ le32_to_cpu(desp->head->msg_id));
+ if (!msg_info || atomic_read(&msg_info->valid) != MSG_Q_SEND) {
+ hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_DELAY,
+ 0, 0);
+ hmdfs_info("cannot find msg(id %d)",
+ le32_to_cpu(desp->head->msg_id));
+ goto free;
+ }
+
+ ret = le32_to_cpu(desp->head->ret_code);
+ if (ret || le32_to_cpu(desp->head->data_len) == sizeof(*desp->head))
+ goto wakeup;
+ ret = hmdfs_readfile_slice(msg_info, desp);
+ pstate = &msg_info->recv_info.state;
+ if (ret || atomic_read(pstate) != FILE_RECV_PROCESS)
+ goto wakeup;
+ goto free;
+
+wakeup:
+ hmdfs_response_wakeup(msg_info, ret, sizeof(struct hmdfs_head_cmd),
+ NULL);
+ hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_NORMAL,
+ msg_info->start, jiffies);
+free:
+ if (msg_info)
+ msg_put(msg_info);
+ peer_put(desp->peer);
+ hmdfs_revert_creds(old_cred);
+
+ kfree(desp->buf);
+ kfree(desp->head);
+ kfree(desp);
+}
+
+static void hmdfs_wait_mp_wfired(struct hmdfs_msg_parasite *mp)
+{
+ /* We just cancel queued works */
+ while (unlikely(!smp_load_acquire(&mp->wfired)))
+ usleep_range(ACQUIRE_WFIRED_INTVAL_USEC_MIN,
+ ACQUIRE_WFIRED_INTVAL_USEC_MAX);
+}
+
+int hmdfs_response_handle_sync(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *head, void *buf)
+{
+ struct sendmsg_wait_queue *msg_info = NULL;
+ struct hmdfs_msg_parasite *mp = NULL;
+ struct hmdfs_msg_idr_head *msg_head = NULL;
+ u32 msg_id = le32_to_cpu(head->msg_id);
+ bool woke = false;
+ u8 cmd = head->operations.command;
+
+ msg_head = hmdfs_find_msg_head(con, msg_id);
+ if (!msg_head)
+ goto out;
+
+ switch (msg_head->type) {
+ case MSG_IDR_MESSAGE_SYNC:
+ msg_info = (struct sendmsg_wait_queue *)msg_head;
+ if (atomic_read(&msg_info->valid) == MSG_Q_SEND) {
+ hmdfs_response_wakeup(msg_info,
+ le32_to_cpu(head->ret_code),
+ le32_to_cpu(head->data_len), buf);
+ hmdfs_client_resp_statis(con->sbi, cmd,
+ HMDFS_RESP_NORMAL,
+ msg_info->start, jiffies);
+ woke = true;
+ }
+
+ msg_put(msg_info);
+ break;
+ case MSG_IDR_MESSAGE_ASYNC:
+ mp = (struct hmdfs_msg_parasite *)msg_head;
+
+ hmdfs_wait_mp_wfired(mp);
+ if (cancel_delayed_work(&mp->d_work)) {
+ mp->resp.out_buf = buf;
+ mp->resp.out_len =
+ le32_to_cpu(head->data_len) - sizeof(*head);
+ mp->resp.ret_code = le32_to_cpu(head->ret_code);
+ queue_delayed_work(con->async_wq, &mp->d_work, 0);
+ hmdfs_client_resp_statis(con->sbi, cmd,
+ HMDFS_RESP_NORMAL, mp->start,
+ jiffies);
+ woke = true;
+ }
+ mp_put(mp);
+ break;
+ default:
+ hmdfs_err("receive incorrect msg type %d msg_id %d cmd %d",
+ msg_head->type, msg_id, cmd);
+ break;
+ }
+
+ if (likely(woke))
+ return 0;
+out:
+ hmdfs_client_resp_statis(con->sbi, cmd, HMDFS_RESP_DELAY, 0, 0);
+ hmdfs_info("cannot find msg_id %d cmd %d", msg_id, cmd);
+ return -EINVAL;
+}
+
+static int hmdfs_response_recv(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *head, void *buf)
+{
+ __u16 command = head->operations.command;
+ int ret;
+
+ if (command >= F_SIZE) {
+ ret = -EINVAL;
+ return ret;
+ }
+
+ switch (head->operations.command) {
+ case F_OPEN:
+ case F_RELEASE:
+ case F_READPAGE:
+ case F_WRITEPAGE:
+ case F_MKDIR:
+ case F_RMDIR:
+ case F_CREATE:
+ case F_UNLINK:
+ case F_RENAME:
+ case F_SETATTR:
+ case F_STATFS:
+ case F_CONNECT_REKEY:
+ case F_DROP_PUSH:
+ case F_GETATTR:
+ case F_FSYNC:
+ case F_SYNCFS:
+ case F_GETXATTR:
+ case F_SETXATTR:
+ case F_LISTXATTR:
+ ret = hmdfs_response_handle_sync(con, head, buf);
+ return ret;
+
+ case F_ITERATE:
+ ret = hmdfs_msg_handle_async(con, head, buf, con->async_wq,
+ hmdfs_file_response_work_fn);
+ return ret;
+
+ default:
+ hmdfs_err("Fatal! Unexpected response command %d",
+ head->operations.command);
+ ret = -EINVAL;
+ return ret;
+ }
+}
+
+static void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head,
+ void *buf)
+{
+ struct hmdfs_head_cmd *hmdfs_head = (struct hmdfs_head_cmd *)head;
+
+ trace_hmdfs_recv_mesg_callback(hmdfs_head);
+
+ if (hmdfs_message_verify(con, hmdfs_head, buf) < 0) {
+ hmdfs_info("Message %d has been abandoned", hmdfs_head->msg_id);
+ goto out_err;
+ }
+
+ switch (hmdfs_head->operations.cmd_flag) {
+ case C_REQUEST:
+ if (hmdfs_request_recv(con, hmdfs_head, buf) < 0)
+ goto out_err;
+ break;
+
+ case C_RESPONSE:
+ if (hmdfs_response_recv(con, hmdfs_head, buf) < 0)
+ goto out_err;
+ break;
+
+ default:
+ hmdfs_err("Fatal! Unexpected msg cmd %d",
+ hmdfs_head->operations.cmd_flag);
+ break;
+ }
+ return;
+
+out_err:
+ kfree(buf);
+}
+
+static inline void hmdfs_recv_page_callback(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *head,
+ int err, void *data)
+{
+ if (head->operations.command == F_READPAGE)
+ hmdfs_client_recv_readpage(head, err, data);
+}
+
+static const struct connection_operations conn_operations[] = {
+ [PROTOCOL_VERSION] = {
+ .recvmsg = hmdfs_recv_mesg_callback,
+ .recvpage = hmdfs_recv_page_callback,
+ /* remote device operations */
+ .remote_file_fops =
+ &hmdfs_dev_file_fops_remote,
+ .remote_file_iops =
+ &hmdfs_dev_file_iops_remote,
+ .remote_file_aops =
+ &hmdfs_dev_file_aops_remote,
+ .remote_unlink =
+ hmdfs_dev_unlink_from_con,
+ .remote_readdir =
+ hmdfs_dev_readdir_from_con,
+ }
+};
+
+const struct connection_operations *hmdfs_get_peer_operation(__u8 version)
+{
+ if (version <= INVALID_VERSION || version >= MAX_VERSION)
+ return NULL;
+
+ if (version <= USERSPACE_MAX_VER)
+ return &(conn_operations[USERDFS_VERSION]);
+ else
+ return &(conn_operations[PROTOCOL_VERSION]);
+}
+
+void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp)
+{
+ hmdfs_wait_mp_wfired(mp);
+ if (!cancel_delayed_work(&mp->d_work))
+ hmdfs_err("cancel parasite work err msg_id=%d cmd=%d",
+ mp->head.msg_id, mp->req.operations.command);
+ else
+ async_request_cb_on_wakeup_fn(&mp->d_work.work);
+}
+
+void hmdfs_wakeup_async_work(struct hmdfs_async_work *async_work)
+{
+ if (!cancel_delayed_work(&async_work->d_work))
+ hmdfs_err("cancel async work err msg_id=%d",
+ async_work->head.msg_id);
+ else
+ hmdfs_recv_page_work_fn(&async_work->d_work.work);
+}
diff --git a/fs/hmdfs/comm/socket_adapter.h b/fs/hmdfs/comm/socket_adapter.h
new file mode 100644
index 0000000000000000000000000000000000000000..ba4c672d7bcc998fd99b7ce9974003007fd1f49b
--- /dev/null
+++ b/fs/hmdfs/comm/socket_adapter.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/socket_adapter.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef SOCKET_ADAPTER_H
+#define SOCKET_ADAPTER_H
+
+#include
+#include
+
+#include "connection.h"
+#include "hmdfs.h"
+#include "protocol.h"
+
+#define HMDFS_KEY_SIZE 32
+#define HMDFS_IV_SIZE 12
+#define HMDFS_TAG_SIZE 16
+#define HMDFS_CID_SIZE 64
+#define INVALID_SOCKET_FD (-1)
+
+#define HMDFS_IDR_RESCHED_COUNT 512
+
+struct connection_operations {
+ void (*recvmsg)(struct hmdfs_peer *con, void *head, void *buf);
+ void (*recvpage)(struct hmdfs_peer *con, struct hmdfs_head_cmd *head,
+ int err, void *data);
+ const struct file_operations *remote_file_fops;
+ const struct inode_operations *remote_file_iops;
+ const struct address_space_operations *remote_file_aops;
+ int (*remote_unlink)(struct hmdfs_peer *con, struct dentry *dentry);
+ int (*remote_readdir)(struct hmdfs_peer *con, struct file *file,
+ struct dir_context *ctx);
+ struct hmdfs_lookup_ret *(*remote_lookup)(struct hmdfs_peer *con,
+ const char *relative_path,
+ const char *d_name);
+};
+
+/*****************************************************************************
+ * connections(TCP, UDP, .etc) adapter for RPC
+ *****************************************************************************/
+
+struct work_handler_desp {
+ struct work_struct work;
+ struct hmdfs_peer *peer;
+ struct hmdfs_head_cmd *head;
+ void *buf;
+};
+
+struct work_readfile_request_async {
+ struct work_struct work;
+ struct hmdfs_peer *con;
+ struct hmdfs_send_command sm;
+};
+
+static inline void hmdfs_init_cmd(struct hmdfs_cmd *op, u8 cmd)
+{
+ op->reserved = 0;
+ op->cmd_flag = C_REQUEST;
+ op->command = cmd;
+ op->reserved2 = 0;
+}
+
+int hmdfs_send_async_request(struct hmdfs_peer *peer,
+ const struct hmdfs_req *req);
+int hmdfs_sendmessage_request(struct hmdfs_peer *con,
+ struct hmdfs_send_command *msg);
+int hmdfs_sendpage_request(struct hmdfs_peer *con,
+ struct hmdfs_send_command *msg);
+
+int hmdfs_sendmessage_response(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, __u32 data_len,
+ void *buf, __u32 ret_code);
+int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head,
+ struct file *filp);
+const struct connection_operations *hmdfs_get_peer_operation(__u8 version);
+
+void hmdfs_recv_page_work_fn(struct work_struct *ptr);
+
+/*****************************************************************************
+ * statistics info for RPC
+ *****************************************************************************/
+
+enum hmdfs_resp_type {
+ HMDFS_RESP_NORMAL,
+ HMDFS_RESP_DELAY,
+ HMDFS_RESP_TIMEOUT
+};
+
+struct server_statistic {
+ unsigned long long cnt; /* request received */
+ unsigned long long max; /* max processing time */
+ unsigned long long total; /* total processing time */
+ unsigned long long snd_cnt; /* resp send to client */
+ unsigned long long snd_fail_cnt; /* send resp to client failed cnt */
+};
+
+struct client_statistic {
+ unsigned long long snd_cnt; /* request send to server */
+ unsigned long long resp_cnt; /* response receive from server */
+ unsigned long long timeout_cnt; /* no respone from server */
+ unsigned long long delay_resp_cnt; /* delay response from server */
+ unsigned long long max; /* max waiting time */
+ unsigned long long total; /* total waiting time */
+ unsigned long long snd_fail_cnt; /* request send failed to server */
+};
+
+
+static inline void hmdfs_statistic(struct hmdfs_sb_info *sbi, u8 cmd,
+ unsigned long jiff)
+{
+ if (cmd >= F_SIZE)
+ return;
+
+ sbi->s_server_statis[cmd].cnt++;
+ sbi->s_server_statis[cmd].total += jiff;
+ if (jiff > sbi->s_server_statis[cmd].max)
+ sbi->s_server_statis[cmd].max = jiff;
+}
+
+static inline void hmdfs_server_snd_statis(struct hmdfs_sb_info *sbi,
+ u8 cmd, int ret)
+{
+ if (cmd >= F_SIZE)
+ return;
+ ret ? sbi->s_server_statis[cmd].snd_fail_cnt++ :
+ sbi->s_server_statis[cmd].snd_cnt++;
+}
+
+static inline void hmdfs_client_snd_statis(struct hmdfs_sb_info *sbi,
+ u8 cmd, int ret)
+{
+ if (cmd >= F_SIZE)
+ return;
+ ret ? sbi->s_client_statis[cmd].snd_fail_cnt++ :
+ sbi->s_client_statis[cmd].snd_cnt++;
+}
+
+extern void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
+ enum hmdfs_resp_type type,
+ unsigned long start, unsigned long end);
+
+/*****************************************************************************
+ * timeout configuration for RPC
+ *****************************************************************************/
+
+enum HMDFS_TIME_OUT {
+ TIMEOUT_NONE = 0,
+ TIMEOUT_COMMON = 4,
+ TIMEOUT_6S = 6,
+ TIMEOUT_30S = 30,
+ TIMEOUT_1M = 60,
+ TIMEOUT_90S = 90,
+ TIMEOUT_CONFIG = UINT_MAX - 1, // for hmdfs_req to read from config
+ TIMEOUT_UNINIT = UINT_MAX,
+};
+
+static inline int get_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd)
+{
+ return sbi->s_cmd_timeout[cmd];
+}
+
+static inline void set_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd,
+ unsigned int value)
+{
+ sbi->s_cmd_timeout[cmd] = value;
+}
+
+void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info,
+ __u32 ret_code, __u32 data_len, void *buf);
+
+void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp);
+
+void hmdfs_wakeup_async_work(struct hmdfs_async_work *async_work);
+
+void msg_put(struct sendmsg_wait_queue *msg_wq);
+void head_put(struct hmdfs_msg_idr_head *head);
+void mp_put(struct hmdfs_msg_parasite *mp);
+void asw_put(struct hmdfs_async_work *asw);
+static inline void asw_done(struct hmdfs_async_work *asw)
+{
+ if (asw->page)
+ unlock_page(asw->page);
+ asw_put(asw);
+}
+
+static inline void asw_get(struct hmdfs_async_work *asw)
+{
+ kref_get(&asw->head.ref);
+}
+#endif
diff --git a/fs/hmdfs/comm/transport.c b/fs/hmdfs/comm/transport.c
new file mode 100644
index 0000000000000000000000000000000000000000..cb57da2c53f806ea61a9684bfd4c7d4150adc86b
--- /dev/null
+++ b/fs/hmdfs/comm/transport.c
@@ -0,0 +1,1220 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/comm/transport.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "transport.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "device_node.h"
+#include "hmdfs_trace.h"
+#include "socket_adapter.h"
+#include "authority/authentication.h"
+
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+#include
+#include "crypto.h"
+#endif
+
+typedef void (*connect_recv_handler)(struct connection *, void *, void *,
+ __u32);
+
+static connect_recv_handler connect_recv_callback[CONNECT_STAT_COUNT] = {
+ [CONNECT_STAT_WAIT_REQUEST] = connection_handshake_recv_handler,
+ [CONNECT_STAT_WAIT_RESPONSE] = connection_handshake_recv_handler,
+ [CONNECT_STAT_WORKING] = connection_working_recv_handler,
+ [CONNECT_STAT_STOP] = NULL,
+ [CONNECT_STAT_WAIT_ACK] = connection_handshake_recv_handler,
+ [CONNECT_STAT_NEGO_FAIL] = NULL,
+};
+
+static int recvmsg_nofs(struct socket *sock, struct msghdr *msg,
+ struct kvec *vec, size_t num, size_t size, int flags)
+{
+ unsigned int nofs_flags;
+ int ret;
+
+ /* enable NOFS for memory allocation */
+ nofs_flags = memalloc_nofs_save();
+ ret = kernel_recvmsg(sock, msg, vec, num, size, flags);
+ memalloc_nofs_restore(nofs_flags);
+
+ return ret;
+}
+
+static int sendmsg_nofs(struct socket *sock, struct msghdr *msg,
+ struct kvec *vec, size_t num, size_t size)
+{
+ unsigned int nofs_flags;
+ int ret;
+
+ /* enable NOFS for memory allocation */
+ nofs_flags = memalloc_nofs_save();
+ ret = kernel_sendmsg(sock, msg, vec, num, size);
+ memalloc_nofs_restore(nofs_flags);
+
+ return ret;
+}
+
+static int tcp_set_recvtimeo(struct socket *sock, int timeout)
+{
+ long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
+
+ tcp_sock_set_nodelay(sock->sk);
+ tcp_sock_set_user_timeout(sock->sk, jiffies_left);
+ return 0;
+}
+
+uint32_t hmdfs_tcpi_rtt(struct hmdfs_peer *con)
+{
+ uint32_t rtt_us = 0;
+ struct connection *conn_impl = NULL;
+ struct tcp_handle *tcp = NULL;
+
+ conn_impl = get_conn_impl(con, CONNECT_TYPE_TCP);
+ if (!conn_impl)
+ return rtt_us;
+ tcp = (struct tcp_handle *)(conn_impl->connect_handle);
+ if (tcp->sock)
+ rtt_us = tcp_sk(tcp->sock->sk)->srtt_us >> 3;
+ connection_put(conn_impl);
+ return rtt_us;
+}
+
+static int tcp_read_head_from_socket(struct socket *sock, void *buf,
+ unsigned int to_read)
+{
+ int rc = 0;
+ struct msghdr hmdfs_msg;
+ struct kvec iov;
+
+ iov.iov_base = buf;
+ iov.iov_len = to_read;
+ memset(&hmdfs_msg, 0, sizeof(hmdfs_msg));
+ hmdfs_msg.msg_flags = MSG_WAITALL;
+ hmdfs_msg.msg_control = NULL;
+ hmdfs_msg.msg_controllen = 0;
+ rc = recvmsg_nofs(sock, &hmdfs_msg, &iov, 1, to_read,
+ hmdfs_msg.msg_flags);
+ if (rc == -EAGAIN || rc == -ETIMEDOUT || rc == -EINTR ||
+ rc == -EBADMSG) {
+ usleep_range(1000, 2000);
+ return -EAGAIN;
+ }
+ // error occurred
+ if (rc != to_read) {
+ hmdfs_err("tcp recv error %d", rc);
+ return -ESHUTDOWN;
+ }
+ return 0;
+}
+
+static int tcp_read_buffer_from_socket(struct socket *sock, void *buf,
+ unsigned int to_read)
+{
+ int read_cnt = 0;
+ int retry_time = 0;
+ int rc = 0;
+ struct msghdr hmdfs_msg;
+ struct kvec iov;
+
+ do {
+ iov.iov_base = (char *)buf + read_cnt;
+ iov.iov_len = to_read - read_cnt;
+ memset(&hmdfs_msg, 0, sizeof(hmdfs_msg));
+ hmdfs_msg.msg_flags = MSG_WAITALL;
+ hmdfs_msg.msg_control = NULL;
+ hmdfs_msg.msg_controllen = 0;
+ rc = recvmsg_nofs(sock, &hmdfs_msg, &iov, 1,
+ to_read - read_cnt, hmdfs_msg.msg_flags);
+ if (rc == -EBADMSG) {
+ usleep_range(1000, 2000);
+ continue;
+ }
+ if (rc == -EAGAIN || rc == -ETIMEDOUT || rc == -EINTR) {
+ retry_time++;
+ hmdfs_info("read again %d", rc);
+ usleep_range(1000, 2000);
+ continue;
+ }
+ // error occurred
+ if (rc <= 0) {
+ hmdfs_err("tcp recv error %d", rc);
+ return -ESHUTDOWN;
+ }
+ read_cnt += rc;
+ if (read_cnt != to_read)
+ hmdfs_info("read again %d/%d", read_cnt, to_read);
+ } while (read_cnt < to_read && retry_time < MAX_RECV_RETRY_TIMES);
+ if (read_cnt == to_read)
+ return 0;
+ return -ESHUTDOWN;
+}
+
+static int hmdfs_drop_readpage_buffer(struct socket *sock,
+ struct hmdfs_head_cmd *recv)
+{
+ unsigned int len;
+ void *buf = NULL;
+ int err;
+
+ len = le32_to_cpu(recv->data_len) - sizeof(struct hmdfs_head_cmd);
+ if (len > HMDFS_PAGE_SIZE || !len) {
+ hmdfs_err("recv invalid readpage length %u", len);
+ return -EINVAL;
+ }
+
+ /* Abort the connection if no memory */
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return -ESHUTDOWN;
+
+ err = tcp_read_buffer_from_socket(sock, buf, len);
+ kfree(buf);
+
+ return err;
+}
+
+static int hmdfs_get_readpage_buffer(struct socket *sock,
+ struct hmdfs_head_cmd *recv,
+ struct page *page)
+{
+ char *page_buf = NULL;
+ unsigned int out_len;
+ int err;
+
+ out_len = le32_to_cpu(recv->data_len) - sizeof(struct hmdfs_head_cmd);
+ if (out_len > HMDFS_PAGE_SIZE || !out_len) {
+ hmdfs_err("recv invalid readpage length %u", out_len);
+ return -EINVAL;
+ }
+
+ page_buf = kmap(page);
+ err = tcp_read_buffer_from_socket(sock, page_buf, out_len);
+ if (err)
+ goto out_unmap;
+ if (out_len != HMDFS_PAGE_SIZE)
+ memset(page_buf + out_len, 0, HMDFS_PAGE_SIZE - out_len);
+
+out_unmap:
+ kunmap(page);
+ return err;
+}
+
+static int tcp_recvpage_tls(struct connection *connect,
+ struct hmdfs_head_cmd *recv)
+{
+ int ret = 0;
+ struct tcp_handle *tcp = NULL;
+ struct hmdfs_peer *node = NULL;
+ struct page *page = NULL;
+ struct hmdfs_async_work *async_work = NULL;
+ int rd_err;
+
+ if (!connect) {
+ hmdfs_err("tcp connect == NULL");
+ return -ESHUTDOWN;
+ }
+ node = connect->node;
+ tcp = (struct tcp_handle *)(connect->connect_handle);
+
+ rd_err = le32_to_cpu(recv->ret_code);
+ if (rd_err)
+ hmdfs_warning("tcp: readpage from peer %llu ret err %d",
+ node->device_id, rd_err);
+
+ async_work = (struct hmdfs_async_work *)hmdfs_find_msg_head(node,
+ le32_to_cpu(recv->msg_id));
+ if (!async_work || !cancel_delayed_work(&async_work->d_work))
+ goto out;
+
+ page = async_work->page;
+ if (!page) {
+ hmdfs_err("page not found");
+ goto out;
+ }
+
+ if (!rd_err) {
+ ret = hmdfs_get_readpage_buffer(tcp->sock, recv, page);
+ if (ret)
+ rd_err = ret;
+ }
+ node->conn_operations->recvpage(node, recv, rd_err, async_work);
+ asw_put(async_work);
+ return ret;
+
+out:
+ /* async_work will be released by recvpage in normal processure */
+ if (async_work)
+ asw_put(async_work);
+ hmdfs_err_ratelimited("timeout and droppage");
+ hmdfs_client_resp_statis(node->sbi, F_READPAGE, HMDFS_RESP_DELAY, 0, 0);
+ if (!rd_err)
+ ret = hmdfs_drop_readpage_buffer(tcp->sock, recv);
+ return ret;
+}
+
+static void aeadcipher_cb(struct crypto_async_request *req, int error)
+{
+ struct aeadcrypt_result *result = req->data;
+
+ if (error == -EINPROGRESS)
+ return;
+ result->err = error;
+ complete(&result->completion);
+}
+
+static int aeadcipher_en_de(struct aead_request *req,
+ struct aeadcrypt_result result, int flag)
+{
+ int rc = 0;
+
+ if (flag)
+ rc = crypto_aead_encrypt(req);
+ else
+ rc = crypto_aead_decrypt(req);
+ switch (rc) {
+ case 0:
+ break;
+ case -EINPROGRESS:
+ case -EBUSY:
+ rc = wait_for_completion_interruptible(&result.completion);
+ if (!rc && !result.err)
+ reinit_completion(&result.completion);
+ break;
+ default:
+ hmdfs_err("returned rc %d result %d", rc, result.err);
+ break;
+ }
+ return rc;
+}
+
+static int set_aeadcipher(struct crypto_aead *tfm, struct aead_request *req,
+ struct aeadcrypt_result *result)
+{
+ init_completion(&result->completion);
+ aead_request_set_callback(
+ req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ aeadcipher_cb, result);
+ return 0;
+}
+
+int aeadcipher_encrypt_buffer(struct connection *con, __u8 *src_buf,
+ size_t src_len, __u8 *dst_buf, size_t dst_len)
+{
+ int ret = 0;
+ struct scatterlist src, dst;
+ struct aead_request *req = NULL;
+ struct aeadcrypt_result result;
+ __u8 cipher_iv[HMDFS_IV_SIZE];
+
+ if (src_len <= 0)
+ return -EINVAL;
+ if (!virt_addr_valid(src_buf) || !virt_addr_valid(dst_buf)) {
+ WARN_ON(1);
+ hmdfs_err("encrypt address is invalid");
+ return -EPERM;
+ }
+
+ get_random_bytes(cipher_iv, HMDFS_IV_SIZE);
+ memcpy(dst_buf, cipher_iv, HMDFS_IV_SIZE);
+ req = aead_request_alloc(con->tfm, GFP_KERNEL);
+ if (!req) {
+ hmdfs_err("aead_request_alloc() failed");
+ return -ENOMEM;
+ }
+ ret = set_aeadcipher(con->tfm, req, &result);
+ if (ret) {
+ hmdfs_err("set_enaeadcipher exit fault");
+ goto out;
+ }
+
+ sg_init_one(&src, src_buf, src_len);
+ sg_init_one(&dst, dst_buf + HMDFS_IV_SIZE, dst_len - HMDFS_IV_SIZE);
+ aead_request_set_crypt(req, &src, &dst, src_len, cipher_iv);
+ aead_request_set_ad(req, 0);
+ ret = aeadcipher_en_de(req, result, ENCRYPT_FLAG);
+out:
+ aead_request_free(req);
+ return ret;
+}
+
+int aeadcipher_decrypt_buffer(struct connection *con, __u8 *src_buf,
+ size_t src_len, __u8 *dst_buf, size_t dst_len)
+{
+ int ret = 0;
+ struct scatterlist src, dst;
+ struct aead_request *req = NULL;
+ struct aeadcrypt_result result;
+ __u8 cipher_iv[HMDFS_IV_SIZE];
+
+ if (src_len <= HMDFS_IV_SIZE + HMDFS_TAG_SIZE)
+ return -EINVAL;
+ if (!virt_addr_valid(src_buf) || !virt_addr_valid(dst_buf)) {
+ WARN_ON(1);
+ hmdfs_err("decrypt address is invalid");
+ return -EPERM;
+ }
+
+ memcpy(cipher_iv, src_buf, HMDFS_IV_SIZE);
+ req = aead_request_alloc(con->tfm, GFP_KERNEL);
+ if (!req) {
+ hmdfs_err("aead_request_alloc() failed");
+ return -ENOMEM;
+ }
+ ret = set_aeadcipher(con->tfm, req, &result);
+ if (ret) {
+ hmdfs_err("set_deaeadcipher exit fault");
+ goto out;
+ }
+
+ sg_init_one(&src, src_buf + HMDFS_IV_SIZE, src_len - HMDFS_IV_SIZE);
+ sg_init_one(&dst, dst_buf, dst_len);
+ aead_request_set_crypt(req, &src, &dst, src_len - HMDFS_IV_SIZE,
+ cipher_iv);
+ aead_request_set_ad(req, 0);
+ ret = aeadcipher_en_de(req, result, DECRYPT_FLAG);
+out:
+ aead_request_free(req);
+ return ret;
+}
+
+static int tcp_recvbuffer_cipher(struct connection *connect,
+ struct hmdfs_head_cmd *recv)
+{
+ int ret = 0;
+ struct tcp_handle *tcp = NULL;
+ size_t cipherbuffer_len;
+ __u8 *cipherbuffer = NULL;
+ size_t outlen = 0;
+ __u8 *outdata = NULL;
+ __u32 recv_len = le32_to_cpu(recv->data_len);
+
+ tcp = (struct tcp_handle *)(connect->connect_handle);
+ if (recv_len == sizeof(struct hmdfs_head_cmd))
+ goto out_recv_head;
+ else if (recv_len > sizeof(struct hmdfs_head_cmd) &&
+ recv_len <= ADAPTER_MESSAGE_LENGTH)
+ cipherbuffer_len = recv_len - sizeof(struct hmdfs_head_cmd) +
+ HMDFS_IV_SIZE + HMDFS_TAG_SIZE;
+ else
+ return -ENOMSG;
+ cipherbuffer = kzalloc(cipherbuffer_len, GFP_KERNEL);
+ if (!cipherbuffer) {
+ hmdfs_err("zalloc cipherbuffer error");
+ return -ESHUTDOWN;
+ }
+ outlen = cipherbuffer_len - HMDFS_IV_SIZE - HMDFS_TAG_SIZE;
+ outdata = kzalloc(outlen, GFP_KERNEL);
+ if (!outdata) {
+ hmdfs_err("encrypt zalloc outdata error");
+ kfree(cipherbuffer);
+ return -ESHUTDOWN;
+ }
+
+ ret = tcp_read_buffer_from_socket(tcp->sock, cipherbuffer,
+ cipherbuffer_len);
+ if (ret)
+ goto out_recv;
+ ret = aeadcipher_decrypt_buffer(connect, cipherbuffer, cipherbuffer_len,
+ outdata, outlen);
+ if (ret) {
+ hmdfs_err("decrypt_buf fail");
+ goto out_recv;
+ }
+out_recv_head:
+ if (connect_recv_callback[connect->status]) {
+ connect_recv_callback[connect->status](connect, recv, outdata,
+ outlen);
+ } else {
+ kfree(outdata);
+ hmdfs_err("encypt callback NULL status %d", connect->status);
+ }
+ kfree(cipherbuffer);
+ return ret;
+out_recv:
+ kfree(cipherbuffer);
+ kfree(outdata);
+ return ret;
+}
+
+static int tcp_recvbuffer_tls(struct connection *connect,
+ struct hmdfs_head_cmd *recv)
+{
+ int ret = 0;
+ struct tcp_handle *tcp = NULL;
+ size_t outlen;
+ __u8 *outdata = NULL;
+ __u32 recv_len = le32_to_cpu(recv->data_len);
+
+ tcp = (struct tcp_handle *)(connect->connect_handle);
+ outlen = recv_len - sizeof(struct hmdfs_head_cmd);
+ if (outlen == 0)
+ goto out_recv_head;
+
+ /*
+ * NOTE: Up to half of the allocated memory may be wasted due to
+ * the Internal Fragmentation, however the memory allocation times
+ * can be reduced and we don't have to adjust existing message
+ * transporting mechanism
+ */
+ outdata = kmalloc(outlen, GFP_KERNEL);
+ if (!outdata)
+ return -ESHUTDOWN;
+
+ ret = tcp_read_buffer_from_socket(tcp->sock, outdata, outlen);
+ if (ret) {
+ kfree(outdata);
+ return ret;
+ }
+ tcp->connect->stat.recv_bytes += outlen;
+out_recv_head:
+ if (connect_recv_callback[connect->status]) {
+ connect_recv_callback[connect->status](connect, recv, outdata,
+ outlen);
+ } else {
+ kfree(outdata);
+ hmdfs_err("callback NULL status %d", connect->status);
+ }
+ return 0;
+}
+
+static int tcp_receive_from_sock(struct tcp_handle *tcp)
+{
+ struct hmdfs_head_cmd *recv = NULL;
+ int ret = 0;
+
+ if (!tcp) {
+ hmdfs_info("tcp recv thread !tcp");
+ return -ESHUTDOWN;
+ }
+
+ if (!tcp->sock) {
+ hmdfs_info("tcp recv thread !sock");
+ return -ESHUTDOWN;
+ }
+
+ recv = kmem_cache_alloc(tcp->recv_cache, GFP_KERNEL);
+ if (!recv) {
+ hmdfs_info("tcp recv thread !cache");
+ return -ESHUTDOWN;
+ }
+
+ ret = tcp_read_head_from_socket(tcp->sock, recv,
+ sizeof(struct hmdfs_head_cmd));
+ if (ret)
+ goto out;
+
+ tcp->connect->stat.recv_bytes += sizeof(struct hmdfs_head_cmd);
+ tcp->connect->stat.recv_message_count++;
+
+ if (recv->magic != HMDFS_MSG_MAGIC) {
+ hmdfs_info_ratelimited("tcp recv fd %d wrong magic. drop message",
+ tcp->fd);
+ goto out;
+ }
+
+ if ((le32_to_cpu(recv->data_len) >
+ HMDFS_MAX_MESSAGE_LEN + sizeof(struct hmdfs_head_cmd)) ||
+ (le32_to_cpu(recv->data_len) < sizeof(struct hmdfs_head_cmd))) {
+ hmdfs_info("tcp recv fd %d length error. drop message",
+ tcp->fd);
+ goto out;
+ }
+
+ if (recv->version > USERSPACE_MAX_VER &&
+ tcp->connect->status == CONNECT_STAT_WORKING &&
+ recv->operations.command == F_READPAGE &&
+ recv->operations.cmd_flag == C_RESPONSE) {
+ ret = tcp_recvpage_tls(tcp->connect, recv);
+ goto out;
+ }
+
+ if (tcp->connect->status == CONNECT_STAT_WORKING &&
+ recv->version > USERSPACE_MAX_VER)
+ ret = tcp_recvbuffer_tls(tcp->connect, recv);
+ else
+ ret = tcp_recvbuffer_cipher(tcp->connect, recv);
+
+out:
+ kmem_cache_free(tcp->recv_cache, recv);
+ return ret;
+}
+
+static bool tcp_handle_is_available(struct tcp_handle *tcp)
+{
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+ struct tls_context *tls_ctx = NULL;
+ struct tls_sw_context_rx *ctx = NULL;
+
+#endif
+ if (!tcp || !tcp->sock || !tcp->sock->sk) {
+ hmdfs_err("Invalid tcp connection");
+ return false;
+ }
+
+ if (tcp->sock->sk->sk_state != TCP_ESTABLISHED) {
+ hmdfs_err("TCP conn %d is broken, current sk_state is %d",
+ tcp->fd, tcp->sock->sk->sk_state);
+ return false;
+ }
+
+ if (tcp->sock->state != SS_CONNECTING &&
+ tcp->sock->state != SS_CONNECTED) {
+ hmdfs_err("TCP conn %d is broken, current sock state is %d",
+ tcp->fd, tcp->sock->state);
+ return false;
+ }
+
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+ tls_ctx = tls_get_ctx(tcp->sock->sk);
+ if (tls_ctx) {
+ ctx = tls_sw_ctx_rx(tls_ctx);
+ if (ctx && ctx->strp.stopped) {
+ hmdfs_err(
+ "TCP conn %d is broken, the strparser has stopped",
+ tcp->fd);
+ return false;
+ }
+ }
+#endif
+ return true;
+}
+
+static int tcp_recv_thread(void *arg)
+{
+ int ret = 0;
+ struct tcp_handle *tcp = (struct tcp_handle *)arg;
+ const struct cred *old_cred;
+
+ WARN_ON(!tcp);
+ WARN_ON(!tcp->sock);
+ set_freezable();
+
+ old_cred = hmdfs_override_creds(tcp->connect->node->sbi->system_cred);
+
+ while (!kthread_should_stop()) {
+ /*
+ * 1. In case the redundant connection has not been mounted on
+ * a peer
+ * 2. Lock is unnecessary since a transient state is acceptable
+ */
+ if (tcp_handle_is_available(tcp) &&
+ list_empty(&tcp->connect->list))
+ goto freeze;
+ if (!mutex_trylock(&tcp->close_mutex))
+ continue;
+ if (tcp_handle_is_available(tcp))
+ ret = tcp_receive_from_sock(tcp);
+ else
+ ret = -ESHUTDOWN;
+ /*
+ * This kthread will exit if ret is -ESHUTDOWN, thus we need to
+ * set recv_task to NULL to avoid calling kthread_stop() from
+ * tcp_close_socket().
+ */
+ if (ret == -ESHUTDOWN)
+ tcp->recv_task = NULL;
+ mutex_unlock(&tcp->close_mutex);
+ if (ret == -ESHUTDOWN) {
+ hmdfs_node_inc_evt_seq(tcp->connect->node);
+ tcp->connect->status = CONNECT_STAT_STOP;
+ if (tcp->connect->node->status != NODE_STAT_OFFLINE)
+ hmdfs_reget_connection(tcp->connect);
+ break;
+ }
+freeze:
+ schedule();
+ try_to_freeze();
+ }
+
+ hmdfs_info("Exiting. Now, sock state = %d", tcp->sock->state);
+ hmdfs_revert_creds(old_cred);
+ connection_put(tcp->connect);
+ return 0;
+}
+
+static int tcp_send_message_sock_cipher(struct tcp_handle *tcp,
+ struct hmdfs_send_data *msg)
+{
+ int ret = 0;
+ __u8 *outdata = NULL;
+ size_t outlen = 0;
+ int send_len = 0;
+ int send_vec_cnt = 0;
+ struct msghdr tcp_msg;
+ struct kvec iov[TCP_KVEC_ELE_DOUBLE];
+
+ memset(&tcp_msg, 0, sizeof(tcp_msg));
+ if (!tcp || !tcp->sock) {
+ hmdfs_err("encrypt tcp socket = NULL");
+ return -ESHUTDOWN;
+ }
+ iov[0].iov_base = msg->head;
+ iov[0].iov_len = msg->head_len;
+ send_vec_cnt = TCP_KVEC_HEAD;
+ if (msg->len == 0)
+ goto send;
+
+ outlen = msg->len + HMDFS_IV_SIZE + HMDFS_TAG_SIZE;
+ outdata = kzalloc(outlen, GFP_KERNEL);
+ if (!outdata) {
+ hmdfs_err("tcp send message encrypt fail to alloc outdata");
+ return -ENOMEM;
+ }
+ ret = aeadcipher_encrypt_buffer(tcp->connect, msg->data, msg->len,
+ outdata, outlen);
+ if (ret) {
+ hmdfs_err("encrypt_buf fail");
+ goto out;
+ }
+ iov[1].iov_base = outdata;
+ iov[1].iov_len = outlen;
+ send_vec_cnt = TCP_KVEC_ELE_DOUBLE;
+send:
+ mutex_lock(&tcp->send_mutex);
+ send_len = sendmsg_nofs(tcp->sock, &tcp_msg, iov, send_vec_cnt,
+ msg->head_len + outlen);
+ mutex_unlock(&tcp->send_mutex);
+ if (send_len <= 0) {
+ hmdfs_err("error %d", send_len);
+ ret = -ESHUTDOWN;
+ } else if (send_len != msg->head_len + outlen) {
+ hmdfs_err("send part of message. %d/%zu", send_len,
+ msg->head_len + outlen);
+ ret = -EAGAIN;
+ } else {
+ ret = 0;
+ }
+out:
+ kfree(outdata);
+ return ret;
+}
+
+static int tcp_send_message_sock_tls(struct tcp_handle *tcp,
+ struct hmdfs_send_data *msg)
+{
+ int send_len = 0;
+ int send_vec_cnt = 0;
+ struct msghdr tcp_msg;
+ struct kvec iov[TCP_KVEC_ELE_TRIPLE];
+
+ memset(&tcp_msg, 0, sizeof(tcp_msg));
+ if (!tcp || !tcp->sock) {
+ hmdfs_err("tcp socket = NULL");
+ return -ESHUTDOWN;
+ }
+ iov[TCP_KVEC_HEAD].iov_base = msg->head;
+ iov[TCP_KVEC_HEAD].iov_len = msg->head_len;
+ if (msg->len == 0 && msg->sdesc_len == 0) {
+ send_vec_cnt = TCP_KVEC_ELE_SINGLE;
+ } else if (msg->sdesc_len == 0) {
+ iov[TCP_KVEC_DATA].iov_base = msg->data;
+ iov[TCP_KVEC_DATA].iov_len = msg->len;
+ send_vec_cnt = TCP_KVEC_ELE_DOUBLE;
+ } else {
+ iov[TCP_KVEC_FILE_PARA].iov_base = msg->sdesc;
+ iov[TCP_KVEC_FILE_PARA].iov_len = msg->sdesc_len;
+ iov[TCP_KVEC_FILE_CONTENT].iov_base = msg->data;
+ iov[TCP_KVEC_FILE_CONTENT].iov_len = msg->len;
+ send_vec_cnt = TCP_KVEC_ELE_TRIPLE;
+ }
+ mutex_lock(&tcp->send_mutex);
+ send_len = sendmsg_nofs(tcp->sock, &tcp_msg, iov, send_vec_cnt,
+ msg->head_len + msg->len + msg->sdesc_len);
+ mutex_unlock(&tcp->send_mutex);
+ if (send_len == -EBADMSG) {
+ return -EBADMSG;
+ } else if (send_len <= 0) {
+ hmdfs_err("error %d", send_len);
+ return -ESHUTDOWN;
+ } else if (send_len != msg->head_len + msg->len + msg->sdesc_len) {
+ hmdfs_err("send part of message. %d/%zu", send_len,
+ msg->head_len + msg->len);
+ tcp->connect->stat.send_bytes += send_len;
+ return -EAGAIN;
+ }
+ tcp->connect->stat.send_bytes += send_len;
+ tcp->connect->stat.send_message_count++;
+ return 0;
+}
+
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+int tcp_send_rekey_request(struct connection *connect)
+{
+ int ret = 0;
+ struct hmdfs_send_data msg;
+ struct tcp_handle *tcp = connect->connect_handle;
+ struct hmdfs_head_cmd *head = NULL;
+ struct connection_rekey_request *rekey_request_param = NULL;
+ struct hmdfs_cmd operations;
+
+ hmdfs_init_cmd(&operations, F_CONNECT_REKEY);
+ head = kzalloc(sizeof(struct hmdfs_head_cmd) +
+ sizeof(struct connection_rekey_request),
+ GFP_KERNEL);
+ if (!head)
+ return -ENOMEM;
+ rekey_request_param =
+ (struct connection_rekey_request
+ *)((uint8_t *)head + sizeof(struct hmdfs_head_cmd));
+
+ rekey_request_param->update_request = cpu_to_le32(UPDATE_NOT_REQUESTED);
+
+ head->magic = HMDFS_MSG_MAGIC;
+ head->version = DFS_2_0;
+ head->operations = operations;
+ head->data_len =
+ cpu_to_le32(sizeof(*head) + sizeof(*rekey_request_param));
+ head->reserved = 0;
+ head->reserved1 = 0;
+ head->ret_code = 0;
+
+ msg.head = head;
+ msg.head_len = sizeof(*head);
+ msg.data = rekey_request_param;
+ msg.len = sizeof(*rekey_request_param);
+ msg.sdesc = NULL;
+ msg.sdesc_len = 0;
+ ret = tcp_send_message_sock_tls(tcp, &msg);
+ if (ret != 0)
+ hmdfs_err("return error %d", ret);
+ kfree(head);
+ return ret;
+}
+#endif
+
+static int tcp_send_message(struct connection *connect,
+ struct hmdfs_send_data *msg)
+{
+ int ret = 0;
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+ unsigned long nowtime = jiffies;
+#endif
+ struct tcp_handle *tcp = NULL;
+
+ if (!connect) {
+ hmdfs_err("tcp connection = NULL ");
+ return -ESHUTDOWN;
+ }
+ if (!msg) {
+ hmdfs_err("msg = NULL");
+ return -EINVAL;
+ }
+ if (msg->len > HMDFS_MAX_MESSAGE_LEN) {
+ hmdfs_err("message->len error: %zu", msg->len);
+ return -EINVAL;
+ }
+ tcp = (struct tcp_handle *)(connect->connect_handle);
+ if (connect->status == CONNECT_STAT_STOP)
+ return -EAGAIN;
+
+ trace_hmdfs_tcp_send_message(msg->head);
+
+ if (connect->status == CONNECT_STAT_WORKING &&
+ connect->node->version > USERSPACE_MAX_VER)
+ ret = tcp_send_message_sock_tls(tcp, msg);
+ else
+ // Handshake status or version HMDFS1.0
+ ret = tcp_send_message_sock_cipher(tcp, msg);
+
+ if (ret != 0) {
+ hmdfs_err("return error %d", ret);
+ return ret;
+ }
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+ if (nowtime - connect->stat.rekey_time >= REKEY_LIFETIME &&
+ connect->status == CONNECT_STAT_WORKING &&
+ connect->node->version >= DFS_2_0) {
+ hmdfs_info("send rekey message to devid %llu",
+ connect->node->device_id);
+ ret = tcp_send_rekey_request(connect);
+ if (ret == 0)
+ set_crypto_info(connect, SET_CRYPTO_SEND);
+ connect->stat.rekey_time = nowtime;
+ }
+#endif
+ return ret;
+}
+
+void tcp_close_socket(struct tcp_handle *tcp)
+{
+ if (!tcp)
+ return;
+ mutex_lock(&tcp->close_mutex);
+ if (tcp->recv_task) {
+ kthread_stop(tcp->recv_task);
+ tcp->recv_task = NULL;
+ }
+ mutex_unlock(&tcp->close_mutex);
+}
+
+static int set_tfm(__u8 *master_key, struct crypto_aead *tfm)
+{
+ int ret = 0;
+ int iv_len;
+ __u8 *sec_key = NULL;
+
+ sec_key = master_key;
+ crypto_aead_clear_flags(tfm, ~0);
+ ret = crypto_aead_setkey(tfm, sec_key, HMDFS_KEY_SIZE);
+ if (ret) {
+ hmdfs_err("failed to set the key");
+ goto out;
+ }
+ ret = crypto_aead_setauthsize(tfm, HMDFS_TAG_SIZE);
+ if (ret) {
+ hmdfs_err("authsize length is error");
+ goto out;
+ }
+
+ iv_len = crypto_aead_ivsize(tfm);
+ if (iv_len != HMDFS_IV_SIZE) {
+ hmdfs_err("IV recommended value should be set %d", iv_len);
+ ret = -ENODATA;
+ }
+out:
+ return ret;
+}
+
+static int tcp_update_socket(struct tcp_handle *tcp, int fd,
+ uint8_t *master_key, struct socket *socket)
+{
+ int err = 0;
+ struct hmdfs_peer *node = NULL;
+
+ if (!master_key || fd == 0)
+ return -EAGAIN;
+
+ tcp->sock = socket;
+ tcp->fd = fd;
+ if (!tcp_handle_is_available(tcp)) {
+ err = -EPIPE;
+ goto put_sock;
+ }
+
+ hmdfs_info("socket fd %d, state %d, refcount %ld",
+ fd, socket->state, file_count(socket->file));
+
+ tcp->recv_cache = kmem_cache_create("hmdfs_socket",
+ tcp->recvbuf_maxsize,
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!tcp->recv_cache) {
+ err = -ENOMEM;
+ goto put_sock;
+ }
+
+ socket->sk->sk_user_data = tcp;
+ err = tcp_set_recvtimeo(socket, TCP_RECV_TIMEOUT);
+ if (err) {
+ hmdfs_err("tcp set timeout error");
+ goto free_mem_cache;
+ }
+
+ /* send key and recv key, default MASTER KEY */
+ memcpy(tcp->connect->master_key, master_key, HMDFS_KEY_SIZE);
+ memcpy(tcp->connect->send_key, master_key, HMDFS_KEY_SIZE);
+ memcpy(tcp->connect->recv_key, master_key, HMDFS_KEY_SIZE);
+ tcp->connect->tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+ if (IS_ERR(tcp->connect->tfm)) {
+ err = PTR_ERR(tcp->connect->tfm);
+ tcp->connect->tfm = NULL;
+ hmdfs_err("failed to load transform for gcm(aes):%d", err);
+ goto free_mem_cache;
+ }
+
+ err = set_tfm(master_key, tcp->connect->tfm);
+ if (err) {
+ hmdfs_err("tfm seting exit fault");
+ goto free_crypto;
+ }
+
+ connection_get(tcp->connect);
+
+ node = tcp->connect->node;
+ tcp->recv_task = kthread_create(tcp_recv_thread, (void *)tcp,
+ "dfs_rcv%u_%llu_%d",
+ node->owner, node->device_id, fd);
+ if (IS_ERR(tcp->recv_task)) {
+ err = PTR_ERR(tcp->recv_task);
+ hmdfs_err("tcp->rcev_task %d", err);
+ goto put_conn;
+ }
+
+ return 0;
+
+put_conn:
+ tcp->recv_task = NULL;
+ connection_put(tcp->connect);
+free_crypto:
+ crypto_free_aead(tcp->connect->tfm);
+ tcp->connect->tfm = NULL;
+free_mem_cache:
+ kmem_cache_destroy(tcp->recv_cache);
+ tcp->recv_cache = NULL;
+put_sock:
+ tcp->sock = NULL;
+ tcp->fd = 0;
+
+ return err;
+}
+
+static struct tcp_handle *tcp_alloc_handle(struct connection *connect,
+ int socket_fd, uint8_t *master_key, struct socket *socket)
+{
+ int ret = 0;
+ struct tcp_handle *tcp = kzalloc(sizeof(*tcp), GFP_KERNEL);
+
+ if (!tcp)
+ return NULL;
+ tcp->connect = connect;
+ tcp->connect->connect_handle = (void *)tcp;
+ tcp->recvbuf_maxsize = MAX_RECV_SIZE;
+ tcp->recv_task = NULL;
+ tcp->recv_cache = NULL;
+ tcp->sock = NULL;
+ mutex_init(&tcp->close_mutex);
+ mutex_init(&tcp->send_mutex);
+ ret = tcp_update_socket(tcp, socket_fd, master_key, socket);
+ if (ret) {
+ kfree(tcp);
+ return NULL;
+ }
+ return tcp;
+}
+
+void hmdfs_get_connection(struct hmdfs_peer *peer)
+{
+ struct notify_param param;
+
+ if (!peer)
+ return;
+ param.notify = NOTIFY_GET_SESSION;
+ param.fd = INVALID_SOCKET_FD;
+ memcpy(param.remote_cid, peer->cid, HMDFS_CID_SIZE);
+ notify(peer, ¶m);
+}
+
+static void connection_notify_to_close(struct connection *conn)
+{
+ struct notify_param param;
+ struct hmdfs_peer *peer = NULL;
+ struct tcp_handle *tcp = NULL;
+
+ tcp = conn->connect_handle;
+ peer = conn->node;
+
+ // libdistbus/src/TcpSession.cpp will close the socket
+ param.notify = NOTIFY_GET_SESSION;
+ param.fd = tcp->fd;
+ memcpy(param.remote_cid, peer->cid, HMDFS_CID_SIZE);
+ notify(peer, ¶m);
+}
+
+void hmdfs_reget_connection(struct connection *conn)
+{
+ struct tcp_handle *tcp = NULL;
+ struct connection *conn_impl = NULL;
+ struct connection *next = NULL;
+ struct task_struct *recv_task = NULL;
+ bool should_put = false;
+ bool stop_thread = true;
+
+ if (!conn)
+ return;
+
+ // One may put a connection if and only if he took it out of the list
+ mutex_lock(&conn->node->conn_impl_list_lock);
+ list_for_each_entry_safe(conn_impl, next, &conn->node->conn_impl_list,
+ list) {
+ if (conn_impl == conn) {
+ should_put = true;
+ list_move(&conn->list, &conn->node->conn_deleting_list);
+ break;
+ }
+ }
+ if (!should_put) {
+ mutex_unlock(&conn->node->conn_impl_list_lock);
+ return;
+ }
+
+ tcp = conn->connect_handle;
+ if (tcp) {
+ recv_task = tcp->recv_task;
+ /*
+ * To avoid the receive thread to stop itself. Ensure receive
+ * thread stop before process offline event
+ */
+ if (!recv_task ||
+ (recv_task && (recv_task->pid == current->pid)))
+ stop_thread = false;
+ }
+ mutex_unlock(&conn->node->conn_impl_list_lock);
+
+ if (tcp) {
+ if (tcp->sock) {
+ hmdfs_info("shudown sock: fd = %d, sockref = %ld, connref = %u stop_thread = %d",
+ tcp->fd, file_count(tcp->sock->file),
+ kref_read(&conn->ref_cnt), stop_thread);
+ kernel_sock_shutdown(tcp->sock, SHUT_RDWR);
+ }
+
+ if (stop_thread)
+ tcp_close_socket(tcp);
+
+ if (tcp->fd != INVALID_SOCKET_FD)
+ connection_notify_to_close(conn);
+ }
+ connection_put(conn);
+}
+
+static struct connection *
+lookup_conn_by_socketfd_unsafe(struct hmdfs_peer *node, struct socket *socket)
+{
+ struct connection *tcp_conn = NULL;
+ struct tcp_handle *tcp = NULL;
+
+ list_for_each_entry(tcp_conn, &node->conn_impl_list, list) {
+ if (tcp_conn->connect_handle) {
+ tcp = (struct tcp_handle *)(tcp_conn->connect_handle);
+ if (tcp->sock == socket) {
+ connection_get(tcp_conn);
+ return tcp_conn;
+ }
+ }
+ }
+ return NULL;
+}
+
+static void hmdfs_reget_connection_work_fn(struct work_struct *work)
+{
+ struct connection *conn =
+ container_of(work, struct connection, reget_work);
+
+ hmdfs_reget_connection(conn);
+ connection_put(conn);
+}
+
+struct connection *alloc_conn_tcp(struct hmdfs_peer *node, int socket_fd,
+ uint8_t *master_key, uint8_t status, struct socket *socket)
+{
+ struct connection *tcp_conn = NULL;
+ unsigned long nowtime = jiffies;
+
+ tcp_conn = kzalloc(sizeof(*tcp_conn), GFP_KERNEL);
+ if (!tcp_conn)
+ goto out_err;
+
+ kref_init(&tcp_conn->ref_cnt);
+ mutex_init(&tcp_conn->ref_lock);
+ INIT_LIST_HEAD(&tcp_conn->list);
+ tcp_conn->node = node;
+ tcp_conn->close = tcp_stop_connect;
+ tcp_conn->send_message = tcp_send_message;
+ tcp_conn->type = CONNECT_TYPE_TCP;
+ tcp_conn->status = status;
+ tcp_conn->stat.rekey_time = nowtime;
+ tcp_conn->connect_handle =
+ (void *)tcp_alloc_handle(tcp_conn, socket_fd, master_key, socket);
+ INIT_WORK(&tcp_conn->reget_work, hmdfs_reget_connection_work_fn);
+ if (!tcp_conn->connect_handle) {
+ hmdfs_err("Failed to alloc tcp_handle for strcut conn");
+ goto out_err;
+ }
+ return tcp_conn;
+
+out_err:
+ kfree(tcp_conn);
+ return NULL;
+}
+
+static struct connection *add_conn_tcp_unsafe(struct hmdfs_peer *node,
+ struct socket *socket,
+ struct connection *conn2add)
+{
+ struct connection *conn;
+
+ conn = lookup_conn_by_socketfd_unsafe(node, socket);
+ if (conn) {
+ hmdfs_info("socket already in list");
+ return conn;
+ }
+
+ /* Prefer to use socket opened by local device */
+ if (conn2add->status == CONNECT_STAT_WAIT_REQUEST)
+ list_add(&conn2add->list, &node->conn_impl_list);
+ else
+ list_add_tail(&conn2add->list, &node->conn_impl_list);
+ connection_get(conn2add);
+ return conn2add;
+}
+
+struct connection *hmdfs_get_conn_tcp(struct hmdfs_peer *node, int fd,
+ uint8_t *master_key, uint8_t status)
+{
+ struct connection *tcp_conn = NULL, *on_peer_conn = NULL;
+ struct tcp_handle *tcp = NULL;
+ struct socket *socket = NULL;
+ int err = 0;
+
+ socket = sockfd_lookup(fd, &err);
+ if (!socket) {
+ hmdfs_err("lookup socket fail, socket_fd %d, err %d", fd, err);
+ return NULL;
+ }
+ mutex_lock(&node->conn_impl_list_lock);
+ tcp_conn = lookup_conn_by_socketfd_unsafe(node, socket);
+ mutex_unlock(&node->conn_impl_list_lock);
+ if (tcp_conn) {
+ hmdfs_info("Got a existing tcp conn: fsocket_fd = %d",
+ fd);
+ sockfd_put(socket);
+ goto out;
+ }
+
+ tcp_conn = alloc_conn_tcp(node, fd, master_key, status, socket);
+ if (!tcp_conn) {
+ hmdfs_info("Failed to alloc a tcp conn, socket_fd %d", fd);
+ sockfd_put(socket);
+ goto out;
+ }
+
+ mutex_lock(&node->conn_impl_list_lock);
+ on_peer_conn = add_conn_tcp_unsafe(node, socket, tcp_conn);
+ mutex_unlock(&node->conn_impl_list_lock);
+ tcp = tcp_conn->connect_handle;
+ if (on_peer_conn == tcp_conn) {
+ hmdfs_info("Got a newly allocated tcp conn: socket_fd = %d", fd);
+ wake_up_process(tcp->recv_task);
+ if (status == CONNECT_STAT_WAIT_RESPONSE)
+ connection_send_handshake(
+ on_peer_conn, CONNECT_MESG_HANDSHAKE_REQUEST,
+ 0);
+ } else {
+ hmdfs_info("Got a existing tcp conn: socket_fd = %d", fd);
+ tcp->fd = INVALID_SOCKET_FD;
+ tcp_close_socket(tcp);
+ connection_put(tcp_conn);
+
+ tcp_conn = on_peer_conn;
+ }
+
+out:
+ return tcp_conn;
+}
+
+void tcp_stop_connect(struct connection *connect)
+{
+ hmdfs_info("now nothing to do");
+}
diff --git a/fs/hmdfs/comm/transport.h b/fs/hmdfs/comm/transport.h
new file mode 100644
index 0000000000000000000000000000000000000000..bce882cb6997753ac8c9e7df2ed858aeaf20e896
--- /dev/null
+++ b/fs/hmdfs/comm/transport.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/comm/transport.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_TRANSPORT_H
+#define HMDFS_TRANSPORT_H
+
+#include "connection.h"
+
+#define ENCRYPT_FLAG 1
+#define DECRYPT_FLAG 0
+
+struct aeadcrypt_result {
+ struct completion completion;
+ int err;
+};
+
+#define ADAPTER_MESSAGE_LENGTH (1024 * 1024 + 1024) // 1M + 1K
+#define MAX_RECV_SIZE sizeof(struct hmdfs_head_cmd)
+
+#define TCP_KVEC_HEAD 0
+#define TCP_KVEC_DATA 1
+
+enum TCP_KVEC_FILE_ELE_INDEX {
+ TCP_KVEC_FILE_PARA = 1,
+ TCP_KVEC_FILE_CONTENT = 2,
+};
+
+enum TCP_KVEC_TYPE {
+ TCP_KVEC_ELE_SINGLE = 1,
+ TCP_KVEC_ELE_DOUBLE = 2,
+ TCP_KVEC_ELE_TRIPLE = 3,
+};
+
+#define TCP_RECV_TIMEOUT 2
+#define MAX_RECV_RETRY_TIMES 2
+
+#ifndef SO_RCVTIMEO
+#define SO_RCVTIMEO SO_RCVTIMEO_OLD
+#endif
+
+struct tcp_handle {
+ struct connection *connect;
+ int recvbuf_maxsize;
+ struct mutex close_mutex;
+ /*
+ * To achieve atomicity.
+ *
+ * The sock lock held at the tcp layer may be temporally released at
+ * `sk_wait_event()` when waiting for sock buffer. From this point on,
+ * threads serialized at the initial call to `lock_sock()` contained
+ * in `tcp_sendmsg()` can proceed, resuling in intermixed messages.
+ */
+ struct mutex send_mutex;
+ struct socket *sock;
+ int fd;
+ struct kmem_cache *recv_cache;
+ struct task_struct *recv_task;
+};
+
+void hmdfs_get_connection(struct hmdfs_peer *peer);
+void hmdfs_reget_connection(struct connection *conn);
+struct connection *hmdfs_get_conn_tcp(struct hmdfs_peer *node, int socket_fd,
+ uint8_t *master_key, uint8_t status);
+void tcp_stop_connect(struct connection *connect);
+uint32_t hmdfs_tcpi_rtt(struct hmdfs_peer *node);
+void tcp_close_socket(struct tcp_handle *tcp);
+
+#ifdef CONFIG_HMDFS_FS_ENCRYPTION
+int tcp_send_rekey_request(struct connection *connect);
+#endif
+
+#endif
diff --git a/fs/hmdfs/dentry.c b/fs/hmdfs/dentry.c
new file mode 100644
index 0000000000000000000000000000000000000000..ac590df0982a6cf290b402467bafb2e6b8a7b601
--- /dev/null
+++ b/fs/hmdfs/dentry.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/dentry.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+
+#include "comm/connection.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_device_view.h"
+#include "hmdfs_merge_view.h"
+
+extern struct kmem_cache *hmdfs_dentry_cachep;
+
+void hmdfs_set_time(struct dentry *dentry, unsigned long time)
+{
+ struct hmdfs_dentry_info *d_info = dentry->d_fsdata;
+
+ if (d_info)
+ d_info->time = time;
+}
+
+unsigned long hmdfs_get_time(struct dentry *dentry)
+{
+ struct hmdfs_dentry_info *d_info = dentry->d_fsdata;
+
+ if (d_info)
+ return (unsigned long)d_info->time;
+ return 0;
+}
+
+static int hmdfs_d_remote_revalidate(struct hmdfs_peer *conn,
+ struct dentry *target,
+ struct dentry *parent)
+{
+ unsigned int timeout = hmdfs_sb(target->d_sb)->dcache_timeout;
+ unsigned long dentry_time = hmdfs_get_time(target);
+ struct clearcache_item *item;
+
+ item = hmdfs_find_cache_item(conn->device_id, parent);
+ if (!item)
+ return 0;
+ kref_put(&item->ref, release_cache_item);
+
+ if (cache_item_revalidate(READ_ONCE(conn->conn_time),
+ dentry_time, timeout))
+ return 1;
+
+ return 0;
+}
+
+static inline void lock_for_dname_cmp(struct dentry *dentry,
+ struct dentry *lower_dentry)
+{
+ if (dentry < lower_dentry) {
+ spin_lock(&dentry->d_lock);
+ spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ } else {
+ spin_lock(&lower_dentry->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ }
+}
+
+static inline void unlock_for_dname_cmp(struct dentry *dentry,
+ struct dentry *lower_dentry)
+{
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&lower_dentry->d_lock);
+}
+
+static int hmdfs_dev_d_revalidate(struct dentry *direntry, unsigned int flags)
+{
+ struct inode *dinode = NULL;
+ struct hmdfs_inode_info *info = NULL;
+
+ spin_lock(&direntry->d_lock);
+ if (IS_ROOT(direntry)) {
+ spin_unlock(&direntry->d_lock);
+ return 1;
+ }
+ spin_unlock(&direntry->d_lock);
+
+ dinode = d_inode(direntry);
+ if (!dinode)
+ return 0;
+
+ info = hmdfs_i(dinode);
+ if (info->inode_type == HMDFS_LAYER_SECOND_LOCAL ||
+ info->inode_type == HMDFS_LAYER_FIRST_DEVICE) {
+ return 1;
+ }
+ if (info->conn && info->conn->status == NODE_STAT_ONLINE)
+ return 1;
+
+ return 0;
+}
+
+static int hmdfs_d_revalidate(struct dentry *direntry, unsigned int flags)
+{
+ struct inode *dinode = NULL;
+ struct hmdfs_inode_info *info = NULL;
+ struct path lower_path, parent_lower_path;
+ struct dentry *parent_dentry = NULL;
+ struct dentry *parent_lower_dentry = NULL;
+ struct dentry *lower_cur_parent_dentry = NULL;
+ struct dentry *lower_dentry = NULL;
+ int ret;
+
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET | LOOKUP_REVAL))
+ return 0;
+
+ dinode = d_inode(direntry);
+ if (!dinode)
+ return 0;
+
+ /* remote dentry timeout */
+ info = hmdfs_i(dinode);
+ parent_dentry = dget_parent(direntry);
+ if (info->conn) {
+ ret = hmdfs_d_remote_revalidate(info->conn, direntry,
+ parent_dentry);
+ dput(parent_dentry);
+ return ret;
+ }
+
+ hmdfs_get_lower_path(direntry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_cur_parent_dentry = dget_parent(lower_dentry);
+ hmdfs_get_lower_path(parent_dentry, &parent_lower_path);
+ parent_lower_dentry = parent_lower_path.dentry;
+ if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+ ret = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+ if (ret == 0)
+ goto out;
+ }
+
+ spin_lock(&lower_dentry->d_lock);
+ if (d_unhashed(lower_dentry)) {
+ spin_unlock(&lower_dentry->d_lock);
+ ret = 0;
+ goto out;
+ }
+ spin_unlock(&lower_dentry->d_lock);
+
+ if (parent_lower_dentry != lower_cur_parent_dentry) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = 1;
+ lock_for_dname_cmp(direntry, lower_dentry);
+ if (!qstr_case_eq(&direntry->d_name, &lower_dentry->d_name))
+ ret = 0;
+ unlock_for_dname_cmp(direntry, lower_dentry);
+
+out:
+ hmdfs_put_lower_path(&parent_lower_path);
+ dput(lower_cur_parent_dentry);
+ hmdfs_put_lower_path(&lower_path);
+ dput(parent_dentry);
+ return ret;
+}
+
+static void hmdfs_dev_d_release(struct dentry *dentry)
+{
+ if (!dentry || !dentry->d_fsdata)
+ return;
+
+ switch (hmdfs_d(dentry)->dentry_type) {
+ case HMDFS_LAYER_SECOND_LOCAL:
+ hmdfs_clear_cache_dents(dentry, false);
+ hmdfs_drop_remote_cache_dents(dentry);
+ path_put(&(hmdfs_d(dentry)->lower_path));
+ break;
+ case HMDFS_LAYER_ZERO:
+ hmdfs_put_reset_lower_path(dentry);
+ break;
+ case HMDFS_LAYER_FIRST_DEVICE:
+ break;
+ case HMDFS_LAYER_SECOND_REMOTE:
+ hmdfs_clear_cache_dents(dentry, false);
+ break;
+ default:
+ hmdfs_err("Unexpected dentry type %d",
+ hmdfs_d(dentry)->dentry_type);
+ return;
+ }
+
+ kmem_cache_free(hmdfs_dentry_cachep, dentry->d_fsdata);
+ dentry->d_fsdata = NULL;
+}
+
+static void hmdfs_d_release(struct dentry *dentry)
+{
+ if (!dentry || !dentry->d_fsdata)
+ return;
+
+ hmdfs_clear_cache_dents(dentry, false);
+ hmdfs_drop_remote_cache_dents(dentry);
+ hmdfs_put_reset_lower_path(dentry);
+ kmem_cache_free(hmdfs_dentry_cachep, dentry->d_fsdata);
+ dentry->d_fsdata = NULL;
+}
+
+static int hmdfs_cmp_ci(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+ struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb);
+
+ if (name->len != len)
+ return 1;
+
+ if (!sbi->s_case_sensitive) {
+ if (str_n_case_eq(name->name, str, len))
+ return 0;
+ } else {
+ if (!strncmp(name->name, str, len))
+ return 0;
+ }
+ return 1;
+}
+
+static int hmdfs_hash_ci(const struct dentry *dentry, struct qstr *qstr)
+{
+ const unsigned char *name = qstr->name;
+ unsigned int len = qstr->len;
+ unsigned long hash;
+ struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb);
+
+ if (sbi->s_case_sensitive)
+ return 0;
+
+ hash = init_name_hash(dentry);
+ while (len--)
+ hash = partial_name_hash(tolower(*name++), hash);
+ qstr->hash = end_name_hash(hash);
+ return 0;
+}
+
+void clear_comrades_locked(struct list_head *comrade_list)
+{
+ struct hmdfs_dentry_comrade *cc, *nc;
+
+ WARN_ON(!comrade_list);
+ list_for_each_entry_safe(cc, nc, comrade_list, list) {
+ dput(cc->lo_d);
+ kfree(cc);
+ }
+ INIT_LIST_HEAD(comrade_list);
+}
+
+void clear_comrades(struct dentry *dentry)
+{
+ struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(dentry);
+
+ mutex_lock(&cdi->comrade_list_lock);
+ clear_comrades_locked(&cdi->comrade_list);
+ mutex_unlock(&cdi->comrade_list_lock);
+}
+
+/**
+ * d_revalidate_merge - revalidate a merge dentry
+ *
+ * Always return 0 to invalidate a dentry for fault-tolerance.
+ * The cost is acceptable for a overlay filesystem.
+ */
+static int d_revalidate_merge(struct dentry *direntry, unsigned int flags)
+{
+ return 0;
+}
+
+static void d_release_merge(struct dentry *dentry)
+{
+ if (!dentry || !dentry->d_fsdata)
+ return;
+
+ clear_comrades(dentry);
+ kmem_cache_free(hmdfs_dentry_merge_cachep, dentry->d_fsdata);
+ dentry->d_fsdata = NULL;
+}
+
+const struct dentry_operations hmdfs_dops_merge = {
+ .d_revalidate = d_revalidate_merge,
+ .d_release = d_release_merge,
+};
+
+const struct dentry_operations hmdfs_dev_dops = {
+ .d_revalidate = hmdfs_dev_d_revalidate,
+ .d_release = hmdfs_dev_d_release,
+};
+
+const struct dentry_operations hmdfs_dops = {
+ .d_revalidate = hmdfs_d_revalidate,
+ .d_release = hmdfs_d_release,
+ .d_compare = hmdfs_cmp_ci,
+ .d_hash = hmdfs_hash_ci,
+};
diff --git a/fs/hmdfs/file_local.c b/fs/hmdfs/file_local.c
new file mode 100644
index 0000000000000000000000000000000000000000..893c6edbc93b40002add2edfcb136afc1cbce61e
--- /dev/null
+++ b/fs/hmdfs/file_local.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/file_local.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "hmdfs_client.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_device_view.h"
+#include "hmdfs_merge_view.h"
+#include "hmdfs_trace.h"
+
+int hmdfs_file_open_local(struct inode *inode, struct file *file)
+{
+ int err = 0;
+ struct file *lower_file = NULL;
+ struct path lower_path;
+ struct super_block *sb = inode->i_sb;
+ const struct cred *cred = hmdfs_sb(sb)->cred;
+ struct hmdfs_file_info *gfi = kzalloc(sizeof(*gfi), GFP_KERNEL);
+
+ if (!gfi) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ hmdfs_get_lower_path(file->f_path.dentry, &lower_path);
+ lower_file = dentry_open(&lower_path, file->f_flags, cred);
+ hmdfs_put_lower_path(&lower_path);
+ if (IS_ERR(lower_file)) {
+ err = PTR_ERR(lower_file);
+ kfree(gfi);
+ } else {
+ gfi->lower_file = lower_file;
+ file->private_data = gfi;
+ }
+out_err:
+ return err;
+}
+
+int hmdfs_file_release_local(struct inode *inode, struct file *file)
+{
+ struct hmdfs_file_info *gfi = hmdfs_f(file);
+
+ file->private_data = NULL;
+ fput(gfi->lower_file);
+ kfree(gfi);
+ return 0;
+}
+
+ssize_t hmdfs_read_local(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *lower_file = hmdfs_f(iocb->ki_filp)->lower_file;
+ int err;
+
+ if (iter->type & ITER_KVEC)
+ err = kernel_read(lower_file, iter->iov->iov_base,
+ iter->iov->iov_len, &(iocb->ki_pos));
+ else
+ err = vfs_read(lower_file, iter->iov->iov_base,
+ iter->iov->iov_len, &(iocb->ki_pos));
+
+ if (err >= 0)
+ file_inode(iocb->ki_filp)->i_atime = file_inode(lower_file)->i_atime;
+ return err;
+}
+
+ssize_t hmdfs_write_local(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *lower_file = hmdfs_f(iocb->ki_filp)->lower_file;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct inode *lower_inode = file_inode(lower_file);
+ struct dentry *dentry = file_dentry(iocb->ki_filp);
+ int err;
+
+ if (iter->type & ITER_KVEC)
+ err = kernel_write(lower_file, iter->iov->iov_base,
+ iter->iov->iov_len, &(iocb->ki_pos));
+ else
+ err = vfs_write(lower_file, iter->iov->iov_base,
+ iter->iov->iov_len, &(iocb->ki_pos));
+
+ if (err >= 0) {
+ inode_lock(inode);
+ i_size_write(inode, i_size_read(lower_inode));
+ inode->i_atime = lower_inode->i_atime;
+ inode->i_ctime = lower_inode->i_ctime;
+ inode->i_mtime = lower_inode->i_mtime;
+ if (!hmdfs_i_merge(hmdfs_i(inode)))
+ update_inode_to_dentry(dentry, inode);
+ inode_unlock(inode);
+ }
+ return err;
+}
+
+int hmdfs_fsync_local(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ int err;
+ struct file *lower_file = hmdfs_f(file)->lower_file;
+
+ err = __generic_file_fsync(file, start, end, datasync);
+ if (err)
+ goto out;
+
+ err = vfs_fsync_range(lower_file, start, end, datasync);
+out:
+ return err;
+}
+
+loff_t hmdfs_file_llseek_local(struct file *file, loff_t offset, int whence)
+{
+ int err = 0;
+ struct file *lower_file = NULL;
+
+ err = generic_file_llseek(file, offset, whence);
+ if (err < 0)
+ goto out;
+ lower_file = hmdfs_f(file)->lower_file;
+ err = generic_file_llseek(lower_file, offset, whence);
+out:
+ return err;
+}
+
+int hmdfs_file_mmap_local(struct file *file, struct vm_area_struct *vma)
+{
+ struct hmdfs_file_info *private_data = file->private_data;
+ struct file *realfile = NULL;
+ int ret;
+
+ if (!private_data)
+ return -EINVAL;
+
+ realfile = private_data->lower_file;
+ if (!realfile)
+ return -EINVAL;
+
+ if (!realfile->f_op->mmap)
+ return -ENODEV;
+
+ if (WARN_ON(file != vma->vm_file))
+ return -EIO;
+
+ vma->vm_file = get_file(realfile);
+ ret = call_mmap(vma->vm_file, vma);
+ if (ret)
+ fput(realfile);
+ else
+ fput(file);
+
+ file_accessed(file);
+
+ return ret;
+}
+
+const struct file_operations hmdfs_file_fops_local = {
+ .owner = THIS_MODULE,
+ .llseek = hmdfs_file_llseek_local,
+ .read_iter = hmdfs_read_local,
+ .write_iter = hmdfs_write_local,
+ .mmap = hmdfs_file_mmap_local,
+ .open = hmdfs_file_open_local,
+ .release = hmdfs_file_release_local,
+ .fsync = hmdfs_fsync_local,
+};
+
+static int hmdfs_iterate_local(struct file *file, struct dir_context *ctx)
+{
+ int err = 0;
+ loff_t start_pos = ctx->pos;
+ struct file *lower_file = hmdfs_f(file)->lower_file;
+
+ if (ctx->pos == -1)
+ return 0;
+
+ lower_file->f_pos = file->f_pos;
+ err = iterate_dir(lower_file, ctx);
+ file->f_pos = lower_file->f_pos;
+
+ if (err < 0)
+ ctx->pos = -1;
+
+ trace_hmdfs_iterate_local(file->f_path.dentry, start_pos, ctx->pos,
+ err);
+ return err;
+}
+
+int hmdfs_dir_open_local(struct inode *inode, struct file *file)
+{
+ int err = 0;
+ struct file *lower_file = NULL;
+ struct dentry *dentry = file->f_path.dentry;
+ struct path lower_path;
+ struct super_block *sb = inode->i_sb;
+ const struct cred *cred = hmdfs_sb(sb)->cred;
+ struct hmdfs_file_info *gfi = kzalloc(sizeof(*gfi), GFP_KERNEL);
+
+ if (!gfi)
+ return -ENOMEM;
+
+ if (IS_ERR_OR_NULL(cred)) {
+ err = -EPERM;
+ goto out_err;
+ }
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_file = dentry_open(&lower_path, file->f_flags, cred);
+ hmdfs_put_lower_path(&lower_path);
+ if (IS_ERR(lower_file)) {
+ err = PTR_ERR(lower_file);
+ goto out_err;
+ } else {
+ gfi->lower_file = lower_file;
+ file->private_data = gfi;
+ }
+ return err;
+
+out_err:
+ kfree(gfi);
+ return err;
+}
+
+static int hmdfs_dir_release_local(struct inode *inode, struct file *file)
+{
+ struct hmdfs_file_info *gfi = hmdfs_f(file);
+
+ file->private_data = NULL;
+ fput(gfi->lower_file);
+ kfree(gfi);
+ return 0;
+}
+
+const struct file_operations hmdfs_dir_ops_local = {
+ .owner = THIS_MODULE,
+ .iterate = hmdfs_iterate_local,
+ .open = hmdfs_dir_open_local,
+ .release = hmdfs_dir_release_local,
+ .fsync = hmdfs_fsync_local,
+};
diff --git a/fs/hmdfs/file_merge.c b/fs/hmdfs/file_merge.c
new file mode 100644
index 0000000000000000000000000000000000000000..2708f2ba24affe375973660b782dfebb06fd29d3
--- /dev/null
+++ b/fs/hmdfs/file_merge.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/file_merge.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "hmdfs_merge_view.h"
+
+#include
+
+#include "hmdfs.h"
+#include "hmdfs_trace.h"
+
+struct hmdfs_iterate_callback_merge {
+ struct dir_context ctx;
+ struct dir_context *caller;
+ /*
+ * Record the return value of 'caller->actor':
+ *
+ * -EINVAL, buffer is exhausted
+ * -EINTR, current task is pending
+ * -EFAULT, something is wrong
+ * 0, success and can do more
+ */
+ int result;
+ struct rb_root *root;
+ uint64_t dev_id;
+};
+
+struct hmdfs_cache_entry {
+ struct rb_node rb_node;
+ int name_len;
+ char *name;
+ int file_type;
+};
+
+struct hmdfs_cache_entry *allocate_entry(const char *name, int namelen,
+ int d_type)
+{
+ struct hmdfs_cache_entry *data;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ data->name = kstrndup(name, namelen, GFP_KERNEL);
+ if (!data->name) {
+ kfree(data);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ data->name_len = namelen;
+ data->file_type = d_type;
+
+ return data;
+}
+
+int insert_filename(struct rb_root *root, struct hmdfs_cache_entry **new_entry)
+{
+ struct rb_node *parent = NULL;
+ struct rb_node **new_node = &(root->rb_node);
+ int cmp_res = 0;
+ struct hmdfs_cache_entry *data = *new_entry;
+
+ while (*new_node) {
+ struct hmdfs_cache_entry *entry = container_of(
+ *new_node, struct hmdfs_cache_entry, rb_node);
+ parent = *new_node;
+
+ if (data->name_len < entry->name_len)
+ cmp_res = -1;
+ else if (data->name_len > entry->name_len)
+ cmp_res = 1;
+ else
+ cmp_res = strncmp(data->name, entry->name,
+ data->name_len);
+
+ if (!cmp_res) {
+ kfree(data->name);
+ kfree(data);
+ *new_entry = entry;
+ return entry->file_type;
+ }
+
+ if (cmp_res < 0)
+ new_node = &((*new_node)->rb_left);
+ else if (cmp_res > 0)
+ new_node = &((*new_node)->rb_right);
+ }
+
+ rb_link_node(&data->rb_node, parent, new_node);
+ rb_insert_color(&data->rb_node, root);
+
+ return 0;
+}
+
+static void recursive_delete(struct rb_node *node)
+{
+ struct hmdfs_cache_entry *entry = NULL;
+
+ if (!node)
+ return;
+
+ recursive_delete(node->rb_left);
+ recursive_delete(node->rb_right);
+
+ entry = container_of(node, struct hmdfs_cache_entry, rb_node);
+ kfree(entry->name);
+ kfree(entry);
+}
+
+static void destroy_tree(struct rb_root *root)
+{
+ if (!root)
+ return;
+ recursive_delete(root->rb_node);
+ root->rb_node = NULL;
+}
+
+static void delete_filename(struct rb_root *root,
+ struct hmdfs_cache_entry *data)
+{
+ struct rb_node **node = &(root->rb_node);
+ struct hmdfs_cache_entry *entry = NULL;
+ int cmp_res = 0;
+
+ while (*node) {
+ entry = container_of(*node, struct hmdfs_cache_entry, rb_node);
+ if (data->name_len < entry->name_len)
+ cmp_res = -1;
+ else if (data->name_len > entry->name_len)
+ cmp_res = 1;
+ else
+ cmp_res = strncmp(data->name, entry->name,
+ data->name_len);
+
+ if (!cmp_res)
+ goto found;
+
+ if (cmp_res < 0)
+ node = &((*node)->rb_left);
+ else if (cmp_res > 0)
+ node = &((*node)->rb_right);
+ }
+ return;
+
+found:
+ rb_erase(*node, root);
+ kfree(entry->name);
+ kfree(entry);
+}
+
+static void rename_conflicting_file(char *dentry_name, int *len,
+ unsigned int dev_id)
+{
+ int i = *len - 1;
+ int dot_pos = -1;
+ char *buffer;
+
+ buffer = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL);
+ if (!buffer)
+ return;
+
+ while (i >= 0) {
+ if (dentry_name[i] == '/')
+ break;
+ if (dentry_name[i] == '.') {
+ // TODO: 这个修改同步到 CT01
+ dot_pos = i;
+ break;
+ }
+ i--;
+ }
+
+ if (dot_pos == -1) {
+ snprintf(dentry_name + *len, DENTRY_NAME_MAX_LEN - *len,
+ CONFLICTING_FILE_SUFFIX, dev_id);
+ goto done;
+ }
+
+ for (i = 0; i < *len - dot_pos; i++)
+ buffer[i] = dentry_name[i + dot_pos];
+
+ buffer[i] = '\0';
+ snprintf(dentry_name + dot_pos, DENTRY_NAME_MAX_LEN - dot_pos,
+ CONFLICTING_FILE_SUFFIX, dev_id);
+ strcat(dentry_name, buffer);
+
+done:
+ *len = strlen(dentry_name);
+ kfree(buffer);
+}
+
+static void rename_conflicting_directory(char *dentry_name, int *len)
+{
+ snprintf(dentry_name + *len, DENTRY_NAME_MAX_LEN - *len,
+ CONFLICTING_DIR_SUFFIX);
+ *len += strlen(CONFLICTING_DIR_SUFFIX);
+}
+
+static int hmdfs_actor_merge(struct dir_context *ctx, const char *name,
+ int namelen, loff_t offset, u64 ino,
+ unsigned int d_type)
+{
+ int ret = 0;
+ int insert_res = 0;
+ int max_devid_len = 2;
+ char *dentry_name = NULL;
+ int dentry_len = namelen;
+ struct hmdfs_cache_entry *cache_entry = NULL;
+ struct hmdfs_iterate_callback_merge *iterate_callback_merge = NULL;
+ struct dir_context *org_ctx = NULL;
+
+ if (hmdfs_file_type(name) != HMDFS_TYPE_COMMON)
+ return 0;
+
+ if (namelen > NAME_MAX)
+ return -EINVAL;
+ dentry_name = kzalloc(NAME_MAX + 1, GFP_KERNEL);
+ if (!dentry_name)
+ return -ENOMEM;
+
+ strncpy(dentry_name, name, dentry_len);
+
+ cache_entry = allocate_entry(dentry_name, dentry_len, d_type);
+ if (IS_ERR(cache_entry)) {
+ ret = PTR_ERR(cache_entry);
+ goto done;
+ }
+
+ iterate_callback_merge =
+ container_of(ctx, struct hmdfs_iterate_callback_merge, ctx);
+ insert_res =
+ insert_filename(iterate_callback_merge->root, &cache_entry);
+ if (d_type == DT_DIR && insert_res == DT_DIR) {
+ goto done;
+ } else if (d_type == DT_DIR && insert_res == DT_REG) {
+ if (strlen(CONFLICTING_DIR_SUFFIX) > NAME_MAX - dentry_len) {
+ ret = -ENAMETOOLONG;
+ goto delete;
+ }
+ rename_conflicting_directory(dentry_name, &dentry_len);
+ cache_entry->file_type = DT_DIR;
+ } else if (d_type == DT_REG && insert_res > 0) {
+ if (strlen(CONFLICTING_FILE_SUFFIX) + max_devid_len >
+ NAME_MAX - dentry_len) {
+ ret = -ENAMETOOLONG;
+ goto delete;
+ }
+ rename_conflicting_file(dentry_name, &dentry_len,
+ iterate_callback_merge->dev_id);
+ }
+
+ org_ctx = iterate_callback_merge->caller;
+ ret = org_ctx->actor(org_ctx, dentry_name, dentry_len, org_ctx->pos,
+ ino, d_type);
+ /*
+ * Record original return value, so that the caller can be aware of
+ * different situations.
+ */
+ iterate_callback_merge->result = ret;
+ ret = ret == 0 ? 0 : 1;
+ if (ret && d_type == DT_DIR && insert_res == DT_REG &&
+ cache_entry->file_type == DT_DIR)
+ cache_entry->file_type = DT_REG;
+
+delete:
+ if (ret && !insert_res)
+ delete_filename(iterate_callback_merge->root, cache_entry);
+done:
+ kfree(dentry_name);
+ return ret;
+}
+
+struct hmdfs_file_info *
+get_next_hmdfs_file_info(struct hmdfs_file_info *fi_head, int device_id)
+{
+ struct hmdfs_file_info *fi_iter = NULL;
+ struct hmdfs_file_info *fi_result = NULL;
+
+ mutex_lock(&fi_head->comrade_list_lock);
+ list_for_each_entry_safe(fi_iter, fi_result, &(fi_head->comrade_list),
+ comrade_list) {
+ if (fi_iter->device_id == device_id)
+ break;
+ }
+ mutex_unlock(&fi_head->comrade_list_lock);
+
+ return fi_result != fi_head ? fi_result : NULL;
+}
+
+struct hmdfs_file_info *get_hmdfs_file_info(struct hmdfs_file_info *fi_head,
+ int device_id)
+{
+ struct hmdfs_file_info *fi_iter = NULL;
+
+ mutex_lock(&fi_head->comrade_list_lock);
+ list_for_each_entry(fi_iter, &(fi_head->comrade_list), comrade_list) {
+ if (fi_iter->device_id == device_id) {
+ mutex_unlock(&fi_head->comrade_list_lock);
+ return fi_iter;
+ }
+ }
+ mutex_unlock(&fi_head->comrade_list_lock);
+
+ return NULL;
+}
+
+int hmdfs_iterate_merge(struct file *file, struct dir_context *ctx)
+{
+ int err = 0;
+ struct hmdfs_file_info *fi_head = hmdfs_f(file);
+ struct hmdfs_file_info *fi_iter = NULL;
+ struct file *lower_file_iter = NULL;
+ loff_t start_pos = ctx->pos;
+ unsigned long device_id = (unsigned long)((ctx->pos) << 1 >>
+ (POS_BIT_NUM - DEV_ID_BIT_NUM));
+ struct hmdfs_iterate_callback_merge ctx_merge = {
+ .ctx.actor = hmdfs_actor_merge,
+ .caller = ctx,
+ .root = &fi_head->root,
+ .dev_id = device_id
+ };
+
+ /* pos = -1 indicates that all devices have been traversed
+ * or an error has occurred.
+ */
+ if (ctx->pos == -1)
+ return 0;
+
+ fi_iter = get_hmdfs_file_info(fi_head, device_id);
+ if (!fi_iter) {
+ fi_iter = get_next_hmdfs_file_info(fi_head, device_id);
+ // dev_id is changed, parameter is set 0 to get next file info
+ if (fi_iter)
+ ctx_merge.ctx.pos =
+ hmdfs_set_pos(fi_iter->device_id, 0, 0);
+ }
+ while (fi_iter) {
+ ctx_merge.dev_id = fi_iter->device_id;
+ device_id = ctx_merge.dev_id;
+ lower_file_iter = fi_iter->lower_file;
+ lower_file_iter->f_pos = file->f_pos;
+ err = iterate_dir(lower_file_iter, &ctx_merge.ctx);
+ file->f_pos = lower_file_iter->f_pos;
+ ctx->pos = file->f_pos;
+
+ if (err)
+ goto done;
+ /*
+ * ctx->actor return nonzero means buffer is exhausted or
+ * something is wrong, thus we should not continue.
+ */
+ if (ctx_merge.result)
+ goto done;
+ fi_iter = get_next_hmdfs_file_info(fi_head, device_id);
+ if (fi_iter) {
+ file->f_pos = hmdfs_set_pos(fi_iter->device_id, 0, 0);
+ ctx->pos = file->f_pos;
+ }
+ }
+done:
+ trace_hmdfs_iterate_merge(file->f_path.dentry, start_pos, ctx->pos,
+ err);
+ return err;
+}
+
+int do_dir_open_merge(struct file *file, const struct cred *cred,
+ struct hmdfs_file_info *fi_head)
+{
+ int ret = -EINVAL;
+ struct hmdfs_dentry_info_merge *dim = hmdfs_dm(file->f_path.dentry);
+ struct hmdfs_dentry_comrade *comrade = NULL;
+ struct hmdfs_file_info *fi = NULL;
+ struct path lo_p = { .mnt = file->f_path.mnt };
+ struct file *lower_file = NULL;
+
+ if (IS_ERR_OR_NULL(cred))
+ return ret;
+
+ mutex_lock(&dim->comrade_list_lock);
+ list_for_each_entry(comrade, &(dim->comrade_list), list) {
+ fi = kzalloc(sizeof(*fi), GFP_KERNEL);
+ if (!fi) {
+ ret = ret ? -ENOMEM : 0;
+ continue; // allow some dir to fail to open
+ }
+ lo_p.dentry = comrade->lo_d;
+ // make sure that dentry will not be dentry_kill before open
+ dget(lo_p.dentry);
+ if (unlikely(d_is_negative(lo_p.dentry))) {
+ hmdfs_info("dentry is negative, try again");
+ kfree(fi);
+ dput(lo_p.dentry);
+ continue; // skip this device
+ }
+ lower_file = dentry_open(&lo_p, file->f_flags, cred);
+ dput(lo_p.dentry);
+ if (IS_ERR(lower_file)) {
+ kfree(fi);
+ continue;
+ }
+ ret = 0;
+ fi->device_id = comrade->dev_id;
+ fi->lower_file = lower_file;
+ mutex_lock(&fi_head->comrade_list_lock);
+ list_add_tail(&fi->comrade_list, &fi_head->comrade_list);
+ mutex_unlock(&fi_head->comrade_list_lock);
+ }
+ mutex_unlock(&dim->comrade_list_lock);
+ return ret;
+}
+
+int hmdfs_dir_open_merge(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+ struct hmdfs_file_info *fi = NULL;
+
+ fi = kzalloc(sizeof(*fi), GFP_KERNEL);
+ if (!fi)
+ return -ENOMEM;
+
+ file->private_data = fi;
+ fi->root = RB_ROOT;
+ mutex_init(&fi->comrade_list_lock);
+ INIT_LIST_HEAD(&fi->comrade_list);
+
+ ret = do_dir_open_merge(file, hmdfs_sb(inode->i_sb)->cred, fi);
+ if (ret)
+ kfree(fi);
+
+ return ret;
+}
+
+int hmdfs_dir_release_merge(struct inode *inode, struct file *file)
+{
+ struct hmdfs_file_info *fi_head = hmdfs_f(file);
+ struct hmdfs_file_info *fi_iter = NULL;
+ struct hmdfs_file_info *fi_temp = NULL;
+
+ mutex_lock(&fi_head->comrade_list_lock);
+ list_for_each_entry_safe(fi_iter, fi_temp, &(fi_head->comrade_list),
+ comrade_list) {
+ list_del_init(&(fi_iter->comrade_list));
+ fput(fi_iter->lower_file);
+ kfree(fi_iter);
+ }
+ mutex_unlock(&fi_head->comrade_list_lock);
+ destroy_tree(&fi_head->root);
+ file->private_data = NULL;
+ kfree(fi_head);
+
+ return 0;
+}
+
+const struct file_operations hmdfs_dir_fops_merge = {
+ .owner = THIS_MODULE,
+ .iterate = hmdfs_iterate_merge,
+ .open = hmdfs_dir_open_merge,
+ .release = hmdfs_dir_release_merge,
+};
+
+int hmdfs_file_open_merge(struct inode *inode, struct file *file)
+{
+ int err = 0;
+ struct file *lower_file = NULL;
+ struct path lo_p = { .mnt = file->f_path.mnt };
+ struct super_block *sb = inode->i_sb;
+ const struct cred *cred = hmdfs_sb(sb)->cred;
+ struct hmdfs_file_info *gfi = NULL;
+ struct dentry *parent = NULL;
+
+ lo_p.dentry = hmdfs_get_fst_lo_d(file->f_path.dentry);
+ if (!lo_p.dentry) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ gfi = kzalloc(sizeof(*gfi), GFP_KERNEL);
+ if (!gfi) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ parent = dget_parent(file->f_path.dentry);
+ lower_file = dentry_open(&lo_p, file->f_flags, cred);
+ if (IS_ERR(lower_file)) {
+ err = PTR_ERR(lower_file);
+ kfree(gfi);
+ } else {
+ gfi->lower_file = lower_file;
+ file->private_data = gfi;
+ }
+ dput(parent);
+out_err:
+ dput(lo_p.dentry);
+ return err;
+}
+
+int hmdfs_file_flush_merge(struct file *file, fl_owner_t id)
+{
+ struct hmdfs_file_info *gfi = hmdfs_f(file);
+ struct file *lower_file = gfi->lower_file;
+
+ if (lower_file->f_op->flush)
+ return lower_file->f_op->flush(lower_file, id);
+
+ return 0;
+}
+
+/* Transparent transmission of parameters to device_view level,
+ * so file operations are same as device_view local operations.
+ */
+const struct file_operations hmdfs_file_fops_merge = {
+ .owner = THIS_MODULE,
+ .llseek = hmdfs_file_llseek_local,
+ .read_iter = hmdfs_read_local,
+ .write_iter = hmdfs_write_local,
+ .mmap = hmdfs_file_mmap_local,
+ .open = hmdfs_file_open_merge,
+ .flush = hmdfs_file_flush_merge,
+ .release = hmdfs_file_release_local,
+ .fsync = hmdfs_fsync_local,
+};
diff --git a/fs/hmdfs/file_remote.c b/fs/hmdfs/file_remote.c
new file mode 100644
index 0000000000000000000000000000000000000000..4ae87a138999359b9faed509d1dee978ad5f419a
--- /dev/null
+++ b/fs/hmdfs/file_remote.c
@@ -0,0 +1,1054 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/file_remote.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "file_remote.h"
+
+#include "comm/socket_adapter.h"
+#include "hmdfs.h"
+#include "hmdfs_client.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_trace.h"
+
+static inline bool hmdfs_remote_write_cache_expired(
+ struct hmdfs_inode_info *info)
+{
+ return time_after(jiffies, info->writecache_expire);
+}
+
+enum expire_reason {
+ ALL_GOOD = 0,
+ INO_DISMATCH = 1,
+ SIZE_OR_CTIME_DISMATCH = 2,
+ TIMER_EXPIRE = 3,
+ TIMER_WORKING = 4,
+ STABLE_CTIME_DISMATCH = 5,
+ KEEP_CACHE = 6,
+};
+
+/*
+ * hmdfs_open_final_remote - Do final steps of opening a remote file, update
+ * local inode cache and decide whether of not to truncate inode pages.
+ *
+ * @info: hmdfs inode info
+ * @open_ret: values returned from remote when opening a remote file
+ * @keep_cache: keep local cache & i_size
+ */
+static int hmdfs_open_final_remote(struct hmdfs_inode_info *info,
+ struct hmdfs_open_ret *open_ret,
+ struct file *file, bool keep_cache)
+{
+ struct inode *inode = &info->vfs_inode;
+ bool truncate = false;
+ enum expire_reason reason = ALL_GOOD;
+ int ret = 0;
+
+ /*
+ * if remote inode number changed and lookup stale data, we'll return
+ * -ESTALE, and reopen the file with metedate from remote getattr.
+ */
+ if (info->remote_ino != open_ret->ino) {
+ hmdfs_debug(
+ "got stale local inode, ino in local %llu, ino from open %llu",
+ info->remote_ino, open_ret->ino);
+ hmdfs_send_close(info->conn, &open_ret->fid);
+ reason = INO_DISMATCH;
+ ret = -ESTALE;
+ goto out;
+ }
+
+ if (keep_cache) {
+ reason = KEEP_CACHE;
+ trace_hmdfs_open_final_remote(info, open_ret, file, reason);
+ goto set_fid_out;
+ }
+
+ /*
+ * if remote size do not match local inode, or remote ctime do not match
+ * the last time same file was opened.
+ */
+ if (inode->i_size != open_ret->file_size ||
+ hmdfs_time_compare(&info->remote_ctime, &open_ret->remote_ctime)) {
+ truncate = true;
+ reason = SIZE_OR_CTIME_DISMATCH;
+ goto out;
+ }
+
+ /*
+ * If 'writecache_expire' is set, check if it expires. And skip the
+ * checking of stable_ctime.
+ */
+ if (info->writecache_expire) {
+ truncate = hmdfs_remote_write_cache_expired(info);
+ if (truncate)
+ reason = TIMER_EXPIRE;
+ else
+ reason = TIMER_WORKING;
+ goto out;
+ }
+
+ /* the first time, or remote ctime is ahead of remote time */
+ if (info->stable_ctime.tv_sec == 0 && info->stable_ctime.tv_nsec == 0) {
+ truncate = true;
+ reason = STABLE_CTIME_DISMATCH;
+ goto out;
+ }
+
+ /*
+ * - if last stable_ctime == stable_ctime, we do nothing.
+ * a. if ctime < stable_ctime, data is ensured to be uptodate,
+ * b. if ctime == stable_ctime, stale data might be accessed. This is
+ * acceptable since pagecache will be dropped later.
+ * c. ctime > stable_ctime is impossible.
+ * - if last stable_ctime < stable_ctime, we clear the cache.
+ * d. ctime != last stable_ctime is impossible
+ * e. ctime == last stable_ctime, this is possible to read again from
+ * b, thus we need to drop the cache.
+ * - if last stable_ctime > stable_ctime, we clear the cache.
+ * stable_ctime must be zero in this case, this is possible because
+ * system time might be changed.
+ */
+ if (hmdfs_time_compare(&info->stable_ctime, &open_ret->stable_ctime)) {
+ truncate = true;
+ reason = STABLE_CTIME_DISMATCH;
+ goto out;
+ }
+
+out:
+ trace_hmdfs_open_final_remote(info, open_ret, file, reason);
+ if (ret)
+ return ret;
+
+ if (reason == SIZE_OR_CTIME_DISMATCH) {
+ inode->i_ctime = open_ret->remote_ctime;
+ info->remote_ctime = open_ret->remote_ctime;
+ }
+
+ if (truncate) {
+ info->writecache_expire = 0;
+ truncate_inode_pages(inode->i_mapping, 0);
+ }
+
+ atomic64_set(&info->write_counter, 0);
+ info->stable_ctime = open_ret->stable_ctime;
+ i_size_write(inode, open_ret->file_size);
+ info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
+set_fid_out:
+ spin_lock(&info->fid_lock);
+ info->fid = open_ret->fid;
+ spin_unlock(&info->fid_lock);
+ return 0;
+}
+
+int hmdfs_do_open_remote(struct file *file, bool keep_cache)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(file_inode(file));
+ struct hmdfs_peer *conn = info->conn;
+ struct hmdfs_open_ret open_ret;
+ __u8 file_type = hmdfs_d(file->f_path.dentry)->file_type;
+ char *send_buf;
+ int err = 0;
+
+ send_buf = hmdfs_get_dentry_relative_path(file->f_path.dentry);
+ if (!send_buf) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+ err = hmdfs_send_open(conn, send_buf, file_type, &open_ret);
+ if (err) {
+ hmdfs_err("hmdfs_send_open return failed with %d", err);
+ goto out_free;
+ }
+
+ err = hmdfs_open_final_remote(info, &open_ret, file, keep_cache);
+
+out_free:
+ kfree(send_buf);
+ return err;
+}
+
+static inline bool hmdfs_remote_need_reopen(struct hmdfs_inode_info *info)
+{
+ return test_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags);
+}
+
+static inline bool hmdfs_remote_is_opening_file(struct hmdfs_inode_info *info)
+{
+ return test_bit(HMDFS_FID_OPENING, &info->fid_flags);
+}
+
+static int hmdfs_remote_wait_opening_file(struct hmdfs_inode_info *info)
+{
+ int err;
+
+ if (!hmdfs_remote_is_opening_file(info))
+ return 0;
+
+ err = ___wait_event(info->fid_wq, hmdfs_remote_is_opening_file(info),
+ TASK_INTERRUPTIBLE, 0, 0,
+ spin_unlock(&info->fid_lock);
+ schedule();
+ spin_lock(&info->fid_lock));
+ if (err)
+ err = -EINTR;
+
+ return err;
+}
+
+static int hmdfs_remote_file_reopen(struct hmdfs_inode_info *info,
+ struct file *filp)
+{
+ int err = 0;
+ struct hmdfs_peer *conn = info->conn;
+ struct inode *inode = NULL;
+ struct hmdfs_fid fid;
+
+ if (conn->status == NODE_STAT_OFFLINE)
+ return -EAGAIN;
+
+ spin_lock(&info->fid_lock);
+ err = hmdfs_remote_wait_opening_file(info);
+ if (err || !hmdfs_remote_need_reopen(info)) {
+ spin_unlock(&info->fid_lock);
+ goto out;
+ }
+
+ set_bit(HMDFS_FID_OPENING, &info->fid_flags);
+ fid = info->fid;
+ spin_unlock(&info->fid_lock);
+
+ inode = &info->vfs_inode;
+ inode_lock(inode);
+ /*
+ * Most closing cases are meaningless, except for one:
+ * read process A read process B
+ * err = -EBADF err = -EBADF (caused by re-online)
+ * set_need_reopen
+ * do reopen
+ * fid = new fid_1 [server hold fid_1]
+ * set need_reopen
+ * do reopen
+ * send close (fid_1) // In case of leak
+ * fid = new fid_2
+ */
+ if (fid.id != HMDFS_INODE_INVALID_FILE_ID)
+ hmdfs_send_close(conn, &fid);
+ err = hmdfs_do_open_remote(filp, true);
+ inode_unlock(inode);
+
+ spin_lock(&info->fid_lock);
+ /*
+ * May make the bit set in offline handler lost, but server
+ * will tell us whether or not the newly-opened file id is
+ * generated before offline, if it is opened before offline,
+ * the operation on the file id will return -EBADF and
+ * HMDFS_FID_NEED_OPEN bit will be set again.
+ */
+ if (!err)
+ clear_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags);
+ clear_bit(HMDFS_FID_OPENING, &info->fid_flags);
+ spin_unlock(&info->fid_lock);
+
+ wake_up_interruptible_all(&info->fid_wq);
+out:
+ return err;
+}
+
+static int hmdfs_remote_check_and_reopen(struct hmdfs_inode_info *info,
+ struct file *filp)
+{
+ if (!hmdfs_remote_need_reopen(info))
+ return 0;
+
+ return hmdfs_remote_file_reopen(info, filp);
+}
+
+void hmdfs_do_close_remote(struct kref *kref)
+{
+ struct hmdfs_inode_info *info =
+ container_of(kref, struct hmdfs_inode_info, ref);
+ struct hmdfs_fid fid;
+
+ hmdfs_remote_fetch_fid(info, &fid);
+ /* This function can return asynchronously */
+ hmdfs_send_close(info->conn, &fid);
+}
+
+static inline bool hmdfs_remote_need_track_file(const struct hmdfs_sb_info *sbi,
+ fmode_t mode)
+{
+ return (hmdfs_is_stash_enabled(sbi) && (mode & FMODE_WRITE));
+}
+
+static void
+hmdfs_remote_del_wr_opened_inode_nolock(struct hmdfs_inode_info *info)
+{
+ WARN_ON(list_empty(&info->wr_opened_node));
+ if (atomic_dec_and_test(&info->wr_opened_cnt))
+ list_del_init(&info->wr_opened_node);
+}
+
+void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ spin_lock(&conn->wr_opened_inode_lock);
+ hmdfs_remote_del_wr_opened_inode_nolock(info);
+ spin_unlock(&conn->wr_opened_inode_lock);
+}
+
+void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ if (list_empty(&info->wr_opened_node)) {
+ atomic_set(&info->wr_opened_cnt, 1);
+ list_add_tail(&info->wr_opened_node,
+ &conn->wr_opened_inode_list);
+ } else {
+ atomic_inc(&info->wr_opened_cnt);
+ }
+}
+
+static void hmdfs_remote_add_wr_opened_inode(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ spin_lock(&conn->wr_opened_inode_lock);
+ hmdfs_remote_add_wr_opened_inode_nolock(conn, info);
+ spin_unlock(&conn->wr_opened_inode_lock);
+}
+
+int hmdfs_file_open_remote(struct inode *inode, struct file *file)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ struct kref *ref = &(info->ref);
+ int err = 0;
+
+ inode_lock(inode);
+ if (kref_read(ref) == 0) {
+ err = hmdfs_do_open_remote(file, false);
+ if (err == 0)
+ kref_init(ref);
+ } else {
+ kref_get(ref);
+ }
+ inode_unlock(inode);
+
+ if (!err && hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb),
+ file->f_mode))
+ hmdfs_remote_add_wr_opened_inode(info->conn, info);
+
+ return err;
+}
+
+static void hmdfs_set_writecache_expire(struct hmdfs_inode_info *info,
+ unsigned int seconds)
+{
+ unsigned long new_expire = jiffies + seconds * HZ;
+
+ /*
+ * When file has been written before closing, set pagecache expire
+ * if it has not been set yet. This is necessary because ctime might
+ * stay the same after overwrite.
+ */
+ if (info->writecache_expire &&
+ time_after(new_expire, info->writecache_expire))
+ return;
+
+ info->writecache_expire = new_expire;
+}
+
+static void hmdfs_remote_keep_writecache(struct inode *inode, struct file *file)
+{
+ struct hmdfs_inode_info *info = NULL;
+ struct kref *ref = NULL;
+ struct hmdfs_getattr_ret *getattr_ret = NULL;
+ unsigned int write_cache_timeout =
+ hmdfs_sb(inode->i_sb)->write_cache_timeout;
+ int err;
+
+ if (!write_cache_timeout)
+ return;
+
+ info = hmdfs_i(inode);
+ ref = &(info->ref);
+ /*
+ * don't do anything if file is still opening or file hasn't been
+ * written.
+ */
+ if (kref_read(ref) > 0 || !atomic64_read(&info->write_counter))
+ return;
+
+ /*
+ * If remote getattr failed, and we don't update ctime,
+ * pagecache will be truncated the next time file is opened.
+ */
+ err = hmdfs_remote_getattr(info->conn, file_dentry(file), 0,
+ &getattr_ret);
+ if (err) {
+ hmdfs_err("remote getattr failed with err %d", err);
+ return;
+ }
+
+ if (!(getattr_ret->stat.result_mask & STATX_CTIME)) {
+ hmdfs_err("get remote ctime failed with mask 0x%x",
+ getattr_ret->stat.result_mask);
+ kfree(getattr_ret);
+ return;
+ }
+ /*
+ * update ctime from remote, in case that pagecahe will be
+ * truncated in next open.
+ */
+ inode->i_ctime = getattr_ret->stat.ctime;
+ info->remote_ctime = getattr_ret->stat.ctime;
+ hmdfs_set_writecache_expire(info, write_cache_timeout);
+ kfree(getattr_ret);
+}
+
+int hmdfs_file_release_remote(struct inode *inode, struct file *file)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+
+ if (hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), file->f_mode))
+ hmdfs_remote_del_wr_opened_inode(info->conn, info);
+
+ inode_lock(inode);
+ kref_put(&info->ref, hmdfs_do_close_remote);
+ hmdfs_remote_keep_writecache(inode, file);
+ inode_unlock(inode);
+
+ return 0;
+}
+
+static int hmdfs_file_flush(struct file *file, fl_owner_t id)
+{
+ int err = 0;
+ struct inode *inode = file_inode(file);
+
+ if (!(file->f_mode & FMODE_WRITE))
+ return 0;
+
+ /*
+ * Continue regardless of whether file reopen fails or not,
+ * because there may be no dirty page.
+ */
+ hmdfs_remote_check_and_reopen(hmdfs_i(inode), file);
+
+ /*
+ * Wait for wsem here would impact the performance greatly, so we
+ * overlap the time to issue as many wbs as we can, expecting async
+ * wbs are eliminated afterwards.
+ */
+ filemap_fdatawrite(inode->i_mapping);
+ down_write(&hmdfs_i(inode)->wpage_sem);
+ err = filemap_write_and_wait(inode->i_mapping);
+ up_write(&hmdfs_i(inode)->wpage_sem);
+ return err;
+}
+
+static ssize_t hmdfs_file_read_iter_remote(struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ struct file *filp = iocb->ki_filp;
+ struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp));
+ struct file_ra_state *ra = NULL;
+ unsigned int rtt;
+ int err;
+ bool tried = false;
+
+retry:
+ err = hmdfs_remote_check_and_reopen(info, filp);
+ if (err)
+ return err;
+
+ ra = &filp->f_ra;
+ /* rtt is measured in 10 msecs */
+ rtt = hmdfs_tcpi_rtt(info->conn) / 10000;
+ switch (rtt) {
+ case 0:
+ break;
+ case 1:
+ ra->ra_pages = 256;
+ break;
+ case 2:
+ ra->ra_pages = 512;
+ break;
+ default:
+ ra->ra_pages = 1024;
+ break;
+ }
+
+ err = generic_file_read_iter(iocb, iter);
+ if (err < 0 && !tried && hmdfs_remote_need_reopen(info)) {
+ /* Read from a stale fid, try read again once. */
+ tried = true;
+ goto retry;
+ }
+
+ return err;
+}
+
+static inline bool hmdfs_is_file_unwritable(const struct hmdfs_inode_info *info,
+ bool check_stash)
+{
+ return (check_stash && hmdfs_inode_is_stashing(info)) ||
+ !hmdfs_is_node_online(info->conn);
+}
+
+static ssize_t __hmdfs_file_write_iter_remote(struct kiocb *iocb,
+ struct iov_iter *iter,
+ bool check_stash)
+{
+ struct file *filp = iocb->ki_filp;
+ struct inode *inode = file_inode(filp);
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ ssize_t ret;
+
+ if (hmdfs_is_file_unwritable(info, check_stash))
+ return -EAGAIN;
+
+ ret = hmdfs_remote_check_and_reopen(info, filp);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+ if (hmdfs_is_file_unwritable(info, check_stash)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ ret = generic_write_checks(iocb, iter);
+ if (ret > 0)
+ ret = __generic_file_write_iter(iocb, iter);
+out:
+ inode_unlock(inode);
+
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ return ret;
+}
+
+ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ return __hmdfs_file_write_iter_remote(iocb, iter, false);
+}
+
+static ssize_t hmdfs_file_write_iter_remote(struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ return __hmdfs_file_write_iter_remote(iocb, iter, true);
+}
+
+/* hmdfs not support mmap write remote file */
+static vm_fault_t hmdfs_page_mkwrite(struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct hmdfs_file_vm_ops = {
+ .fault = filemap_fault,
+ .map_pages = filemap_map_pages,
+ .page_mkwrite = hmdfs_page_mkwrite,
+};
+
+static int hmdfs_file_mmap_remote(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &hmdfs_file_vm_ops;
+ file_accessed(file);
+
+ return 0;
+}
+
+static int hmdfs_file_fsync_remote(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(file_inode(file));
+ struct hmdfs_peer *conn = info->conn;
+ struct hmdfs_fid fid;
+ int err;
+
+ trace_hmdfs_fsync_enter_remote(conn->sbi, conn->device_id,
+ info->remote_ino, datasync);
+ /*
+ * Continue regardless of whether file reopen fails or not,
+ * because there may be no dirty page.
+ */
+ hmdfs_remote_check_and_reopen(info, file);
+
+ filemap_fdatawrite(file->f_mapping);
+ down_write(&info->wpage_sem);
+ err = file_write_and_wait_range(file, start, end);
+ up_write(&info->wpage_sem);
+ if (err) {
+ hmdfs_err("local fsync fail with %d", err);
+ goto out;
+ }
+
+ hmdfs_remote_fetch_fid(info, &fid);
+ err = hmdfs_send_fsync(conn, &fid, start, end, datasync);
+ if (err)
+ hmdfs_err("send fsync fail with %d", err);
+
+out:
+ trace_hmdfs_fsync_exit_remote(conn->sbi, conn->device_id,
+ info->remote_ino,
+ get_cmd_timeout(conn->sbi, F_FSYNC), err);
+
+ /* Compatible with POSIX retcode */
+ if (err == -ETIME)
+ err = -EIO;
+
+ return err;
+}
+
+const struct file_operations hmdfs_dev_file_fops_remote = {
+ .owner = THIS_MODULE,
+ .llseek = generic_file_llseek,
+ .read_iter = hmdfs_file_read_iter_remote,
+ .write_iter = hmdfs_file_write_iter_remote,
+ .mmap = hmdfs_file_mmap_remote,
+ .open = hmdfs_file_open_remote,
+ .release = hmdfs_file_release_remote,
+ .flush = hmdfs_file_flush,
+ .fsync = hmdfs_file_fsync_remote,
+};
+
+static void hmdfs_fill_page_zero(struct page *page)
+{
+ void *addr = NULL;
+
+ addr = kmap(page);
+ memset(addr, 0, PAGE_SIZE);
+ kunmap(page);
+ SetPageUptodate(page);
+ unlock_page(page);
+}
+
+static int hmdfs_readpage_remote(struct file *file, struct page *page)
+{
+ struct inode *inode = file_inode(file);
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ loff_t isize = i_size_read(inode);
+ pgoff_t end_index = (isize - 1) >> PAGE_SHIFT;
+ struct hmdfs_fid fid;
+
+ if (!isize || page->index > end_index) {
+ hmdfs_fill_page_zero(page);
+ return 0;
+ }
+
+ if (!isize || page->index > end_index) {
+ hmdfs_fill_page_zero(page);
+ return 0;
+ }
+
+ hmdfs_remote_fetch_fid(info, &fid);
+ return hmdfs_client_readpage(info->conn, &fid, page);
+}
+
+uint32_t hmdfs_get_writecount(struct page *page)
+{
+ uint32_t count = 0;
+ loff_t pos = (loff_t)page->index << HMDFS_PAGE_OFFSET;
+ struct inode *inode = page->mapping->host;
+ loff_t size = i_size_read(inode);
+ /*
+ * If page offset is greater than i_size, this is possible when
+ * writepage concurrent with truncate. In this case, we don't need to
+ * do remote writepage since it'll be truncated after the page is
+ * unlocked.
+ */
+ if (pos >= size)
+ count = 0;
+ /*
+ * If the page about to write is beyond i_size, we can't write beyond
+ * i_size because remote file size will be wrong.
+ */
+ else if (size < pos + HMDFS_PAGE_SIZE)
+ count = size - pos;
+ /* It's safe to write the whole page */
+ else
+ count = HMDFS_PAGE_SIZE;
+
+ return count;
+}
+
+static bool allow_cur_thread_wpage(struct hmdfs_inode_info *info,
+ bool *rsem_held, bool sync_all)
+{
+ WARN_ON(!rsem_held);
+
+ if (sync_all) {
+ *rsem_held = false;
+ return true;
+ }
+ *rsem_held = down_read_trylock(&info->wpage_sem);
+ return *rsem_held;
+}
+
+/**
+ * hmdfs_writepage_remote - writeback a dirty page to remote
+ *
+ * INFO:
+ * When asked to WB_SYNC_ALL, this function should leave with both the page and
+ * the radix tree node clean to achieve close-to-open consitency. Moreover,
+ * this shall never return -EIO to help filemap to iterate all dirty pages.
+ *
+ * INFO:
+ * When asked to WB_SYNC_NONE, this function should be mercy if faults(oom or
+ * bad pipe) happended to enable subsequent r/w & wb.
+ */
+static int hmdfs_writepage_remote(struct page *page,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = page->mapping->host;
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ struct hmdfs_sb_info *sbi = hmdfs_sb(inode->i_sb);
+ int ret = 0;
+ bool rsem_held = false;
+ bool sync = wbc->sync_mode == WB_SYNC_ALL;
+ struct hmdfs_writepage_context *param = NULL;
+
+ if (!allow_cur_thread_wpage(info, &rsem_held, sync))
+ goto out_unlock;
+
+ set_page_writeback(page);
+
+ param = kzalloc(sizeof(*param), GFP_NOFS);
+ if (!param) {
+ ret = -ENOMEM;
+ goto out_endwb;
+ }
+
+ if (sync && hmdfs_usr_sig_pending(current)) {
+ ClearPageUptodate(page);
+ goto out_free;
+ }
+ param->count = hmdfs_get_writecount(page);
+ if (!param->count)
+ goto out_free;
+ param->rsem_held = rsem_held;
+ hmdfs_remote_fetch_fid(info, ¶m->fid);
+ param->sync_all = sync;
+ param->caller = current;
+ get_task_struct(current);
+ param->page = page;
+ param->timeout = jiffies + msecs_to_jiffies(sbi->wb_timeout_ms);
+ INIT_DELAYED_WORK(¶m->retry_dwork, hmdfs_remote_writepage_retry);
+ ret = hmdfs_remote_do_writepage(info->conn, param);
+ if (likely(!ret))
+ return 0;
+
+ put_task_struct(current);
+out_free:
+ kfree(param);
+out_endwb:
+ end_page_writeback(page);
+ if (rsem_held)
+ up_read(&info->wpage_sem);
+out_unlock:
+ if (sync || !hmdfs_need_redirty_page(info, ret)) {
+ SetPageError(page);
+ mapping_set_error(page->mapping, ret);
+ } else {
+ redirty_page_for_writepage(wbc, page);
+ }
+ unlock_page(page);
+ return ret;
+}
+
+static void hmdfs_account_dirty_pages(struct address_space *mapping)
+{
+ struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info;
+
+ if (!sbi->h_wb->dirty_writeback_control)
+ return;
+
+ this_cpu_inc(*sbi->h_wb->bdp_ratelimits);
+}
+
+static int hmdfs_write_begin_remote(struct file *file,
+ struct address_space *mapping, loff_t pos,
+ unsigned int len, unsigned int flags,
+ struct page **pagep, void **fsdata)
+{
+ pgoff_t index = ((unsigned long long)pos) >> PAGE_SHIFT;
+ struct inode *inode = file_inode(file);
+ struct page *page = NULL;
+ int ret = 0;
+
+start:
+ page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+ if (!page)
+ return -ENOMEM;
+ *pagep = page;
+ wait_on_page_writeback(page);
+
+ // If this page will be covered completely.
+ if (len == HMDFS_PAGE_SIZE || PageUptodate(page))
+ return 0;
+
+ /*
+ * If data existed in this page will covered,
+ * we just need to clear this page.
+ */
+ if (!((unsigned long long)pos & (HMDFS_PAGE_SIZE - 1)) &&
+ (pos + len) >= i_size_read(inode)) {
+ zero_user_segment(page, len, HMDFS_PAGE_SIZE);
+ return 0;
+ }
+ /*
+ * We need readpage before write date to this page.
+ */
+ ret = hmdfs_readpage_remote(file, page);
+ if (!ret) {
+ if (PageLocked(page)) {
+ ret = __lock_page_killable(page);
+ if (!ret)
+ unlock_page(page);
+ }
+
+ if (!ret && PageUptodate(page)) {
+ put_page(page);
+ goto start;
+ }
+ if (!ret)
+ ret = -EIO;
+ }
+ put_page(page);
+ return ret;
+}
+
+static int hmdfs_write_end_remote(struct file *file,
+ struct address_space *mapping, loff_t pos,
+ unsigned int len, unsigned int copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = page->mapping->host;
+
+ if (!PageUptodate(page)) {
+ if (unlikely(copied != len))
+ copied = 0;
+ else
+ SetPageUptodate(page);
+ }
+ if (!copied)
+ goto unlock_out;
+
+ if (!PageDirty(page)) {
+ hmdfs_account_dirty_pages(mapping);
+ set_page_dirty(page);
+ }
+
+ if (pos + copied > i_size_read(inode)) {
+ i_size_write(inode, pos + copied);
+ hmdfs_i(inode)->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
+ }
+unlock_out:
+ unlock_page(page);
+ put_page(page);
+
+ /* hmdfs private writeback control */
+ hmdfs_balance_dirty_pages_ratelimited(mapping);
+ return copied;
+}
+
+const struct address_space_operations hmdfs_dev_file_aops_remote = {
+ .readpage = hmdfs_readpage_remote,
+ .write_begin = hmdfs_write_begin_remote,
+ .write_end = hmdfs_write_end_remote,
+ .writepage = hmdfs_writepage_remote,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+};
+
+loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id,
+ unsigned long offset)
+{
+ loff_t pos;
+
+ pos = ((loff_t)dev_id << (POS_BIT_NUM - 1 - DEV_ID_BIT_NUM)) +
+ ((loff_t)group_id << OFFSET_BIT_NUM) + offset;
+ if (dev_id)
+ pos |= ((loff_t)1 << (POS_BIT_NUM - 1));
+ return pos;
+}
+
+static int analysis_dentry_file_from_con(struct hmdfs_sb_info *sbi,
+ struct file *file,
+ struct file *handler,
+ struct dir_context *ctx)
+{
+ struct hmdfs_dentry_group *dentry_group = NULL;
+ loff_t pos = ctx->pos;
+ unsigned long dev_id = (unsigned long)((pos << 1) >> (POS_BIT_NUM - DEV_ID_BIT_NUM));
+ unsigned long group_id = (unsigned long)((pos << (1 + DEV_ID_BIT_NUM)) >>
+ (POS_BIT_NUM - GROUP_ID_BIT_NUM));
+ loff_t offset = pos & OFFSET_BIT_MASK;
+ int group_num = 0;
+ char *dentry_name = NULL;
+ int iterate_result = 0;
+ int i, j;
+
+ dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL);
+
+ if (!dentry_group)
+ return -ENOMEM;
+
+ if (IS_ERR_OR_NULL(handler)) {
+ kfree(dentry_group);
+ return -ENOENT;
+ }
+
+ group_num = get_dentry_group_cnt(file_inode(handler));
+ dentry_name = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL);
+ if (!dentry_name) {
+ kfree(dentry_group);
+ return -ENOMEM;
+ }
+
+ for (i = group_id; i < group_num; i++) {
+ int ret = hmdfs_metainfo_read(sbi, handler, dentry_group,
+ sizeof(struct hmdfs_dentry_group),
+ i);
+ if (ret != sizeof(struct hmdfs_dentry_group)) {
+ hmdfs_err("read dentry group failed ret:%d", ret);
+ goto done;
+ }
+
+ for (j = offset; j < DENTRY_PER_GROUP; j++) {
+ int len;
+ int file_type = DT_UNKNOWN;
+ bool is_continue;
+
+ len = le16_to_cpu(dentry_group->nsl[j].namelen);
+ if (!test_bit_le(j, dentry_group->bitmap) || len == 0)
+ continue;
+
+ memset(dentry_name, 0, DENTRY_NAME_MAX_LEN);
+ // TODO: Support more file_type
+ if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode)))
+ file_type = DT_DIR;
+ else if (S_ISREG(le16_to_cpu(
+ dentry_group->nsl[j].i_mode)))
+ file_type = DT_REG;
+
+ strncat(dentry_name, dentry_group->filename[j], len);
+ pos = hmdfs_set_pos(dev_id, i, j);
+ is_continue =
+ dir_emit(ctx, dentry_name, len,
+ pos + INUNUMBER_START, file_type);
+ if (!is_continue) {
+ ctx->pos = pos;
+ iterate_result = 1;
+ goto done;
+ }
+ }
+ offset = 0;
+ }
+
+done:
+ kfree(dentry_name);
+ kfree(dentry_group);
+ return iterate_result;
+}
+
+int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file,
+ struct dir_context *ctx)
+{
+ int iterate_result = 0;
+
+ iterate_result = analysis_dentry_file_from_con(
+ con->sbi, file, file->private_data, ctx);
+ return iterate_result;
+}
+
+static int hmdfs_iterate_remote(struct file *file, struct dir_context *ctx)
+{
+ int err = 0;
+ loff_t start_pos = ctx->pos;
+ struct hmdfs_peer *con = NULL;
+ struct hmdfs_dentry_info *di = hmdfs_d(file->f_path.dentry);
+ bool is_local = !((ctx->pos) >> (POS_BIT_NUM - 1));
+ uint64_t dev_id = di->device_id;
+
+ if (ctx->pos == -1)
+ return 0;
+ if (is_local)
+ ctx->pos = hmdfs_set_pos(dev_id, 0, 0);
+
+ con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id);
+ if (con) {
+ // ctx->pos = 0;
+ err = con->conn_operations->remote_readdir(con, file, ctx);
+ if (unlikely(!con)) {
+ hmdfs_err("con is null");
+ goto done;
+ }
+ peer_put(con);
+ if (err)
+ goto done;
+ }
+
+done:
+ if (err <= 0)
+ ctx->pos = -1;
+
+ trace_hmdfs_iterate_remote(file->f_path.dentry, start_pos, ctx->pos,
+ err);
+ return err;
+}
+
+int hmdfs_dir_open_remote(struct inode *inode, struct file *file)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ struct clearcache_item *cache_item = NULL;
+
+ if (info->conn && info->conn->version <= USERSPACE_MAX_VER) {
+ return 0;
+ } else if (info->conn) {
+ if (!hmdfs_cache_revalidate(READ_ONCE(info->conn->conn_time),
+ info->conn->device_id,
+ file->f_path.dentry))
+ get_remote_dentry_file_sync(file->f_path.dentry,
+ info->conn);
+ cache_item = hmdfs_find_cache_item(info->conn->device_id,
+ file->f_path.dentry);
+ if (cache_item) {
+ file->private_data = cache_item->filp;
+ get_file(file->private_data);
+ kref_put(&cache_item->ref, release_cache_item);
+ return 0;
+ }
+ return -ENOENT;
+ }
+ return -ENOENT;
+}
+
+static int hmdfs_dir_release_remote(struct inode *inode, struct file *file)
+{
+ if (file->private_data)
+ fput(file->private_data);
+ file->private_data = NULL;
+ return 0;
+}
+
+const struct file_operations hmdfs_dev_dir_ops_remote = {
+ .owner = THIS_MODULE,
+ .iterate = hmdfs_iterate_remote,
+ .open = hmdfs_dir_open_remote,
+ .release = hmdfs_dir_release_remote,
+ .fsync = __generic_file_fsync,
+};
diff --git a/fs/hmdfs/file_remote.h b/fs/hmdfs/file_remote.h
new file mode 100644
index 0000000000000000000000000000000000000000..026bd0c944a60c6f01d5cadf5cb671b2be9b355d
--- /dev/null
+++ b/fs/hmdfs/file_remote.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/file_remote.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_FILE_REMOTE_H
+#define HMDFS_FILE_REMOTE_H
+
+#include
+#include
+
+#include "hmdfs.h"
+#include "comm/connection.h"
+
+void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info);
+
+void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info);
+
+ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb,
+ struct iov_iter *iter);
+
+#endif
diff --git a/fs/hmdfs/file_root.c b/fs/hmdfs/file_root.c
new file mode 100644
index 0000000000000000000000000000000000000000..d82ff4d0b04b0958fb3e34022b0937a0b9d0294e
--- /dev/null
+++ b/fs/hmdfs/file_root.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/file_root.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+
+#include "authority/authentication.h"
+#include "comm/socket_adapter.h"
+#include "comm/transport.h"
+#include "hmdfs.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_device_view.h"
+
+#define DEVICE_VIEW_CTX_POS 2
+#define MERGE_VIEW_CTX_POS 3
+#define ROOT_DIR_INO_START 20000000
+
+// used by hmdfs_device_iterate functions
+#define DEVICE_VIEW_INO_START 20000002
+#define LOCAL_DEVICE_CTX_POS 2
+
+struct hmdfs_peer *get_next_con(struct hmdfs_sb_info *sbi,
+ unsigned long current_dev_id)
+{
+ struct hmdfs_peer *con = NULL;
+ struct hmdfs_peer *next_con = NULL;
+ struct list_head *head, *node;
+
+ mutex_lock(&sbi->connections.node_lock);
+ head = &sbi->connections.node_list;
+ if (current_dev_id == 0) {
+ node = head->next;
+ if (node == head)
+ goto done;
+ next_con = container_of(node, struct hmdfs_peer, list);
+ if (next_con->status == NODE_STAT_ONLINE)
+ goto done;
+ current_dev_id = next_con->device_id;
+ next_con = NULL;
+ }
+
+ list_for_each_entry(con, &sbi->connections.node_list, list) {
+ if ((con->device_id & 0xFFFF) == (current_dev_id & 0xFFFF)) {
+ node = con->list.next;
+ if (node == head)
+ goto done;
+ next_con = container_of(node, struct hmdfs_peer, list);
+ if (next_con->status == NODE_STAT_ONLINE)
+ goto done;
+ current_dev_id = next_con->device_id;
+ next_con = NULL;
+ }
+ }
+done:
+ if (next_con)
+ peer_get(next_con);
+ mutex_unlock(&sbi->connections.node_lock);
+ return next_con;
+}
+
+int hmdfs_device_iterate(struct file *file, struct dir_context *ctx)
+{
+ int err = 0;
+ uint64_t ino_start = DEVICE_VIEW_INO_START;
+ struct hmdfs_peer *next_con = NULL;
+ unsigned long dev_id = 0;
+ struct hmdfs_peer *con = NULL;
+ char *remote_device_name = NULL;
+
+ if (ctx->pos != 0)
+ goto out;
+ dir_emit_dots(file, ctx);
+
+ if (ctx->pos == LOCAL_DEVICE_CTX_POS) {
+ err = dir_emit(ctx, DEVICE_VIEW_LOCAL,
+ sizeof(DEVICE_VIEW_LOCAL) - 1, ino_start++,
+ DT_DIR);
+ if (!err)
+ goto out;
+ (ctx->pos)++;
+ }
+ next_con = get_next_con(file->f_inode->i_sb->s_fs_info, 0);
+ if (!next_con)
+ goto out;
+
+ dev_id = next_con->device_id;
+ peer_put(next_con);
+ con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id);
+ remote_device_name = kmalloc(HMDFS_CID_SIZE + 1, GFP_KERNEL);
+ if (!remote_device_name) {
+ err = -ENOMEM;
+ goto out;
+ }
+ while (con) {
+ peer_put(con);
+ snprintf(remote_device_name, HMDFS_CID_SIZE + 1, "%s",
+ con->cid);
+ if (!dir_emit(ctx, remote_device_name,
+ strlen(remote_device_name), ino_start++, DT_DIR))
+ goto done;
+
+ (ctx->pos)++;
+ con = get_next_con(file->f_inode->i_sb->s_fs_info, dev_id);
+ if (!con)
+ goto done;
+
+ dev_id = con->device_id;
+ }
+done:
+ kfree(remote_device_name);
+out:
+ if (err <= 0)
+ ctx->pos = -1;
+
+ return err;
+}
+
+int hmdfs_root_iterate(struct file *file, struct dir_context *ctx)
+{
+ uint64_t ino_start = ROOT_DIR_INO_START;
+ struct hmdfs_sb_info *sbi = file_inode(file)->i_sb->s_fs_info;
+
+ if (!dir_emit_dots(file, ctx))
+ return 0;
+ if (ctx->pos == DEVICE_VIEW_CTX_POS) {
+ if (!dir_emit(ctx, DEVICE_VIEW_ROOT,
+ sizeof(DEVICE_VIEW_ROOT) - 1, ino_start, DT_DIR))
+ return 0;
+ ino_start++;
+ ctx->pos = MERGE_VIEW_CTX_POS;
+ }
+ if (sbi->s_merge_switch && ctx->pos == MERGE_VIEW_CTX_POS) {
+ if (!dir_emit(ctx, MERGE_VIEW_ROOT, sizeof(MERGE_VIEW_ROOT) - 1,
+ ino_start, DT_DIR))
+ return 0;
+ (ctx->pos)++;
+ }
+ return 0;
+}
+
+const struct file_operations hmdfs_root_fops = {
+ .owner = THIS_MODULE,
+ .iterate = hmdfs_root_iterate,
+};
+
+const struct file_operations hmdfs_device_fops = {
+ .owner = THIS_MODULE,
+ .iterate = hmdfs_device_iterate,
+};
diff --git a/fs/hmdfs/hmdfs.h b/fs/hmdfs/hmdfs.h
new file mode 100644
index 0000000000000000000000000000000000000000..d0a24db08f62144d53a8912198956105d68d7259
--- /dev/null
+++ b/fs/hmdfs/hmdfs.h
@@ -0,0 +1,325 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/hmdfs.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_H
+#define HMDFS_H
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "comm/protocol.h"
+#include "comm/fault_inject.h"
+
+#if KERNEL_VERSION(4, 15, 0) < LINUX_VERSION_CODE
+#define hmdfs_time_t timespec64
+#define hmdfs_time_compare timespec64_compare
+#define hmdfs_time_add timespec64_add
+#else
+#define hmdfs_time_t timespec
+#define hmdfs_time_compare timespec_compare
+#define hmdfs_time_add timespec_add
+#endif
+
+#define HMDFS_PAGE_SIZE 4096
+#define HMDFS_PAGE_OFFSET 12
+
+/* max xattr value size, not include '\0' */
+#define HMDFS_XATTR_SIZE_MAX 4096
+/* max listxattr response size, include '\0' */
+#define HMDFS_LISTXATTR_SIZE_MAX 4096
+
+// 20 digits +'\0', Converted from a u64 integer
+#define HMDFS_ACCOUNT_HASH_MAX_LEN 21
+#define CTRL_PATH_MAX_LEN 21
+
+#define HMDFS_SUPER_MAGIC 0x20200302
+
+#define DEFAULT_WRITE_CACHE_TIMEOUT 30
+#define DEFAULT_SRV_REQ_MAX_ACTIVE 16
+
+#define HMDFS_INODE_INVALID_FILE_ID (1U << 31)
+#define HMDFS_FID_VER_BOOT_COOKIE_SHIFT 15
+
+/* According to task_struct instead of workqueue_struct */
+#define HMDFS_WQ_NAME_LEN 16
+
+#define HMDFS_DEF_WB_TIMEOUT_MS 60000
+#define HMDFS_MAX_WB_TIMEOUT_MS 900000
+
+#define HMDFS_READPAGES_NR_MAX 32
+
+enum {
+ HMDFS_FEATURE_READPAGES = 1ULL << 0,
+ HMDFS_FEATURE_READPAGES_OPEN = 1ULL << 1,
+ HMDFS_ATOMIC_OPEN = 1ULL << 2,
+};
+
+struct client_statistic;
+struct server_statistic;
+struct hmdfs_writeback;
+struct hmdfs_server_writeback;
+struct hmdfs_syncfs_info {
+ wait_queue_head_t wq;
+ atomic_t wait_count;
+ int remote_ret;
+ unsigned long long version;
+
+ /* Protect version in concurrent operations */
+ spinlock_t v_lock;
+ /*
+ * Serialize hmdfs_sync_fs() process:
+ * |<- pending_list ->| exexuting |<- wait_list ->|
+ * syncfs_1 syncfs_2 (syncfs_3) syncfs_4 syncfs_5
+ *
+ * Abandon syncfs processes in pending_list after syncfs_3 finished;
+ * Pick the last syncfs process in wait_list after syncfs_3 finished;
+ */
+ bool is_executing;
+ /* syncfs process arriving after current exexcuting syncfs */
+ struct list_head wait_list;
+ /* syncfs process arriving before current exexcuting syncfs */
+ struct list_head pending_list;
+ spinlock_t list_lock;
+};
+
+struct hmdfs_sb_info {
+ /* list for all registered superblocks */
+ struct list_head list;
+ struct mutex umount_mutex;
+
+ struct kobject kobj;
+ struct completion s_kobj_unregister;
+ struct super_block *sb;
+ struct super_block *lower_sb;
+ /* from mount, which is root */
+ const struct cred *cred;
+ /* from update cmd, expected to be system */
+ const struct cred *system_cred;
+ struct {
+ struct mutex node_lock;
+ struct list_head node_list;
+ atomic_t conn_seq;
+ unsigned long recent_ol;
+ } connections;
+ char *local_dst;
+ char *real_dst;
+ char *local_src;
+ char *cache_dir;
+ /* seq number for hmdfs super block */
+ unsigned int seq;
+
+ /*
+ * This value indicate how long the pagecache stay valid(in seconds) in
+ * client if metadate(except iversion) is equal to server. This
+ * functionality is disabled if this value is 0.
+ */
+ unsigned int write_cache_timeout;
+ unsigned int dcache_timeout;
+ unsigned int dcache_precision;
+ unsigned long dcache_threshold;
+ struct list_head client_cache;
+ struct list_head server_cache;
+ struct list_head to_delete;
+ struct mutex cache_list_lock;
+
+ /* local operation time statistic */
+ struct server_statistic *s_server_statis;
+
+ /* client statistic */
+ struct client_statistic *s_client_statis;
+
+ /* TIMEOUT of each command */
+ struct kobject s_cmd_timeout_kobj;
+ struct completion s_timeout_kobj_unregister;
+ unsigned int s_cmd_timeout[F_SIZE];
+
+ /* For case sensitive */
+ bool s_case_sensitive;
+
+ /* For features supporting */
+ u64 s_features;
+
+ /* For merge & device view */
+ unsigned int s_merge_switch;
+ /* For writeback */
+ struct hmdfs_writeback *h_wb;
+ /* For server writeback */
+ struct hmdfs_server_writeback *h_swb;
+
+ /* syncfs info */
+ struct hmdfs_syncfs_info hsi;
+
+ /* To bridge the userspace utils */
+ struct kfifo notify_fifo;
+ spinlock_t notify_fifo_lock;
+ struct hmdfs_fault_inject fault_inject;
+
+ /* For reboot detect */
+ uint64_t boot_cookie;
+ /* offline process */
+ unsigned int async_cb_delay;
+ /* For server handle requests */
+ unsigned int async_req_max_active;
+ /* stash dirty pages during offline */
+ bool s_offline_stash;
+
+ /* Timeout (ms) to retry writing remote pages */
+ unsigned int wb_timeout_ms;
+
+ struct path stash_work_dir;
+ /* dentry cache */
+ bool s_dentry_cache;
+
+ /* msgs that are waiting for remote */
+ struct list_head async_readdir_msg_list;
+ /* protect async_readdir_msg_list */
+ spinlock_t async_readdir_msg_lock;
+ /* async readdir work that are queued but not finished */
+ struct list_head async_readdir_work_list;
+ /* protect async_readdir_work_list */
+ spinlock_t async_readdir_work_lock;
+ /* wait for async_readdir_work_list to be empty in umount */
+ wait_queue_head_t async_readdir_wq;
+ /* don't allow async readdir */
+ bool async_readdir_prohibit;
+};
+
+static inline struct hmdfs_sb_info *hmdfs_sb(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static inline bool hmdfs_is_stash_enabled(const struct hmdfs_sb_info *sbi)
+{
+ return sbi->s_offline_stash;
+}
+
+struct setattr_info {
+ loff_t size;
+ unsigned int valid;
+ umode_t mode;
+ kuid_t uid;
+ kgid_t gid;
+ long long atime;
+ long atime_nsec;
+ long long mtime;
+ long mtime_nsec;
+ long long ctime;
+ long ctime_nsec;
+};
+
+struct hmdfs_file_info {
+ union {
+ struct {
+ struct rb_root root;
+ struct mutex comrade_list_lock;
+ };
+ struct {
+ struct file *lower_file;
+ int device_id;
+ };
+ };
+ struct list_head comrade_list;
+};
+
+static inline struct hmdfs_file_info *hmdfs_f(struct file *file)
+{
+ return file->private_data;
+}
+
+// Almost all the source files want this, so...
+#include "inode.h"
+
+/* locking helpers */
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+ struct dentry *dir = dget_parent(dentry);
+
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+ return dir;
+}
+
+static inline void unlock_dir(struct dentry *dir)
+{
+ inode_unlock(d_inode(dir));
+ dput(dir);
+}
+
+extern uint64_t path_hash(const char *path, int len, bool case_sense);
+extern int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
+ const char *name, unsigned int flags,
+ struct path *path);
+extern ssize_t hmdfs_remote_listxattr(struct dentry *dentry, char *buffer,
+ size_t size);
+
+int check_filename(const char *name, int len);
+
+int hmdfs_permission(struct inode *inode, int mask);
+
+int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data);
+
+/* Refer to comments in hmdfs_request_work_fn() */
+#define HMDFS_SERVER_CTX_FLAGS (PF_KTHREAD | PF_WQ_WORKER | PF_NPROC_EXCEEDED)
+
+static inline bool is_current_hmdfs_server_ctx(void)
+{
+ return ((current->flags & HMDFS_SERVER_CTX_FLAGS) ==
+ HMDFS_SERVER_CTX_FLAGS);
+}
+
+extern uint64_t hmdfs_gen_boot_cookie(void);
+
+static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len)
+{
+ return !strncasecmp(s1, s2, len);
+}
+
+static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2)
+{
+ return q1->len == q2->len && str_n_case_eq(q1->name, q2->name, q2->len);
+}
+
+/*****************************************************************************
+ * log print helpers
+ *****************************************************************************/
+__printf(4, 5) void __hmdfs_log(const char *level, const bool ratelimited,
+ const char *function, const char *fmt, ...);
+#define hmdfs_err(fmt, ...) \
+ __hmdfs_log(KERN_ERR, false, __func__, fmt, ##__VA_ARGS__)
+#define hmdfs_warning(fmt, ...) \
+ __hmdfs_log(KERN_WARNING, false, __func__, fmt, ##__VA_ARGS__)
+#define hmdfs_info(fmt, ...) \
+ __hmdfs_log(KERN_INFO, false, __func__, fmt, ##__VA_ARGS__)
+#define hmdfs_err_ratelimited(fmt, ...) \
+ __hmdfs_log(KERN_ERR, true, __func__, fmt, ##__VA_ARGS__)
+#define hmdfs_warning_ratelimited(fmt, ...) \
+ __hmdfs_log(KERN_WARNING, true, __func__, fmt, ##__VA_ARGS__)
+#define hmdfs_info_ratelimited(fmt, ...) \
+ __hmdfs_log(KERN_INFO, true, __func__, fmt, ##__VA_ARGS__)
+#ifdef CONFIG_HMDFS_FS_DEBUG
+#define hmdfs_debug(fmt, ...) \
+ __hmdfs_log(KERN_DEBUG, false, __func__, fmt, ##__VA_ARGS__)
+#define hmdfs_debug_ratelimited(fmt, ...) \
+ __hmdfs_log(KERN_DEBUG, true, __func__, fmt, ##__VA_ARGS__)
+#else
+#define hmdfs_debug(fmt, ...) ((void)0)
+#define hmdfs_debug_ratelimited(fmt, ...) ((void)0)
+#endif
+
+/*****************************************************************************
+ * inode/file operations declartion
+ *****************************************************************************/
+extern const struct inode_operations hmdfs_device_ops;
+extern const struct inode_operations hmdfs_root_ops;
+extern const struct file_operations hmdfs_root_fops;
+extern const struct file_operations hmdfs_device_fops;
+
+#endif // HMDFS_H
diff --git a/fs/hmdfs/hmdfs_client.c b/fs/hmdfs/hmdfs_client.c
new file mode 100644
index 0000000000000000000000000000000000000000..2c381f57f7e01352749244550c535fa51a4e5b9b
--- /dev/null
+++ b/fs/hmdfs/hmdfs_client.c
@@ -0,0 +1,1096 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/hmdfs_client.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "hmdfs_client.h"
+#include "hmdfs_server.h"
+
+#include
+#include
+#include
+
+#include "comm/socket_adapter.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_trace.h"
+#include "comm/node_cb.h"
+#include "stash.h"
+#include "authority/authentication.h"
+
+#define HMDFS_SYNC_WPAGE_RETRY_MS 2000
+
+static inline void free_sm_outbuf(struct hmdfs_send_command *sm)
+{
+ if (sm->out_buf && sm->out_len != 0)
+ kfree(sm->out_buf);
+ sm->out_len = 0;
+ sm->out_buf = NULL;
+}
+
+int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf,
+ __u8 file_type, struct hmdfs_open_ret *open_ret)
+{
+ int ret;
+ int path_len = strlen(send_buf);
+ size_t send_len = sizeof(struct open_request) + path_len + 1;
+ struct open_request *open_req = kzalloc(send_len, GFP_KERNEL);
+ struct open_response *resp;
+ struct hmdfs_send_command sm = {
+ .data = open_req,
+ .len = send_len,
+ };
+ hmdfs_init_cmd(&sm.operations, F_OPEN);
+
+ if (!open_req) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ open_req->file_type = file_type;
+ open_req->path_len = cpu_to_le32(path_len);
+ strcpy(open_req->buf, send_buf);
+ ret = hmdfs_sendmessage_request(con, &sm);
+ kfree(open_req);
+
+ if (!ret && (sm.out_len == 0 || !sm.out_buf))
+ ret = -ENOENT;
+ if (ret)
+ goto out;
+ resp = sm.out_buf;
+
+ open_ret->ino = le64_to_cpu(resp->ino);
+ open_ret->fid.ver = le64_to_cpu(resp->file_ver);
+ open_ret->fid.id = le32_to_cpu(resp->file_id);
+ open_ret->file_size = le64_to_cpu(resp->file_size);
+ open_ret->remote_ctime.tv_sec = le64_to_cpu(resp->ctime);
+ open_ret->remote_ctime.tv_nsec = le32_to_cpu(resp->ctime_nsec);
+ open_ret->stable_ctime.tv_sec = le64_to_cpu(resp->stable_ctime);
+ open_ret->stable_ctime.tv_nsec = le32_to_cpu(resp->stable_ctime_nsec);
+
+out:
+ free_sm_outbuf(&sm);
+ return ret;
+}
+
+void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid)
+{
+ size_t send_len = sizeof(struct release_request);
+ struct release_request *release_req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = release_req,
+ .len = send_len,
+ };
+ hmdfs_init_cmd(&sm.operations, F_RELEASE);
+
+ if (!release_req)
+ return;
+
+ release_req->file_ver = cpu_to_le64(fid->ver);
+ release_req->file_id = cpu_to_le32(fid->id);
+
+ hmdfs_sendmessage_request(con, &sm);
+ kfree(release_req);
+}
+
+int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid,
+ __s64 start, __s64 end, __s32 datasync)
+{
+ int ret;
+ struct fsync_request *fsync_req =
+ kzalloc(sizeof(struct fsync_request), GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = fsync_req,
+ .len = sizeof(struct fsync_request),
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_FSYNC);
+ if (!fsync_req)
+ return -ENOMEM;
+
+ fsync_req->file_ver = cpu_to_le64(fid->ver);
+ fsync_req->file_id = cpu_to_le32(fid->id);
+ fsync_req->datasync = cpu_to_le32(datasync);
+ fsync_req->start = cpu_to_le64(start);
+ fsync_req->end = cpu_to_le64(end);
+
+ ret = hmdfs_sendmessage_request(con, &sm);
+
+ free_sm_outbuf(&sm);
+ kfree(fsync_req);
+ return ret;
+}
+
+int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid,
+ struct page *page)
+{
+ int ret;
+ size_t send_len = sizeof(struct readpage_request);
+ struct readpage_request *read_data = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = read_data,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_READPAGE);
+ if (!read_data) {
+ unlock_page(page);
+ return -ENOMEM;
+ }
+
+ sm.out_buf = page;
+ read_data->file_ver = cpu_to_le64(fid->ver);
+ read_data->file_id = cpu_to_le32(fid->id);
+ read_data->size = cpu_to_le32(HMDFS_PAGE_SIZE);
+ read_data->index = cpu_to_le64(page->index);
+ ret = hmdfs_sendpage_request(con, &sm);
+ kfree(read_data);
+ return ret;
+}
+
+bool hmdfs_usr_sig_pending(struct task_struct *p)
+{
+ sigset_t *sig = &p->pending.signal;
+
+ if (likely(!signal_pending(p)))
+ return false;
+ return sigismember(sig, SIGINT) || sigismember(sig, SIGTERM) ||
+ sigismember(sig, SIGKILL);
+}
+
+void hmdfs_client_writepage_done(struct hmdfs_inode_info *info,
+ struct hmdfs_writepage_context *ctx)
+{
+ struct page *page = ctx->page;
+ bool unlock = ctx->rsem_held;
+
+ SetPageUptodate(page);
+ end_page_writeback(page);
+ if (unlock)
+ up_read(&info->wpage_sem);
+ unlock_page(page);
+}
+
+static void hmdfs_client_writepage_err(struct hmdfs_peer *peer,
+ struct hmdfs_inode_info *info,
+ struct hmdfs_writepage_context *ctx,
+ int err)
+{
+ struct page *page = ctx->page;
+ bool unlock = ctx->rsem_held;
+
+ if (err == -ENOMEM || err == -EAGAIN || err == -ESHUTDOWN ||
+ err == -ETIME)
+ SetPageUptodate(page);
+ else
+ hmdfs_info("Page %ld of file %u writeback err %d devid %llu",
+ page->index, ctx->fid.id, err, peer->device_id);
+
+ /*
+ * Current and subsequent writebacks have been canceled by the
+ * user, leaving these pages' states in chaos. Read pages in
+ * the future to update these pages.
+ */
+ if (ctx->sync_all && hmdfs_usr_sig_pending(ctx->caller))
+ ClearPageUptodate(page);
+
+ if (ctx->sync_all || !time_is_after_eq_jiffies(ctx->timeout) ||
+ !(err == -ETIME || hmdfs_need_redirty_page(info, err))) {
+ SetPageError(page);
+ mapping_set_error(page->mapping, -EIO);
+ } else {
+ __set_page_dirty_nobuffers(page);
+ account_page_redirty(page);
+ }
+
+ end_page_writeback(page);
+ if (unlock)
+ up_read(&info->wpage_sem);
+ unlock_page(page);
+}
+
+static inline bool
+hmdfs_no_timedout_sync_write(struct hmdfs_writepage_context *ctx)
+{
+ return ctx->sync_all && time_is_after_eq_jiffies(ctx->timeout);
+}
+
+static inline bool
+hmdfs_client_rewrite_for_timeout(struct hmdfs_writepage_context *ctx, int err)
+{
+ return (err == -ETIME && hmdfs_no_timedout_sync_write(ctx) &&
+ !hmdfs_usr_sig_pending(ctx->caller));
+}
+
+static inline bool
+hmdfs_client_rewrite_for_offline(struct hmdfs_sb_info *sbi,
+ struct hmdfs_writepage_context *ctx, int err)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host);
+ unsigned int status = READ_ONCE(info->stash_status);
+
+ /*
+ * No retry if offline occurs during inode restoration.
+ *
+ * Do retry if local file cache is ready even it is not
+ * a WB_SYNC_ALL write, else no-sync_all writeback will
+ * return -EIO, mapping_set_error(mapping, -EIO) will be
+ * called and it will make the concurrent calling of
+ * filemap_write_and_wait() in hmdfs_flush_stash_file_data()
+ * return -EIO.
+ */
+ return (hmdfs_is_stash_enabled(sbi) &&
+ status != HMDFS_REMOTE_INODE_RESTORING &&
+ (hmdfs_no_timedout_sync_write(ctx) ||
+ status == HMDFS_REMOTE_INODE_STASHING) &&
+ hmdfs_is_offline_or_timeout_err(err));
+}
+
+static inline bool
+hmdfs_client_redo_writepage(struct hmdfs_sb_info *sbi,
+ struct hmdfs_writepage_context *ctx, int err)
+{
+ return hmdfs_client_rewrite_for_timeout(ctx, err) ||
+ hmdfs_client_rewrite_for_offline(sbi, ctx, err);
+}
+
+static bool hmdfs_remote_write_to_remote(struct hmdfs_inode_info *info)
+{
+ unsigned int status = READ_ONCE(info->stash_status);
+ bool stashing;
+
+ if (status != HMDFS_REMOTE_INODE_STASHING)
+ return true;
+
+ /* Ensure it's OK to use info->cache afterwards */
+ spin_lock(&info->stash_lock);
+ stashing = (info->stash_status == HMDFS_REMOTE_INODE_STASHING);
+ spin_unlock(&info->stash_lock);
+
+ return !stashing;
+}
+
+int hmdfs_remote_do_writepage(struct hmdfs_peer *con,
+ struct hmdfs_writepage_context *ctx)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host);
+ bool to_remote = false;
+ int err = 0;
+
+ to_remote = hmdfs_remote_write_to_remote(info);
+ if (to_remote)
+ err = hmdfs_client_writepage(info->conn, ctx);
+ else
+ err = hmdfs_stash_writepage(info->conn, ctx);
+ if (!err)
+ return 0;
+
+ if (!(to_remote &&
+ hmdfs_client_rewrite_for_offline(con->sbi, ctx, err)))
+ return err;
+
+ queue_delayed_work(con->retry_wb_wq, &ctx->retry_dwork,
+ msecs_to_jiffies(HMDFS_SYNC_WPAGE_RETRY_MS));
+
+ return 0;
+}
+
+void hmdfs_remote_writepage_retry(struct work_struct *work)
+{
+ struct hmdfs_writepage_context *ctx =
+ container_of(work, struct hmdfs_writepage_context,
+ retry_dwork.work);
+ struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host);
+ struct hmdfs_peer *peer = info->conn;
+ const struct cred *old_cred = NULL;
+ int err;
+
+ old_cred = hmdfs_override_creds(peer->sbi->cred);
+ err = hmdfs_remote_do_writepage(peer, ctx);
+ hmdfs_revert_creds(old_cred);
+ if (err) {
+ hmdfs_client_writepage_err(peer, info, ctx, err);
+ put_task_struct(ctx->caller);
+ kfree(ctx);
+ }
+}
+
+void hmdfs_writepage_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req,
+ const struct hmdfs_resp *resp)
+{
+ struct hmdfs_writepage_context *ctx = req->private;
+ struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host);
+ int ret = resp->ret_code;
+ unsigned long page_index = ctx->page->index;
+
+ trace_hmdfs_writepage_cb_enter(peer, info->remote_ino, page_index, ret);
+
+ if (!ret) {
+ hmdfs_client_writepage_done(info, ctx);
+ atomic64_inc(&info->write_counter);
+ goto cleanup_all;
+ }
+
+ if (hmdfs_client_redo_writepage(peer->sbi, ctx, ret)) {
+ ret = hmdfs_remote_do_writepage(peer, ctx);
+ if (!ret)
+ goto cleanup_req;
+ WARN_ON(ret == -ETIME);
+ }
+
+ hmdfs_client_writepage_err(peer, info, ctx, ret);
+
+cleanup_all:
+ put_task_struct(ctx->caller);
+ kfree(ctx);
+cleanup_req:
+ kfree(req->data);
+
+ trace_hmdfs_writepage_cb_exit(peer, info->remote_ino, page_index, ret);
+}
+
+int hmdfs_client_writepage(struct hmdfs_peer *con,
+ struct hmdfs_writepage_context *param)
+{
+ int ret = 0;
+ size_t send_len = sizeof(struct writepage_request) + HMDFS_PAGE_SIZE;
+ struct writepage_request *write_data = kzalloc(send_len, GFP_NOFS);
+ struct hmdfs_req req;
+ char *data = NULL;
+
+ if (unlikely(!write_data))
+ return -ENOMEM;
+
+ WARN_ON(!PageLocked(param->page)); // VFS
+ WARN_ON(PageDirty(param->page)); // VFS
+ WARN_ON(!PageWriteback(param->page)); // hmdfs
+
+ write_data->file_ver = cpu_to_le64(param->fid.ver);
+ write_data->file_id = cpu_to_le32(param->fid.id);
+ write_data->index = cpu_to_le64(param->page->index);
+ write_data->count = cpu_to_le32(param->count);
+ data = kmap(param->page);
+ memcpy((char *)write_data->buf, data, HMDFS_PAGE_SIZE);
+ kunmap(param->page);
+ req.data = write_data;
+ req.data_len = send_len;
+
+ req.private = param;
+ req.private_len = sizeof(*param);
+
+ req.timeout = TIMEOUT_CONFIG;
+ hmdfs_init_cmd(&req.operations, F_WRITEPAGE);
+ ret = hmdfs_send_async_request(con, &req);
+ if (unlikely(ret))
+ kfree(write_data);
+ return ret;
+}
+
+void hmdfs_client_recv_readpage(struct hmdfs_head_cmd *head, int err,
+ struct hmdfs_async_work *async_work)
+{
+ struct page *page = async_work->page;
+ int ret = le32_to_cpu(head->ret_code);
+ struct hmdfs_inode_info *info = hmdfs_i(page->mapping->host);
+ unsigned long page_index = page->index;
+
+ if (!err)
+ SetPageUptodate(page);
+ else if (err == -EBADF)
+ /* There may be a stale fd caused by fid version, need reopen */
+ set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags);
+
+ hmdfs_client_resp_statis(async_work->head.peer->sbi, F_READPAGE,
+ HMDFS_RESP_NORMAL, async_work->start, jiffies);
+
+ trace_hmdfs_client_recv_readpage(async_work->head.peer,
+ info->remote_ino, page_index, ret);
+
+ asw_done(async_work);
+}
+
+/* read cache dentry file at path and write them into filp */
+int hmdfs_client_start_readdir(struct hmdfs_peer *con, struct file *filp,
+ const char *path, int path_len,
+ struct hmdfs_dcache_header *header)
+{
+ int ret;
+ size_t send_len = sizeof(struct readdir_request) + path_len + 1;
+ struct readdir_request *req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = req,
+ .len = send_len,
+ .local_filp = filp,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_ITERATE);
+ if (!req)
+ return -ENOMEM;
+
+ /* add ref or it will be release at msg put */
+ get_file(sm.local_filp);
+ req->path_len = cpu_to_le32(path_len);
+ strncpy(req->path, path, path_len);
+
+ /*
+ * Is we already have a cache file, verify it. If it is
+ * uptodate, then we don't have to transfer a new one
+ */
+ if (header) {
+ req->dcache_crtime = header->dcache_crtime;
+ req->dcache_crtime_nsec = header->dcache_crtime_nsec;
+ req->dentry_ctime = header->dentry_ctime;
+ req->dentry_ctime_nsec = header->dentry_ctime_nsec;
+ req->num = header->num;
+ req->verify_cache = cpu_to_le32(1);
+ }
+
+ ret = hmdfs_sendmessage_request(con, &sm);
+ kfree(req);
+ return ret;
+}
+
+int hmdfs_client_start_mkdir(struct hmdfs_peer *con,
+ const char *path, const char *name,
+ umode_t mode, struct hmdfs_lookup_ret *mkdir_ret)
+{
+ int ret = 0;
+ int path_len = strlen(path);
+ int name_len = strlen(name);
+ size_t send_len = sizeof(struct mkdir_request) + path_len + 1 +
+ name_len + 1;
+ struct mkdir_request *mkdir_req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_inodeinfo_response *resp = NULL;
+ struct hmdfs_send_command sm = {
+ .data = mkdir_req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_MKDIR);
+ if (!mkdir_req)
+ return -ENOMEM;
+
+ mkdir_req->path_len = cpu_to_le32(path_len);
+ mkdir_req->name_len = cpu_to_le32(name_len);
+ mkdir_req->mode = cpu_to_le16(mode);
+ strncpy(mkdir_req->path, path, path_len);
+ strncpy(mkdir_req->path + path_len + 1, name, name_len);
+
+ ret = hmdfs_sendmessage_request(con, &sm);
+ if (ret == -ENOENT || ret == -ETIME || ret == -EOPNOTSUPP)
+ goto out;
+ if (!sm.out_buf) {
+ ret = -ENOENT;
+ goto out;
+ }
+ resp = sm.out_buf;
+ mkdir_ret->i_mode = le16_to_cpu(resp->i_mode);
+ mkdir_ret->i_size = le64_to_cpu(resp->i_size);
+ mkdir_ret->i_mtime = le64_to_cpu(resp->i_mtime);
+ mkdir_ret->i_mtime_nsec = le32_to_cpu(resp->i_mtime_nsec);
+ mkdir_ret->i_ino = le64_to_cpu(resp->i_ino);
+
+out:
+ free_sm_outbuf(&sm);
+ kfree(mkdir_req);
+ return ret;
+}
+
+int hmdfs_client_start_create(struct hmdfs_peer *con,
+ const char *path, const char *name,
+ umode_t mode, bool want_excl,
+ struct hmdfs_lookup_ret *create_ret)
+{
+ int ret = 0;
+ int path_len = strlen(path);
+ int name_len = strlen(name);
+ size_t send_len = sizeof(struct create_request) + path_len + 1 +
+ name_len + 1;
+ struct create_request *create_req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_inodeinfo_response *resp = NULL;
+ struct hmdfs_send_command sm = {
+ .data = create_req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_CREATE);
+ if (!create_req)
+ return -ENOMEM;
+
+ create_req->path_len = cpu_to_le32(path_len);
+ create_req->name_len = cpu_to_le32(name_len);
+ create_req->mode = cpu_to_le16(mode);
+ create_req->want_excl = want_excl;
+ strncpy(create_req->path, path, path_len);
+ strncpy(create_req->path + path_len + 1, name, name_len);
+
+ ret = hmdfs_sendmessage_request(con, &sm);
+ if (ret == -ENOENT || ret == -ETIME || ret == -EOPNOTSUPP)
+ goto out;
+ if (!sm.out_buf) {
+ ret = -ENOENT;
+ goto out;
+ }
+ resp = sm.out_buf;
+ create_ret->i_mode = le16_to_cpu(resp->i_mode);
+ create_ret->i_size = le64_to_cpu(resp->i_size);
+ create_ret->i_mtime = le64_to_cpu(resp->i_mtime);
+ create_ret->i_mtime_nsec = le32_to_cpu(resp->i_mtime_nsec);
+ create_ret->i_ino = le64_to_cpu(resp->i_ino);
+
+out:
+ free_sm_outbuf(&sm);
+ kfree(create_req);
+ return ret;
+}
+
+int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path,
+ const char *name)
+{
+ int ret;
+ int path_len = strlen(path);
+ int name_len = strlen(name);
+ size_t send_len = sizeof(struct rmdir_request) + path_len + 1 +
+ name_len + 1;
+ struct rmdir_request *rmdir_req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = rmdir_req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_RMDIR);
+ if (!rmdir_req)
+ return -ENOMEM;
+
+ rmdir_req->path_len = cpu_to_le32(path_len);
+ rmdir_req->name_len = cpu_to_le32(name_len);
+ strncpy(rmdir_req->path, path, path_len);
+ strncpy(rmdir_req->path + path_len + 1, name, name_len);
+
+ ret = hmdfs_sendmessage_request(con, &sm);
+ free_sm_outbuf(&sm);
+ kfree(rmdir_req);
+ return ret;
+}
+
+int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path,
+ const char *name)
+{
+ int ret;
+ int path_len = strlen(path);
+ int name_len = strlen(name);
+ size_t send_len = sizeof(struct unlink_request) + path_len + 1 +
+ name_len + 1;
+ struct unlink_request *unlink_req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = unlink_req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_UNLINK);
+ if (!unlink_req)
+ return -ENOMEM;
+
+ unlink_req->path_len = cpu_to_le32(path_len);
+ unlink_req->name_len = cpu_to_le32(name_len);
+ strncpy(unlink_req->path, path, path_len);
+ strncpy(unlink_req->path + path_len + 1, name, name_len);
+
+ ret = hmdfs_sendmessage_request(con, &sm);
+ kfree(unlink_req);
+ free_sm_outbuf(&sm);
+ return ret;
+}
+
+int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path,
+ const char *old_name, const char *new_path,
+ const char *new_name, unsigned int flags)
+{
+ int ret;
+ int old_path_len = strlen(old_path);
+ int new_path_len = strlen(new_path);
+ int old_name_len = strlen(old_name);
+ int new_name_len = strlen(new_name);
+
+ size_t send_len = sizeof(struct rename_request) + old_path_len + 1 +
+ new_path_len + 1 + old_name_len + 1 + new_name_len +
+ 1;
+ struct rename_request *rename_req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = rename_req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_RENAME);
+ if (!rename_req)
+ return -ENOMEM;
+
+ rename_req->old_path_len = cpu_to_le32(old_path_len);
+ rename_req->new_path_len = cpu_to_le32(new_path_len);
+ rename_req->old_name_len = cpu_to_le32(old_name_len);
+ rename_req->new_name_len = cpu_to_le32(new_name_len);
+ rename_req->flags = cpu_to_le32(flags);
+
+ strncpy(rename_req->path, old_path, old_path_len);
+ strncpy(rename_req->path + old_path_len + 1, new_path, new_path_len);
+
+ strncpy(rename_req->path + old_path_len + 1 + new_path_len + 1,
+ old_name, old_name_len);
+ strncpy(rename_req->path + old_path_len + 1 + new_path_len + 1 +
+ old_name_len + 1,
+ new_name, new_name_len);
+
+ ret = hmdfs_sendmessage_request(con, &sm);
+ free_sm_outbuf(&sm);
+ kfree(rename_req);
+ return ret;
+}
+
+int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf,
+ struct setattr_info *attr_info)
+{
+ int ret;
+ int path_len = strlen(send_buf);
+ size_t send_len = path_len + 1 + sizeof(struct setattr_request);
+ struct setattr_request *setattr_req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = setattr_req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_SETATTR);
+ if (!setattr_req)
+ return -ENOMEM;
+
+ strcpy(setattr_req->buf, send_buf);
+ setattr_req->path_len = cpu_to_le32(path_len);
+ setattr_req->valid = cpu_to_le32(attr_info->valid);
+ setattr_req->size = cpu_to_le64(attr_info->size);
+ setattr_req->mtime = cpu_to_le64(attr_info->mtime);
+ setattr_req->mtime_nsec = cpu_to_le32(attr_info->mtime_nsec);
+ ret = hmdfs_sendmessage_request(con, &sm);
+ kfree(setattr_req);
+ return ret;
+}
+
+static void hmdfs_update_getattr_ret(struct getattr_response *resp,
+ struct hmdfs_getattr_ret *result)
+{
+ struct kstat *stat = &result->stat;
+
+ stat->result_mask = le32_to_cpu(resp->result_mask);
+ if (stat->result_mask == 0)
+ return;
+
+ stat->ino = le64_to_cpu(resp->ino);
+ stat->mode = le16_to_cpu(resp->mode);
+ stat->nlink = le32_to_cpu(resp->nlink);
+ stat->uid.val = le32_to_cpu(resp->uid);
+ stat->gid.val = le32_to_cpu(resp->gid);
+ stat->size = le64_to_cpu(resp->size);
+ stat->blocks = le64_to_cpu(resp->blocks);
+ stat->blksize = le32_to_cpu(resp->blksize);
+ stat->atime.tv_sec = le64_to_cpu(resp->atime);
+ stat->atime.tv_nsec = le32_to_cpu(resp->atime_nsec);
+ stat->mtime.tv_sec = le64_to_cpu(resp->mtime);
+ stat->mtime.tv_nsec = le32_to_cpu(resp->mtime_nsec);
+ stat->ctime.tv_sec = le64_to_cpu(resp->ctime);
+ stat->ctime.tv_nsec = le32_to_cpu(resp->ctime_nsec);
+ stat->btime.tv_sec = le64_to_cpu(resp->crtime);
+ stat->btime.tv_nsec = le32_to_cpu(resp->crtime_nsec);
+ result->fsid = le64_to_cpu(resp->fsid);
+ /* currently not used */
+ result->i_flags = 0;
+}
+
+int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf,
+ unsigned int lookup_flags,
+ struct hmdfs_getattr_ret *result)
+{
+ int path_len = strlen(send_buf);
+ size_t send_len = path_len + 1 + sizeof(struct getattr_request);
+ int ret = 0;
+ struct getattr_request *req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_GETATTR);
+ if (!req)
+ return -ENOMEM;
+
+ req->path_len = cpu_to_le32(path_len);
+ req->lookup_flags = cpu_to_le32(lookup_flags);
+ strncpy(req->buf, send_buf, path_len);
+ ret = hmdfs_sendmessage_request(con, &sm);
+ if (!ret && (sm.out_len == 0 || !sm.out_buf))
+ ret = -ENOENT;
+ if (ret)
+ goto out;
+
+ hmdfs_update_getattr_ret(sm.out_buf, result);
+
+out:
+ kfree(req);
+ free_sm_outbuf(&sm);
+ return ret;
+}
+
+static void hmdfs_update_statfs_ret(struct statfs_response *resp,
+ struct kstatfs *buf)
+{
+ buf->f_type = le64_to_cpu(resp->f_type);
+ buf->f_bsize = le64_to_cpu(resp->f_bsize);
+ buf->f_blocks = le64_to_cpu(resp->f_blocks);
+ buf->f_bfree = le64_to_cpu(resp->f_bfree);
+ buf->f_bavail = le64_to_cpu(resp->f_bavail);
+ buf->f_files = le64_to_cpu(resp->f_files);
+ buf->f_ffree = le64_to_cpu(resp->f_ffree);
+ buf->f_fsid.val[0] = le32_to_cpu(resp->f_fsid_0);
+ buf->f_fsid.val[1] = le32_to_cpu(resp->f_fsid_1);
+ buf->f_namelen = le64_to_cpu(resp->f_namelen);
+ buf->f_frsize = le64_to_cpu(resp->f_frsize);
+ buf->f_flags = le64_to_cpu(resp->f_flags);
+ buf->f_spare[0] = le64_to_cpu(resp->f_spare_0);
+ buf->f_spare[1] = le64_to_cpu(resp->f_spare_1);
+ buf->f_spare[2] = le64_to_cpu(resp->f_spare_2);
+ buf->f_spare[3] = le64_to_cpu(resp->f_spare_3);
+}
+
+int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path,
+ struct kstatfs *buf)
+{
+ int ret;
+ int path_len = strlen(path);
+ size_t send_len = sizeof(struct statfs_request) + path_len + 1;
+ struct statfs_request *req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_STATFS);
+ if (!req)
+ return -ENOMEM;
+
+ req->path_len = cpu_to_le32(path_len);
+ strncpy(req->path, path, path_len);
+
+ ret = hmdfs_sendmessage_request(con, &sm);
+
+ if (ret == -ETIME)
+ ret = -EIO;
+ if (!ret && (sm.out_len == 0 || !sm.out_buf))
+ ret = -ENOENT;
+ if (ret)
+ goto out;
+
+ hmdfs_update_statfs_ret(sm.out_buf, buf);
+out:
+ kfree(req);
+ free_sm_outbuf(&sm);
+ return ret;
+}
+
+int hmdfs_send_syncfs(struct hmdfs_peer *con, int syncfs_timeout)
+{
+ int ret;
+ struct hmdfs_req req;
+ struct hmdfs_sb_info *sbi = con->sbi;
+ struct syncfs_request *syncfs_req =
+ kzalloc(sizeof(struct syncfs_request), GFP_KERNEL);
+
+ if (!syncfs_req) {
+ hmdfs_err("cannot allocate syncfs_request");
+ return -ENOMEM;
+ }
+
+ hmdfs_init_cmd(&req.operations, F_SYNCFS);
+ req.timeout = syncfs_timeout;
+
+ syncfs_req->version = cpu_to_le64(sbi->hsi.version);
+ req.data = syncfs_req;
+ req.data_len = sizeof(*syncfs_req);
+
+ ret = hmdfs_send_async_request(con, &req);
+ if (ret) {
+ kfree(syncfs_req);
+ hmdfs_err("ret fail with %d", ret);
+ }
+
+ return ret;
+}
+
+static void hmdfs_update_getxattr_ret(struct getxattr_response *resp,
+ void *value, size_t o_size, int *ret)
+{
+ ssize_t size = le32_to_cpu(resp->size);
+
+ if (o_size && o_size < size) {
+ *ret = -ERANGE;
+ return;
+ }
+
+ if (o_size)
+ memcpy(value, resp->value, size);
+
+ *ret = size;
+}
+
+int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf,
+ const char *name, void *value, size_t size)
+{
+ size_t path_len = strlen(send_buf);
+ size_t name_len = strlen(name);
+ size_t send_len = path_len + name_len +
+ sizeof(struct getxattr_request) + 2;
+ int ret = 0;
+ struct getxattr_request *req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_GETXATTR);
+ if (!req)
+ return -ENOMEM;
+
+ req->path_len = cpu_to_le32(path_len);
+ req->name_len = cpu_to_le32(name_len);
+ req->size = cpu_to_le32(size);
+ strncpy(req->buf, send_buf, path_len);
+ strncpy(req->buf + path_len + 1, name, name_len);
+ ret = hmdfs_sendmessage_request(con, &sm);
+ if (!ret && (sm.out_len == 0 || !sm.out_buf))
+ ret = -ENOENT;
+ if (ret)
+ goto out;
+
+ hmdfs_update_getxattr_ret(sm.out_buf, value, size, &ret);
+
+out:
+ kfree(req);
+ free_sm_outbuf(&sm);
+ return ret;
+}
+
+int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ size_t path_len = strlen(send_buf);
+ size_t name_len = strlen(name);
+ size_t send_len = path_len + name_len + size + 2 +
+ sizeof(struct setxattr_request);
+ int ret = 0;
+ struct setxattr_request *req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_SETXATTR);
+ if (!req)
+ return -ENOMEM;
+
+ req->path_len = cpu_to_le32(path_len);
+ req->name_len = cpu_to_le32(name_len);
+ req->size = cpu_to_le32(size);
+ req->flags = cpu_to_le32(flags);
+ strncpy(req->buf, send_buf, path_len);
+ strncpy(req->buf + path_len + 1, name, name_len);
+ memcpy(req->buf + path_len + name_len + 2, value, size);
+ if (!value)
+ req->del = true;
+ ret = hmdfs_sendmessage_request(con, &sm);
+ kfree(req);
+ return ret;
+}
+
+static void hmdfs_update_listxattr_ret(struct listxattr_response *resp,
+ char *list, size_t o_size, ssize_t *ret)
+{
+ ssize_t size = le32_to_cpu(resp->size);
+
+ if (o_size && o_size < size) {
+ *ret = -ERANGE;
+ return;
+ }
+
+ /* multi name split with '\0', use memcpy */
+ if (o_size)
+ memcpy(list, resp->list, size);
+
+ *ret = size;
+}
+
+ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf,
+ char *list, size_t size)
+{
+ size_t path_len = strlen(send_buf);
+ size_t send_len = path_len + 1 + sizeof(struct listxattr_request);
+ ssize_t ret = 0;
+ struct listxattr_request *req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_LISTXATTR);
+ if (!req)
+ return -ENOMEM;
+
+ req->path_len = cpu_to_le32(path_len);
+ req->size = cpu_to_le32(size);
+ strncpy(req->buf, send_buf, path_len);
+ ret = hmdfs_sendmessage_request(con, &sm);
+ if (!ret && (sm.out_len == 0 || !sm.out_buf))
+ ret = -ENOENT;
+ if (ret)
+ goto out;
+
+ hmdfs_update_listxattr_ret(sm.out_buf, list, size, &ret);
+
+out:
+ kfree(req);
+ free_sm_outbuf(&sm);
+ return ret;
+}
+
+void hmdfs_recv_syncfs_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req,
+ const struct hmdfs_resp *resp)
+{
+ struct hmdfs_sb_info *sbi = peer->sbi;
+ struct syncfs_request *syncfs_req = (struct syncfs_request *)req->data;
+
+ WARN_ON(!syncfs_req);
+ spin_lock(&sbi->hsi.v_lock);
+ if (le64_to_cpu(syncfs_req->version) != sbi->hsi.version) {
+ hmdfs_info(
+ "Recv stale syncfs resp[ver: %llu] from device %llu, current ver %llu",
+ le64_to_cpu(syncfs_req->version), peer->device_id,
+ sbi->hsi.version);
+ spin_unlock(&sbi->hsi.v_lock);
+ goto out;
+ }
+
+ if (!sbi->hsi.remote_ret)
+ sbi->hsi.remote_ret = resp->ret_code;
+
+ if (resp->ret_code) {
+ hmdfs_err("Recv syncfs error code %d from device %llu",
+ resp->ret_code, peer->device_id);
+ } else {
+ /*
+ * Set @sb_dirty_count to zero if no one else produce
+ * dirty data on remote server during remote sync.
+ */
+ atomic64_cmpxchg(&peer->sb_dirty_count,
+ peer->old_sb_dirty_count, 0);
+ }
+
+ atomic_dec(&sbi->hsi.wait_count);
+ spin_unlock(&sbi->hsi.v_lock);
+ wake_up_interruptible(&sbi->hsi.wq);
+
+out:
+ kfree(syncfs_req);
+}
+
+void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path)
+{
+ int path_len = strlen(path);
+ size_t send_len = sizeof(struct drop_push_request) + path_len + 1;
+ struct drop_push_request *dp_req = kzalloc(send_len, GFP_KERNEL);
+ struct hmdfs_send_command sm = {
+ .data = dp_req,
+ .len = send_len,
+ };
+
+ hmdfs_init_cmd(&sm.operations, F_DROP_PUSH);
+ if (!dp_req)
+ return;
+
+ dp_req->path_len = cpu_to_le32(path_len);
+ strncpy(dp_req->path, path, path_len);
+
+ hmdfs_sendmessage_request(con, &sm);
+ kfree(dp_req);
+}
+
+static void *hmdfs_get_msg_next(struct hmdfs_peer *peer, int *id)
+{
+ struct hmdfs_msg_idr_head *head = NULL;
+
+ spin_lock(&peer->idr_lock);
+ head = idr_get_next(&peer->msg_idr, id);
+ if (head && head->type < MSG_IDR_MAX && head->type >= 0)
+ kref_get(&head->ref);
+
+ spin_unlock(&peer->idr_lock);
+
+ return head;
+}
+
+void hmdfs_client_offline_notify(struct hmdfs_peer *conn, int evt,
+ unsigned int seq)
+{
+ int id;
+ int count = 0;
+ struct hmdfs_msg_idr_head *head = NULL;
+
+ for (id = 0; (head = hmdfs_get_msg_next(conn, &id)) != NULL; ++id) {
+ switch (head->type) {
+ case MSG_IDR_1_0_NONE:
+ head_put(head);
+ head_put(head);
+ break;
+ case MSG_IDR_MESSAGE_SYNC:
+ case MSG_IDR_1_0_MESSAGE_SYNC:
+ hmdfs_response_wakeup((struct sendmsg_wait_queue *)head,
+ -ETIME, 0, NULL);
+ hmdfs_debug("wakeup id=%d", head->msg_id);
+ msg_put((struct sendmsg_wait_queue *)head);
+ break;
+ case MSG_IDR_MESSAGE_ASYNC:
+ hmdfs_wakeup_parasite(
+ (struct hmdfs_msg_parasite *)head);
+ hmdfs_debug("wakeup parasite id=%d", head->msg_id);
+ mp_put((struct hmdfs_msg_parasite *)head);
+ break;
+ case MSG_IDR_PAGE:
+ case MSG_IDR_1_0_PAGE:
+ hmdfs_wakeup_async_work(
+ (struct hmdfs_async_work *)head);
+ hmdfs_debug("wakeup async work id=%d", head->msg_id);
+ asw_put((struct hmdfs_async_work *)head);
+ break;
+ default:
+ hmdfs_err("Bad type=%d id=%d", head->type,
+ head->msg_id);
+ break;
+ }
+
+ count++;
+ /* If there are too many idr to process, avoid to soft lockup,
+ * process every 512 message we resched
+ */
+ if (count % HMDFS_IDR_RESCHED_COUNT == 0)
+ cond_resched();
+ }
+}
+
+static struct hmdfs_node_cb_desc client_cb[] = {
+ {
+ .evt = NODE_EVT_OFFLINE,
+ .sync = true,
+ .min_version = DFS_1_0,
+ .fn = hmdfs_client_offline_notify,
+ },
+};
+
+void __init hmdfs_client_add_node_evt_cb(void)
+{
+ hmdfs_node_add_evt_cb(client_cb, ARRAY_SIZE(client_cb));
+}
diff --git a/fs/hmdfs/hmdfs_client.h b/fs/hmdfs/hmdfs_client.h
new file mode 100644
index 0000000000000000000000000000000000000000..ab2867dca4579fd15047c54f790c6ab61985fb90
--- /dev/null
+++ b/fs/hmdfs/hmdfs_client.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/hmdfs_client.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_CLIENT_H
+#define HMDFS_CLIENT_H
+
+#include "comm/transport.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_device_view.h"
+
+struct hmdfs_open_ret {
+ struct hmdfs_fid fid;
+ __u64 file_size;
+ __u64 ino;
+ struct hmdfs_time_t remote_ctime;
+ struct hmdfs_time_t stable_ctime;
+};
+
+struct hmdfs_writepage_context {
+ struct hmdfs_fid fid;
+ uint32_t count;
+ bool sync_all;
+ bool rsem_held;
+ unsigned long timeout;
+ struct task_struct *caller;
+ struct page *page;
+ struct delayed_work retry_dwork;
+};
+
+int hmdfs_client_start_readdir(struct hmdfs_peer *con, struct file *filp,
+ const char *path, int path_len,
+ struct hmdfs_dcache_header *header);
+int hmdfs_client_start_mkdir(struct hmdfs_peer *con,
+ const char *path, const char *name,
+ umode_t mode, struct hmdfs_lookup_ret *mkdir_ret);
+int hmdfs_client_start_create(struct hmdfs_peer *con,
+ const char *path, const char *name,
+ umode_t mode, bool want_excl,
+ struct hmdfs_lookup_ret *create_ret);
+int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path,
+ const char *name);
+int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path,
+ const char *name);
+int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path,
+ const char *old_name, const char *new_path,
+ const char *new_name, unsigned int flags);
+
+static inline bool hmdfs_is_offline_err(int err)
+{
+ /*
+ * writepage() will get -EBADF if peer is online
+ * again during offline stash, and -EBADF also
+ * needs redo.
+ */
+ return (err == -EAGAIN || err == -ESHUTDOWN || err == -EBADF);
+}
+
+static inline bool hmdfs_is_offline_or_timeout_err(int err)
+{
+ return hmdfs_is_offline_err(err) || err == -ETIME;
+}
+
+static inline bool hmdfs_need_redirty_page(const struct hmdfs_inode_info *info,
+ int err)
+{
+ /*
+ * 1. stash is enabled
+ * 2. offline related error
+ * 3. no restore
+ */
+ return hmdfs_is_stash_enabled(info->conn->sbi) &&
+ hmdfs_is_offline_err(err) &&
+ READ_ONCE(info->stash_status) != HMDFS_REMOTE_INODE_RESTORING;
+}
+
+bool hmdfs_usr_sig_pending(struct task_struct *p);
+void hmdfs_writepage_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req,
+ const struct hmdfs_resp *resp);
+int hmdfs_client_writepage(struct hmdfs_peer *con,
+ struct hmdfs_writepage_context *param);
+int hmdfs_remote_do_writepage(struct hmdfs_peer *con,
+ struct hmdfs_writepage_context *ctx);
+void hmdfs_remote_writepage_retry(struct work_struct *work);
+
+void hmdfs_client_writepage_done(struct hmdfs_inode_info *info,
+ struct hmdfs_writepage_context *ctx);
+
+int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf,
+ __u8 file_type, struct hmdfs_open_ret *open_ret);
+void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid);
+int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid,
+ __s64 start, __s64 end, __s32 datasync);
+int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid,
+ struct page *page);
+
+int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf,
+ struct setattr_info *attr_info);
+int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf,
+ unsigned int lookup_flags,
+ struct hmdfs_getattr_ret *getattr_result);
+int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path,
+ struct kstatfs *buf);
+void hmdfs_client_recv_readpage(struct hmdfs_head_cmd *head, int err,
+ struct hmdfs_async_work *async_work);
+int hmdfs_send_syncfs(struct hmdfs_peer *con, int syncfs_timeout);
+int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf,
+ const char *name, void *value, size_t size);
+int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf,
+ const char *name, const void *val,
+ size_t size, int flags);
+ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf,
+ char *list, size_t size);
+void hmdfs_recv_syncfs_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req,
+ const struct hmdfs_resp *resp);
+
+void __init hmdfs_client_add_node_evt_cb(void);
+#endif
diff --git a/fs/hmdfs/hmdfs_dentryfile.c b/fs/hmdfs/hmdfs_dentryfile.c
new file mode 100644
index 0000000000000000000000000000000000000000..98b215ba2d8edc8c877776c555284caade7071ea
--- /dev/null
+++ b/fs/hmdfs/hmdfs_dentryfile.c
@@ -0,0 +1,2680 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/hmdfs_dentryfile.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "hmdfs_dentryfile.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "authority/authentication.h"
+#include "comm/transport.h"
+#include "hmdfs_client.h"
+#include "hmdfs_device_view.h"
+
+/* Hashing code copied from f2fs */
+#define HMDFS_HASH_COL_BIT ((0x1ULL) << 63)
+#define DELTA 0x9E3779B9
+
+static bool is_dot_dotdot(const unsigned char *name, __u32 len)
+{
+ if (len == 1 && name[0] == '.')
+ return true;
+
+ if (len == 2 && name[0] == '.' && name[1] == '.')
+ return true;
+
+ return false;
+}
+
+static void str2hashbuf(const unsigned char *msg, size_t len, unsigned int *buf,
+ int num, bool case_sense)
+{
+ unsigned int pad, val;
+ int i;
+ unsigned char c;
+
+ pad = (__u32)len | ((__u32)len << 8);
+ pad |= pad << 16;
+
+ val = pad;
+ if (len > (size_t)num * 4)
+ len = (size_t)num * 4;
+ for (i = 0; i < len; i++) {
+ if ((i % 4) == 0)
+ val = pad;
+ c = msg[i];
+ if (!case_sense)
+ c = tolower(c);
+ val = c + (val << 8);
+ if ((i % 4) == 3) {
+ *buf++ = val;
+ val = pad;
+ num--;
+ }
+ }
+ if (--num >= 0)
+ *buf++ = val;
+ while (--num >= 0)
+ *buf++ = pad;
+}
+
+static void tea_transform(unsigned int buf[4], unsigned int const in[])
+{
+ __u32 sum = 0;
+ __u32 b0 = buf[0], b1 = buf[1];
+ __u32 a = in[0], b = in[1], c = in[2], d = in[3];
+ int n = 16;
+
+ do {
+ sum += DELTA;
+ b0 += ((b1 << 4) + a) ^ (b1 + sum) ^ ((b1 >> 5) + b);
+ b1 += ((b0 << 4) + c) ^ (b0 + sum) ^ ((b0 >> 5) + d);
+ } while (--n);
+
+ buf[0] += b0;
+ buf[1] += b1;
+}
+
+static __u32 hmdfs_dentry_hash(const struct qstr *qstr, bool case_sense)
+{
+ __u32 hash;
+ __u32 hmdfs_hash;
+ const unsigned char *p = qstr->name;
+ __u32 len = qstr->len;
+ __u32 in[8], buf[4];
+
+ if (is_dot_dotdot(p, len))
+ return 0;
+
+ /* Initialize the default seed for the hash checksum functions */
+ buf[0] = 0x67452301;
+ buf[1] = 0xefcdab89;
+ buf[2] = 0x98badcfe;
+ buf[3] = 0x10325476;
+
+ while (1) {
+ str2hashbuf(p, len, in, 4, case_sense);
+ tea_transform(buf, in);
+ p += 16;
+ if (len <= 16)
+ break;
+ len -= 16;
+ }
+ hash = buf[0];
+ hmdfs_hash = hash & ~HMDFS_HASH_COL_BIT;
+ return hmdfs_hash;
+}
+
+static atomic_t curr_ino = ATOMIC_INIT(INUNUMBER_START);
+int get_inonumber(void)
+{
+ return atomic_inc_return(&curr_ino);
+}
+
+static int hmdfs_get_root_dentry_type(struct dentry *dentry, int *is_root)
+{
+ struct hmdfs_dentry_info *d_info = hmdfs_d(dentry);
+
+ *is_root = 1;
+ switch (d_info->dentry_type) {
+ case HMDFS_LAYER_OTHER_LOCAL:
+ *is_root = 0;
+ fallthrough;
+ case HMDFS_LAYER_SECOND_LOCAL:
+ return HMDFS_LAYER_SECOND_LOCAL;
+ case HMDFS_LAYER_OTHER_REMOTE:
+ *is_root = 0;
+ fallthrough;
+ case HMDFS_LAYER_SECOND_REMOTE:
+ return HMDFS_LAYER_SECOND_REMOTE;
+ default:
+ hmdfs_info("Unexpected dentry type %d", d_info->dentry_type);
+ return -EINVAL;
+ }
+}
+
+static int prepend(char **buffer, int *buflen, const char *str, int namelen)
+{
+ *buflen -= namelen;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ *buffer -= namelen;
+ memcpy(*buffer, str, namelen);
+ return 0;
+}
+
+static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
+{
+ const char *dname = name->name;
+ u32 dlen = name->len;
+ char *p = NULL;
+
+ *buflen -= dlen + 1;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ p = *buffer -= dlen + 1;
+ *p++ = '/';
+ while (dlen--) {
+ char c = *dname++;
+
+ if (!c)
+ break;
+ *p++ = c;
+ }
+ return 0;
+}
+
+static char *hmdfs_dentry_path_raw(struct dentry *d, char *buf, int buflen)
+{
+ struct dentry *dentry = NULL;
+ char *end = NULL;
+ char *retval = NULL;
+ unsigned int len;
+ unsigned int seq = 0;
+ int root_flag = 0;
+ int error = 0;
+ struct hmdfs_dentry_info *di = hmdfs_d(d);
+ int hmdfs_root_dentry_type = 0;
+
+ di->time = jiffies;
+ hmdfs_root_dentry_type = hmdfs_get_root_dentry_type(d, &root_flag);
+ if (hmdfs_root_dentry_type < 0)
+ return NULL;
+ if (root_flag) {
+ strcpy(buf, "/");
+ return buf;
+ }
+ rcu_read_lock();
+restart:
+ dentry = d;
+ di = hmdfs_d(dentry);
+ di->time = jiffies;
+ end = buf + buflen;
+ len = buflen;
+ prepend(&end, &len, "\0", 1);
+ retval = end - 1;
+ *retval = '/';
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ while (di->dentry_type != hmdfs_root_dentry_type) {
+ struct dentry *parent = dentry->d_parent;
+
+ prefetch(parent);
+ error = prepend_name(&end, &len, &dentry->d_name);
+ if (error)
+ break;
+ retval = end;
+ dentry = parent;
+ di = hmdfs_d(dentry);
+ di->time = jiffies;
+ }
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+ if (error)
+ goto Elong;
+ return retval;
+Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+char *hmdfs_get_dentry_relative_path(struct dentry *dentry)
+{
+ char *final_buf = NULL;
+ char *buf = NULL;
+ char *p = NULL;
+
+ buf = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ final_buf = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!final_buf) {
+ kfree(buf);
+ return NULL;
+ }
+
+ /* NULL dentry return root dir */
+ if (!dentry) {
+ strcpy(final_buf, "/");
+ kfree(buf);
+ return final_buf;
+ }
+ p = hmdfs_dentry_path_raw(dentry, buf, PATH_MAX);
+ if (IS_ERR_OR_NULL(p)) {
+ kfree(buf);
+ kfree(final_buf);
+ return NULL;
+ }
+
+ if (strlen(p) >= PATH_MAX) {
+ kfree(buf);
+ kfree(final_buf);
+ return NULL;
+ }
+ strcpy(final_buf, p);
+ kfree(buf);
+ return final_buf;
+}
+
+char *hmdfs_get_dentry_absolute_path(const char *rootdir,
+ const char *relative_path)
+{
+ char *buf = 0;
+
+ if (!rootdir || !relative_path)
+ return NULL;
+ if (strlen(rootdir) + strlen(relative_path) >= PATH_MAX)
+ return NULL;
+
+ buf = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ strcpy(buf, rootdir);
+ strcat(buf, relative_path);
+ return buf;
+}
+
+char *hmdfs_connect_path(const char *path, const char *name)
+{
+ char *buf = 0;
+
+ if (!path || !name)
+ return NULL;
+
+ if (strlen(path) + strlen(name) + 1 >= PATH_MAX)
+ return NULL;
+
+ buf = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ strcpy(buf, path);
+ strcat(buf, "/");
+ strcat(buf, name);
+ return buf;
+}
+
+int hmdfs_metainfo_read(struct hmdfs_sb_info *sbi, struct file *filp,
+ void *buffer, int size, int bidx)
+{
+ loff_t pos = get_dentry_group_pos(bidx);
+
+ return cache_file_read(sbi, filp, buffer, (size_t)size, &pos);
+}
+
+int hmdfs_metainfo_write(struct hmdfs_sb_info *sbi, struct file *filp,
+ const void *buffer, int size, int bidx)
+{
+ loff_t pos = get_dentry_group_pos(bidx);
+
+ return cache_file_write(sbi, filp, buffer, (size_t)size, &pos);
+}
+
+/* for each level */
+/* bucketseq start offset by 0,for example
+ * level0 bucket0(0)
+ * level1 bucket0(1) bucket1(2)
+ * level2 bucket0(3) bucket1(4) bucket2(5) bucket3(6)
+ * return bucket number.
+ */
+static __u32 get_bucketaddr(int level, int buckoffset)
+{
+ int all_level_bucketaddr = 0;
+ __u32 curlevelmaxbucks;
+
+ if (level >= MAX_BUCKET_LEVEL) {
+ hmdfs_err("level = %d overflow", level);
+ return all_level_bucketaddr;
+ }
+ curlevelmaxbucks = (1 << level);
+ if (buckoffset >= curlevelmaxbucks) {
+ hmdfs_err("buckoffset %d overflow, level %d has %d buckets max",
+ buckoffset, level, curlevelmaxbucks);
+ return all_level_bucketaddr;
+ }
+ all_level_bucketaddr = curlevelmaxbucks + buckoffset - 1;
+
+ return all_level_bucketaddr;
+}
+
+static __u32 get_bucket_by_level(int level)
+{
+ int buckets = 0;
+
+ if (level >= MAX_BUCKET_LEVEL) {
+ hmdfs_err("level = %d overflow", level);
+ return buckets;
+ }
+
+ buckets = (1 << level);
+ return buckets;
+}
+
+static __u32 get_overall_bucket(int level)
+{
+ int buckets = 0;
+
+ if (level >= MAX_BUCKET_LEVEL) {
+ hmdfs_err("level = %d overflow", level);
+ return buckets;
+ }
+ buckets = (1 << (level + 1)) - 1;
+ return buckets;
+}
+
+static inline loff_t get_dcache_file_size(int level)
+{
+ loff_t buckets = get_overall_bucket(level);
+
+ return buckets * DENTRYGROUP_SIZE * BUCKET_BLOCKS + DENTRYGROUP_HEADER;
+}
+
+static char *get_relative_path(struct hmdfs_sb_info *sbi, char *from)
+{
+ char *relative;
+
+ if (strncmp(from, sbi->local_src, strlen(sbi->local_src))) {
+ hmdfs_warning("orig path do not start with local_src");
+ return NULL;
+ }
+ relative = from + strlen(sbi->local_src);
+ if (*relative == '/')
+ relative++;
+ return relative;
+}
+
+struct file *hmdfs_get_or_create_dents(struct hmdfs_sb_info *sbi, char *name)
+{
+ struct path root_path, path;
+ struct file *filp = NULL;
+ char *relative;
+ int err;
+
+ err = kern_path(sbi->local_src, 0, &root_path);
+ if (err) {
+ hmdfs_err("kern_path failed err = %d", err);
+ return NULL;
+ }
+ relative = get_relative_path(sbi, name);
+ if (!relative) {
+ hmdfs_err("get relative path failed");
+ goto err_root_path;
+ }
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt, relative, 0,
+ &path);
+ if (err) {
+ hmdfs_err("lookup failed err = %d", err);
+ goto err_root_path;
+ }
+
+ filp = hmdfs_server_cache_revalidate(sbi, relative, &path);
+ if (IS_ERR_OR_NULL(filp)) {
+ filp = hmdfs_server_rebuild_dents(sbi, &path, NULL, relative);
+ if (IS_ERR_OR_NULL(filp))
+ goto err_lookup_path;
+ }
+
+err_lookup_path:
+ path_put(&path);
+err_root_path:
+ path_put(&root_path);
+ return filp;
+}
+
+/* read all dentry in target path directory */
+int read_dentry(struct hmdfs_sb_info *sbi, char *file_name,
+ struct dir_context *ctx)
+{
+ unsigned long pos = (unsigned long)(ctx->pos);
+ unsigned long group_id = (pos << (1 + DEV_ID_BIT_NUM)) >>
+ (POS_BIT_NUM - GROUP_ID_BIT_NUM);
+ unsigned long offset = pos & OFFSET_BIT_MASK;
+ struct hmdfs_dentry_group *dentry_group = NULL;
+ struct file *handler = NULL;
+ int group_num = 0;
+ int iterate_result = 0;
+ int i, j;
+ const struct cred *saved_cred;
+
+ saved_cred = hmdfs_override_fsids(false);
+ if (!saved_cred) {
+ hmdfs_err("prepare cred failed!");
+ return -ENOMEM;
+ }
+
+
+ if (!file_name)
+ return -EINVAL;
+
+ dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL);
+ if (!dentry_group)
+ return -ENOMEM;
+
+ handler = hmdfs_get_or_create_dents(sbi, file_name);
+ if (IS_ERR_OR_NULL(handler)) {
+ kfree(dentry_group);
+ return -ENOENT;
+ }
+
+ group_num = get_dentry_group_cnt(file_inode(handler));
+
+ for (i = group_id; i < group_num; i++) {
+ hmdfs_metainfo_read(sbi, handler, dentry_group,
+ sizeof(struct hmdfs_dentry_group), i);
+ for (j = offset; j < DENTRY_PER_GROUP; j++) {
+ int len;
+ int file_type = 0;
+ bool is_continue;
+
+ len = le16_to_cpu(dentry_group->nsl[j].namelen);
+ if (!test_bit_le(j, dentry_group->bitmap) || len == 0)
+ continue;
+
+ if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode)))
+ file_type = DT_DIR;
+ else if (S_ISREG(le16_to_cpu(
+ dentry_group->nsl[j].i_mode)))
+ file_type = DT_REG;
+ else if (S_ISLNK(le16_to_cpu(
+ dentry_group->nsl[j].i_mode)))
+ file_type = DT_LNK;
+
+ pos = hmdfs_set_pos(0, i, j);
+ is_continue = dir_emit(
+ ctx, dentry_group->filename[j], len,
+ le64_to_cpu(dentry_group->nsl[j].i_ino),
+ file_type);
+ if (!is_continue) {
+ ctx->pos = pos;
+ iterate_result = 1;
+ goto done;
+ }
+ }
+ offset = 0;
+ }
+
+done:
+ hmdfs_revert_fsids(saved_cred);
+ kfree(dentry_group);
+ fput(handler);
+ return iterate_result;
+}
+
+unsigned int get_max_depth(struct file *filp)
+{
+ size_t isize;
+
+ isize = get_dentry_group_cnt(file_inode(filp)) / BUCKET_BLOCKS;
+
+ return get_count_order(isize + 1);
+}
+
+struct hmdfs_dentry_group *find_dentry_page(struct hmdfs_sb_info *sbi,
+ pgoff_t index, struct file *filp)
+{
+ int size;
+ struct hmdfs_dentry_group *dentry_blk = NULL;
+ loff_t pos = get_dentry_group_pos(index);
+ int err;
+
+ dentry_blk = kmalloc(sizeof(*dentry_blk), GFP_KERNEL);
+ if (!dentry_blk)
+ return NULL;
+
+ err = hmdfs_wlock_file(filp, pos, DENTRYGROUP_SIZE);
+ if (err) {
+ hmdfs_err("lock file pos %lld failed", pos);
+ kfree(dentry_blk);
+ return NULL;
+ }
+
+ size = cache_file_read(sbi, filp, dentry_blk, (size_t)DENTRYGROUP_SIZE,
+ &pos);
+ if (size != DENTRYGROUP_SIZE) {
+ kfree(dentry_blk);
+ dentry_blk = NULL;
+ }
+
+ return dentry_blk;
+}
+
+static ssize_t write_dentry_page(struct file *filp, const void *buffer,
+ int buffersize, loff_t position)
+{
+ ssize_t size;
+
+ size = kernel_write(filp, buffer, (size_t)buffersize, &position);
+ if (size != buffersize)
+ hmdfs_err("write failed, ret = %zd", size);
+
+ return size;
+}
+
+static struct hmdfs_dentry *find_in_block(struct hmdfs_dentry_group *dentry_blk,
+ __u32 namehash,
+ const struct qstr *qstr,
+ struct hmdfs_dentry **insense_de,
+ bool case_sense)
+{
+ struct hmdfs_dentry *de;
+ unsigned long bit_pos = 0;
+ int max_len = 0;
+
+ while (bit_pos < DENTRY_PER_GROUP) {
+ if (!test_bit_le(bit_pos, dentry_blk->bitmap)) {
+ bit_pos++;
+ max_len++;
+ }
+ de = &dentry_blk->nsl[bit_pos];
+ if (unlikely(!de->namelen)) {
+ bit_pos++;
+ continue;
+ }
+
+ if (le32_to_cpu(de->hash) == namehash &&
+ le16_to_cpu(de->namelen) == qstr->len &&
+ !memcmp(qstr->name, dentry_blk->filename[bit_pos],
+ le16_to_cpu(de->namelen)))
+ goto found;
+ if (!(*insense_de) && !case_sense &&
+ le32_to_cpu(de->hash) == namehash &&
+ le16_to_cpu(de->namelen) == qstr->len &&
+ str_n_case_eq(qstr->name, dentry_blk->filename[bit_pos],
+ le16_to_cpu(de->namelen)))
+ *insense_de = de;
+ max_len = 0;
+ bit_pos += get_dentry_slots(le16_to_cpu(de->namelen));
+ }
+ de = NULL;
+found:
+ return de;
+}
+
+static struct hmdfs_dentry *hmdfs_in_level(struct dentry *child_dentry,
+ unsigned int level,
+ struct hmdfs_dcache_lookup_ctx *ctx)
+{
+ unsigned int nbucket;
+ unsigned int bidx, end_block;
+ struct hmdfs_dentry *de = NULL;
+ struct hmdfs_dentry *tmp_insense_de = NULL;
+ struct hmdfs_dentry_group *dentry_blk;
+
+ nbucket = get_bucket_by_level(level);
+ if (!nbucket)
+ return de;
+
+ bidx = get_bucketaddr(level, ctx->hash % nbucket) * BUCKET_BLOCKS;
+ end_block = bidx + BUCKET_BLOCKS;
+
+ for (; bidx < end_block; bidx++) {
+ dentry_blk = find_dentry_page(ctx->sbi, bidx, ctx->filp);
+ if (!dentry_blk)
+ break;
+
+ de = find_in_block(dentry_blk, ctx->hash, ctx->name,
+ &tmp_insense_de, ctx->sbi->s_case_sensitive);
+ if (!de && !(ctx->insense_de) && tmp_insense_de) {
+ ctx->insense_de = tmp_insense_de;
+ ctx->insense_page = dentry_blk;
+ ctx->insense_bidx = bidx;
+ } else if (!de) {
+ hmdfs_unlock_file(ctx->filp, get_dentry_group_pos(bidx),
+ DENTRYGROUP_SIZE);
+ kfree(dentry_blk);
+ } else {
+ ctx->page = dentry_blk;
+ break;
+ }
+ }
+ ctx->bidx = bidx;
+ return de;
+}
+
+struct hmdfs_dentry *hmdfs_find_dentry(struct dentry *child_dentry,
+ struct hmdfs_dcache_lookup_ctx *ctx)
+{
+ struct hmdfs_dentry *de = NULL;
+ unsigned int max_depth;
+ unsigned int level;
+
+ if (!ctx->filp)
+ return NULL;
+
+ ctx->hash = hmdfs_dentry_hash(ctx->name, ctx->sbi->s_case_sensitive);
+
+ max_depth = get_max_depth(ctx->filp);
+ for (level = 0; level < max_depth; level++) {
+ de = hmdfs_in_level(child_dentry, level, ctx);
+ if (de) {
+ if (ctx->insense_page) {
+ hmdfs_unlock_file(ctx->filp,
+ get_dentry_group_pos(ctx->insense_bidx),
+ DENTRYGROUP_SIZE);
+ kfree(ctx->insense_page);
+ ctx->insense_page = NULL;
+ }
+ return de;
+ }
+ }
+ if (ctx->insense_de) {
+ ctx->bidx = ctx->insense_bidx;
+ ctx->page = ctx->insense_page;
+ ctx->insense_bidx = 0;
+ ctx->insense_page = NULL;
+ }
+ return ctx->insense_de;
+}
+
+void update_dentry(struct hmdfs_dentry_group *d, struct dentry *child_dentry,
+ struct inode *inode, __u32 name_hash, unsigned int bit_pos)
+{
+ struct hmdfs_dentry *de;
+ struct hmdfs_dentry_info *gdi = hmdfs_d(child_dentry);
+ const struct qstr name = child_dentry->d_name;
+ int slots = get_dentry_slots(name.len);
+ int i;
+ unsigned long ino;
+ __u32 igen;
+
+ /*
+ * If the dentry's inode is symlink, it must be lower inode,
+ * and we should use the upper ino and generation to fill
+ * the dentryfile.
+ */
+ if (!gdi && S_ISLNK(d_inode(child_dentry)->i_mode)) {
+ ino = d_inode(child_dentry)->i_ino;
+ igen = d_inode(child_dentry)->i_generation;
+ } else {
+ ino = inode->i_ino;
+ igen = inode->i_generation;
+ }
+
+ de = &d->nsl[bit_pos];
+ de->hash = cpu_to_le32(name_hash);
+ de->namelen = cpu_to_le16(name.len);
+ memcpy(d->filename[bit_pos], name.name, name.len);
+ de->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
+ de->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ de->i_size = cpu_to_le64(inode->i_size);
+ de->i_ino = cpu_to_le64(generate_u64_ino(ino, igen));
+ de->i_flag = 0;
+
+ /*
+ * If the dentry has fsdata, we just assume it must be
+ * hmdfs filesystem's dentry.
+ * Only client may update it's info in dentryfile when rename
+ * the remote file.
+ * Since the symlink mtime and size is from server's lower
+ * inode, we should just use it and only set S_IFLNK in mode.
+ */
+ if (gdi && hm_islnk(gdi->file_type))
+ de->i_mode = cpu_to_le16(S_IFLNK);
+ else if (!gdi && S_ISLNK(d_inode(child_dentry)->i_mode))
+ de->i_mode = d_inode(child_dentry)->i_mode;
+ else
+ de->i_mode = cpu_to_le16(inode->i_mode);
+
+ for (i = 0; i < slots; i++) {
+ __set_bit_le(bit_pos + i, d->bitmap);
+ /* avoid wrong garbage data for readdir */
+ if (i)
+ (de + i)->namelen = 0;
+ }
+}
+
+int room_for_filename(const void *bitmap, int slots, int max_slots)
+{
+ int bit_start = 0;
+ int zero_start, zero_end;
+next:
+ zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start);
+ if (zero_start >= max_slots)
+ return max_slots;
+
+ zero_end = find_next_bit_le(bitmap, max_slots, zero_start);
+ if (zero_end - zero_start >= slots)
+ return zero_start;
+
+ bit_start = zero_end + 1;
+
+ if (zero_end + 1 >= max_slots)
+ return max_slots;
+ goto next;
+}
+
+void create_in_cache_file(uint64_t dev_id, struct dentry *dentry)
+{
+ struct clearcache_item *item = NULL;
+
+ item = hmdfs_find_cache_item(dev_id, dentry->d_parent);
+ if (item) {
+ if (d_inode(dentry))
+ create_dentry(dentry, d_inode(dentry), item->filp,
+ hmdfs_sb(dentry->d_sb));
+ else
+ hmdfs_err("inode is null!");
+ kref_put(&item->ref, release_cache_item);
+ } else {
+ hmdfs_info("find cache item failed, device_id:%llu", dev_id);
+ }
+}
+
+int create_dentry(struct dentry *child_dentry, struct inode *inode,
+ struct file *file, struct hmdfs_sb_info *sbi)
+{
+ unsigned int bit_pos, level;
+ unsigned long bidx, end_block;
+ const struct qstr qstr = child_dentry->d_name;
+ __u32 namehash;
+ loff_t pos;
+ ssize_t size;
+ int ret = 0;
+ struct hmdfs_dentry_group *dentry_blk = NULL;
+
+ level = 0;
+
+ namehash = hmdfs_dentry_hash(&qstr, sbi->s_case_sensitive);
+
+ dentry_blk = kmalloc(sizeof(*dentry_blk), GFP_KERNEL);
+ if (!dentry_blk) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+find:
+ if (level == MAX_BUCKET_LEVEL) {
+ ret = -ENOSPC;
+ goto out;
+ }
+ bidx = BUCKET_BLOCKS *
+ get_bucketaddr(level, namehash % get_bucket_by_level(level));
+ end_block = bidx + BUCKET_BLOCKS;
+ if (end_block > get_dentry_group_cnt(file_inode(file))) {
+ if (cache_file_truncate(sbi, &(file->f_path),
+ get_dcache_file_size(level))) {
+ ret = -ENOSPC;
+ goto out;
+ }
+ }
+
+ for (; bidx < end_block; bidx++) {
+ int size;
+
+ pos = get_dentry_group_pos(bidx);
+ ret = hmdfs_wlock_file(file, pos, DENTRYGROUP_SIZE);
+ if (ret)
+ goto out;
+
+ size = cache_file_read(sbi, file, dentry_blk,
+ (size_t)DENTRYGROUP_SIZE, &pos);
+ if (size != DENTRYGROUP_SIZE) {
+ ret = -ENOSPC;
+ hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE);
+ goto out;
+ }
+
+ bit_pos = room_for_filename(&dentry_blk->bitmap,
+ get_dentry_slots(qstr.len),
+ DENTRY_PER_GROUP);
+ if (bit_pos < DENTRY_PER_GROUP)
+ goto add;
+ hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE);
+ }
+ ++level;
+ goto find;
+add:
+ pos = get_dentry_group_pos(bidx);
+ update_dentry(dentry_blk, child_dentry, inode, namehash, bit_pos);
+ size = cache_file_write(sbi, file, dentry_blk,
+ sizeof(struct hmdfs_dentry_group), &pos);
+ if (size != sizeof(struct hmdfs_dentry_group))
+ hmdfs_err("cache file write failed!, ret = %zd", size);
+ hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE);
+out:
+ kfree(dentry_blk);
+out_err:
+ return ret;
+}
+
+void hmdfs_init_dcache_lookup_ctx(struct hmdfs_dcache_lookup_ctx *ctx,
+ struct hmdfs_sb_info *sbi,
+ const struct qstr *qstr, struct file *filp)
+{
+ ctx->sbi = sbi;
+ ctx->name = qstr;
+ ctx->filp = filp;
+ ctx->bidx = 0;
+ ctx->page = NULL;
+ ctx->insense_de = NULL;
+ ctx->insense_bidx = 0;
+ ctx->insense_page = NULL;
+}
+
+int update_inode_to_dentry(struct dentry *child_dentry, struct inode *inode)
+{
+ struct hmdfs_sb_info *sbi = d_inode(child_dentry)->i_sb->s_fs_info;
+ struct hmdfs_dentry *de = NULL;
+ loff_t ipos;
+ struct dentry *parent_dentry;
+ struct cache_file_node *cfn = NULL;
+ char *relative_path = NULL;
+ struct hmdfs_dcache_lookup_ctx ctx;
+
+ parent_dentry = child_dentry->d_parent;
+
+ relative_path = hmdfs_get_dentry_relative_path(parent_dentry);
+ if (!relative_path)
+ return -ENOMEM;
+
+ cfn = find_cfn(sbi, HMDFS_SERVER_CID, relative_path, true);
+ if (!cfn)
+ goto out;
+
+ hmdfs_init_dcache_lookup_ctx(&ctx, sbi, &child_dentry->d_name,
+ cfn->filp);
+ de = hmdfs_find_dentry(child_dentry, &ctx);
+ if (!de)
+ goto out_cfn;
+
+ de->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
+ de->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ de->i_size = cpu_to_le64(inode->i_size);
+ de->i_ino = cpu_to_le64(
+ generate_u64_ino(inode->i_ino, inode->i_generation));
+ de->i_flag = 0;
+
+ ipos = get_dentry_group_pos(ctx.bidx);
+ write_dentry_page(cfn->filp, ctx.page,
+ sizeof(struct hmdfs_dentry_group), ipos);
+ hmdfs_unlock_file(cfn->filp, ipos, DENTRYGROUP_SIZE);
+ kfree(ctx.page);
+out_cfn:
+ release_cfn(cfn);
+out:
+ kfree(relative_path);
+ return 0;
+}
+
+void hmdfs_delete_dentry(struct dentry *d, struct file *filp)
+{
+ struct hmdfs_dentry *de = NULL;
+ unsigned int bit_pos;
+ int slots, i;
+ loff_t ipos;
+ ssize_t size;
+ struct hmdfs_dcache_lookup_ctx ctx;
+
+ hmdfs_init_dcache_lookup_ctx(&ctx, hmdfs_sb(d->d_sb), &d->d_name, filp);
+
+ de = hmdfs_find_dentry(d, &ctx);
+ if (IS_ERR_OR_NULL(de)) {
+ hmdfs_info("find dentry failed!, err=%ld", PTR_ERR(de));
+ return;
+ }
+ slots = get_dentry_slots(le16_to_cpu(de->namelen));
+
+ bit_pos = de - ctx.page->nsl;
+ for (i = 0; i < slots; i++)
+ __clear_bit_le(bit_pos + i, &ctx.page->bitmap);
+
+ ipos = get_dentry_group_pos(ctx.bidx);
+ size = cache_file_write(hmdfs_sb(d->d_sb), filp, ctx.page,
+ sizeof(struct hmdfs_dentry_group), &ipos);
+ if (size != sizeof(struct hmdfs_dentry_group))
+ hmdfs_err("cache file write failed!, ret = %zd", size);
+ hmdfs_unlock_file(filp, ipos, DENTRYGROUP_SIZE);
+ kfree(ctx.page);
+}
+
+static int hmdfs_get_cache_path(struct hmdfs_sb_info *sbi, struct path *dir)
+{
+ struct hmdfs_dentry_info *di = hmdfs_d(sbi->sb->s_root);
+ int err;
+
+ if (!sbi->s_dentry_cache) {
+ *dir = di->lower_path;
+ return 0;
+ }
+
+ err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, dir);
+ if (err)
+ hmdfs_err("open failed, errno = %d", err);
+
+ return err;
+}
+
+static void hmdfs_put_cache_path(struct hmdfs_sb_info *sbi, struct path *dir)
+{
+ if (!sbi->s_dentry_cache)
+ return;
+ path_put(dir);
+}
+
+struct file *create_local_dentry_file_cache(struct hmdfs_sb_info *sbi)
+{
+ struct file *filp = NULL;
+ const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred);
+ struct path cache_dir;
+ int err;
+
+ err = hmdfs_get_cache_path(sbi, &cache_dir);
+ if (err) {
+ filp = ERR_PTR(err);
+ goto out;
+ }
+
+ filp = file_open_root(&cache_dir, ".",
+ O_RDWR | O_LARGEFILE | O_TMPFILE,
+ DENTRY_FILE_PERM);
+ if (IS_ERR(filp))
+ hmdfs_err("dentryfile open failed and exit err=%ld",
+ PTR_ERR(filp));
+
+ hmdfs_put_cache_path(sbi, &cache_dir);
+out:
+ hmdfs_revert_creds(old_cred);
+ return filp;
+}
+
+static int hmdfs_linkat(struct path *old_path, const char *newname)
+{
+ struct dentry *new_dentry = NULL;
+ struct path new_path;
+ int error;
+
+ new_dentry = kern_path_create(AT_FDCWD, newname, &new_path, 0);
+ if (IS_ERR(new_dentry)) {
+ hmdfs_err("create kernel path failed, error: %ld",
+ PTR_ERR(new_dentry));
+ return PTR_ERR(new_dentry);
+ }
+
+ error = -EXDEV;
+ if (old_path->mnt != new_path.mnt)
+ goto out_dput;
+
+ error = vfs_link(old_path->dentry, new_path.dentry->d_inode, new_dentry,
+ NULL);
+
+out_dput:
+ done_path_create(&new_path, new_dentry);
+ return error;
+}
+
+static int cache_file_mkdir(const char *name, umode_t mode)
+{
+ struct dentry *dentry;
+ struct path path;
+ int err;
+
+ dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ err = vfs_mkdir(d_inode(path.dentry), dentry, mode);
+ if (err && err != -EEXIST)
+ hmdfs_err("vfs_mkdir failed, err = %d", err);
+
+ done_path_create(&path, dentry);
+ return err;
+}
+
+static int cache_file_create_path(const char *fullpath)
+{
+ char *path;
+ char *s;
+ int err = 0;
+
+ path = kstrdup(fullpath, GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+
+ s = path + 1;
+ while (true) {
+ s = strchr(s, '/');
+ if (!s)
+ break;
+ s[0] = '\0';
+ err = cache_file_mkdir(path, 0755);
+ if (err && err != -EEXIST)
+ break;
+ s[0] = '/';
+ s++;
+ }
+ kfree(path);
+ return err;
+}
+
+static void hmdfs_cache_path_create(char *s, const char *dir, bool server)
+{
+ if (server)
+ snprintf(s, PATH_MAX, "%s/dentry_cache/server/", dir);
+ else
+ snprintf(s, PATH_MAX, "%s/dentry_cache/client/", dir);
+}
+
+static void hmdfs_cache_file_create(char *s, uint64_t hash, const char *id,
+ bool server)
+{
+ int offset = strlen(s);
+
+ if (server)
+ snprintf(s + offset, PATH_MAX - offset, "%016llx", hash);
+ else
+ snprintf(s + offset, PATH_MAX - offset, "%s_%016llx", id, hash);
+}
+
+int cache_file_name_generate(char *fullname, struct hmdfs_peer *con,
+ const char *relative_path, bool server)
+{
+ struct hmdfs_sb_info *sbi = con->sbi;
+ uint64_t hash;
+ char cid[HMDFS_CFN_CID_SIZE];
+ int err;
+
+ hmdfs_cache_path_create(fullname, sbi->cache_dir, server);
+
+ err = cache_file_create_path(fullname);
+ if (err && err != -EEXIST) {
+ hmdfs_err("making dir failed %d", err);
+ return err;
+ }
+
+ strncpy(cid, con->cid, HMDFS_CFN_CID_SIZE - 1);
+ cid[HMDFS_CFN_CID_SIZE - 1] = '\0';
+
+ hash = path_hash(relative_path, strlen(relative_path),
+ sbi->s_case_sensitive);
+ hmdfs_cache_file_create(fullname, hash, cid, server);
+
+ return 0;
+}
+
+static void free_cfn(struct cache_file_node *cfn)
+{
+ if (!IS_ERR_OR_NULL(cfn->filp))
+ filp_close(cfn->filp, NULL);
+
+ kfree(cfn->relative_path);
+ kfree(cfn);
+}
+
+static bool dentry_file_match(struct cache_file_node *cfn, const char *id,
+ const char *path)
+{
+ int ret;
+
+ if (cfn->sbi->s_case_sensitive)
+ ret = strcmp(cfn->relative_path, path);
+ else
+ ret = strcasecmp(cfn->relative_path, path);
+
+ return (!ret && !strncmp((cfn)->cid, id, HMDFS_CFN_CID_SIZE - 1));
+}
+
+struct cache_file_node *__find_cfn(struct hmdfs_sb_info *sbi, const char *cid,
+ const char *path, bool server)
+{
+ struct cache_file_node *cfn = NULL;
+ struct list_head *head = get_list_head(sbi, server);
+
+ list_for_each_entry(cfn, head, list) {
+ if (dentry_file_match(cfn, cid, path)) {
+ refcount_inc(&cfn->ref);
+ return cfn;
+ }
+ }
+ return NULL;
+}
+
+struct cache_file_node *create_cfn(struct hmdfs_sb_info *sbi, const char *path,
+ const char *cid, bool server)
+{
+ struct cache_file_node *cfn = kzalloc(sizeof(*cfn), GFP_KERNEL);
+
+ if (!cfn)
+ return NULL;
+
+ cfn->relative_path = kstrdup(path, GFP_KERNEL);
+ if (!cfn->relative_path)
+ goto out;
+
+ refcount_set(&cfn->ref, 1);
+ strncpy(cfn->cid, cid, HMDFS_CFN_CID_SIZE - 1);
+ cfn->cid[HMDFS_CFN_CID_SIZE - 1] = '\0';
+ cfn->sbi = sbi;
+ cfn->server = server;
+ return cfn;
+out:
+ free_cfn(cfn);
+ return NULL;
+}
+
+static struct file *insert_cfn(struct hmdfs_sb_info *sbi, const char *filename,
+ const char *path, const char *cid, bool server)
+{
+ const struct cred *old_cred = NULL;
+ struct cache_file_node *cfn = NULL;
+ struct cache_file_node *exist = NULL;
+ struct list_head *head = NULL;
+ struct file *filp = NULL;
+
+ cfn = create_cfn(sbi, path, cid, server);
+ if (!cfn)
+ return ERR_PTR(-ENOMEM);
+
+ old_cred = hmdfs_override_creds(sbi->system_cred);
+ filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0);
+ hmdfs_revert_creds(old_cred);
+ if (IS_ERR(filp)) {
+ hmdfs_err("open file failed, err=%ld", PTR_ERR(filp));
+ goto out;
+ }
+
+ head = get_list_head(sbi, server);
+
+ mutex_lock(&sbi->cache_list_lock);
+ exist = __find_cfn(sbi, cid, path, server);
+ if (!exist) {
+ cfn->filp = filp;
+ list_add_tail(&cfn->list, head);
+ } else {
+ mutex_unlock(&sbi->cache_list_lock);
+ release_cfn(exist);
+ filp_close(filp, NULL);
+ filp = ERR_PTR(-EEXIST);
+ goto out;
+ }
+ mutex_unlock(&sbi->cache_list_lock);
+ return filp;
+out:
+ free_cfn(cfn);
+ return filp;
+}
+
+int hmdfs_rename_dentry(struct dentry *old_dentry, struct dentry *new_dentry,
+ struct file *old_filp, struct file *new_filp)
+{
+ int ret;
+ struct hmdfs_sb_info *sbi = hmdfs_sb(new_dentry->d_sb);
+
+ /*
+ * Try to delete first, because stale dentry might exist after
+ * coverwrite.
+ */
+ hmdfs_delete_dentry(new_dentry, new_filp);
+
+ ret = create_dentry(new_dentry, d_inode(old_dentry), new_filp, sbi);
+ if (ret) {
+ hmdfs_err("create dentry failed!, err=%d", ret);
+ return ret;
+ }
+
+ hmdfs_delete_dentry(old_dentry, old_filp);
+ return 0;
+}
+
+/**
+ * cache_file_persistent - link the tmpfile to the cache dir
+ * @con: the connection peer
+ * @filp: the file handler of the tmpfile
+ * @relative_path: the relative path which the tmpfile belongs
+ * @server: server or client
+ *
+ * Return value: the new file handler of the persistent file if the
+ * persistent operation succeed. Otherwise will return the original handler
+ * of the tmpfile passed in, so that the caller does not have to check
+ * the returned handler.
+ *
+ */
+struct file *cache_file_persistent(struct hmdfs_peer *con, struct file *filp,
+ const char *relative_path, bool server)
+{
+ struct cache_file_node *cfn = NULL;
+ char *fullname = NULL;
+ char *cid = server ? HMDFS_SERVER_CID : (char *)con->cid;
+ struct file *newf = NULL;
+ int i = 0;
+ int len;
+ int err;
+
+ if (!con->sbi->s_dentry_cache)
+ return filp;
+
+ cfn = find_cfn(con->sbi, cid, relative_path, server);
+ if (cfn) {
+ release_cfn(cfn);
+ return filp;
+ }
+ fullname = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!fullname)
+ return filp;
+
+ err = cache_file_name_generate(fullname, con, relative_path, server);
+ if (err)
+ goto out;
+
+ err = __vfs_setxattr(file_dentry(filp), file_inode(filp),
+ DENTRY_FILE_XATTR_NAME, relative_path,
+ strlen(relative_path), 0);
+ if (err) {
+ hmdfs_err("setxattr for file failed, err=%d", err);
+ goto out;
+ }
+
+ len = strlen(fullname);
+
+ do {
+ err = hmdfs_linkat(&filp->f_path, fullname);
+ if (!err)
+ break;
+
+ snprintf(fullname + len, PATH_MAX - len, "_%d", i);
+ } while (i++ < DENTRY_FILE_NAME_RETRY);
+
+ if (err) {
+ hmdfs_err("link for file failed, err=%d", err);
+ goto out;
+ }
+
+ newf = insert_cfn(con->sbi, fullname, relative_path, cid, server);
+ if (!IS_ERR(newf))
+ filp = newf;
+out:
+ kfree(fullname);
+ return filp;
+}
+
+void __destroy_cfn(struct list_head *head)
+{
+ struct cache_file_node *cfn = NULL;
+ struct cache_file_node *n = NULL;
+
+ list_for_each_entry_safe(cfn, n, head, list) {
+ list_del_init(&cfn->list);
+ release_cfn(cfn);
+ }
+}
+
+void hmdfs_cfn_destroy(struct hmdfs_sb_info *sbi)
+{
+ mutex_lock(&sbi->cache_list_lock);
+ __destroy_cfn(&sbi->client_cache);
+ __destroy_cfn(&sbi->server_cache);
+ mutex_unlock(&sbi->cache_list_lock);
+}
+
+struct cache_file_node *find_cfn(struct hmdfs_sb_info *sbi, const char *cid,
+ const char *path, bool server)
+{
+ struct cache_file_node *cfn = NULL;
+
+ mutex_lock(&sbi->cache_list_lock);
+ cfn = __find_cfn(sbi, cid, path, server);
+ mutex_unlock(&sbi->cache_list_lock);
+ return cfn;
+}
+
+void release_cfn(struct cache_file_node *cfn)
+{
+ if (refcount_dec_and_test(&cfn->ref))
+ free_cfn(cfn);
+}
+
+void remove_cfn(struct cache_file_node *cfn)
+{
+ struct hmdfs_sb_info *sbi = cfn->sbi;
+ bool deleted;
+
+ mutex_lock(&sbi->cache_list_lock);
+ deleted = list_empty(&cfn->list);
+ if (!deleted)
+ list_del_init(&cfn->list);
+ mutex_unlock(&sbi->cache_list_lock);
+ if (!deleted) {
+ delete_dentry_file(cfn->filp);
+ release_cfn(cfn);
+ }
+}
+
+int hmdfs_do_lock_file(struct file *filp, unsigned char fl_type, loff_t start,
+ loff_t len)
+{
+ struct file_lock fl;
+ int err;
+
+ locks_init_lock(&fl);
+
+ fl.fl_type = fl_type;
+ fl.fl_flags = FL_POSIX | FL_CLOSE | FL_SLEEP;
+ fl.fl_start = start;
+ fl.fl_end = start + len - 1;
+ fl.fl_owner = filp;
+ fl.fl_pid = current->tgid;
+ fl.fl_file = filp;
+ fl.fl_ops = NULL;
+ fl.fl_lmops = NULL;
+
+ err = locks_lock_file_wait(filp, &fl);
+ if (err)
+ hmdfs_err("lock file wait failed: %d", err);
+
+ return err;
+}
+
+int hmdfs_wlock_file(struct file *filp, loff_t start, loff_t len)
+{
+ return hmdfs_do_lock_file(filp, F_WRLCK, start, len);
+}
+
+int hmdfs_rlock_file(struct file *filp, loff_t start, loff_t len)
+{
+ return hmdfs_do_lock_file(filp, F_RDLCK, start, len);
+}
+
+int hmdfs_unlock_file(struct file *filp, loff_t start, loff_t len)
+{
+ return hmdfs_do_lock_file(filp, F_UNLCK, start, len);
+}
+
+long cache_file_truncate(struct hmdfs_sb_info *sbi, const struct path *path,
+ loff_t length)
+{
+ const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred);
+ long ret = vfs_truncate(path, length);
+
+ hmdfs_revert_creds(old_cred);
+
+ return ret;
+}
+
+ssize_t cache_file_read(struct hmdfs_sb_info *sbi, struct file *filp, void *buf,
+ size_t count, loff_t *pos)
+{
+ const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred);
+ ssize_t ret = kernel_read(filp, buf, count, pos);
+
+ hmdfs_revert_creds(old_cred);
+
+ return ret;
+}
+
+ssize_t cache_file_write(struct hmdfs_sb_info *sbi, struct file *filp,
+ const void *buf, size_t count, loff_t *pos)
+{
+ const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred);
+ ssize_t ret = kernel_write(filp, buf, count, pos);
+
+ hmdfs_revert_creds(old_cred);
+
+ return ret;
+}
+
+
+int read_header(struct hmdfs_sb_info *sbi, struct file *filp,
+ struct hmdfs_dcache_header *header)
+{
+ ssize_t bytes;
+ loff_t pos = 0;
+
+ bytes = cache_file_read(sbi, filp, header, sizeof(*header), &pos);
+ if (bytes != sizeof(*header)) {
+ hmdfs_err("read file failed, err:%zd", bytes);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static unsigned long long cache_get_dentry_count(struct hmdfs_sb_info *sbi,
+ struct file *filp)
+{
+ struct hmdfs_dcache_header header;
+ int overallpage;
+
+ overallpage = get_dentry_group_cnt(file_inode(filp));
+ if (overallpage == 0)
+ return 0;
+
+ if (read_header(sbi, filp, &header))
+ return 0;
+
+ return le64_to_cpu(header.num);
+}
+
+static int cache_check_case_sensitive(struct hmdfs_sb_info *sbi,
+ struct file *filp)
+{
+ struct hmdfs_dcache_header header;
+
+ if (read_header(sbi, filp, &header))
+ return 0;
+
+ if (sbi->s_case_sensitive != (bool)header.case_sensitive) {
+ hmdfs_info("Case sensitive inconsistent, current fs is: %d, cache is %d, will drop cache",
+ sbi->s_case_sensitive, header.case_sensitive);
+ return 0;
+ }
+ return 1;
+}
+
+int write_header(struct file *filp, struct hmdfs_dcache_header *header)
+{
+ loff_t pos = 0;
+ ssize_t size;
+
+ size = kernel_write(filp, header, sizeof(*header), &pos);
+ if (size != sizeof(*header)) {
+ hmdfs_err("update dcache header failed %zd", size);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+void add_to_delete_list(struct hmdfs_sb_info *sbi, struct cache_file_node *cfn)
+{
+ mutex_lock(&sbi->cache_list_lock);
+ list_add_tail(&cfn->list, &sbi->to_delete);
+ mutex_unlock(&sbi->cache_list_lock);
+}
+
+void load_cfn(struct hmdfs_sb_info *sbi, const char *fullname, const char *path,
+ const char *cid, bool server)
+{
+ struct cache_file_node *cfn = NULL;
+ struct cache_file_node *cfn1 = NULL;
+ struct list_head *head = NULL;
+
+ cfn = create_cfn(sbi, path, cid, server);
+ if (!cfn)
+ return;
+
+ cfn->filp = filp_open(fullname, O_RDWR | O_LARGEFILE, 0);
+ if (IS_ERR(cfn->filp)) {
+ hmdfs_err("open fail %ld", PTR_ERR(cfn->filp));
+ goto out;
+ }
+
+ if (cache_get_dentry_count(sbi, cfn->filp) < sbi->dcache_threshold) {
+ add_to_delete_list(sbi, cfn);
+ return;
+ }
+
+ if (!cache_check_case_sensitive(sbi, cfn->filp)) {
+ add_to_delete_list(sbi, cfn);
+ return;
+ }
+
+ head = get_list_head(sbi, server);
+
+ mutex_lock(&sbi->cache_list_lock);
+ cfn1 = __find_cfn(sbi, cid, path, server);
+ if (!cfn1) {
+ list_add_tail(&cfn->list, head);
+ } else {
+ release_cfn(cfn1);
+ mutex_unlock(&sbi->cache_list_lock);
+ add_to_delete_list(sbi, cfn);
+ return;
+ }
+ mutex_unlock(&sbi->cache_list_lock);
+
+ return;
+out:
+ free_cfn(cfn);
+}
+
+static int get_cid_and_hash(const char *name, uint64_t *hash, char *cid)
+{
+ int len;
+ char *p = strstr(name, "_");
+
+ if (!p)
+ return -EINVAL;
+
+ len = p - name;
+ if (len >= HMDFS_CFN_CID_SIZE)
+ return -EINVAL;
+
+ memcpy(cid, name, len);
+ cid[len] = '\0';
+
+ if (sscanf(++p, "%llx", hash) != 1)
+ return -EINVAL;
+ return 0;
+}
+
+static void store_one(const char *name, struct cache_file_callback *cb)
+{
+ struct file *file = NULL;
+ char *fullname = NULL;
+ char *kvalue = NULL;
+ char cid[HMDFS_CFN_CID_SIZE];
+ uint64_t hash;
+ ssize_t error;
+
+ if (strlen(name) + strlen(cb->dirname) >= PATH_MAX)
+ return;
+
+ fullname = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!fullname)
+ return;
+
+ snprintf(fullname, PATH_MAX, "%s%s", cb->dirname, name);
+
+ file = filp_open(fullname, O_RDWR | O_LARGEFILE, 0);
+ if (IS_ERR(file)) {
+ hmdfs_err("open fail %ld", PTR_ERR(file));
+ goto out;
+ }
+
+ kvalue = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!kvalue)
+ goto out_file;
+
+ error = __vfs_getxattr(file_dentry(file), file_inode(file),
+ DENTRY_FILE_XATTR_NAME, kvalue, PATH_MAX);
+ if (error <= 0 || error >= PATH_MAX) {
+ hmdfs_err("getxattr return: %zd", error);
+ goto out_kvalue;
+ }
+ kvalue[error] = '\0';
+ cid[0] = '\0';
+
+ if (!cb->server) {
+ if (get_cid_and_hash(name, &hash, cid)) {
+ hmdfs_err("get cid and hash fail");
+ goto out_kvalue;
+ }
+ }
+
+ load_cfn(cb->sbi, fullname, kvalue, cid, cb->server);
+
+out_kvalue:
+ kfree(kvalue);
+out_file:
+ filp_close(file, NULL);
+out:
+ kfree(fullname);
+}
+
+static int cache_file_iterate(struct dir_context *ctx, const char *name,
+ int name_len, loff_t offset, u64 ino,
+ unsigned int d_type)
+{
+ struct cache_file_item *cfi = NULL;
+ struct cache_file_callback *cb =
+ container_of(ctx, struct cache_file_callback, ctx);
+
+ if (name_len > NAME_MAX) {
+ hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX);
+ return 0;
+ }
+
+ if (d_type != DT_REG)
+ return 0;
+
+ cfi = kmalloc(sizeof(*cfi), GFP_KERNEL);
+ if (!cfi)
+ return -ENOMEM;
+
+ cfi->name = kstrndup(name, name_len, GFP_KERNEL);
+ if (!cfi->name) {
+ kfree(cfi);
+ return -ENOMEM;
+ }
+
+ list_add_tail(&cfi->list, &cb->list);
+
+ return 0;
+}
+
+void hmdfs_do_load(struct hmdfs_sb_info *sbi, const char *fullname, bool server)
+{
+ struct file *file = NULL;
+ struct path dirpath;
+ int err;
+ struct cache_file_item *cfi = NULL;
+ struct cache_file_item *n = NULL;
+ struct cache_file_callback cb = {
+ .ctx.actor = cache_file_iterate,
+ .ctx.pos = 0,
+ .dirname = fullname,
+ .sbi = sbi,
+ .server = server,
+ };
+ INIT_LIST_HEAD(&cb.list);
+
+
+ err = kern_path(fullname, LOOKUP_DIRECTORY, &dirpath);
+ if (err) {
+ hmdfs_info("No file path");
+ return;
+ }
+
+ file = dentry_open(&dirpath, O_RDONLY, current_cred());
+ if (IS_ERR_OR_NULL(file)) {
+ hmdfs_err("dentry_open failed, error: %ld", PTR_ERR(file));
+ path_put(&dirpath);
+ return;
+ }
+
+ err = iterate_dir(file, &cb.ctx);
+ if (err)
+ hmdfs_err("iterate_dir failed, err: %d", err);
+
+ list_for_each_entry_safe(cfi, n, &cb.list, list) {
+ store_one(cfi->name, &cb);
+ list_del_init(&cfi->list);
+ kfree(cfi->name);
+ kfree(cfi);
+ }
+
+ fput(file);
+ path_put(&dirpath);
+}
+
+/**
+ * This function just used for delete dentryfile.dat
+ */
+int delete_dentry_file(struct file *filp)
+{
+ int err = 0;
+ struct dentry *dentry = file_dentry(filp);
+ struct dentry *parent = lock_parent(dentry);
+
+ if (dentry->d_parent == parent) {
+ dget(dentry);
+ err = vfs_unlink(d_inode(parent), dentry, NULL);
+ dput(dentry);
+ }
+ unlock_dir(parent);
+
+ return err;
+}
+
+void hmdfs_delete_useless_cfn(struct hmdfs_sb_info *sbi)
+{
+ struct cache_file_node *cfn = NULL;
+ struct cache_file_node *n = NULL;
+
+ mutex_lock(&sbi->cache_list_lock);
+
+ list_for_each_entry_safe(cfn, n, &sbi->to_delete, list) {
+ delete_dentry_file(cfn->filp);
+ list_del_init(&cfn->list);
+ release_cfn(cfn);
+ }
+ mutex_unlock(&sbi->cache_list_lock);
+}
+
+void hmdfs_cfn_load(struct hmdfs_sb_info *sbi)
+{
+ char *fullname = NULL;
+
+ if (!sbi->s_dentry_cache)
+ return;
+
+ fullname = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!fullname)
+ return;
+
+ snprintf(fullname, PATH_MAX, "%s/dentry_cache/client/",
+ sbi->cache_dir);
+ hmdfs_do_load(sbi, fullname, false);
+
+ snprintf(fullname, PATH_MAX, "%s/dentry_cache/server/",
+ sbi->cache_dir);
+ hmdfs_do_load(sbi, fullname, true);
+ kfree(fullname);
+
+ hmdfs_delete_useless_cfn(sbi);
+}
+
+static void __cache_file_destroy_by_path(struct list_head *head,
+ const char *path)
+{
+ struct cache_file_node *cfn = NULL;
+ struct cache_file_node *n = NULL;
+
+ list_for_each_entry_safe(cfn, n, head, list) {
+ if (strcmp(path, cfn->relative_path) != 0)
+ continue;
+ list_del_init(&cfn->list);
+ delete_dentry_file(cfn->filp);
+ release_cfn(cfn);
+ }
+}
+
+static void cache_file_destroy_by_path(struct hmdfs_sb_info *sbi,
+ const char *path)
+{
+ mutex_lock(&sbi->cache_list_lock);
+
+ __cache_file_destroy_by_path(&sbi->server_cache, path);
+ __cache_file_destroy_by_path(&sbi->client_cache, path);
+
+ mutex_unlock(&sbi->cache_list_lock);
+}
+
+static void cache_file_find_and_delete(struct hmdfs_peer *con,
+ const char *relative_path)
+{
+ struct cache_file_node *cfn;
+
+ cfn = find_cfn(con->sbi, con->cid, relative_path, false);
+ if (!cfn)
+ return;
+
+ remove_cfn(cfn);
+ release_cfn(cfn);
+}
+
+void cache_file_delete_by_dentry(struct hmdfs_peer *con, struct dentry *dentry)
+{
+ char *relative_path = NULL;
+
+ relative_path = hmdfs_get_dentry_relative_path(dentry);
+ if (unlikely(!relative_path)) {
+ hmdfs_err("get relative path failed %d", -ENOMEM);
+ return;
+ }
+ cache_file_find_and_delete(con, relative_path);
+ kfree(relative_path);
+}
+
+struct file *hmdfs_get_new_dentry_file(struct hmdfs_peer *con,
+ const char *relative_path,
+ struct hmdfs_dcache_header *header)
+{
+ struct hmdfs_sb_info *sbi = con->sbi;
+ int len = strlen(relative_path);
+ struct file *filp = NULL;
+ int err;
+
+ filp = create_local_dentry_file_cache(sbi);
+ if (IS_ERR(filp))
+ return filp;
+
+ err = hmdfs_client_start_readdir(con, filp, relative_path, len, header);
+ if (err) {
+ if (err != -ENOENT)
+ hmdfs_err("readdir failed dev: %llu err: %d",
+ con->device_id, err);
+ fput(filp);
+ filp = ERR_PTR(err);
+ }
+
+ return filp;
+}
+
+void add_cfn_to_item(struct dentry *dentry, struct hmdfs_peer *con,
+ struct cache_file_node *cfn)
+{
+ struct file *file = cfn->filp;
+ int err;
+
+ err = hmdfs_add_cache_list(con->device_id, dentry, file);
+ if (unlikely(err)) {
+ hmdfs_err("add cache list failed devid:%llu err:%d",
+ con->device_id, err);
+ return;
+ }
+}
+
+int hmdfs_add_file_to_cache(struct dentry *dentry, struct hmdfs_peer *con,
+ struct file *file, const char *relative_path)
+{
+ struct hmdfs_sb_info *sbi = con->sbi;
+ struct file *newf = file;
+
+ if (cache_get_dentry_count(sbi, file) >= sbi->dcache_threshold)
+ newf = cache_file_persistent(con, file, relative_path, false);
+ else
+ cache_file_find_and_delete(con, relative_path);
+
+ return hmdfs_add_cache_list(con->device_id, dentry, newf);
+}
+
+static struct file *read_header_and_revalidate(struct hmdfs_peer *con,
+ struct file *filp,
+ const char *relative_path)
+{
+ struct hmdfs_dcache_header header;
+ struct hmdfs_dcache_header *p = NULL;
+
+ if (read_header(con->sbi, filp, &header) == 0)
+ p = &header;
+
+ return hmdfs_get_new_dentry_file(con, relative_path, p);
+}
+
+void remote_file_revalidate_cfn(struct dentry *dentry, struct hmdfs_peer *con,
+ struct cache_file_node *cfn,
+ const char *relative_path)
+{
+ struct file *file = NULL;
+ int err;
+
+ file = read_header_and_revalidate(con, cfn->filp, relative_path);
+ if (IS_ERR(file))
+ return;
+
+ /*
+ * If the request returned ok but file length is 0, we assume
+ * that the server verified the client cache file is uptodate.
+ */
+ if (i_size_read(file->f_inode) == 0) {
+ hmdfs_info("The cfn cache for dev:%llu is uptodate",
+ con->device_id);
+ fput(file);
+ add_cfn_to_item(dentry, con, cfn);
+ return;
+ }
+
+ /* OK, cfn is not uptodate, let's remove it and add the new file */
+ remove_cfn(cfn);
+
+ err = hmdfs_add_file_to_cache(dentry, con, file, relative_path);
+ if (unlikely(err))
+ hmdfs_err("add cache list failed devid:%llu err:%d",
+ con->device_id, err);
+ fput(file);
+}
+
+void remote_file_revalidate_item(struct dentry *dentry, struct hmdfs_peer *con,
+ struct clearcache_item *item,
+ const char *relative_path)
+{
+ struct file *file = NULL;
+ int err;
+
+ file = read_header_and_revalidate(con, item->filp, relative_path);
+ if (IS_ERR(file))
+ return;
+
+ /*
+ * If the request returned ok but file length is 0, we assume
+ * that the server verified the client cache file is uptodate.
+ */
+ if (i_size_read(file->f_inode) == 0) {
+ hmdfs_info("The item cache for dev:%llu is uptodate",
+ con->device_id);
+ item->time = jiffies;
+ fput(file);
+ return;
+ }
+
+ /* We need to replace the old item */
+ remove_cache_item(item);
+ cache_file_find_and_delete(con, relative_path);
+
+ err = hmdfs_add_file_to_cache(dentry, con, file, relative_path);
+ if (unlikely(err))
+ hmdfs_err("add cache list failed devid:%llu err:%d",
+ con->device_id, err);
+ fput(file);
+}
+
+bool get_remote_dentry_file(struct dentry *dentry, struct hmdfs_peer *con)
+{
+ struct hmdfs_dentry_info *d_info = hmdfs_d(dentry);
+ struct cache_file_node *cfn = NULL;
+ struct hmdfs_sb_info *sbi = con->sbi;
+ char *relative_path = NULL;
+ int err = 0;
+ struct file *filp = NULL;
+ struct clearcache_item *item;
+
+ if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), con->device_id,
+ dentry))
+ return false;
+
+ relative_path = hmdfs_get_dentry_relative_path(dentry);
+ if (unlikely(!relative_path)) {
+ hmdfs_err("get relative path failed %d", -ENOMEM);
+ return false;
+ }
+ mutex_lock(&d_info->cache_pull_lock);
+ if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), con->device_id,
+ dentry))
+ goto out_unlock;
+
+ item = hmdfs_find_cache_item(con->device_id, dentry);
+ if (item) {
+ remote_file_revalidate_item(dentry, con, item, relative_path);
+ kref_put(&item->ref, release_cache_item);
+ goto out_unlock;
+ }
+
+ cfn = find_cfn(sbi, con->cid, relative_path, false);
+ if (cfn) {
+ remote_file_revalidate_cfn(dentry, con, cfn, relative_path);
+ release_cfn(cfn);
+ goto out_unlock;
+ }
+
+ filp = hmdfs_get_new_dentry_file(con, relative_path, NULL);
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ goto out_unlock;
+ }
+
+ err = hmdfs_add_file_to_cache(dentry, con, filp, relative_path);
+ if (unlikely(err))
+ hmdfs_err("add cache list failed devid:%lu err:%d",
+ (unsigned long)con->device_id, err);
+ fput(filp);
+
+out_unlock:
+ mutex_unlock(&d_info->cache_pull_lock);
+ if (err && err != -ENOENT)
+ hmdfs_err("readdir failed dev:%lu err:%d",
+ (unsigned long)con->device_id, err);
+ kfree(relative_path);
+ return true;
+}
+
+int hmdfs_file_type(const char *name)
+{
+ if (!name)
+ return -EINVAL;
+
+ if (!strcmp(name, CURRENT_DIR) || !strcmp(name, PARENT_DIR))
+ return HMDFS_TYPE_DOT;
+
+ return HMDFS_TYPE_COMMON;
+}
+
+struct clearcache_item *hmdfs_find_cache_item(uint64_t dev_id,
+ struct dentry *dentry)
+{
+ struct clearcache_item *item = NULL;
+ struct hmdfs_dentry_info *d_info = hmdfs_d(dentry);
+
+ if (!d_info)
+ return NULL;
+
+ spin_lock(&d_info->cache_list_lock);
+ list_for_each_entry(item, &(d_info->cache_list_head), list) {
+ if (dev_id == item->dev_id) {
+ kref_get(&item->ref);
+ spin_unlock(&d_info->cache_list_lock);
+ return item;
+ }
+ }
+ spin_unlock(&d_info->cache_list_lock);
+ return NULL;
+}
+
+bool hmdfs_cache_revalidate(unsigned long conn_time, uint64_t dev_id,
+ struct dentry *dentry)
+{
+ bool ret = false;
+ struct clearcache_item *item = NULL;
+ struct hmdfs_dentry_info *d_info = hmdfs_d(dentry);
+ unsigned int timeout;
+
+ if (!d_info)
+ return ret;
+
+ timeout = hmdfs_sb(dentry->d_sb)->dcache_timeout;
+ spin_lock(&d_info->cache_list_lock);
+ list_for_each_entry(item, &(d_info->cache_list_head), list) {
+ if (dev_id == item->dev_id) {
+ ret = cache_item_revalidate(conn_time, item->time,
+ timeout);
+ break;
+ }
+ }
+ spin_unlock(&d_info->cache_list_lock);
+ return ret;
+}
+
+void remove_cache_item(struct clearcache_item *item)
+{
+ bool deleted;
+
+ spin_lock(&item->d_info->cache_list_lock);
+ deleted = list_empty(&item->list);
+ if (!deleted)
+ list_del_init(&item->list);
+ spin_unlock(&item->d_info->cache_list_lock);
+ if (!deleted)
+ kref_put(&item->ref, release_cache_item);
+}
+
+void release_cache_item(struct kref *ref)
+{
+ struct clearcache_item *item =
+ container_of(ref, struct clearcache_item, ref);
+
+ if (item->filp)
+ fput(item->filp);
+ kfree(item);
+}
+
+void hmdfs_remove_cache_filp(struct hmdfs_peer *con, struct dentry *dentry)
+{
+ struct clearcache_item *item = NULL;
+ struct clearcache_item *item_temp = NULL;
+ struct hmdfs_dentry_info *d_info = hmdfs_d(dentry);
+ // struct path *lower_path = NULL;
+
+ if (!d_info)
+ return;
+
+ spin_lock(&d_info->cache_list_lock);
+ list_for_each_entry_safe(item, item_temp, &(d_info->cache_list_head),
+ list) {
+ if (con->device_id == item->dev_id) {
+ list_del_init(&item->list);
+ spin_unlock(&d_info->cache_list_lock);
+ cache_file_delete_by_dentry(con, dentry);
+ kref_put(&item->ref, release_cache_item);
+ return;
+ }
+ }
+ spin_unlock(&d_info->cache_list_lock);
+}
+
+int hmdfs_add_cache_list(uint64_t dev_id, struct dentry *dentry,
+ struct file *filp)
+{
+ struct clearcache_item *item = NULL;
+ struct hmdfs_dentry_info *d_info = hmdfs_d(dentry);
+
+ if (!d_info)
+ return -ENOMEM;
+
+ item = kzalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->dev_id = dev_id;
+ item->filp = get_file(filp);
+ item->time = jiffies;
+ item->d_info = d_info;
+ kref_init(&item->ref);
+ spin_lock(&d_info->cache_list_lock);
+ list_add_tail(&(item->list), &(d_info->cache_list_head));
+ spin_unlock(&d_info->cache_list_lock);
+ return 0;
+}
+
+void hmdfs_add_remote_cache_list(struct hmdfs_peer *con, const char *dir_path)
+{
+ int err = 0;
+ struct remotecache_item *item = NULL;
+ struct remotecache_item *item_temp = NULL;
+ struct path path, root_path;
+ struct hmdfs_dentry_info *d_info = NULL;
+
+ err = kern_path(con->sbi->local_dst, 0, &root_path);
+ if (err) {
+ hmdfs_err("kern_path failed err = %d", err);
+ return;
+ }
+
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt, dir_path, 0,
+ &path);
+ if (err)
+ goto out_put_root;
+
+ d_info = hmdfs_d(path.dentry);
+ if (!d_info) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* find duplicate con */
+ mutex_lock(&d_info->remote_cache_list_lock);
+ list_for_each_entry_safe(item, item_temp,
+ &(d_info->remote_cache_list_head), list) {
+ if (item->con->device_id == con->device_id) {
+ mutex_unlock(&d_info->remote_cache_list_lock);
+ goto out;
+ }
+ }
+
+ item = kzalloc(sizeof(*item), GFP_KERNEL);
+ if (!item) {
+ err = -ENOMEM;
+ mutex_unlock(&d_info->remote_cache_list_lock);
+ goto out;
+ }
+
+ item->con = con;
+ item->drop_flag = 0;
+ list_add(&(item->list), &(d_info->remote_cache_list_head));
+ mutex_unlock(&d_info->remote_cache_list_lock);
+
+out:
+ path_put(&path);
+out_put_root:
+ path_put(&root_path);
+}
+
+int hmdfs_drop_remote_cache_dents(struct dentry *dentry)
+{
+ struct path lower_path;
+ struct inode *lower_inode = NULL;
+ struct remotecache_item *item = NULL;
+ struct remotecache_item *item_temp = NULL;
+ struct hmdfs_dentry_info *d_info = NULL;
+ char *relative_path = NULL;
+
+ if (!dentry) {
+ hmdfs_err("dentry null and return");
+ return 0;
+ }
+
+ d_info = hmdfs_d(dentry);
+ if (!d_info) {
+ hmdfs_err("d_info null and return");
+ return 0;
+ }
+ hmdfs_get_lower_path(dentry, &lower_path);
+ if (IS_ERR_OR_NULL(lower_path.dentry)) {
+ hmdfs_put_lower_path(&lower_path);
+ return 0;
+ }
+ lower_inode = d_inode(lower_path.dentry);
+ hmdfs_put_lower_path(&lower_path);
+ if (IS_ERR_OR_NULL(lower_inode))
+ return 0;
+ /* only for directory */
+ if (!S_ISDIR(lower_inode->i_mode))
+ return 0;
+
+ relative_path = hmdfs_get_dentry_relative_path(dentry);
+ if (!relative_path) {
+ hmdfs_err("get dentry relative path failed");
+ return 0;
+ }
+ mutex_lock(&d_info->remote_cache_list_lock);
+ list_for_each_entry_safe(item, item_temp,
+ &(d_info->remote_cache_list_head), list) {
+ if (item->drop_flag) {
+ item->drop_flag = 0;
+ continue;
+ }
+ mutex_unlock(&d_info->remote_cache_list_lock);
+ hmdfs_send_drop_push(item->con, relative_path);
+ mutex_lock(&d_info->remote_cache_list_lock);
+ list_del(&item->list);
+ kfree(item);
+ }
+ mutex_unlock(&d_info->remote_cache_list_lock);
+
+ kfree(relative_path);
+ return 0;
+}
+
+/* Clear the dentry cache files of target directory */
+int hmdfs_clear_cache_dents(struct dentry *dentry, bool remove_cache)
+{
+ struct clearcache_item *item = NULL;
+ struct clearcache_item *item_temp = NULL;
+ struct hmdfs_dentry_info *d_info = hmdfs_d(dentry);
+ char *path = NULL;
+
+ if (!d_info)
+ return 0;
+
+ spin_lock(&d_info->cache_list_lock);
+ list_for_each_entry_safe(item, item_temp, &(d_info->cache_list_head),
+ list) {
+ list_del_init(&item->list);
+ kref_put(&item->ref, release_cache_item);
+ }
+ spin_unlock(&d_info->cache_list_lock);
+
+ if (!remove_cache)
+ return 0;
+
+ /* it also need confirm that there are no dentryfile_dev*
+ * under this dentry
+ */
+ path = hmdfs_get_dentry_relative_path(dentry);
+
+ if (unlikely(!path)) {
+ hmdfs_err("get relative path failed");
+ return 0;
+ }
+
+ cache_file_destroy_by_path(hmdfs_sb(dentry->d_sb), path);
+
+ kfree(path);
+ return 0;
+}
+
+void hmdfs_mark_drop_flag(uint64_t device_id, struct dentry *dentry)
+{
+ struct remotecache_item *item = NULL;
+ struct hmdfs_dentry_info *d_info = NULL;
+
+ d_info = hmdfs_d(dentry);
+ if (!d_info) {
+ hmdfs_err("d_info null and return");
+ return;
+ }
+
+ mutex_lock(&d_info->remote_cache_list_lock);
+ list_for_each_entry(item, &(d_info->remote_cache_list_head), list) {
+ if (item->con->device_id == device_id) {
+ item->drop_flag = 1;
+ break;
+ }
+ }
+ mutex_unlock(&d_info->remote_cache_list_lock);
+}
+
+void hmdfs_clear_drop_flag(struct dentry *dentry)
+{
+ struct remotecache_item *item = NULL;
+ struct hmdfs_dentry_info *d_info = NULL;
+
+ if (!dentry) {
+ hmdfs_err("dentry null and return");
+ return;
+ }
+
+ d_info = hmdfs_d(dentry);
+ if (!d_info) {
+ hmdfs_err("d_info null and return");
+ return;
+ }
+
+ mutex_lock(&d_info->remote_cache_list_lock);
+ list_for_each_entry(item, &(d_info->remote_cache_list_head), list) {
+ if (item->drop_flag)
+ item->drop_flag = 0;
+ }
+ mutex_unlock(&d_info->remote_cache_list_lock);
+}
+
+#define DUSTBIN_SUFFIX ".hwbk"
+static void hmdfs_rename_bak(struct dentry *dentry)
+{
+ struct path lower_path;
+ struct dentry *lower_parent = NULL;
+ struct dentry *lower_dentry = NULL;
+ struct dentry *new_dentry = NULL;
+ char *name = NULL;
+ int len = 0;
+ int err = 0;
+
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ len = strlen(lower_dentry->d_name.name) + strlen(DUSTBIN_SUFFIX) + 2;
+ if (len >= NAME_MAX) {
+ err = -ENAMETOOLONG;
+ goto put_lower_path;
+ }
+
+ name = kmalloc(len, GFP_KERNEL);
+ if (!name) {
+ err = -ENOMEM;
+ goto put_lower_path;
+ }
+
+ snprintf(name, len, ".%s%s", lower_dentry->d_name.name, DUSTBIN_SUFFIX);
+ err = mnt_want_write(lower_path.mnt);
+ if (err) {
+ hmdfs_info("get write access failed, err %d", err);
+ goto free_name;
+ }
+
+ lower_parent = lock_parent(lower_dentry);
+ new_dentry = lookup_one_len(name, lower_parent, strlen(name));
+ if (IS_ERR(new_dentry)) {
+ err = PTR_ERR(new_dentry);
+ hmdfs_info("lookup new dentry failed, err %d", err);
+ goto unlock_parent;
+ }
+
+ err = vfs_rename(d_inode(lower_parent), lower_dentry,
+ d_inode(lower_parent), new_dentry, NULL, 0);
+
+ dput(new_dentry);
+unlock_parent:
+ unlock_dir(lower_parent);
+ mnt_drop_write(lower_path.mnt);
+free_name:
+ kfree(name);
+put_lower_path:
+ hmdfs_put_lower_path(&lower_path);
+
+ if (err)
+ hmdfs_err("failed to rename file, err %d", err);
+}
+
+int hmdfs_root_unlink(uint64_t device_id, struct path *root_path,
+ const char *unlink_dir, const char *unlink_name)
+{
+ int err = 0;
+ struct path path;
+ struct dentry *child_dentry = NULL;
+ struct inode *dir = NULL;
+ struct inode *child_inode = NULL;
+ kuid_t tmp_uid;
+
+ err = vfs_path_lookup(root_path->dentry, root_path->mnt,
+ unlink_dir, LOOKUP_DIRECTORY, &path);
+ if (err) {
+ hmdfs_err("found path failed err = %d", err);
+ return err;
+ }
+ dir = d_inode(path.dentry);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+
+ child_dentry = lookup_one_len(unlink_name, path.dentry,
+ strlen(unlink_name));
+ if (IS_ERR(child_dentry)) {
+ err = PTR_ERR(child_dentry);
+ hmdfs_err("lookup_one_len failed, err = %d", err);
+ goto unlock_out;
+ }
+ if (d_is_negative(child_dentry)) {
+ err = -ENOENT;
+ dput(child_dentry);
+ goto unlock_out;
+ }
+ child_inode = d_inode(child_dentry);
+
+ tmp_uid = hmdfs_override_inode_uid(dir);
+
+ hmdfs_mark_drop_flag(device_id, path.dentry);
+ ihold(child_inode);
+ err = vfs_unlink(dir, child_dentry, NULL);
+ /*
+ * -EOWNERDEAD means we want to put the file in a specail dir instead of
+ * deleting it, specifically dustbin in phone, so that user can
+ * recover the deleted images and videos.
+ */
+ if (err == -EOWNERDEAD) {
+ hmdfs_rename_bak(child_dentry);
+ err = 0;
+ }
+ if (err)
+ hmdfs_err("unlink path failed err = %d", err);
+ hmdfs_revert_inode_uid(dir, tmp_uid);
+ dput(child_dentry);
+
+unlock_out:
+ inode_unlock(dir);
+ if (child_inode)
+ iput(child_inode);
+ path_put(&path);
+ return err;
+}
+
+struct dentry *hmdfs_root_mkdir(uint64_t device_id, const char *local_dst_path,
+ const char *mkdir_dir, const char *mkdir_name,
+ umode_t mode)
+{
+ int err;
+ struct path path;
+ struct dentry *child_dentry = NULL;
+ struct dentry *ret = NULL;
+ char *mkdir_path = NULL;
+ char *mkdir_abs_path = NULL;
+
+ mkdir_path = hmdfs_connect_path(mkdir_dir, mkdir_name);
+ if (!mkdir_path)
+ return ERR_PTR(-EACCES);
+
+ mkdir_abs_path =
+ hmdfs_get_dentry_absolute_path(local_dst_path, mkdir_path);
+ if (!mkdir_abs_path) {
+ ret = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ child_dentry = kern_path_create(AT_FDCWD, mkdir_abs_path,
+ &path, LOOKUP_DIRECTORY);
+ if (IS_ERR(child_dentry)) {
+ ret = child_dentry;
+ goto out;
+ }
+
+ hmdfs_mark_drop_flag(device_id, child_dentry->d_parent);
+ err = vfs_mkdir(d_inode(path.dentry), child_dentry, mode);
+ if (err) {
+ hmdfs_err("mkdir failed! err=%d", err);
+ ret = ERR_PTR(err);
+ goto out_put;
+ }
+ ret = dget(child_dentry);
+out_put:
+ done_path_create(&path, child_dentry);
+out:
+ kfree(mkdir_path);
+ kfree(mkdir_abs_path);
+ return ret;
+}
+
+struct dentry *hmdfs_root_create(uint64_t device_id, const char *local_dst_path,
+ const char *create_dir,
+ const char *create_name,
+ umode_t mode, bool want_excl)
+{
+ int err;
+ struct path path;
+ struct dentry *child_dentry = NULL;
+ struct dentry *ret = NULL;
+ char *create_path = NULL;
+ char *create_abs_path = NULL;
+
+ create_path = hmdfs_connect_path(create_dir, create_name);
+ if (!create_path)
+ return ERR_PTR(-EACCES);
+
+ create_abs_path =
+ hmdfs_get_dentry_absolute_path(local_dst_path, create_path);
+ if (!create_abs_path) {
+ ret = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ child_dentry = kern_path_create(AT_FDCWD, create_abs_path, &path, 0);
+
+ if (IS_ERR(child_dentry)) {
+ ret = child_dentry;
+ goto out;
+ }
+ hmdfs_mark_drop_flag(device_id, child_dentry->d_parent);
+ err = vfs_create(d_inode(path.dentry), child_dentry, mode, want_excl);
+ if (err) {
+ hmdfs_err("path create failed! err=%d", err);
+ ret = ERR_PTR(err);
+ goto out_put;
+ }
+ ret = dget(child_dentry);
+out_put:
+ done_path_create(&path, child_dentry);
+out:
+ kfree(create_path);
+ kfree(create_abs_path);
+ return ret;
+}
+
+int hmdfs_root_rmdir(uint64_t device_id, struct path *root_path,
+ const char *rmdir_dir, const char *rmdir_name)
+{
+ int err = 0;
+ struct path path;
+ struct dentry *child_dentry = NULL;
+ struct inode *dir = NULL;
+
+ err = vfs_path_lookup(root_path->dentry, root_path->mnt,
+ rmdir_dir, LOOKUP_DIRECTORY, &path);
+ if (err) {
+ hmdfs_err("found path failed err = %d", err);
+ return err;
+ }
+ dir = d_inode(path.dentry);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+
+ child_dentry = lookup_one_len(rmdir_name, path.dentry,
+ strlen(rmdir_name));
+ if (IS_ERR(child_dentry)) {
+ err = PTR_ERR(child_dentry);
+ hmdfs_err("lookup_one_len failed, err = %d", err);
+ goto unlock_out;
+ }
+ if (d_is_negative(child_dentry)) {
+ err = -ENOENT;
+ dput(child_dentry);
+ goto unlock_out;
+ }
+
+ hmdfs_mark_drop_flag(device_id, path.dentry);
+ err = vfs_rmdir(dir, child_dentry);
+ if (err)
+ hmdfs_err("rmdir failed err = %d", err);
+ dput(child_dentry);
+
+unlock_out:
+ inode_unlock(dir);
+ path_put(&path);
+ return err;
+}
+
+int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id,
+ const char *oldpath, const char *oldname,
+ const char *newpath, const char *newname,
+ unsigned int flags)
+{
+ int err = 0;
+ struct path path_dst;
+ struct path path_old;
+ struct path path_new;
+ struct dentry *trap = NULL;
+ struct dentry *old_dentry = NULL;
+ struct dentry *new_dentry = NULL;
+
+ err = kern_path(sbi->local_dst, 0, &path_dst);
+ if (err) {
+ hmdfs_err("kern_path for local dst failed %d", err);
+ return err;
+ }
+
+ err = vfs_path_lookup(path_dst.dentry, path_dst.mnt, oldpath, 0,
+ &path_old);
+ if (err) {
+ hmdfs_info("lookup oldpath from local_dst failed, err %d", err);
+ goto put_path_dst;
+ }
+
+ err = vfs_path_lookup(path_dst.dentry, path_dst.mnt, newpath, 0,
+ &path_new);
+ if (err) {
+ hmdfs_info("lookup newpath from local_dst failed, err %d", err);
+ goto put_path_old;
+ }
+
+ err = mnt_want_write(path_dst.mnt);
+ if (err) {
+ hmdfs_info("get write access failed for local_dst, err %d",
+ err);
+ goto put_path_new;
+ }
+
+ trap = lock_rename(path_new.dentry, path_old.dentry);
+
+ old_dentry = lookup_one_len(oldname, path_old.dentry, strlen(oldname));
+ if (IS_ERR(old_dentry)) {
+ err = PTR_ERR(old_dentry);
+ hmdfs_info("lookup old dentry failed, err %d", err);
+ goto unlock;
+ }
+
+ /* source should not be ancestor of target */
+ if (old_dentry == trap) {
+ err = -EINVAL;
+ goto put_old_dentry;
+ }
+
+ new_dentry = lookup_one_len(newname, path_new.dentry, strlen(newname));
+ if (IS_ERR(new_dentry)) {
+ err = PTR_ERR(new_dentry);
+ hmdfs_info("lookup new dentry failed, err %d", err);
+ goto put_old_dentry;
+ }
+
+ /*
+ * Exchange rename is not supported, thus target should not be an
+ * ancestor of source.
+ */
+ if (trap == new_dentry) {
+ err = -ENOTEMPTY;
+ goto put_new_dentry;
+ }
+
+ if (d_is_positive(new_dentry) && (flags & RENAME_NOREPLACE)) {
+ err = -EEXIST;
+ goto put_new_dentry;
+ }
+
+ hmdfs_mark_drop_flag(device_id, path_old.dentry);
+ if (path_old.dentry != path_new.dentry)
+ hmdfs_mark_drop_flag(device_id, path_new.dentry);
+
+ err = vfs_rename(d_inode(path_old.dentry), old_dentry,
+ d_inode(path_new.dentry), new_dentry, NULL, 0);
+
+put_new_dentry:
+ dput(new_dentry);
+put_old_dentry:
+ dput(old_dentry);
+unlock:
+ unlock_rename(path_new.dentry, path_old.dentry);
+ mnt_drop_write(path_dst.mnt);
+put_path_new:
+ path_put(&path_new);
+put_path_old:
+ path_put(&path_old);
+put_path_dst:
+ path_put(&path_dst);
+
+ return err;
+}
+
+int hmdfs_get_path_in_sb(struct super_block *sb, const char *name,
+ unsigned int flags, struct path *path)
+{
+ int err;
+
+ err = kern_path(name, flags, path);
+ if (err) {
+ hmdfs_err("can't get %s %d\n", name, err);
+ return err;
+ }
+
+ /* should ensure the path is belong sb */
+ if (path->dentry->d_sb != sb) {
+ err = -EINVAL;
+ hmdfs_err("Wrong sb: %s on %s", name,
+ path->dentry->d_sb->s_type->name);
+ path_put(path);
+ }
+
+ return err;
+}
diff --git a/fs/hmdfs/hmdfs_dentryfile.h b/fs/hmdfs/hmdfs_dentryfile.h
new file mode 100644
index 0000000000000000000000000000000000000000..df1463007f15be5f3ba3180b4559a7a65a47eedf
--- /dev/null
+++ b/fs/hmdfs/hmdfs_dentryfile.h
@@ -0,0 +1,342 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/hmdfs_dentryfile.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_DENTRYFILE_H
+#define HMDFS_DENTRYFILE_H
+
+#include "hmdfs.h"
+#include
+
+/* use for escape from hmdfs file system, hmdfs hide follow names */
+#define CURRENT_DIR "."
+#define PARENT_DIR ".."
+
+/* local dentry cache data */
+#define DENTRY_FILE_XATTR_NAME "user.hmdfs_cache"
+
+#define DENTRY_FILE_NAME_RETRY 10
+
+#define MAX_BUCKET_LEVEL 63
+#define BUCKET_BLOCKS 2
+#define MAX_DIR_BUCKETS (1 << ((MAX_BUCKET_LEVEL / 2) - 1))
+
+#define CONFLICTING_FILE_CONST_SUFFIX "_conflict_dev"
+#define CONFLICTING_FILE_SUFFIX "_conflict_dev%u"
+#define CONFLICTING_DIR_SUFFIX "_remote_directory"
+
+#define POS_BIT_NUM 64
+#define DEV_ID_BIT_NUM 16
+#define GROUP_ID_BIT_NUM 39
+#define OFFSET_BIT_NUM 8
+#define OFFSET_BIT_MASK 0xFF
+
+#define DEFAULT_DCACHE_TIMEOUT 30
+#define DEFAULT_DCACHE_PRECISION 10
+#define DEFAULT_DCACHE_THRESHOLD 1000
+#define HMDFS_STALE_REMOTE_ISIZE ULLONG_MAX
+
+/* Seconds per-week */
+#define MAX_DCACHE_TIMEOUT 604800
+
+struct hmdfs_iterate_callback {
+ struct dir_context ctx;
+ struct dir_context *caller;
+ int result;
+ struct rb_root *root;
+};
+
+/*
+ * 4096 = version(1) + bitmap(10) + reserved(5)
+ * + nsl(80 * 43) + filename(80 * 8)
+ */
+#define DENTRYGROUP_SIZE 4096
+#define DENTRY_NAME_LEN 8
+#define DENTRY_RESERVED_LENGTH 3
+#define DENTRY_PER_GROUP 80
+#define DENTRY_BITMAP_LENGTH 10
+#define DENTRY_GROUP_RESERVED 5
+#define DENTRYGROUP_HEADER 4096
+
+struct hmdfs_dentry {
+ __le32 hash;
+ __le16 i_mode;
+ __le16 namelen;
+ __le64 i_size;
+ /* modification time */
+ __le64 i_mtime;
+ /* modification time in nano scale */
+ __le32 i_mtime_nsec;
+ /* combination of inode number and generation */
+ __le64 i_ino;
+ __le32 i_flag;
+ /* reserved bytes for long term extend, total 43 bytes */
+ __u8 reserved[DENTRY_RESERVED_LENGTH];
+} __packed;
+
+/* 4K/51 Bytes = 80 dentries for per dentrygroup */
+struct hmdfs_dentry_group {
+ __u8 dentry_version; /* dentry version start from 1 */
+ __u8 bitmap[DENTRY_BITMAP_LENGTH];
+ struct hmdfs_dentry nsl[DENTRY_PER_GROUP];
+ __u8 filename[DENTRY_PER_GROUP][DENTRY_NAME_LEN];
+ __u8 reserved[DENTRY_GROUP_RESERVED];
+} __packed;
+
+/**
+ * The content of 1st 4k block in dentryfile.dat.
+ * Used for check whether the dcache can be used directly or
+ * need to rebuild.
+ *
+ * Since the ctime has 10ms or less precision, if the dcache
+ * rebuild at the same time of the dentry inode ctime, maybe
+ * non-consistent in dcache.
+ * eg: create 1.jpg 2.jpg 3.jpg
+ * dcache rebuild may only has 1.jpg 2.jpg
+ * So, we need use these time to verify the dcache.
+ */
+struct hmdfs_dcache_header {
+ /* The time of dcache rebuild */
+ __le64 dcache_crtime;
+ __le64 dcache_crtime_nsec;
+
+ /* The directory inode ctime when dcache rebuild */
+ __le64 dentry_ctime;
+ __le64 dentry_ctime_nsec;
+
+ /* The dentry count */
+ __le64 num;
+
+ /* The case sensitive */
+ __u8 case_sensitive;
+} __packed;
+
+static inline loff_t get_dentry_group_pos(unsigned int bidx)
+{
+ return ((loff_t)bidx) * DENTRYGROUP_SIZE + DENTRYGROUP_HEADER;
+}
+
+static inline unsigned int get_dentry_group_cnt(struct inode *inode)
+{
+ loff_t size = i_size_read(inode);
+
+ return size >= DENTRYGROUP_HEADER ?
+ (size - DENTRYGROUP_HEADER) / DENTRYGROUP_SIZE :
+ 0;
+}
+
+#define DENTRY_NAME_MAX_LEN (DENTRY_PER_GROUP * DENTRY_NAME_LEN)
+#define BITS_PER_BYTE 8
+#define HMDFS_SLOT_LEN_BITS 3
+#define get_dentry_slots(x) (((x) + BITS_PER_BYTE - 1) >> HMDFS_SLOT_LEN_BITS)
+
+#define INUNUMBER_START 10000000
+
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+#define DENTRY_FILE_PERM 0660
+#else
+#define DENTRY_FILE_PERM 0666
+#endif
+
+struct hmdfs_dcache_lookup_ctx {
+ struct hmdfs_sb_info *sbi;
+ const struct qstr *name;
+ struct file *filp;
+ __u32 hash;
+
+ /* for case sensitive */
+ unsigned int bidx;
+ struct hmdfs_dentry_group *page;
+
+ /* for case insensitive */
+ struct hmdfs_dentry *insense_de;
+ unsigned int insense_bidx;
+ struct hmdfs_dentry_group *insense_page;
+};
+
+extern void hmdfs_init_dcache_lookup_ctx(struct hmdfs_dcache_lookup_ctx *ctx,
+ struct hmdfs_sb_info *sbi,
+ const struct qstr *qstr,
+ struct file *filp);
+
+int create_dentry(struct dentry *child_dentry, struct inode *inode,
+ struct file *file, struct hmdfs_sb_info *sbi);
+int read_dentry(struct hmdfs_sb_info *sbi, char *file_name,
+ struct dir_context *ctx);
+struct hmdfs_dentry *hmdfs_find_dentry(struct dentry *child_dentry,
+ struct hmdfs_dcache_lookup_ctx *ctx);
+void hmdfs_delete_dentry(struct dentry *d, struct file *filp);
+int hmdfs_rename_dentry(struct dentry *old_dentry, struct dentry *new_dentry,
+ struct file *old_filp, struct file *new_filp);
+int get_inonumber(void);
+struct file *create_local_dentry_file_cache(struct hmdfs_sb_info *sbi);
+int update_inode_to_dentry(struct dentry *child_dentry, struct inode *inode);
+struct file *cache_file_persistent(struct hmdfs_peer *con, struct file *filp,
+ const char *relative_path, bool server);
+
+#define HMDFS_TYPE_COMMON 0
+#define HMDFS_TYPE_DOT 1
+#define HMDFS_TYPE_DENTRY 2
+#define HMDFS_TYPE_DENTRY_CACHE 3
+int hmdfs_file_type(const char *name);
+
+loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id,
+ unsigned long offset);
+
+struct getdents_callback_real {
+ struct dir_context ctx;
+ struct path *parent_path;
+ loff_t num;
+ struct file *file;
+ struct hmdfs_sb_info *sbi;
+ const char *dir;
+};
+
+struct file *hmdfs_server_rebuild_dents(struct hmdfs_sb_info *sbi,
+ struct path *path, loff_t *num,
+ const char *dir);
+
+#define DCACHE_LIFETIME 30
+
+struct clearcache_item {
+ uint64_t dev_id;
+ struct file *filp;
+ unsigned long time;
+ struct list_head list;
+ struct kref ref;
+ struct hmdfs_dentry_info *d_info;
+};
+
+void hmdfs_add_remote_cache_list(struct hmdfs_peer *con, const char *dir_path);
+
+struct remotecache_item {
+ struct hmdfs_peer *con;
+ struct list_head list;
+ __u8 drop_flag;
+};
+
+#define HMDFS_CFN_CID_SIZE 65
+#define HMDFS_SERVER_CID ""
+
+struct cache_file_node {
+ struct list_head list;
+ struct hmdfs_sb_info *sbi;
+ char *relative_path;
+ u8 cid[HMDFS_CFN_CID_SIZE];
+ refcount_t ref;
+ bool server;
+ struct file *filp;
+};
+
+struct cache_file_item {
+ struct list_head list;
+ const char *name;
+};
+
+struct cache_file_callback {
+ struct dir_context ctx;
+ const char *dirname;
+ struct hmdfs_sb_info *sbi;
+ bool server;
+ struct list_head list;
+};
+
+int hmdfs_drop_remote_cache_dents(struct dentry *dentry);
+void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path);
+void hmdfs_mark_drop_flag(uint64_t device_id, struct dentry *dentry);
+void hmdfs_clear_drop_flag(struct dentry *dentry);
+void delete_in_cache_file(uint64_t dev_id, struct dentry *dentry);
+void create_in_cache_file(uint64_t dev_id, struct dentry *dentry);
+struct clearcache_item *hmdfs_find_cache_item(uint64_t dev_id,
+ struct dentry *dentry);
+bool hmdfs_cache_revalidate(unsigned long conn_time, uint64_t dev_id,
+ struct dentry *dentry);
+void hmdfs_remove_cache_filp(struct hmdfs_peer *con, struct dentry *dentry);
+int hmdfs_add_cache_list(uint64_t dev_id, struct dentry *dentry,
+ struct file *filp);
+int hmdfs_clear_cache_dents(struct dentry *dentry, bool remove_cache);
+
+int hmdfs_root_unlink(uint64_t device_id, struct path *root_path,
+ const char *unlink_dir, const char *unlink_name);
+struct dentry *hmdfs_root_mkdir(uint64_t device_id, const char *local_dst_path,
+ const char *mkdir_dir, const char *mkdir_name,
+ umode_t mode);
+struct dentry *hmdfs_root_create(uint64_t device_id, const char *local_dst_path,
+ const char *create_dir,
+ const char *create_name,
+ umode_t mode, bool want_excl);
+int hmdfs_root_rmdir(uint64_t device_id, struct path *root_path,
+ const char *rmdir_dir, const char *rmdir_name);
+int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id,
+ const char *oldpath, const char *oldname,
+ const char *newpath, const char *newname,
+ unsigned int flags);
+
+int hmdfs_get_path_in_sb(struct super_block *sb, const char *name,
+ unsigned int flags, struct path *path);
+
+int hmdfs_wlock_file(struct file *filp, loff_t start, loff_t len);
+int hmdfs_rlock_file(struct file *filp, loff_t start, loff_t len);
+int hmdfs_unlock_file(struct file *filp, loff_t start, loff_t len);
+long cache_file_truncate(struct hmdfs_sb_info *sbi, const struct path *path,
+ loff_t length);
+ssize_t cache_file_read(struct hmdfs_sb_info *sbi, struct file *filp, void *buf,
+ size_t count, loff_t *pos);
+ssize_t cache_file_write(struct hmdfs_sb_info *sbi, struct file *filp,
+ const void *buf, size_t count, loff_t *pos);
+int hmdfs_metainfo_read(struct hmdfs_sb_info *sbi, struct file *filp,
+ void *buffer, int buffersize, int bidx);
+
+bool get_remote_dentry_file(struct dentry *dentry, struct hmdfs_peer *con);
+void get_remote_dentry_file_sync(struct dentry *dentry, struct hmdfs_peer *con);
+
+void release_cache_item(struct kref *ref);
+void remove_cache_item(struct clearcache_item *item);
+
+void hmdfs_cfn_load(struct hmdfs_sb_info *sbi);
+void hmdfs_cfn_destroy(struct hmdfs_sb_info *sbi);
+struct cache_file_node *find_cfn(struct hmdfs_sb_info *sbi, const char *cid,
+ const char *path, bool server);
+void release_cfn(struct cache_file_node *cfn);
+void destroy_cfn(struct hmdfs_sb_info *sbi);
+void remove_cfn(struct cache_file_node *cfn);
+int delete_dentry_file(struct file *filp);
+struct file *hmdfs_server_cache_revalidate(struct hmdfs_sb_info *sbi,
+ const char *recvpath,
+ struct path *path);
+int write_header(struct file *filp, struct hmdfs_dcache_header *header);
+
+static inline struct list_head *get_list_head(struct hmdfs_sb_info *sbi,
+ bool server)
+{
+ return ((server) ? &(sbi)->server_cache : &(sbi)->client_cache);
+}
+
+/*
+ * generate_u64_ino - generate a new 64 bit inode number
+ *
+ * @ino: origin 32 bit inode number
+ * @generation: origin 32 bit inode generation
+ *
+ * We need both remote inode number and generation to ensure the uniqueness of
+ * the local inode, thus we store inode->i_ino in lower 32 bits, and
+ * inode->i_generation in higher 32 bits.
+ */
+static inline uint64_t generate_u64_ino(unsigned long ino,
+ unsigned int generation)
+{
+ return (uint64_t)ino | ((uint64_t)generation << 32);
+}
+
+static inline bool cache_item_revalidate(unsigned long conn_time,
+ unsigned long item_time,
+ unsigned int timeout)
+{
+ return time_before_eq(jiffies, item_time + timeout * HZ) &&
+ time_before_eq(conn_time, item_time);
+}
+
+#endif
diff --git a/fs/hmdfs/hmdfs_device_view.h b/fs/hmdfs/hmdfs_device_view.h
new file mode 100644
index 0000000000000000000000000000000000000000..dcc49fb89597e0d62504aa47ffd5d45766f0f65b
--- /dev/null
+++ b/fs/hmdfs/hmdfs_device_view.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/hmdfs_device_view.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_DEVICE_VIEW_H
+#define HMDFS_DEVICE_VIEW_H
+
+#include "hmdfs.h"
+
+/*****************************************************************************
+ * macro defination
+ *****************************************************************************/
+
+#define DEVICE_VIEW_ROOT "device_view"
+#define MERGE_VIEW_ROOT "merge_view"
+#define UPDATE_LOCAL_DST "/device_view/local/"
+
+#define DEVICE_VIEW_LOCAL "local"
+
+/*
+ * in order to distinguish from vfs, we define our own bitmask, this should
+ * covert to vfs bitmask while calling vfs apis
+ */
+#define HMDFS_LOOKUP_REVAL 0x1
+
+enum HMDFS_FILE_TYPE {
+ HM_REG = 0,
+ HM_SYMLINK = 1,
+
+ HM_MAX_FILE_TYPE = 0XFF
+};
+
+struct bydev_inode_info {
+ struct inode *lower_inode;
+ uint64_t ino;
+};
+
+struct hmdfs_dentry_info {
+ struct path lower_path;
+ unsigned long time;
+ struct list_head cache_list_head;
+ spinlock_t cache_list_lock;
+ struct list_head remote_cache_list_head;
+ struct mutex remote_cache_list_lock;
+ __u8 file_type;
+ __u8 dentry_type;
+ uint64_t device_id;
+ spinlock_t lock;
+ struct mutex cache_pull_lock;
+ bool async_readdir_in_progress;
+};
+
+struct hmdfs_lookup_ret {
+ uint64_t i_size;
+ uint64_t i_mtime;
+ uint32_t i_mtime_nsec;
+ uint16_t i_mode;
+ uint64_t i_ino;
+};
+
+struct hmdfs_getattr_ret {
+ /*
+ * if stat->result_mask is 0, it means this remote getattr failed with
+ * look up, see details in hmdfs_server_getattr.
+ */
+ struct kstat stat;
+ uint32_t i_flags;
+ uint64_t fsid;
+};
+
+extern int hmdfs_remote_getattr(struct hmdfs_peer *conn, struct dentry *dentry,
+ unsigned int lookup_flags,
+ struct hmdfs_getattr_ret **getattr_result);
+
+/*****************************************************************************
+ * local/remote inode/file operations
+ *****************************************************************************/
+
+extern const struct dentry_operations hmdfs_dops;
+extern const struct dentry_operations hmdfs_dev_dops;
+
+/* local device operation */
+extern const struct inode_operations hmdfs_file_iops_local;
+extern const struct file_operations hmdfs_file_fops_local;
+extern const struct inode_operations hmdfs_dir_inode_ops_local;
+extern const struct file_operations hmdfs_dir_ops_local;
+extern const struct inode_operations hmdfs_symlink_iops_local;
+
+/* remote device operation */
+extern const struct inode_operations hmdfs_dev_file_iops_remote;
+extern const struct file_operations hmdfs_dev_file_fops_remote;
+extern const struct address_space_operations hmdfs_dev_file_aops_remote;
+extern const struct inode_operations hmdfs_dev_dir_inode_ops_remote;
+extern const struct file_operations hmdfs_dev_dir_ops_remote;
+extern int hmdfs_dev_unlink_from_con(struct hmdfs_peer *conn,
+ struct dentry *dentry);
+extern int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file,
+ struct dir_context *ctx);
+int hmdfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
+int hmdfs_rmdir(struct inode *dir, struct dentry *dentry);
+int hmdfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool want_excl);
+int hmdfs_unlink(struct inode *dir, struct dentry *dentry);
+int hmdfs_remote_unlink(struct hmdfs_peer *conn, struct dentry *dentry);
+int hmdfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags);
+loff_t hmdfs_file_llseek_local(struct file *file, loff_t offset, int whence);
+ssize_t hmdfs_read_local(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t hmdfs_write_local(struct kiocb *iocb, struct iov_iter *iter);
+int hmdfs_file_release_local(struct inode *inode, struct file *file);
+int hmdfs_file_mmap_local(struct file *file, struct vm_area_struct *vma);
+struct dentry *hmdfs_lookup(struct inode *parent_inode,
+ struct dentry *child_dentry, unsigned int flags);
+struct dentry *hmdfs_lookup_local(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ unsigned int flags);
+struct dentry *hmdfs_lookup_remote(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ unsigned int flags);
+int hmdfs_symlink_local(struct inode *dir, struct dentry *dentry,
+ const char *symname);
+int hmdfs_fsync_local(struct file *file, loff_t start, loff_t end,
+ int datasync);
+int hmdfs_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname);
+int hmdfs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
+
+/*****************************************************************************
+ * common functions declaration
+ *****************************************************************************/
+
+static inline struct hmdfs_dentry_info *hmdfs_d(struct dentry *dentry)
+{
+ return dentry->d_fsdata;
+}
+
+static inline bool hm_isreg(uint8_t file_type)
+{
+ return (file_type == HM_REG);
+}
+
+static inline bool hm_islnk(uint8_t file_type)
+{
+ return (file_type == HM_SYMLINK);
+}
+struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con,
+ struct hmdfs_lookup_ret *lookup_result,
+ struct inode *dir);
+struct hmdfs_lookup_ret *get_remote_inode_info(struct hmdfs_peer *con,
+ struct dentry *dentry,
+ unsigned int flags);
+void hmdfs_set_time(struct dentry *dentry, unsigned long time);
+struct inode *fill_inode_local(struct super_block *sb,
+ struct inode *lower_inode);
+struct inode *fill_root_inode(struct super_block *sb,
+ struct inode *lower_inode);
+struct inode *fill_device_inode(struct super_block *sb,
+ struct inode *lower_inode);
+struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con,
+ struct dentry *dentry,
+ struct qstr *qstr,
+ unsigned int flags,
+ const char *relative_path);
+char *hmdfs_connect_path(const char *path, const char *name);
+
+char *hmdfs_get_dentry_relative_path(struct dentry *dentry);
+char *hmdfs_get_dentry_absolute_path(const char *rootdir,
+ const char *relative_path);
+int hmdfs_convert_lookup_flags(unsigned int hmdfs_flags,
+ unsigned int *vfs_flags);
+static inline void hmdfs_get_lower_path(struct dentry *dent, struct path *pname)
+{
+ spin_lock(&hmdfs_d(dent)->lock);
+ pname->dentry = hmdfs_d(dent)->lower_path.dentry;
+ pname->mnt = hmdfs_d(dent)->lower_path.mnt;
+ path_get(pname);
+ spin_unlock(&hmdfs_d(dent)->lock);
+}
+
+static inline void hmdfs_put_lower_path(struct path *pname)
+{
+ path_put(pname);
+}
+
+static inline void hmdfs_put_reset_lower_path(struct dentry *dent)
+{
+ struct path pname;
+
+ spin_lock(&hmdfs_d(dent)->lock);
+ if (hmdfs_d(dent)->lower_path.dentry) {
+ pname.dentry = hmdfs_d(dent)->lower_path.dentry;
+ pname.mnt = hmdfs_d(dent)->lower_path.mnt;
+ hmdfs_d(dent)->lower_path.dentry = NULL;
+ hmdfs_d(dent)->lower_path.mnt = NULL;
+ spin_unlock(&hmdfs_d(dent)->lock);
+ path_put(&pname);
+ } else {
+ spin_unlock(&hmdfs_d(dent)->lock);
+ }
+}
+
+static inline void hmdfs_set_lower_path(struct dentry *dent, struct path *pname)
+{
+ spin_lock(&hmdfs_d(dent)->lock);
+ hmdfs_d(dent)->lower_path.dentry = pname->dentry;
+ hmdfs_d(dent)->lower_path.mnt = pname->mnt;
+ spin_unlock(&hmdfs_d(dent)->lock);
+}
+
+/* Only reg file for HMDFS_LAYER_OTHER_* support xattr */
+static inline bool hmdfs_support_xattr(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ struct hmdfs_dentry_info *gdi = hmdfs_d(dentry);
+
+ if (info->inode_type != HMDFS_LAYER_OTHER_LOCAL &&
+ info->inode_type != HMDFS_LAYER_OTHER_REMOTE)
+ return false;
+
+ if (!S_ISREG(inode->i_mode))
+ return false;
+
+ if (hm_islnk(gdi->file_type))
+ return false;
+
+ return true;
+}
+
+int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry,
+ int dentry_type);
+
+#endif
diff --git a/fs/hmdfs/hmdfs_merge_view.h b/fs/hmdfs/hmdfs_merge_view.h
new file mode 100644
index 0000000000000000000000000000000000000000..01064b3d98dfb2e092b7d83268628631e25ba2c3
--- /dev/null
+++ b/fs/hmdfs/hmdfs_merge_view.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/hmdfs_merge_view.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_MERGE_VIEW_H
+#define HMDFS_MERGE_VIEW_H
+
+#include "hmdfs.h"
+
+#include "comm/connection.h"
+#include
+
+/*****************************************************************************
+ * Dentires for merge view and their comrades.
+ * A dentry's lower dentry is named COMRADE.
+ *****************************************************************************/
+
+struct hmdfs_dentry_info_merge {
+ unsigned long ctime;
+ // For the merge view to link dentries with same names
+ struct mutex comrade_list_lock;
+ struct list_head comrade_list;
+};
+
+struct hmdfs_dentry_comrade {
+ uint64_t dev_id;
+ struct dentry *lo_d;
+ struct list_head list;
+};
+
+enum FILE_CMD_MERGE {
+ F_MKDIR_MERGE = 0,
+ F_CREATE_MERGE = 1,
+ F_SYMLINK_MERGE = 2,
+};
+
+struct hmdfs_recursive_para {
+ bool is_last;
+ int opcode;
+ umode_t mode;
+ bool want_excl;
+ const char *name;
+};
+static inline struct hmdfs_dentry_info_merge *hmdfs_dm(struct dentry *dentry)
+{
+ return dentry->d_fsdata;
+}
+
+static inline umode_t hmdfs_cm(struct hmdfs_dentry_comrade *comrade)
+{
+ return d_inode(comrade->lo_d)->i_mode;
+}
+
+static inline bool comrade_is_local(struct hmdfs_dentry_comrade *comrade)
+{
+ return comrade->dev_id == HMDFS_DEVID_LOCAL;
+}
+
+struct dentry *hmdfs_lookup_merge(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ unsigned int flags);
+
+struct hmdfs_dentry_comrade *alloc_comrade(struct dentry *lo_d, int dev_id);
+
+void link_comrade(struct list_head *onstack_comrades_head,
+ struct hmdfs_dentry_comrade *comrade);
+
+static inline void destroy_comrade(struct hmdfs_dentry_comrade *comrade)
+{
+ dput(comrade->lo_d);
+ kfree(comrade);
+}
+
+void clear_comrades(struct dentry *dentry);
+
+static inline void link_comrade_unlocked(struct dentry *dentry,
+ struct hmdfs_dentry_comrade *comrade)
+{
+ mutex_lock(&hmdfs_dm(dentry)->comrade_list_lock);
+ link_comrade(&hmdfs_dm(dentry)->comrade_list, comrade);
+ mutex_unlock(&hmdfs_dm(dentry)->comrade_list_lock);
+}
+
+void clear_comrades_locked(struct list_head *comrade_list);
+
+#define for_each_comrade_locked(_dentry, _comrade) \
+ list_for_each_entry(_comrade, &(hmdfs_dm(_dentry)->comrade_list), list)
+
+#define hmdfs_trace_merge(_trace_func, _parent_inode, _child_dentry, err) \
+ { \
+ struct hmdfs_dentry_comrade *comrade; \
+ struct hmdfs_dentry_info_merge *dm = hmdfs_dm(_child_dentry); \
+ _trace_func(_parent_inode, _child_dentry, err); \
+ if (likely(dm)) { \
+ mutex_lock(&dm->comrade_list_lock); \
+ for_each_comrade_locked(_child_dentry, comrade) \
+ trace_hmdfs_show_comrade(_child_dentry, \
+ comrade->lo_d, \
+ comrade->dev_id); \
+ mutex_unlock(&dm->comrade_list_lock); \
+ } \
+ }
+
+#define hmdfs_trace_rename_merge(olddir, olddentry, newdir, newdentry, err) \
+ { \
+ struct hmdfs_dentry_comrade *comrade; \
+ trace_hmdfs_rename_merge(olddir, olddentry, newdir, newdentry, \
+ err); \
+ mutex_lock(&hmdfs_dm(olddentry)->comrade_list_lock); \
+ for_each_comrade_locked(olddentry, comrade) \
+ trace_hmdfs_show_comrade(olddentry, comrade->lo_d, \
+ comrade->dev_id); \
+ mutex_unlock(&hmdfs_dm(olddentry)->comrade_list_lock); \
+ mutex_lock(&hmdfs_dm(newdentry)->comrade_list_lock); \
+ for_each_comrade_locked(newdentry, comrade) \
+ trace_hmdfs_show_comrade(newdentry, comrade->lo_d, \
+ comrade->dev_id); \
+ mutex_unlock(&hmdfs_dm(newdentry)->comrade_list_lock); \
+ }
+
+/*****************************************************************************
+ * Helper functions abstarcting out comrade
+ *****************************************************************************/
+
+static inline bool hmdfs_i_merge(struct hmdfs_inode_info *hii)
+{
+ __u8 t = hii->inode_type;
+ return t == HMDFS_LAYER_FIRST_MERGE || t == HMDFS_LAYER_OTHER_MERGE;
+}
+
+struct dentry *hmdfs_get_lo_d(struct dentry *dentry, int dev_id);
+struct dentry *hmdfs_get_fst_lo_d(struct dentry *dentry);
+
+/*****************************************************************************
+ * Inode operations for the merge view
+ *****************************************************************************/
+
+extern const struct inode_operations hmdfs_file_iops_merge;
+extern const struct file_operations hmdfs_file_fops_merge;
+extern const struct inode_operations hmdfs_symlink_iops_merge;
+extern const struct inode_operations hmdfs_dir_iops_merge;
+extern const struct file_operations hmdfs_dir_fops_merge;
+extern const struct dentry_operations hmdfs_dops_merge;
+
+/*****************************************************************************
+ * dentry cache for the merge view
+ *****************************************************************************/
+extern struct kmem_cache *hmdfs_dentry_merge_cachep;
+
+#endif // HMDFS_MERGE_H
diff --git a/fs/hmdfs/hmdfs_server.c b/fs/hmdfs/hmdfs_server.c
new file mode 100644
index 0000000000000000000000000000000000000000..c50e9f9de8429aac4ba3a9944e2f7be486cd83ef
--- /dev/null
+++ b/fs/hmdfs/hmdfs_server.c
@@ -0,0 +1,2073 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/hmdfs_server.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "hmdfs_server.h"
+
+#include
+#include
+#include
+#include
+#include
+
+#include "authority/authentication.h"
+#include "comm/fault_inject.h"
+#include "hmdfs.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_trace.h"
+#include "server_writeback.h"
+#include "comm/node_cb.h"
+
+#define HMDFS_MAX_HIDDEN_DIR 1
+
+struct hmdfs_open_info {
+ struct file *file;
+ struct inode *inode;
+ bool stat_valid;
+ struct kstat stat;
+ uint64_t real_ino;
+ int file_id;
+};
+
+static int insert_file_into_conn(struct hmdfs_peer *conn, struct file *file)
+{
+ struct idr *idr = &(conn->file_id_idr);
+ int ret;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&(conn->file_id_lock));
+ ret = idr_alloc_cyclic(idr, file, 0, 0, GFP_NOWAIT);
+ spin_unlock(&(conn->file_id_lock));
+ idr_preload_end();
+ return ret;
+}
+
+/*
+ * get_file_from_conn - get file from conn by file_id. It should be noted that
+ * an additional reference will be acquired for returned file, the called should
+ * put it after the file is not used anymore.
+ */
+static struct file *get_file_from_conn(struct hmdfs_peer *conn, __u32 file_id)
+{
+ struct file *file;
+ struct idr *idr = &(conn->file_id_idr);
+
+ rcu_read_lock();
+ file = idr_find(idr, file_id);
+ if (file && !get_file_rcu(file))
+ file = NULL;
+ rcu_read_unlock();
+ return file;
+}
+
+void remove_file_from_conn(struct hmdfs_peer *conn, __u32 file_id)
+{
+ spinlock_t *lock = &(conn->file_id_lock);
+ struct idr *idr = &(conn->file_id_idr);
+
+ spin_lock(lock);
+ idr_remove(idr, file_id);
+ spin_unlock(lock);
+}
+
+struct file *hmdfs_open_photokit_path(struct hmdfs_sb_info *sbi,
+ const char *path)
+{
+ struct file *file;
+ int err;
+ const char *root_name = sbi->local_dst;
+ char *real_path;
+ int path_len;
+
+ path_len = strlen(root_name) + strlen(path) + 2;
+ if (path_len >= PATH_MAX) {
+ err = -EINVAL;
+ return ERR_PTR(err);
+ }
+ real_path = kzalloc(path_len, GFP_KERNEL);
+ if (!real_path) {
+ err = -ENOMEM;
+ return ERR_PTR(err);
+ }
+
+ sprintf(real_path, "%s/%s", root_name, path);
+ file = filp_open(real_path, O_RDWR | O_LARGEFILE, 0644);
+ if (IS_ERR(file)) {
+ hmdfs_info("filp_open failed: %ld", PTR_ERR(file));
+ } else {
+ hmdfs_info("get file with magic %lu",
+ file->f_inode->i_sb->s_magic);
+ }
+
+ kfree(real_path);
+ return file;
+}
+
+struct file *hmdfs_open_path(struct hmdfs_sb_info *sbi, const char *path)
+{
+ struct path root_path;
+ struct file *file;
+ int err;
+ const char *root_name = sbi->local_dst;
+
+ err = kern_path(root_name, 0, &root_path);
+ if (err) {
+ hmdfs_info("kern_path failed: %d", err);
+ return ERR_PTR(err);
+ }
+ file = file_open_root(&root_path, path,
+ O_RDWR | O_LARGEFILE, 0644);
+ path_put(&root_path);
+ if (IS_ERR(file)) {
+ hmdfs_err(
+ "GRAPERR sb->s_readonly_remount %d sb_flag %lu",
+ sbi->sb->s_readonly_remount, sbi->sb->s_flags);
+ hmdfs_info("file_open_root failed: %ld", PTR_ERR(file));
+ } else {
+ hmdfs_info("get file with magic %lu",
+ file->f_inode->i_sb->s_magic);
+ }
+ return file;
+}
+
+inline void hmdfs_close_path(struct file *file)
+{
+ fput(file);
+}
+
+/* After offline server close all files opened by client */
+void hmdfs_server_offline_notify(struct hmdfs_peer *conn, int evt,
+ unsigned int seq)
+{
+ int id;
+ int count = 0;
+ unsigned int next;
+ struct file *filp = NULL;
+ struct idr *idr = &conn->file_id_idr;
+
+ /* wait all async work complete */
+ flush_workqueue(conn->req_handle_wq);
+ flush_workqueue(conn->async_wq);
+
+ /* If there is some open requests in processing,
+ * Maybe, we need to close file when peer offline
+ */
+ idr_for_each_entry(idr, filp, id) {
+ hmdfs_debug("[%d]Server close: id=%d", count, id);
+ hmdfs_close_path(filp);
+ count++;
+ if (count % HMDFS_IDR_RESCHED_COUNT == 0)
+ cond_resched();
+ }
+
+ /* Reinitialize idr */
+ next = idr_get_cursor(idr);
+ idr_destroy(idr);
+
+ idr_init(idr);
+ idr_set_cursor(idr, next);
+
+ /* Make old file id to be stale */
+ conn->fid_cookie++;
+}
+
+static struct hmdfs_node_cb_desc server_cb[] = {
+ {
+ .evt = NODE_EVT_OFFLINE,
+ .sync = true,
+ .min_version = DFS_2_0,
+ .fn = hmdfs_server_offline_notify
+ },
+};
+
+void __init hmdfs_server_add_node_evt_cb(void)
+{
+ hmdfs_node_add_evt_cb(server_cb, ARRAY_SIZE(server_cb));
+}
+
+static int hmdfs_get_inode_by_name(struct hmdfs_peer *con, const char *filename,
+ uint64_t *ino)
+{
+ int ret = 0;
+ struct path root_path;
+ struct path dst_path;
+ struct inode *inode = NULL;
+
+ ret = kern_path(con->sbi->local_dst, 0, &root_path);
+ if (ret) {
+ hmdfs_err("kern_path failed err = %d", ret);
+ return ret;
+ }
+
+ ret = vfs_path_lookup(root_path.dentry, root_path.mnt, filename, 0,
+ &dst_path);
+ if (ret) {
+ path_put(&root_path);
+ return ret;
+ }
+
+ inode = d_inode(dst_path.dentry);
+ if (con->sbi->sb == inode->i_sb)
+ inode = hmdfs_i(inode)->lower_inode;
+ *ino = generate_u64_ino(inode->i_ino, inode->i_generation);
+
+ path_put(&dst_path);
+ path_put(&root_path);
+
+ return 0;
+}
+
+static struct file *hmdfs_open_file(struct hmdfs_peer *con,
+ const char *filename, uint8_t file_type,
+ int *file_id)
+{
+ struct file *file = NULL;
+ int id;
+
+ if (!filename) {
+ hmdfs_err("filename is NULL");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (hm_islnk(file_type))
+ file = hmdfs_open_photokit_path(con->sbi, filename);
+ else
+ file = hmdfs_open_path(con->sbi, filename);
+ if (IS_ERR(file))
+ return file;
+
+ id = insert_file_into_conn(con, file);
+ if (id < 0) {
+ hmdfs_err("file_id alloc failed! err=%d", id);
+ hmdfs_close_path(file);
+ return ERR_PTR(id);
+ }
+ *file_id = id;
+
+ return file;
+}
+
+static struct hmdfs_time_t msec_to_timespec(unsigned int msec)
+{
+ struct hmdfs_time_t timespec = {
+ .tv_sec = msec / MSEC_PER_SEC,
+ .tv_nsec = (msec % MSEC_PER_SEC) * NSEC_PER_MSEC,
+ };
+
+ return timespec;
+}
+
+static struct hmdfs_time_t hmdfs_current_kernel_time(void)
+{
+ struct hmdfs_time_t time;
+
+#if KERNEL_VERSION(4, 18, 0) < LINUX_VERSION_CODE
+ ktime_get_coarse_real_ts64(&time);
+#else
+ time = current_kernel_time();
+#endif
+ return time;
+}
+
+/*
+ * Generate fid version like following format:
+ *
+ * | boot cookie | con cookie |
+ * |---------------------|-------------|
+ * 49 15 (bits)
+ */
+static uint64_t hmdfs_server_pack_fid_ver(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd)
+{
+ uint64_t boot_cookie = con->sbi->boot_cookie;
+ uint16_t con_cookie = con->fid_cookie;
+
+ if (hmdfs_should_fake_fid_ver(&con->sbi->fault_inject, con,
+ cmd, T_BOOT_COOKIE))
+ boot_cookie = hmdfs_gen_boot_cookie();
+
+ if (hmdfs_should_fake_fid_ver(&con->sbi->fault_inject, con,
+ cmd, T_CON_COOKIE))
+ con_cookie++;
+
+ return (boot_cookie |
+ (con_cookie & ((1 << HMDFS_FID_VER_BOOT_COOKIE_SHIFT) - 1)));
+}
+
+static struct file *get_file_by_fid_and_ver(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd,
+ __u32 file_id, __u64 file_ver)
+{
+ struct file *file = NULL;
+ __u64 cur_file_ver = hmdfs_server_pack_fid_ver(con, cmd);
+
+ if (file_ver != cur_file_ver) {
+ hmdfs_warning("Stale file version %llu for fid %u (ver %llu)",
+ file_ver, file_id, cur_file_ver);
+ return ERR_PTR(-EBADF);
+ }
+
+ file = get_file_from_conn(con, file_id);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ return file;
+}
+
+static void hmdfs_update_open_response(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd,
+ struct hmdfs_open_info *info,
+ struct open_response *resp)
+{
+ struct hmdfs_time_t current_time = hmdfs_current_kernel_time();
+ struct hmdfs_time_t ctime = info->stat_valid ? info->stat.ctime :
+ info->inode->i_ctime;
+ struct hmdfs_time_t precision =
+ msec_to_timespec(con->sbi->dcache_precision);
+ loff_t size = info->stat_valid ? info->stat.size :
+ i_size_read(info->inode);
+
+ resp->ino = cpu_to_le64(info->real_ino);
+ resp->file_ver = cpu_to_le64(hmdfs_server_pack_fid_ver(con, cmd));
+ resp->file_id = cpu_to_le32(info->file_id);
+ resp->file_size = cpu_to_le64(size);
+ resp->ctime = cpu_to_le64(ctime.tv_sec);
+ resp->ctime_nsec = cpu_to_le32(ctime.tv_nsec);
+
+ /*
+ * In server, ctime might stay the same after coverwrite. We introduce a
+ * new value stable_ctime to handle the problem.
+ * - if open rpc time < ctime, stable_ctime = 0;
+ * - if ctime <= open rpc time < ctime + dcache_precision, stable_ctime
+ * = ctime
+ * - else, stable_ctime = ctime + dcache_precision;
+ */
+ precision = hmdfs_time_add(ctime, precision);
+ if (hmdfs_time_compare(¤t_time, &ctime) < 0) {
+ resp->stable_ctime = cpu_to_le64(0);
+ resp->stable_ctime_nsec = cpu_to_le32(0);
+ } else if (hmdfs_time_compare(¤t_time, &ctime) >= 0 &&
+ hmdfs_time_compare(¤t_time, &precision) < 0) {
+ resp->stable_ctime = resp->ctime;
+ resp->stable_ctime_nsec = resp->ctime_nsec;
+ } else {
+ resp->stable_ctime = cpu_to_le64(precision.tv_sec);
+ resp->stable_ctime_nsec = cpu_to_le32(precision.tv_nsec);
+ }
+}
+
+static int hmdfs_get_open_info(struct hmdfs_peer *con, uint8_t file_type,
+ const char *filename,
+ struct hmdfs_open_info *info)
+{
+ int ret = 0;
+
+ info->inode = file_inode(info->file);
+ info->stat_valid = false;
+ if (con->sbi->sb == info->inode->i_sb) {
+ /* if open a regular file */
+ info->inode = hmdfs_i(info->inode)->lower_inode;
+ } else if (con->sbi->lower_sb != info->inode->i_sb) {
+ /* It's possible that inode is not from lower, for example:
+ * 1. touch /f2fs/file
+ * 2. ln -s /sdcard_fs/file /f2fs/link
+ * 3. cat /hmdfs/link -> generate dentry cache in sdcard_fs
+ * 4. echo hi >> /hmdfs/file -> append write not through
+ * sdcard_fs
+ * 5. cat /hmdfs/link -> got inode in sdcard, which size is
+ * still 0
+ *
+ * If src file isn't in lower, use getattr to get
+ * information.
+ */
+ ret = vfs_getattr(&info->file->f_path, &info->stat, STATX_BASIC_STATS | STATX_BTIME,
+ 0);
+ if (ret) {
+ hmdfs_err("call vfs_getattr failed, err %d", ret);
+ return ret;
+ }
+ info->stat_valid = true;
+ }
+
+ /* if open a link file, get ino from link inode */
+ if (hm_islnk(file_type)) {
+ ret = hmdfs_get_inode_by_name(con, filename, &info->real_ino);
+ if (ret)
+ return ret;
+ } else {
+ info->real_ino = generate_u64_ino(info->inode->i_ino,
+ info->inode->i_generation);
+ }
+
+ return 0;
+}
+
+void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ struct open_request *recv = data;
+ int sizeread = sizeof(struct open_response);
+ struct open_response *resp = NULL;
+ struct hmdfs_open_info *info = NULL;
+ int ret = 0;
+
+ trace_hmdfs_server_open_enter(con, recv);
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret))
+ goto out_err;
+
+ resp = kzalloc(sizeread, GFP_KERNEL);
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!resp || !info) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ info->file = hmdfs_open_file(con, recv->buf, recv->file_type,
+ &info->file_id);
+ if (IS_ERR(info->file)) {
+ ret = PTR_ERR(info->file);
+ goto err_free;
+ }
+
+ ret = hmdfs_get_open_info(con, recv->file_type, recv->buf, info);
+ if (ret)
+ goto err_close;
+
+ hmdfs_update_open_response(con, cmd, info, resp);
+
+ trace_hmdfs_server_open_exit(con, resp, info->file, 0);
+ ret = hmdfs_sendmessage_response(con, cmd, sizeread, resp, 0);
+ if (ret) {
+ hmdfs_err("sending msg response failed, file_id %d, err %d",
+ info->file_id, ret);
+ remove_file_from_conn(con, info->file_id);
+ hmdfs_close_path(info->file);
+ }
+ kfree(resp);
+ kfree(info);
+ return;
+
+err_close:
+ remove_file_from_conn(con, info->file_id);
+ hmdfs_close_path(info->file);
+err_free:
+ kfree(resp);
+ kfree(info);
+out_err:
+ trace_hmdfs_server_open_exit(con, NULL, NULL, ret);
+ hmdfs_send_err_response(con, cmd, ret);
+}
+
+static int hmdfs_check_and_create(struct path *path_parent,
+ struct dentry *dentry, uint64_t device_id,
+ umode_t mode, bool is_excl)
+{
+ int err = 0;
+
+ /* if inode doesn't exist, create it */
+ if (d_is_negative(dentry)) {
+ hmdfs_mark_drop_flag(device_id, path_parent->dentry);
+ err = vfs_create(d_inode(path_parent->dentry), dentry, mode,
+ is_excl);
+ if (err)
+ hmdfs_err("create failed, err %d", err);
+ } else {
+ if (is_excl)
+ err = -EEXIST;
+ /* if inode aready exist, see if it's symlink */
+ else if (S_ISREG(d_inode(dentry)->i_mode) &&
+ hm_islnk(hmdfs_d(dentry)->file_type))
+ err = -EINVAL;
+ else if (S_ISDIR(d_inode(dentry)->i_mode))
+ err = -EISDIR;
+ }
+
+ return err;
+}
+static int hmdfs_lookup_create(struct hmdfs_peer *con,
+ struct atomic_open_request *recv,
+ struct path *child_path, bool *truncate)
+{
+ int err = 0;
+ struct path path_root;
+ struct path path_parent;
+ uint32_t open_flags = le32_to_cpu(recv->open_flags);
+ char *path = recv->buf;
+ char *filename = recv->buf + le32_to_cpu(recv->path_len) + 1;
+ struct dentry *dentry = NULL;
+
+ err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &path_root);
+ if (err) {
+ hmdfs_err("no path for %s, err %d", con->sbi->local_dst, err);
+ return err;
+ }
+
+ err = vfs_path_lookup(path_root.dentry, path_root.mnt, path,
+ LOOKUP_DIRECTORY, &path_parent);
+ if (err) {
+ hmdfs_info("no dir in %s, err %d", con->sbi->local_dst, err);
+ goto put_path_root;
+ }
+
+ inode_lock(d_inode(path_parent.dentry));
+ dentry = lookup_one_len(filename, path_parent.dentry, strlen(filename));
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ inode_unlock(d_inode(path_parent.dentry));
+ goto put_path_parent;
+ }
+ /* only truncate if inode already exists */
+ *truncate = ((open_flags & HMDFS_O_TRUNC) && d_is_positive(dentry));
+ err = hmdfs_check_and_create(&path_parent, dentry, con->device_id,
+ le16_to_cpu(recv->mode),
+ open_flags & HMDFS_O_EXCL);
+ inode_unlock(d_inode(path_parent.dentry));
+ if (err) {
+ dput(dentry);
+ } else {
+ child_path->dentry = dentry;
+ child_path->mnt = mntget(path_parent.mnt);
+ }
+
+put_path_parent:
+ path_put(&path_parent);
+put_path_root:
+ path_put(&path_root);
+ return err;
+}
+
+static int hmdfs_dentry_open(struct hmdfs_peer *con,
+ const struct path *path,
+ struct hmdfs_open_info *info)
+{
+ int err = 0;
+
+ info->file = dentry_open(path, O_RDWR | O_LARGEFILE, current_cred());
+ if (IS_ERR(info->file)) {
+ err = PTR_ERR(info->file);
+ hmdfs_err("open file failed, err %d", err);
+ return err;
+ }
+
+ info->file_id = insert_file_into_conn(con, info->file);
+ if (info->file_id < 0) {
+ err = info->file_id;
+ hmdfs_err("file_id alloc failed! err %d", err);
+ hmdfs_close_path(info->file);
+ return err;
+ }
+
+ return 0;
+}
+
+static int hmdfs_server_do_atomic_open(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd,
+ struct atomic_open_request *recv,
+ struct hmdfs_open_info *info,
+ struct atomic_open_response *resp)
+{
+ struct path child_path;
+ bool truncate = false;
+ int err = 0;
+
+ err = hmdfs_lookup_create(con, recv, &child_path, &truncate);
+ if (err)
+ return err;
+
+ err = hmdfs_dentry_open(con, &child_path, info);
+ if (err)
+ goto put_child;
+
+ err = hmdfs_get_open_info(con, HM_REG, NULL, info);
+ if (err)
+ goto fail_close;
+
+ if (truncate) {
+ err = vfs_truncate(&child_path, 0);
+ if (err) {
+ hmdfs_err("truncate failed, err %d", err);
+ goto fail_close;
+ }
+ }
+ hmdfs_update_open_response(con, cmd, info, &resp->open_resp);
+ resp->i_mode = cpu_to_le16(file_inode(info->file)->i_mode);
+
+fail_close:
+ if (err) {
+ remove_file_from_conn(con, info->file_id);
+ hmdfs_close_path(info->file);
+ }
+put_child:
+ path_put(&child_path);
+ return err;
+}
+
+void hmdfs_server_atomic_open(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data)
+{
+ int err;
+ struct atomic_open_request *recv = data;
+ struct atomic_open_response *resp = NULL;
+ struct hmdfs_open_info *info = NULL;
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto out;
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+ if (!resp || !info) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = hmdfs_server_do_atomic_open(con, cmd, recv, info, resp);
+
+out:
+ if (err) {
+ hmdfs_send_err_response(con, cmd, err);
+ } else {
+ err = hmdfs_sendmessage_response(con, cmd, sizeof(*resp), resp,
+ 0);
+ if (err) {
+ hmdfs_err("sending msg response failed, file_id %d, err %d",
+ info->file_id, err);
+ remove_file_from_conn(con, info->file_id);
+ hmdfs_close_path(info->file);
+ }
+ }
+ kfree(info);
+ kfree(resp);
+}
+
+void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ struct release_request *release_recv = data;
+ struct file *file = NULL;
+ __u32 file_id;
+ __u64 file_ver;
+ int ret = 0;
+
+ file_id = le32_to_cpu(release_recv->file_id);
+ file_ver = le64_to_cpu(release_recv->file_ver);
+ file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver);
+ if (IS_ERR(file)) {
+ hmdfs_err("cannot find %u", file_id);
+ ret = PTR_ERR(file);
+ goto out;
+ }
+ /* put the reference acquired by get_file_by_fid_and_ver() */
+ hmdfs_close_path(file);
+ hmdfs_info("close %u", file_id);
+ remove_file_from_conn(con, file_id);
+
+ hmdfs_close_path(file);
+
+out:
+ trace_hmdfs_server_release(con, file_id, file_ver, ret);
+ set_conn_sock_quickack(con);
+}
+
+void hmdfs_server_fsync(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ struct fsync_request *fsync_recv = data;
+ __s32 datasync = le32_to_cpu(fsync_recv->datasync);
+ __s64 start = le64_to_cpu(fsync_recv->start);
+ __s64 end = le64_to_cpu(fsync_recv->end);
+ struct file *file = NULL;
+ __u32 file_id;
+ __u64 file_ver;
+ int ret = 0;
+
+ file_id = le32_to_cpu(fsync_recv->file_id);
+ file_ver = le64_to_cpu(fsync_recv->file_ver);
+ file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver);
+ if (IS_ERR(file)) {
+ hmdfs_err("cannot find %u", file_id);
+ ret = PTR_ERR(file);
+ goto out;
+ }
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret))
+ goto out_put_file;
+
+ ret = vfs_fsync_range(file, start, end, datasync);
+ if (ret)
+ hmdfs_err("fsync fail, ret %d", ret);
+
+out_put_file:
+ hmdfs_close_path(file);
+out:
+ hmdfs_send_err_response(con, cmd, ret);
+}
+
+void hmdfs_server_readpage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ struct readpage_request *readpage_recv = data;
+ __u64 file_ver;
+ __u32 file_id;
+ struct file *file = NULL;
+ loff_t pos;
+ struct readpage_response *readpage = NULL;
+ int ret = 0;
+ size_t read_len;
+
+ file_id = le32_to_cpu(readpage_recv->file_id);
+ file_ver = le64_to_cpu(readpage_recv->file_ver);
+ file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver);
+ if (IS_ERR(file)) {
+ hmdfs_info(
+ "file with id %u does not exist, pgindex %llu, devid %llu",
+ file_id, le64_to_cpu(readpage_recv->index),
+ con->device_id);
+ ret = PTR_ERR(file);
+ goto fail;
+ }
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret))
+ goto fail_put_file;
+
+ read_len = (size_t)le32_to_cpu(readpage_recv->size);
+ if (read_len == 0)
+ goto fail_put_file;
+
+ readpage = kmalloc(read_len, GFP_KERNEL);
+ if (!readpage) {
+ ret = -ENOMEM;
+ goto fail_put_file;
+ }
+
+ pos = (loff_t)le64_to_cpu(readpage_recv->index) << HMDFS_PAGE_OFFSET;
+ ret = kernel_read(file, readpage->buf, read_len, &pos);
+ if (ret < 0) {
+ hmdfs_send_err_response(con, cmd, -EIO);
+ } else {
+ if (ret != read_len)
+ memset(readpage->buf + ret, 0, read_len - ret);
+ hmdfs_sendmessage_response(con, cmd, read_len, readpage, 0);
+ }
+
+ hmdfs_close_path(file);
+ kfree(readpage);
+ return;
+
+fail_put_file:
+ hmdfs_close_path(file);
+fail:
+ hmdfs_send_err_response(con, cmd, ret);
+}
+
+static struct readpages_response *alloc_readpages_resp(unsigned int len)
+{
+ struct readpages_response *resp = NULL;
+
+ if (len > HMDFS_PAGE_SIZE)
+ resp = vmalloc(len);
+ else
+ resp = kmalloc(len, GFP_KERNEL);
+
+ return resp;
+}
+
+static void free_readpages_resp(struct readpages_response *resp,
+ unsigned int len)
+{
+ if (len > HMDFS_PAGE_SIZE)
+ vfree(resp);
+ else
+ kfree(resp);
+}
+
+void hmdfs_server_readpages(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ struct readpages_request *req = data;
+ __u64 file_ver;
+ __u32 file_id;
+ struct file *file = NULL;
+ loff_t pos;
+ struct readpages_response *resp = NULL;
+ ssize_t ret = 0;
+ size_t read_len;
+
+ file_id = le32_to_cpu(req->file_id);
+ file_ver = le64_to_cpu(req->file_ver);
+ file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto fail;
+ }
+
+ read_len = (size_t)le32_to_cpu(req->size);
+ if (read_len == 0)
+ goto fail_put_file;
+
+ resp = alloc_readpages_resp(read_len);
+ if (!resp) {
+ ret = -ENOMEM;
+ goto fail_put_file;
+ }
+
+ pos = (loff_t)le64_to_cpu(req->index) << HMDFS_PAGE_OFFSET;
+ ret = kernel_read(file, resp->buf, read_len, &pos);
+ if (ret < 0) {
+ ret = -EIO;
+ goto fail_free_resp;
+ }
+
+ hmdfs_sendmessage_response(con, cmd, ret, resp, 0);
+ hmdfs_close_path(file);
+ free_readpages_resp(resp, read_len);
+ return;
+
+fail_free_resp:
+ free_readpages_resp(resp, read_len);
+fail_put_file:
+ hmdfs_close_path(file);
+fail:
+ hmdfs_send_err_response(con, cmd, ret);
+}
+
+static int hmdfs_do_readpages_open(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd,
+ struct readpages_open_request *recv,
+ struct hmdfs_open_info *info,
+ struct readpages_open_response *resp)
+{
+ int ret = 0;
+ loff_t pos = 0;
+
+ info->file = hmdfs_open_file(con, recv->buf, recv->file_type,
+ &info->file_id);
+ if (IS_ERR(info->file))
+ return PTR_ERR(info->file);
+
+ ret = hmdfs_get_open_info(con, recv->file_type, recv->buf, info);
+ if (ret)
+ goto fail_close;
+
+ pos = (loff_t)le64_to_cpu(recv->index) << HMDFS_PAGE_OFFSET;
+ ret = kernel_read(info->file, resp->buf, le32_to_cpu(recv->size), &pos);
+ if (ret < 0)
+ goto fail_close;
+
+ hmdfs_update_open_response(con, cmd, info, &resp->open_resp);
+ memset(resp->reserved, 0, sizeof(resp->reserved));
+ ret = hmdfs_sendmessage_response(con, cmd, sizeof(*resp) + ret, resp,
+ 0);
+ if (ret) {
+ hmdfs_err("sending msg response failed, file_id %d, err %d",
+ info->file_id, ret);
+ ret = 0;
+ goto fail_close;
+ }
+ return 0;
+
+fail_close:
+ remove_file_from_conn(con, info->file_id);
+ hmdfs_close_path(info->file);
+ return ret;
+}
+
+void hmdfs_server_readpages_open(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data)
+{
+ struct readpages_open_request *recv = data;
+ struct readpages_open_response *resp = NULL;
+ int ret = -EINVAL;
+ size_t read_len = 0;
+ size_t resp_len = 0;
+ struct hmdfs_open_info *info = NULL;
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret))
+ goto fail;
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ read_len = (size_t)le32_to_cpu(recv->size);
+ if (read_len == 0) {
+ ret = -EINVAL;
+ goto fail_free_info;
+ }
+ resp_len = read_len + sizeof(*resp);
+ resp = vmalloc(resp_len);
+ if (!resp) {
+ ret = -ENOMEM;
+ goto fail_free_info;
+ }
+
+ ret = hmdfs_do_readpages_open(con, cmd, recv, info, resp);
+
+ vfree(resp);
+fail_free_info:
+ kfree(info);
+fail:
+ if (ret)
+ hmdfs_send_err_response(con, cmd, ret);
+}
+
+static bool need_rebuild_dcache(struct hmdfs_dcache_header *h,
+ struct hmdfs_time_t time,
+ unsigned int precision)
+{
+ struct hmdfs_time_t crtime = { .tv_sec = le64_to_cpu(h->dcache_crtime),
+ .tv_nsec = le64_to_cpu(
+ h->dcache_crtime_nsec) };
+ struct hmdfs_time_t ctime = { .tv_sec = le64_to_cpu(h->dentry_ctime),
+ .tv_nsec = le64_to_cpu(
+ h->dentry_ctime_nsec) };
+ struct hmdfs_time_t pre_time = { .tv_sec = precision / MSEC_PER_SEC,
+ .tv_nsec = precision % MSEC_PER_SEC *
+ NSEC_PER_MSEC };
+
+ if (hmdfs_time_compare(&time, &ctime) != 0)
+ return true;
+
+ pre_time = hmdfs_time_add(time, pre_time);
+ if (hmdfs_time_compare(&crtime, &pre_time) < 0)
+ return true;
+
+ return false;
+}
+
+static bool hmdfs_server_cache_validate(struct file *filp, struct inode *inode,
+ unsigned long precision)
+{
+ struct hmdfs_dcache_header header;
+ int overallpage;
+ ssize_t bytes;
+ loff_t pos = 0;
+
+ overallpage = get_dentry_group_cnt(file_inode(filp));
+ if (overallpage == 0) {
+ hmdfs_err("cache file size is 0");
+ return false;
+ }
+
+ bytes = kernel_read(filp, &header, sizeof(header), &pos);
+ if (bytes != sizeof(header)) {
+ hmdfs_err("read file failed, err:%zd", bytes);
+ return false;
+ }
+
+ return !need_rebuild_dcache(&header, inode->i_ctime, precision);
+}
+
+struct file *hmdfs_server_cache_revalidate(struct hmdfs_sb_info *sbi,
+ const char *recvpath,
+ struct path *path)
+{
+ struct cache_file_node *cfn = NULL;
+ struct file *file;
+
+ cfn = find_cfn(sbi, HMDFS_SERVER_CID, recvpath, true);
+ if (!cfn)
+ return NULL;
+
+ if (!hmdfs_server_cache_validate(cfn->filp, path->dentry->d_inode,
+ sbi->dcache_precision)) {
+ remove_cfn(cfn);
+ release_cfn(cfn);
+ return NULL;
+ }
+ file = cfn->filp;
+ get_file(cfn->filp);
+ release_cfn(cfn);
+
+ return file;
+}
+
+bool hmdfs_client_cache_validate(struct hmdfs_sb_info *sbi,
+ struct readdir_request *readdir_recv,
+ struct path *path)
+{
+ struct inode *inode = path->dentry->d_inode;
+ struct hmdfs_dcache_header header;
+
+ /* always rebuild dentryfile for small dir */
+ if (le64_to_cpu(readdir_recv->num) < sbi->dcache_threshold)
+ return false;
+
+ header.dcache_crtime = readdir_recv->dcache_crtime;
+ header.dcache_crtime_nsec = readdir_recv->dcache_crtime_nsec;
+ header.dentry_ctime = readdir_recv->dentry_ctime;
+ header.dentry_ctime_nsec = readdir_recv->dentry_ctime_nsec;
+
+ return !need_rebuild_dcache(&header, inode->i_ctime,
+ sbi->dcache_precision);
+}
+
+static char *server_lower_dentry_path_raw(struct hmdfs_peer *peer,
+ struct dentry *lo_d)
+{
+ struct hmdfs_dentry_info *di = hmdfs_d(peer->sbi->sb->s_root);
+ struct dentry *lo_d_root = di->lower_path.dentry;
+ struct dentry *lo_d_tmp = NULL;
+ char *lo_p_buf = NULL;
+ char *buf_head = NULL;
+ char *buf_tail = NULL;
+ size_t path_len = 0;
+
+ lo_p_buf = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (unlikely(!lo_p_buf))
+ return ERR_PTR(-ENOMEM);
+
+ /* To generate a reversed path str */
+ for (lo_d_tmp = lo_d; lo_d_tmp != lo_d_root && !IS_ROOT(lo_d_tmp);
+ lo_d_tmp = lo_d_tmp->d_parent) {
+ u32 dlen = lo_d_tmp->d_name.len;
+ int reverse_index = dlen - 1;
+
+ /* Considering the appended slash and '\0' */
+ if (unlikely(path_len + dlen + 1 > PATH_MAX - 1)) {
+ kfree(lo_p_buf);
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+ for (; reverse_index >= 0; --reverse_index)
+ lo_p_buf[path_len++] =
+ lo_d_tmp->d_name.name[reverse_index];
+ lo_p_buf[path_len++] = '/';
+ }
+
+ /* Reverse the reversed path str to get the real path str */
+ for (buf_head = lo_p_buf, buf_tail = lo_p_buf + path_len - 1;
+ buf_head < buf_tail; ++buf_head, --buf_tail)
+ swap(*buf_head, *buf_tail);
+
+ if (path_len == 0)
+ lo_p_buf[0] = '/';
+ return lo_p_buf;
+}
+
+static int server_lookup(struct hmdfs_peer *peer, const char *req_path,
+ struct path *path)
+{
+ struct path root_path;
+ int err = 0;
+
+ err = kern_path(peer->sbi->local_dst, 0, &root_path);
+ if (err)
+ goto out_noroot;
+
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt, req_path,
+ LOOKUP_DIRECTORY, path);
+ path_put(&root_path);
+out_noroot:
+ return err;
+}
+
+/**
+ * server_lookup_lower - lookup lower file-system
+ * @peer: target device node
+ * @req_path: abs path (mount point as the root) from the request
+ * @lo_o: the lower path to return
+ *
+ * return the lower path's name, with characters' cases matched
+ */
+static char *server_lookup_lower(struct hmdfs_peer *peer, const char *req_path,
+ struct path *lo_p)
+{
+ char *lo_p_name = ERR_PTR(-ENOENT);
+ struct path up_p;
+ int err = 0;
+
+ err = server_lookup(peer, req_path, &up_p);
+ if (err)
+ goto out;
+
+ hmdfs_get_lower_path(up_p.dentry, lo_p);
+ path_put(&up_p);
+
+ lo_p_name = server_lower_dentry_path_raw(peer, lo_p->dentry);
+ if (IS_ERR(lo_p_name)) {
+ err = PTR_ERR(lo_p_name);
+ path_put(lo_p);
+ }
+out:
+ return err ? ERR_PTR(err) : lo_p_name;
+}
+
+void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ struct readdir_request *readdir_recv = data;
+ struct path lo_p;
+ struct file *filp = NULL;
+ int err = 0;
+ unsigned long long num = 0;
+ char *lo_p_name = NULL;
+
+ trace_hmdfs_server_readdir(readdir_recv);
+
+ lo_p_name = server_lookup_lower(con, readdir_recv->path, &lo_p);
+ if (IS_ERR(lo_p_name)) {
+ err = PTR_ERR(lo_p_name);
+ hmdfs_info("Failed to get lower path: %d", err);
+ goto send_err;
+ }
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto err_lookup_path;
+
+ if (le32_to_cpu(readdir_recv->verify_cache)) {
+ if (hmdfs_client_cache_validate(con->sbi, readdir_recv, &lo_p))
+ goto out_response;
+ }
+
+ filp = hmdfs_server_cache_revalidate(con->sbi, lo_p_name, &lo_p);
+ if (IS_ERR_OR_NULL(filp)) {
+ filp = hmdfs_server_rebuild_dents(con->sbi, &lo_p, &num,
+ lo_p_name);
+ if (IS_ERR_OR_NULL(filp)) {
+ err = PTR_ERR(filp);
+ goto err_lookup_path;
+ }
+ }
+
+out_response:
+ err = hmdfs_readfile_response(con, cmd, filp);
+ if (!err)
+ hmdfs_add_remote_cache_list(con, lo_p_name);
+ if (num >= con->sbi->dcache_threshold)
+ cache_file_persistent(con, filp, lo_p_name, true);
+ if (filp)
+ fput(filp);
+err_lookup_path:
+ path_put(&lo_p);
+ kfree(lo_p_name);
+send_err:
+ if (err)
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ int err = 0;
+ struct mkdir_request *mkdir_recv = data;
+ struct inode *child_inode = NULL;
+ struct dentry *dent = NULL;
+ char *mkdir_dir = NULL;
+ char *mkdir_name = NULL;
+ struct hmdfs_inodeinfo_response *mkdir_resp = NULL;
+ int respsize = sizeof(struct hmdfs_inodeinfo_response);
+ int path_len = le32_to_cpu(mkdir_recv->path_len);
+
+ mkdir_resp = kzalloc(respsize, GFP_KERNEL);
+ if (!mkdir_resp) {
+ err = -ENOMEM;
+ goto mkdir_out;
+ }
+
+ mkdir_dir = mkdir_recv->path;
+ mkdir_name = mkdir_recv->path + path_len + 1;
+
+ dent = hmdfs_root_mkdir(con->device_id, con->sbi->local_dst,
+ mkdir_dir, mkdir_name,
+ le16_to_cpu(mkdir_recv->mode));
+ if (IS_ERR(dent)) {
+ err = PTR_ERR(dent);
+ hmdfs_err("hmdfs_root_mkdir failed err = %d", err);
+ goto mkdir_out;
+ }
+ child_inode = d_inode(dent);
+ mkdir_resp->i_mode = cpu_to_le16(child_inode->i_mode);
+ mkdir_resp->i_size = cpu_to_le64(child_inode->i_size);
+ mkdir_resp->i_mtime = cpu_to_le64(child_inode->i_mtime.tv_sec);
+ mkdir_resp->i_mtime_nsec = cpu_to_le32(child_inode->i_mtime.tv_nsec);
+ mkdir_resp->i_ino = cpu_to_le64(child_inode->i_ino);
+ dput(dent);
+mkdir_out:
+ hmdfs_sendmessage_response(con, cmd, respsize, mkdir_resp, err);
+ kfree(mkdir_resp);
+}
+
+void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ int err = 0;
+ struct create_request *create_recv = data;
+ struct inode *child_inode = NULL;
+ struct dentry *dent = NULL;
+ char *create_dir = NULL;
+ char *create_name = NULL;
+ struct hmdfs_inodeinfo_response *create_resp = NULL;
+ int respsize = sizeof(struct hmdfs_inodeinfo_response);
+ int path_len = le32_to_cpu(create_recv->path_len);
+
+ create_resp = kzalloc(respsize, GFP_KERNEL);
+ if (!create_resp) {
+ err = -ENOMEM;
+ goto create_out;
+ }
+
+ create_dir = create_recv->path;
+ create_name = create_recv->path + path_len + 1;
+
+ dent = hmdfs_root_create(con->device_id, con->sbi->local_dst,
+ create_dir, create_name,
+ le16_to_cpu(create_recv->mode),
+ create_recv->want_excl);
+ if (IS_ERR(dent)) {
+ err = PTR_ERR(dent);
+ hmdfs_err("hmdfs_root_create failed err = %d", err);
+ goto create_out;
+ }
+ child_inode = d_inode(dent);
+ create_resp->i_mode = cpu_to_le16(child_inode->i_mode);
+ create_resp->i_size = cpu_to_le64(child_inode->i_size);
+ create_resp->i_mtime = cpu_to_le64(child_inode->i_mtime.tv_sec);
+ create_resp->i_mtime_nsec = cpu_to_le32(child_inode->i_mtime.tv_nsec);
+ /*
+ * keep same as hmdfs_server_open,
+ * to prevent hmdfs_open_final_remote from judging ino errors.
+ */
+ create_resp->i_ino = cpu_to_le64(
+ generate_u64_ino(hmdfs_i(child_inode)->lower_inode->i_ino,
+ child_inode->i_generation));
+ dput(dent);
+create_out:
+ hmdfs_sendmessage_response(con, cmd, respsize, create_resp, err);
+ kfree(create_resp);
+}
+
+void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ int err = 0;
+ struct path root_path;
+ char *path = NULL;
+ char *name = NULL;
+ struct rmdir_request *rmdir_recv = data;
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto out;
+
+ path = rmdir_recv->path;
+ name = rmdir_recv->path + le32_to_cpu(rmdir_recv->path_len) + 1;
+ err = kern_path(con->sbi->local_dst, 0, &root_path);
+ if (!err) {
+ err = hmdfs_root_rmdir(con->device_id, &root_path, path, name);
+ path_put(&root_path);
+ }
+out:
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ int err = 0;
+ struct path root_path;
+ char *path = NULL;
+ char *name = NULL;
+ struct unlink_request *unlink_recv = data;
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto out;
+
+ path = unlink_recv->path;
+ name = unlink_recv->path + le32_to_cpu(unlink_recv->path_len) + 1;
+ err = kern_path(con->sbi->local_dst, 0, &root_path);
+ if (!err) {
+ err = hmdfs_root_unlink(con->device_id, &root_path, path, name);
+ path_put(&root_path);
+ }
+out:
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ int err = 0;
+ int old_path_len;
+ int new_path_len;
+ int old_name_len;
+ int new_name_len;
+ unsigned int flags;
+ char *path_old = NULL;
+ char *name_old = NULL;
+ char *path_new = NULL;
+ char *name_new = NULL;
+ struct rename_request *recv = data;
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto out;
+
+ old_path_len = le32_to_cpu(recv->old_path_len);
+ new_path_len = le32_to_cpu(recv->new_path_len);
+ old_name_len = le32_to_cpu(recv->old_name_len);
+ new_name_len = le32_to_cpu(recv->new_name_len);
+ flags = le32_to_cpu(recv->flags);
+
+ path_old = recv->path;
+ path_new = recv->path + old_path_len + 1;
+ name_old = recv->path + old_path_len + 1 + new_path_len + 1;
+ name_new = recv->path + old_path_len + 1 + new_path_len + 1 +
+ old_name_len + 1;
+
+ err = hmdfs_root_rename(con->sbi, con->device_id, path_old, name_old,
+ path_new, name_new, flags);
+out:
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+static int hmdfs_lookup_symlink(struct path *link_path, const char *path_fmt,
+ ...)
+{
+ int ret;
+ va_list args;
+ char *path = kmalloc(PATH_MAX, GFP_KERNEL);
+
+ if (!path)
+ return -ENOMEM;
+
+ va_start(args, path_fmt);
+ ret = vsnprintf(path, PATH_MAX, path_fmt, args);
+ va_end(args);
+
+ if (ret >= PATH_MAX) {
+ ret = -ENAMETOOLONG;
+ goto out;
+ }
+
+ /*
+ * Todo: when rebuild dentryfile, there maybe deadlock
+ * because iterate_dir already hold the parent
+ * lock, but now, we didn't know the symlink
+ * src's parent.
+ */
+ ret = kern_path(path, LOOKUP_FOLLOW, link_path);
+ if (ret) {
+ hmdfs_err("kern_path failed err = %d", ret);
+ goto out;
+ }
+
+ if (!S_ISREG(d_inode(link_path->dentry)->i_mode)) {
+ hmdfs_err("path is dir symlink");
+ path_put(link_path);
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+out:
+ kfree(path);
+ return ret;
+}
+
+static int hmdfs_filldir_real(struct dir_context *ctx, const char *name,
+ int name_len, loff_t offset, u64 ino,
+ unsigned int d_type)
+{
+ int res = 0;
+ char namestr[NAME_MAX + 1];
+ struct getdents_callback_real *gc = NULL;
+ struct dentry *child = NULL;
+
+ if (name_len > NAME_MAX) {
+ hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX);
+ goto out;
+ }
+
+ gc = container_of(ctx, struct getdents_callback_real, ctx);
+
+ memcpy(namestr, name, name_len);
+ namestr[name_len] = '\0';
+
+ if (hmdfs_file_type(namestr) != HMDFS_TYPE_COMMON)
+ goto out;
+
+ /* parent lock already hold by iterate_dir */
+ child = lookup_one_len(name, gc->parent_path->dentry, name_len);
+ if (IS_ERR(child)) {
+ res = PTR_ERR(child);
+ hmdfs_err("lookup failed because %d", res);
+ goto out;
+ }
+
+ if (d_really_is_negative(child)) {
+ dput(child);
+ hmdfs_err("lookup failed because negative dentry");
+ /* just do not fill this entry and continue for next entry */
+ goto out;
+ }
+
+ if (d_type == DT_REG || d_type == DT_DIR) {
+ create_dentry(child, d_inode(child), gc->file, gc->sbi);
+ gc->num++;
+ } else if (d_type == DT_LNK) {
+ struct path link_path;
+
+ res = hmdfs_lookup_symlink(&link_path, "%s/%s/%s",
+ gc->sbi->local_src, gc->dir,
+ namestr);
+ if (!res) {
+ create_dentry(child, d_inode(link_path.dentry),
+ gc->file, gc->sbi);
+ path_put(&link_path);
+ gc->num++;
+ } else if (res == -ENOENT) {
+ /*
+ * If source file do not exist, use the info from link
+ * inode.
+ */
+ create_dentry(child, d_inode(child), gc->file, gc->sbi);
+ gc->num++;
+ }
+ }
+
+ dput(child);
+
+out:
+ /*
+ * we always return 0 here, so that the caller can continue to next
+ * dentry even if failed on this dentry somehow.
+ */
+ return 0;
+}
+
+static void hmdfs_server_set_header(struct hmdfs_dcache_header *header,
+ struct file *file, struct file *dentry_file)
+{
+ struct inode *inode = NULL;
+ struct hmdfs_time_t cur_time;
+
+ inode = file_inode(file);
+ cur_time = current_time(file_inode(dentry_file));
+ header->dcache_crtime = cpu_to_le64(cur_time.tv_sec);
+ header->dcache_crtime_nsec = cpu_to_le64(cur_time.tv_nsec);
+ header->dentry_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+ header->dentry_ctime_nsec = cpu_to_le64(inode->i_ctime.tv_nsec);
+}
+
+// Get the dentries of target directory
+struct file *hmdfs_server_rebuild_dents(struct hmdfs_sb_info *sbi,
+ struct path *path, loff_t *num,
+ const char *dir)
+{
+ int err = 0;
+ struct getdents_callback_real gc = {
+ .ctx.actor = hmdfs_filldir_real,
+ .ctx.pos = 0,
+ .num = 0,
+ .sbi = sbi,
+ .dir = dir,
+ };
+ struct file *file = NULL;
+ struct file *dentry_file = NULL;
+ struct hmdfs_dcache_header header;
+
+ dentry_file = create_local_dentry_file_cache(sbi);
+ if (IS_ERR(dentry_file)) {
+ hmdfs_err("file create failed err=%ld", PTR_ERR(dentry_file));
+ return dentry_file;
+ }
+
+ file = dentry_open(path, O_RDONLY | O_DIRECTORY, current_cred());
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ hmdfs_err("dentry_open failed");
+ goto out;
+ }
+
+ hmdfs_server_set_header(&header, file, dentry_file);
+
+ gc.parent_path = path;
+ gc.file = dentry_file;
+
+ err = iterate_dir(file, &(gc.ctx));
+ if (err) {
+ hmdfs_err("iterate_dir failed");
+ goto out;
+ }
+
+ header.case_sensitive = sbi->s_case_sensitive;
+ header.num = cpu_to_le64(gc.num);
+ if (num)
+ *num = gc.num;
+
+ err = write_header(dentry_file, &header);
+out:
+ if (!IS_ERR_OR_NULL(file))
+ fput(file);
+
+ if (err) {
+ fput(dentry_file);
+ dentry_file = ERR_PTR(err);
+ }
+
+ trace_hmdfs_server_rebuild_dents(&header, err);
+ return dentry_file;
+}
+
+void hmdfs_server_writepage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ struct writepage_request *writepage_recv = data;
+ struct hmdfs_server_writeback *hswb = NULL;
+ __u64 file_ver;
+ __u32 file_id;
+ struct file *file = NULL;
+ loff_t pos;
+ __u32 count;
+ ssize_t ret;
+ int err = 0;
+
+ file_id = le32_to_cpu(writepage_recv->file_id);
+ file_ver = le64_to_cpu(writepage_recv->file_ver);
+ file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver);
+ if (IS_ERR(file)) {
+ hmdfs_info(
+ "file with id %u does not exist, pgindex %llu, devid %llu",
+ file_id, le64_to_cpu(writepage_recv->index),
+ con->device_id);
+ err = PTR_ERR(file);
+ goto out;
+ }
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto out_put_file;
+
+ pos = (loff_t)le64_to_cpu(writepage_recv->index) << HMDFS_PAGE_OFFSET;
+ count = le32_to_cpu(writepage_recv->count);
+ ret = kernel_write(file, writepage_recv->buf, count, &pos);
+ if (ret != count)
+ err = -EIO;
+
+out_put_file:
+ hmdfs_close_path(file);
+out:
+ hmdfs_send_err_response(con, cmd, err);
+
+ hswb = con->sbi->h_swb;
+ if (!err && hswb->dirty_writeback_control)
+ hmdfs_server_check_writeback(hswb);
+}
+
+static int hmdfs_lookup_linkpath(struct hmdfs_sb_info *sbi,
+ const char *path_name, struct path *dst_path)
+{
+ struct path link_path;
+ int err;
+
+ err = hmdfs_lookup_symlink(&link_path, "%s/%s", sbi->local_dst,
+ path_name);
+ if (err)
+ return err;
+
+ if (d_inode(link_path.dentry)->i_sb != sbi->sb) {
+ path_put(dst_path);
+ *dst_path = link_path;
+ } else {
+ path_put(&link_path);
+ }
+
+ return 0;
+}
+
+static struct inode *hmdfs_verify_path(struct dentry *dentry, char *recv_buf,
+ struct super_block *sb)
+{
+ struct inode *inode = d_inode(dentry);
+ struct hmdfs_inode_info *info = NULL;
+
+ /* if we found path from wrong fs */
+ if (inode->i_sb != sb) {
+ hmdfs_err("super block do not match");
+ return NULL;
+ }
+
+ info = hmdfs_i(inode);
+ /* make sure lower inode is not NULL */
+ if (info->lower_inode)
+ return info->lower_inode;
+
+ /*
+ * we don't expect lower inode to be NULL in server. However, it's
+ * possible because dentry cache can contain stale data.
+ */
+ hmdfs_info("lower inode is NULL, is remote file: %d",
+ info->conn != NULL);
+ return NULL;
+}
+
+static int hmdfs_notify_change(struct vfsmount *mnt, struct dentry *dentry,
+ struct iattr *attr,
+ struct inode **delegated_inode)
+{
+#ifdef CONFIG_SDCARD_FS
+ /* sdcard_fs need to call setattr2, notify_change will call setattr */
+ return notify_change2(mnt, dentry, attr, delegated_inode);
+#else
+ return notify_change(dentry, attr, delegated_inode);
+#endif
+}
+
+void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ int err = 0;
+ struct dentry *dentry = NULL;
+ struct inode *inode = NULL;
+ struct setattr_request *recv = data;
+ struct path root_path, dst_path;
+ struct iattr attr;
+ __u32 valid = le32_to_cpu(recv->valid);
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto out;
+
+ err = kern_path(con->sbi->local_dst, 0, &root_path);
+ if (err) {
+ hmdfs_err("kern_path failed err = %d", err);
+ goto out;
+ }
+
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->buf, 0,
+ &dst_path);
+ if (err)
+ goto out_put_root;
+
+ inode = hmdfs_verify_path(dst_path.dentry, recv->buf, con->sbi->sb);
+ if (!inode) {
+ err = -ENOENT;
+ goto out_put_dst;
+ }
+
+ /* We need to follow if symlink was found */
+ if (S_ISLNK(inode->i_mode)) {
+ err = hmdfs_lookup_linkpath(con->sbi, recv->buf, &dst_path);
+ /* if source file doesn't exist, use link inode */
+ if (err == -ENOENT)
+ err = 0;
+ else if (err)
+ goto out_put_dst;
+ }
+
+ dentry = dst_path.dentry;
+ memset(&attr, 0, sizeof(attr));
+ /* only support size and mtime */
+ if (valid & (ATTR_SIZE | ATTR_MTIME))
+ attr.ia_valid =
+ (valid & (ATTR_MTIME | ATTR_MTIME_SET | ATTR_SIZE));
+ attr.ia_size = le64_to_cpu(recv->size);
+ attr.ia_mtime.tv_sec = le64_to_cpu(recv->mtime);
+ attr.ia_mtime.tv_nsec = le32_to_cpu(recv->mtime_nsec);
+
+ inode_lock(dentry->d_inode);
+ err = hmdfs_notify_change(dst_path.mnt, dentry, &attr, NULL);
+ inode_unlock(dentry->d_inode);
+
+out_put_dst:
+ path_put(&dst_path);
+out_put_root:
+ path_put(&root_path);
+out:
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+static void update_getattr_response(struct hmdfs_peer *con, struct inode *inode,
+ struct kstat *ks,
+ struct getattr_response *resp)
+{
+ /* if getattr for link, get ino and mode from actual lower inode */
+ resp->ino = cpu_to_le64(
+ generate_u64_ino(inode->i_ino, inode->i_generation));
+ resp->mode = cpu_to_le16(inode->i_mode);
+
+ /* get other information from vfs_getattr() */
+ resp->result_mask = cpu_to_le32(STATX_BASIC_STATS | STATX_BTIME);
+ resp->fsid = cpu_to_le64(ks->dev);
+ resp->nlink = cpu_to_le32(ks->nlink);
+ resp->uid = cpu_to_le32(ks->uid.val);
+ resp->gid = cpu_to_le32(ks->gid.val);
+ resp->size = cpu_to_le64(ks->size);
+ resp->blocks = cpu_to_le64(ks->blocks);
+ resp->blksize = cpu_to_le32(ks->blksize);
+ resp->atime = cpu_to_le64(ks->atime.tv_sec);
+ resp->atime_nsec = cpu_to_le32(ks->atime.tv_nsec);
+ resp->mtime = cpu_to_le64(ks->mtime.tv_sec);
+ resp->mtime_nsec = cpu_to_le32(ks->mtime.tv_nsec);
+ resp->ctime = cpu_to_le64(ks->ctime.tv_sec);
+ resp->ctime_nsec = cpu_to_le32(ks->ctime.tv_nsec);
+ resp->crtime = cpu_to_le64(ks->btime.tv_sec);
+ resp->crtime_nsec = cpu_to_le32(ks->btime.tv_nsec);
+}
+
+void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ int err = 0;
+ struct getattr_request *recv = data;
+ int size_read = sizeof(struct getattr_response);
+ struct getattr_response *resp = NULL;
+ struct kstat ks;
+ struct path root_path, dst_path;
+ struct inode *inode = NULL;
+ unsigned int recv_flags = le32_to_cpu(recv->lookup_flags);
+ unsigned int lookup_flags = 0;
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto err;
+
+ err = hmdfs_convert_lookup_flags(recv_flags, &lookup_flags);
+ if (err)
+ goto err;
+
+ resp = kzalloc(size_read, GFP_KERNEL);
+ if (!resp) {
+ err = -ENOMEM;
+ goto err;
+ }
+ err = kern_path(con->sbi->local_dst, 0, &root_path);
+ if (err) {
+ hmdfs_err("kern_path failed err = %d", err);
+ goto err_free_resp;
+ }
+ //TODO: local_dst -->local_src
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->buf,
+ lookup_flags, &dst_path);
+ if (err)
+ goto out_put_root;
+
+ inode = hmdfs_verify_path(dst_path.dentry, recv->buf, con->sbi->sb);
+ if (!inode) {
+ err = -ENOENT;
+ goto out_put_dst;
+ }
+ /* We need to follow if symlink was found */
+ if (S_ISLNK(inode->i_mode)) {
+ err = hmdfs_lookup_linkpath(con->sbi, recv->buf, &dst_path);
+ /* if source file doesn't exist, use link inode */
+ if (err && err != -ENOENT)
+ goto out_put_dst;
+ }
+
+ err = vfs_getattr(&dst_path, &ks, STATX_BASIC_STATS | STATX_BTIME, 0);
+ if (err)
+ goto err_put_dst;
+ update_getattr_response(con, inode, &ks, resp);
+
+out_put_dst:
+ path_put(&dst_path);
+out_put_root:
+ /*
+ * if path lookup failed, we return with result_mask setting to
+ * zero. So we can be aware of such situation in caller.
+ */
+ if (err)
+ resp->result_mask = cpu_to_le32(0);
+ path_put(&root_path);
+ hmdfs_sendmessage_response(con, cmd, size_read, resp, err);
+ kfree(resp);
+ return;
+
+err_put_dst:
+ path_put(&dst_path);
+ path_put(&root_path);
+err_free_resp:
+ kfree(resp);
+err:
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+static void init_statfs_response(struct statfs_response *resp,
+ struct kstatfs *st)
+{
+ resp->f_type = cpu_to_le64(HMDFS_SUPER_MAGIC);
+ resp->f_bsize = cpu_to_le64(st->f_bsize);
+ resp->f_blocks = cpu_to_le64(st->f_blocks);
+ resp->f_bfree = cpu_to_le64(st->f_bfree);
+ resp->f_bavail = cpu_to_le64(st->f_bavail);
+ resp->f_files = cpu_to_le64(st->f_files);
+ resp->f_ffree = cpu_to_le64(st->f_ffree);
+ resp->f_fsid_0 = cpu_to_le32(st->f_fsid.val[0]);
+ resp->f_fsid_1 = cpu_to_le32(st->f_fsid.val[1]);
+ resp->f_namelen = cpu_to_le64(st->f_namelen);
+ resp->f_frsize = cpu_to_le64(st->f_frsize);
+ resp->f_flags = cpu_to_le64(st->f_flags);
+ /* f_spare is not used in f2fs or ext4 */
+ resp->f_spare_0 = cpu_to_le64(st->f_spare[0]);
+ resp->f_spare_1 = cpu_to_le64(st->f_spare[1]);
+ resp->f_spare_2 = cpu_to_le64(st->f_spare[2]);
+ resp->f_spare_3 = cpu_to_le64(st->f_spare[3]);
+}
+
+void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ struct statfs_request *recv = data;
+ struct statfs_response *resp = NULL;
+ struct path root_path, path;
+ struct kstatfs *st = NULL;
+ int err = 0;
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &err))
+ goto out;
+
+ st = kzalloc(sizeof(*st), GFP_KERNEL);
+ if (!st) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ resp = kmalloc(sizeof(*resp), GFP_KERNEL);
+ if (!resp) {
+ err = -ENOMEM;
+ goto free_st;
+ }
+
+ err = kern_path(con->sbi->local_src, 0, &root_path);
+ if (err) {
+ hmdfs_info("kern_path failed err = %d", err);
+ goto free_st;
+ }
+
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->path, 0,
+ &path);
+ if (err) {
+ hmdfs_info("recv->path found failed err = %d", err);
+ goto put_root;
+ }
+
+ err = vfs_statfs(&path, st);
+ if (err)
+ hmdfs_info("statfs local dentry failed, err = %d", err);
+ init_statfs_response(resp, st);
+ path_put(&path);
+
+put_root:
+ path_put(&root_path);
+free_st:
+ kfree(st);
+out:
+ if (err)
+ hmdfs_send_err_response(con, cmd, err);
+ else
+ hmdfs_sendmessage_response(con, cmd, sizeof(*resp), resp, 0);
+
+ kfree(resp);
+}
+
+void hmdfs_server_syncfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data)
+{
+ /*
+ * Reserved interface. There is a difference compared with traditional
+ * syncfs process. Remote syncfs process in client:
+ * 1. Remote writepages by async call
+ * 2. Remote syncfs calling
+ * 3. Wait all remote async calls(writepages) return in step 1
+ */
+ int ret = 0;
+
+ if (hmdfs_should_fail_req(&con->sbi->fault_inject, con, cmd, &ret)) {
+ hmdfs_send_err_response(con, cmd, ret);
+ return;
+ }
+
+ hmdfs_send_err_response(con, cmd, ret);
+}
+
+void hmdfs_server_getxattr(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data)
+{
+ struct getxattr_request *recv = data;
+ size_t size = le32_to_cpu(recv->size);
+ size_t size_read = sizeof(struct getxattr_response) + size;
+ struct getxattr_response *resp = NULL;
+ struct path root_path;
+ struct path path;
+ char *file_path = recv->buf;
+ char *name = recv->buf + recv->path_len + 1;
+ int err = -ENOMEM;
+
+ resp = kzalloc(size_read, GFP_KERNEL);
+ if (!resp)
+ goto err;
+
+ err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path);
+ if (err) {
+ hmdfs_info("kern_path failed err = %d", err);
+ goto err_free_resp;
+ }
+
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt,
+ file_path, 0, &path);
+ if (err) {
+ hmdfs_info("path found failed err = %d", err);
+ goto err_put_root;
+ }
+
+ if (!size)
+ err = vfs_getxattr(path.dentry, name, NULL, size);
+ else
+ err = vfs_getxattr(path.dentry, name, resp->value, size);
+ if (err < 0) {
+ hmdfs_info("getxattr failed err %d", err);
+ goto err_put_path;
+ }
+
+ resp->size = cpu_to_le32(err);
+ hmdfs_sendmessage_response(con, cmd, size_read, resp, 0);
+ path_put(&path);
+ path_put(&root_path);
+ kfree(resp);
+ return;
+
+err_put_path:
+ path_put(&path);
+err_put_root:
+ path_put(&root_path);
+err_free_resp:
+ kfree(resp);
+err:
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+void hmdfs_server_setxattr(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data)
+{
+ struct setxattr_request *recv = data;
+ size_t size = le32_to_cpu(recv->size);
+ int flags = le32_to_cpu(recv->flags);
+ bool del = recv->del;
+ struct path root_path;
+ struct path path;
+ const char *file_path = NULL;
+ const char *name = NULL;
+ const void *value = NULL;
+ int err;
+
+ err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path);
+ if (err) {
+ hmdfs_info("kern_path failed err = %d", err);
+ goto err;
+ }
+
+ file_path = recv->buf;
+ name = recv->buf + recv->path_len + 1;
+ value = name + recv->name_len + 1;
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt,
+ file_path, 0, &path);
+ if (err) {
+ hmdfs_info("path found failed err = %d", err);
+ goto err_put_root;
+ }
+
+ if (del) {
+ WARN_ON(flags != XATTR_REPLACE);
+ err = vfs_removexattr(path.dentry, name);
+ } else {
+ err = vfs_setxattr(path.dentry, name, value, size, flags);
+ }
+
+ path_put(&path);
+err_put_root:
+ path_put(&root_path);
+err:
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+void hmdfs_server_listxattr(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data)
+{
+ struct listxattr_request *recv = data;
+ size_t size = le32_to_cpu(recv->size);
+ int size_read = sizeof(struct listxattr_response) + size;
+ struct listxattr_response *resp = NULL;
+ const char *file_path = NULL;
+ struct path root_path;
+ struct path path;
+ int err = 0;
+
+ resp = kzalloc(size_read, GFP_KERNEL);
+ if (!resp) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path);
+ if (err) {
+ hmdfs_info("kern_path failed err = %d", err);
+ goto err_free_resp;
+ }
+
+ file_path = recv->buf;
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt,
+ file_path, 0, &path);
+ if (err) {
+ hmdfs_info("path found failed err = %d", err);
+ goto err_put_root;
+ }
+
+ if (!size)
+ err = vfs_listxattr(path.dentry, NULL, size);
+ else
+ err = vfs_listxattr(path.dentry, resp->list, size);
+ if (err < 0) {
+ hmdfs_info("listxattr failed err = %d", err);
+ goto err_put_path;
+ }
+
+ resp->size = cpu_to_le32(err);
+ hmdfs_sendmessage_response(con, cmd, size_read, resp, 0);
+ path_put(&root_path);
+ path_put(&path);
+ kfree(resp);
+ return;
+
+err_put_path:
+ path_put(&path);
+err_put_root:
+ path_put(&root_path);
+err_free_resp:
+ kfree(resp);
+err:
+ hmdfs_send_err_response(con, cmd, err);
+}
+
+void hmdfs_server_get_drop_push(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data)
+{
+ struct drop_push_request *dp_recv = data;
+ struct path root_path, path;
+ int err;
+ char *tmp_path = NULL;
+
+ err = kern_path(con->sbi->real_dst, 0, &root_path);
+ if (err) {
+ hmdfs_err("kern_path failed err = %d", err);
+ goto quickack;
+ }
+ tmp_path = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!tmp_path)
+ goto out;
+ snprintf(tmp_path, PATH_MAX, "/" DEVICE_VIEW_ROOT "/%s%s",
+ con->cid, dp_recv->path);
+
+ err = vfs_path_lookup(root_path.dentry, root_path.mnt, tmp_path, 0,
+ &path);
+ if (err) {
+ hmdfs_info("path found failed err = %d", err);
+ goto free;
+ }
+ hmdfs_remove_cache_filp(con, path.dentry);
+
+ path_put(&path);
+free:
+ kfree(tmp_path);
+out:
+ path_put(&root_path);
+quickack:
+ set_conn_sock_quickack(con);
+}
diff --git a/fs/hmdfs/hmdfs_server.h b/fs/hmdfs/hmdfs_server.h
new file mode 100644
index 0000000000000000000000000000000000000000..844f3a9ee82c41ad1ab0b7e3c5f01905006cf85d
--- /dev/null
+++ b/fs/hmdfs/hmdfs_server.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/hmdfs_server.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_SERVER_H
+#define HMDFS_SERVER_H
+
+#include "hmdfs.h"
+#include "comm/transport.h"
+#include "comm/socket_adapter.h"
+
+static inline void hmdfs_send_err_response(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, int err)
+{
+ if (hmdfs_sendmessage_response(con, cmd, 0, NULL, (__u32)err))
+ hmdfs_warning("send err failed");
+}
+
+void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_atomic_open(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data);
+void hmdfs_server_fsync(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_readpage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_readpages(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_readpages_open(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data);
+void hmdfs_server_writepage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+
+void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+
+void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+
+void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+
+void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+
+void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+
+void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+
+void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_syncfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_getxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_setxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_listxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd,
+ void *data);
+void hmdfs_server_get_drop_push(struct hmdfs_peer *con,
+ struct hmdfs_head_cmd *cmd, void *data);
+
+void __init hmdfs_server_add_node_evt_cb(void);
+#endif
diff --git a/fs/hmdfs/hmdfs_trace.h b/fs/hmdfs/hmdfs_trace.h
new file mode 100644
index 0000000000000000000000000000000000000000..205bf697c35741590e0df9cc17b5df995358f8a9
--- /dev/null
+++ b/fs/hmdfs/hmdfs_trace.h
@@ -0,0 +1,800 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/hmdfs_trace.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hmdfs
+
+#if !defined(__HMDFS_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
+
+#define __HMDFS_TRACE_H__
+
+#include
+#include "comm/protocol.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_client.h"
+#include "hmdfs_device_view.h"
+#include "client_writeback.h"
+
+TRACE_EVENT(hmdfs_permission,
+
+ TP_PROTO(unsigned long ino),
+
+ TP_ARGS(ino),
+
+ TP_STRUCT__entry(__field(unsigned long, ino)),
+
+ TP_fast_assign(__entry->ino = ino;),
+
+ TP_printk("permission check for ino %lu failed", __entry->ino));
+
+/* communication */
+TRACE_EVENT(hmdfs_recv_mesg_callback,
+
+ TP_PROTO(struct hmdfs_head_cmd *cmd),
+
+ TP_ARGS(cmd),
+
+ TP_STRUCT__entry(
+ __field(__u32, msg_id)
+ __field(__u32, magic)
+ __field(__u16, command)
+ __field(__u16, cmd_flag)
+ __field(__u32, data_len)
+ __field(__u32, ret_code)
+ ),
+
+ TP_fast_assign(
+ __entry->msg_id = le32_to_cpu(cmd->msg_id);
+ __entry->magic = cmd->magic;
+ __entry->command = cmd->operations.command;
+ __entry->cmd_flag = cmd->operations.cmd_flag;
+ __entry->data_len = cmd->data_len;
+ __entry->ret_code = cmd->ret_code;
+ ),
+
+ TP_printk("msg_id:%u magic:%u command:%hu, cmd_flag:%hu, data_len:%u, ret_code:%u",
+ __entry->msg_id, __entry->magic, __entry->command,
+ __entry->cmd_flag, __entry->data_len, __entry->ret_code)
+);
+
+TRACE_EVENT(hmdfs_tcp_send_message,
+
+ TP_PROTO(struct hmdfs_head_cmd *cmd),
+
+ TP_ARGS(cmd),
+
+ TP_STRUCT__entry(
+ __field(__u32, msg_id)
+ __field(__u32, magic)
+ __field(__u16, command)
+ __field(__u16, cmd_flag)
+ __field(__u32, data_len)
+ __field(__u32, ret_code)
+ ),
+
+ TP_fast_assign(
+ __entry->msg_id = le32_to_cpu(cmd->msg_id);
+ __entry->magic = cmd->magic;
+ __entry->command = cmd->operations.command;
+ __entry->cmd_flag = cmd->operations.cmd_flag;
+ __entry->data_len = cmd->data_len;
+ __entry->ret_code = cmd->ret_code;
+ ),
+
+ TP_printk("msg_id:%u magic:%u command:%hu, cmd_flag:%hu, data_len:%u, ret_code:%u",
+ __entry->msg_id, __entry->magic, __entry->command,
+ __entry->cmd_flag, __entry->data_len, __entry->ret_code)
+);
+
+/* file system interface */
+DECLARE_EVENT_CLASS(hmdfs_iterate_op_end,
+
+ TP_PROTO(struct dentry *__d, loff_t start_pos, loff_t end_pos, int err),
+
+ TP_ARGS(__d, start_pos, end_pos, err),
+
+ TP_STRUCT__entry(
+ __string(name_str, __d->d_name.name)
+ __field(loff_t, start)
+ __field(loff_t, end)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name_str, __d->d_name.name);
+ __entry->start = start_pos;
+ __entry->end = end_pos;
+ __entry->err = err;
+ ),
+
+ TP_printk("dentry[%s] start_pos:%llx, end_pos:%llx, err:%d",
+ __get_str(name_str), __entry->start,
+ __entry->end, __entry->err)
+);
+
+#define define_hmdfs_iterate_op_end_event(event_name) \
+ DEFINE_EVENT(hmdfs_iterate_op_end, event_name, \
+ TP_PROTO(struct dentry *__d, loff_t start_pos, \
+ loff_t end_pos, int err), \
+ TP_ARGS(__d, start_pos, end_pos, err))
+
+define_hmdfs_iterate_op_end_event(hmdfs_iterate_local);
+define_hmdfs_iterate_op_end_event(hmdfs_iterate_remote);
+define_hmdfs_iterate_op_end_event(hmdfs_iterate_merge);
+
+
+TRACE_EVENT(hmdfs_lookup,
+
+ TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
+
+ TP_ARGS(dir, dentry, flags),
+
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __string(name_str, dentry->d_name.name)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->ino = dir->i_ino;
+ __assign_str(name_str, dentry->d_name.name);
+ __entry->flags = flags;
+ ),
+
+ TP_printk("parent_ino = %lu, name:%s, flags:%u",
+ __entry->ino, __get_str(name_str), __entry->flags)
+);
+
+DECLARE_EVENT_CLASS(hmdfs_lookup_op_end,
+
+ TP_PROTO(struct inode *dir, struct dentry *dentry, int err),
+
+ TP_ARGS(dir, dentry, err),
+
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __string(name_str, dentry->d_name.name)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->ino = dir->i_ino;
+ __assign_str(name_str, dentry->d_name.name);
+ __entry->err = err;
+ ),
+
+ TP_printk("parent_ino = %lu, name:%s, err:%d",
+ __entry->ino, __get_str(name_str), __entry->err)
+);
+
+#define define_hmdfs_lookup_op_end_event(event_name) \
+ DEFINE_EVENT(hmdfs_lookup_op_end, event_name, \
+ TP_PROTO(struct inode *dir, struct dentry *dentry, \
+ int err), \
+ TP_ARGS(dir, dentry, err))
+
+
+define_hmdfs_lookup_op_end_event(hmdfs_root_lookup);
+define_hmdfs_lookup_op_end_event(hmdfs_root_lookup_end);
+
+define_hmdfs_lookup_op_end_event(hmdfs_device_lookup);
+define_hmdfs_lookup_op_end_event(hmdfs_device_lookup_end);
+
+define_hmdfs_lookup_op_end_event(hmdfs_lookup_local);
+define_hmdfs_lookup_op_end_event(hmdfs_lookup_local_end);
+define_hmdfs_lookup_op_end_event(hmdfs_mkdir_local);
+define_hmdfs_lookup_op_end_event(hmdfs_rmdir_local);
+define_hmdfs_lookup_op_end_event(hmdfs_create_local);
+
+define_hmdfs_lookup_op_end_event(hmdfs_lookup_remote);
+define_hmdfs_lookup_op_end_event(hmdfs_lookup_remote_end);
+define_hmdfs_lookup_op_end_event(hmdfs_mkdir_remote);
+define_hmdfs_lookup_op_end_event(hmdfs_rmdir_remote);
+define_hmdfs_lookup_op_end_event(hmdfs_create_remote);
+
+define_hmdfs_lookup_op_end_event(hmdfs_lookup_merge);
+define_hmdfs_lookup_op_end_event(hmdfs_lookup_merge_end);
+define_hmdfs_lookup_op_end_event(hmdfs_mkdir_merge);
+define_hmdfs_lookup_op_end_event(hmdfs_rmdir_merge);
+define_hmdfs_lookup_op_end_event(hmdfs_create_merge);
+
+
+define_hmdfs_lookup_op_end_event(hmdfs_symlink_merge);
+define_hmdfs_lookup_op_end_event(hmdfs_symlink_local);
+
+define_hmdfs_lookup_op_end_event(hmdfs_get_link_merge);
+define_hmdfs_lookup_op_end_event(hmdfs_get_link_local);
+
+TRACE_EVENT(hmdfs_show_comrade,
+
+ TP_PROTO(struct dentry *d, struct dentry *lo_d, uint64_t devid),
+
+ TP_ARGS(d, lo_d, devid),
+
+ TP_STRUCT__entry(
+ __string(name, d->d_name.name)
+ __string(lo_name, lo_d->d_name.name)
+ __field(uint64_t, devid)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, d->d_name.name)
+ __assign_str(lo_name, lo_d->d_name.name)
+ __entry->devid = devid;
+ ),
+
+ TP_printk("parent_name:%s -> lo_d_name:%s, lo_d_devid:%llu",
+ __get_str(name), __get_str(lo_name), __entry->devid)
+);
+
+DECLARE_EVENT_CLASS(hmdfs_rename_op_end,
+
+ TP_PROTO(struct inode *olddir, struct dentry *olddentry,
+ struct inode *newdir, struct dentry *newdentry,
+ unsigned int flags),
+
+ TP_ARGS(olddir, olddentry, newdir, newdentry, flags),
+
+ TP_STRUCT__entry(
+ __field(ino_t, oldino)
+ __string(oldname_str, olddentry->d_name.name)
+ __field(ino_t, newino)
+ __string(newname_str, newdentry->d_name.name)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->oldino = olddir->i_ino;
+ __assign_str(oldname_str, olddentry->d_name.name);
+ __entry->newino = newdir->i_ino;
+ __assign_str(newname_str, newdentry->d_name.name);
+ __entry->flags = flags;
+ ),
+
+ TP_printk("old_pino = %lu, oldname:%s; new_pino = %lu, newname:%s, flags:%u",
+ __entry->oldino, __get_str(oldname_str),
+ __entry->newino, __get_str(newname_str), __entry->flags)
+);
+
+#define define_hmdfs_rename_op_end_event(event_name) \
+ DEFINE_EVENT(hmdfs_rename_op_end, event_name, \
+ TP_PROTO(struct inode *olddir, struct dentry *olddentry, \
+ struct inode *newdir, struct dentry *newdentry, \
+ unsigned int flags), \
+ TP_ARGS(olddir, olddentry, newdir, newdentry, flags))
+
+define_hmdfs_rename_op_end_event(hmdfs_rename_local);
+define_hmdfs_rename_op_end_event(hmdfs_rename_remote);
+define_hmdfs_rename_op_end_event(hmdfs_rename_merge);
+
+TRACE_EVENT(hmdfs_statfs,
+
+ TP_PROTO(struct dentry *d, uint8_t type),
+
+ TP_ARGS(d, type),
+
+ TP_STRUCT__entry(
+ __string(name, d->d_name.name)
+ __field(uint8_t, type)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, d->d_name.name)
+ __entry->type = type;
+ ),
+
+ TP_printk("dentry_name:%s, lo_d_devid:%u",
+ __get_str(name), __entry->type)
+);
+
+
+
+TRACE_EVENT(hmdfs_balance_dirty_pages_ratelimited,
+
+ TP_PROTO(struct hmdfs_sb_info *sbi,
+ struct hmdfs_writeback *hwb,
+ int bdp_ratelimits),
+
+ TP_ARGS(sbi, hwb, bdp_ratelimits),
+
+ TP_STRUCT__entry(
+ __array(char, dst, 128)
+ __field(int, nr_dirtied)
+ __field(int, nr_dirtied_pause)
+ __field(int, dirty_exceeded)
+ __field(long long, bdp_ratelimits)
+ __field(long, ratelimit_pages)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->dst, sbi->local_dst, 128);
+
+ __entry->nr_dirtied = current->nr_dirtied;
+ __entry->nr_dirtied_pause = current->nr_dirtied_pause;
+ __entry->dirty_exceeded = hwb->dirty_exceeded;
+ __entry->bdp_ratelimits = bdp_ratelimits;
+ __entry->ratelimit_pages = hwb->ratelimit_pages;
+ ),
+
+ TP_printk("hmdfs dst:%s nr_dirtied=%d nr_dirtied_pause=%d dirty_exceeded=%d bdp_ratelimits=%lld ratelimit_pages=%ld",
+ __entry->dst, __entry->nr_dirtied, __entry->nr_dirtied_pause,
+ __entry->dirty_exceeded, __entry->bdp_ratelimits,
+ __entry->ratelimit_pages)
+);
+
+TRACE_EVENT(hmdfs_balance_dirty_pages,
+
+ TP_PROTO(struct hmdfs_sb_info *sbi,
+ struct bdi_writeback *wb,
+ struct hmdfs_dirty_throttle_control *hdtc,
+ unsigned long pause,
+ unsigned long start_time),
+
+ TP_ARGS(sbi, wb, hdtc, pause, start_time),
+
+ TP_STRUCT__entry(
+ __array(char, dst, 128)
+ __field(unsigned long, write_bw)
+ __field(unsigned long, avg_write_bw)
+ __field(unsigned long, file_bg_thresh)
+ __field(unsigned long, fs_bg_thresh)
+ __field(unsigned long, file_thresh)
+ __field(unsigned long, fs_thresh)
+ __field(unsigned long, file_nr_dirty)
+ __field(unsigned long, fs_nr_dirty)
+ __field(unsigned long, file_nr_rec)
+ __field(unsigned long, fs_nr_rec)
+ __field(unsigned long, pause)
+ __field(unsigned long, paused)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->dst, sbi->local_dst, 128);
+
+ __entry->write_bw = wb->write_bandwidth;
+ __entry->avg_write_bw = wb->avg_write_bandwidth;
+ __entry->file_bg_thresh = hdtc->file_bg_thresh;
+ __entry->fs_bg_thresh = hdtc->fs_bg_thresh;
+ __entry->file_thresh = hdtc->file_thresh;
+ __entry->fs_thresh = hdtc->fs_thresh;
+ __entry->file_nr_dirty = hdtc->file_nr_dirty;
+ __entry->fs_nr_dirty = hdtc->fs_nr_dirty;
+ __entry->file_nr_rec = hdtc->file_nr_reclaimable;
+ __entry->fs_nr_rec = hdtc->fs_nr_reclaimable;
+ __entry->pause = pause * 1000 / HZ;
+ __entry->paused = (jiffies - start_time) *
+ 1000 / HZ;
+ ),
+
+ TP_printk("hmdfs dst:%s write_bw=%lu, awrite_bw=%lu, bg_thresh=%lu,%lu thresh=%lu,%lu dirty=%lu,%lu reclaimable=%lu,%lu pause=%lu paused=%lu",
+ __entry->dst, __entry->write_bw, __entry->avg_write_bw,
+ __entry->file_bg_thresh, __entry->fs_bg_thresh,
+ __entry->file_thresh, __entry->fs_thresh,
+ __entry->file_nr_dirty, __entry->fs_nr_dirty,
+ __entry->file_nr_rec, __entry->fs_nr_rec,
+ __entry->pause, __entry->paused
+ )
+);
+
+TRACE_EVENT(hmdfs_start_srv_wb,
+
+ TP_PROTO(struct hmdfs_sb_info *sbi, int dirty_pages,
+ unsigned int dirty_thresh_pg),
+
+ TP_ARGS(sbi, dirty_pages, dirty_thresh_pg),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ __field(int, dirty_pages)
+ __field(unsigned int, dirty_thresh_pg)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, sbi->local_src, 128);
+ __entry->dirty_pages = dirty_pages;
+ __entry->dirty_thresh_pg = dirty_thresh_pg;
+ ),
+
+ TP_printk("hmdfs src: %s, start writeback dirty pages. writeback %d pages dirty_thresh is %d pages",
+ __entry->src, __entry->dirty_pages, __entry->dirty_thresh_pg)
+);
+
+TRACE_EVENT(hmdfs_fsync_enter_remote,
+
+ TP_PROTO(struct hmdfs_sb_info *sbi, unsigned long long device_id,
+ unsigned long long remote_ino, int datasync),
+
+ TP_ARGS(sbi, device_id, remote_ino, datasync),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ __field(uint64_t, device_id)
+ __field(uint64_t, remote_ino)
+ __field(int, datasync)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, sbi->local_src, 128);
+ __entry->device_id = device_id;
+ __entry->remote_ino = remote_ino;
+ __entry->datasync = datasync;
+ ),
+
+ TP_printk("hmdfs: src %s, start remote fsync file(remote dev_id=%llu,ino=%llu), datasync=%d",
+ __entry->src, __entry->device_id,
+ __entry->remote_ino, __entry->datasync)
+);
+
+TRACE_EVENT(hmdfs_fsync_exit_remote,
+
+ TP_PROTO(struct hmdfs_sb_info *sbi, unsigned long long device_id,
+ unsigned long long remote_ino, unsigned int timeout, int err),
+
+ TP_ARGS(sbi, device_id, remote_ino, timeout, err),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ __field(uint64_t, device_id)
+ __field(uint64_t, remote_ino)
+ __field(uint32_t, timeout)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, sbi->local_src, 128);
+ __entry->device_id = device_id;
+ __entry->remote_ino = remote_ino;
+ __entry->timeout = timeout;
+ __entry->err = err;
+ ),
+
+ TP_printk("hmdfs: src %s, finish remote fsync file(remote dev_id=%llu,ino=%llu), timeout=%u, err=%d",
+ __entry->src, __entry->device_id, __entry->remote_ino,
+ __entry->timeout, __entry->err)
+);
+
+TRACE_EVENT(hmdfs_syncfs_enter,
+
+ TP_PROTO(struct hmdfs_sb_info *sbi),
+
+ TP_ARGS(sbi),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, sbi->local_src, 128);
+ ),
+
+ TP_printk("hmdfs: src %s, start syncfs", __entry->src)
+);
+
+TRACE_EVENT(hmdfs_syncfs_exit,
+
+ TP_PROTO(struct hmdfs_sb_info *sbi, int remain_count,
+ unsigned int timeout, int err),
+
+ TP_ARGS(sbi, remain_count, timeout, err),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ __field(int, remain_count)
+ __field(uint32_t, timeout)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, sbi->local_src, 128);
+ __entry->remain_count = remain_count;
+ __entry->timeout = timeout;
+ __entry->err = err;
+ ),
+
+ TP_printk("hmdfs: src %s, finish syncfs(timeout=%u), remain %d remote devices to response, err=%d",
+ __entry->src, __entry->timeout,
+ __entry->remain_count, __entry->err)
+);
+
+TRACE_EVENT(hmdfs_server_release,
+
+ TP_PROTO(struct hmdfs_peer *con, uint32_t file_id,
+ uint64_t file_ver, int err),
+
+ TP_ARGS(con, file_id, file_ver, err),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ __field(uint32_t, file_id)
+ __field(uint64_t, file_ver)
+ __field(uint64_t, device_id)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, con->sbi->local_src, 128);
+ __entry->file_id = file_id;
+ __entry->file_ver = file_ver;
+ __entry->device_id = con->device_id;
+ __entry->err = err;
+ ),
+
+ TP_printk("hmdfs: src %s, server release file, fid=%u, fid_ver=%llu, remote_dev=%llu, err=%d",
+ __entry->src, __entry->file_id, __entry->file_ver,
+ __entry->device_id, __entry->err)
+);
+
+TRACE_EVENT(hmdfs_client_recv_readpage,
+
+ TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino,
+ unsigned long page_index, int err),
+
+ TP_ARGS(con, remote_ino, page_index, err),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ __field(uint64_t, remote_ino)
+ __field(unsigned long, page_index)
+ __field(uint64_t, device_id)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, con->sbi->local_src, 128);
+ __entry->remote_ino = remote_ino;
+ __entry->page_index = page_index;
+ __entry->device_id = con->device_id;
+ __entry->err = err;
+ ),
+
+ TP_printk("hmdfs: src %s, client readpage callback from remote device %llu, remote_ino=%llu, page_idx=%lu, err=%d",
+ __entry->src, __entry->device_id,
+ __entry->remote_ino, __entry->page_index, __entry->err)
+);
+
+TRACE_EVENT(hmdfs_writepage_cb_enter,
+
+ TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino,
+ unsigned long page_index, int err),
+
+ TP_ARGS(con, remote_ino, page_index, err),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ __field(uint64_t, remote_ino)
+ __field(unsigned long, page_index)
+ __field(uint64_t, device_id)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, con->sbi->local_src, 128);
+ __entry->remote_ino = remote_ino;
+ __entry->page_index = page_index;
+ __entry->device_id = con->device_id;
+ __entry->err = err;
+ ),
+
+ TP_printk("hmdfs: src %s, writepage_cb start, return from remote device %llu, remote_ino=%llu, page_idx=%lu, err=%d",
+ __entry->src, __entry->device_id,
+ __entry->remote_ino, __entry->page_index, __entry->err)
+);
+
+TRACE_EVENT(hmdfs_writepage_cb_exit,
+
+ TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino,
+ unsigned long page_index, int err),
+
+ TP_ARGS(con, remote_ino, page_index, err),
+
+ TP_STRUCT__entry(
+ __array(char, src, 128)
+ __field(uint64_t, remote_ino)
+ __field(unsigned long, page_index)
+ __field(uint64_t, device_id)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->src, con->sbi->local_src, 128);
+ __entry->remote_ino = remote_ino;
+ __entry->page_index = page_index;
+ __entry->device_id = con->device_id;
+ __entry->err = err;
+ ),
+
+ TP_printk("hmdfs: src %s, writepage_cb exit, return from remote device %llu, remote_ino=%llu, page_index=%lu, err=%d",
+ __entry->src, __entry->device_id,
+ __entry->remote_ino, __entry->page_index, __entry->err)
+);
+
+TRACE_EVENT(hmdfs_server_rebuild_dents,
+
+ TP_PROTO(struct hmdfs_dcache_header *__h, int err),
+
+ TP_ARGS(__h, err),
+
+ TP_STRUCT__entry(
+ __field(uint64_t, crtime)
+ __field(uint64_t, crtime_nsec)
+ __field(uint64_t, ctime)
+ __field(uint64_t, ctime_nsec)
+ __field(uint64_t, num)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->crtime = le64_to_cpu(__h->dcache_crtime);
+ __entry->crtime_nsec = le64_to_cpu(__h->dcache_crtime_nsec);
+ __entry->ctime = le64_to_cpu(__h->dentry_ctime);
+ __entry->ctime_nsec = le64_to_cpu(__h->dentry_ctime_nsec);
+ __entry->num = le64_to_cpu(__h->num);
+ __entry->err = err;
+ ),
+
+ TP_printk("dcache crtime %llu:%llu ctime %llu:%llu has %llu dentry err %d",
+ __entry->crtime, __entry->crtime_nsec, __entry->ctime,
+ __entry->ctime_nsec, __entry->num, __entry->err)
+);
+
+TRACE_EVENT(hmdfs_server_readdir,
+
+ TP_PROTO(struct readdir_request *req),
+
+ TP_ARGS(req),
+
+ TP_STRUCT__entry(
+ __string(path, req->path)
+ ),
+
+ TP_fast_assign(
+ __assign_str(path, req->path);
+ ),
+
+ TP_printk("hmdfs_server_readdir %s", __get_str(path))
+);
+
+TRACE_EVENT(hmdfs_open_final_remote,
+
+ TP_PROTO(struct hmdfs_inode_info *info,
+ struct hmdfs_open_ret *open_ret,
+ struct file *file,
+ int reason),
+
+ TP_ARGS(info, open_ret, file, reason),
+
+ TP_STRUCT__entry(
+ __array(char, file_path, MAX_FILTER_STR_VAL)
+ __field(uint32_t, reason)
+ __field(uint32_t, file_id)
+ __field(uint64_t, file_ver)
+ __field(uint64_t, remote_file_size)
+ __field(uint64_t, remote_ino)
+ __field(uint64_t, remote_ctime)
+ __field(uint64_t, remote_ctime_nsec)
+ __field(uint64_t, remote_stable_ctime)
+ __field(uint64_t, remote_stable_ctime_nsec)
+ __field(uint64_t, local_file_size)
+ __field(uint64_t, local_ino)
+ __field(uint64_t, local_ctime)
+ __field(uint64_t, local_ctime_nsec)
+ __field(uint64_t, local_stable_ctime)
+ __field(uint64_t, local_stable_ctime_nsec)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->file_path, file->f_path.dentry->d_name.name,
+ MAX_FILTER_STR_VAL);
+ __entry->reason = reason;
+ __entry->file_id = open_ret->fid.id;
+ __entry->file_ver = open_ret->fid.ver;
+ __entry->remote_file_size = open_ret->file_size;
+ __entry->remote_ino = open_ret->ino;
+ __entry->remote_ctime = open_ret->remote_ctime.tv_sec;
+ __entry->remote_ctime_nsec = open_ret->remote_ctime.tv_nsec;
+ __entry->remote_stable_ctime = open_ret->stable_ctime.tv_sec;
+ __entry->remote_stable_ctime_nsec =
+ open_ret->stable_ctime.tv_nsec;
+ __entry->local_file_size = info->vfs_inode.i_size;
+ __entry->local_ino = info->remote_ino;
+ __entry->local_ctime = info->remote_ctime.tv_sec;
+ __entry->local_ctime_nsec = info->remote_ctime.tv_nsec;
+ __entry->local_stable_ctime = info->stable_ctime.tv_sec;
+ __entry->local_stable_ctime_nsec = info->stable_ctime.tv_nsec;
+ ),
+
+ TP_printk("file path: %s, file id: %u, file ver: %llu, reason: %d, file size: %llu/%llu, ino: %llu/%llu, ctime: %llu.%llu/%llu.%llu, stable_ctime: %llu.%llu/%llu.%llu from remote/local",
+ __entry->file_path, __entry->file_id, __entry->file_ver,
+ __entry->reason, __entry->remote_file_size,
+ __entry->local_file_size, __entry->remote_ino,
+ __entry->local_ino, __entry->remote_ctime,
+ __entry->remote_ctime_nsec, __entry->local_ctime,
+ __entry->local_ctime_nsec, __entry->remote_stable_ctime,
+ __entry->remote_stable_ctime_nsec,
+ __entry->local_stable_ctime, __entry->local_stable_ctime_nsec)
+);
+
+TRACE_EVENT(hmdfs_server_open_enter,
+
+ TP_PROTO(struct hmdfs_peer *con,
+ struct open_request *recv),
+
+ TP_ARGS(con, recv),
+
+ TP_STRUCT__entry(
+ __array(char, open_path, MAX_FILTER_STR_VAL)
+ __array(char, dst_path, MAX_FILTER_STR_VAL)
+ __field(uint32_t, file_type)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->open_path, recv->buf, MAX_FILTER_STR_VAL);
+ strlcpy(__entry->dst_path, con->sbi->local_dst,
+ MAX_FILTER_STR_VAL);
+ __entry->file_type = recv->file_type;
+ ),
+
+ TP_printk("server open file %s from %s, file_type is %u",
+ __entry->open_path, __entry->dst_path,
+ __entry->file_type)
+);
+
+TRACE_EVENT(hmdfs_server_open_exit,
+
+ TP_PROTO(struct hmdfs_peer *con,
+ struct open_response *resp,
+ struct file *file,
+ int ret),
+
+ TP_ARGS(con, resp, file, ret),
+
+ TP_STRUCT__entry(
+ __array(char, file_path, MAX_FILTER_STR_VAL)
+ __array(char, src_path, MAX_FILTER_STR_VAL)
+ __field(uint32_t, file_id)
+ __field(uint64_t, file_size)
+ __field(uint64_t, ino)
+ __field(uint64_t, ctime)
+ __field(uint64_t, ctime_nsec)
+ __field(uint64_t, stable_ctime)
+ __field(uint64_t, stable_ctime_nsec)
+ __field(int, retval)
+ ),
+
+ TP_fast_assign(
+ if (file)
+ strlcpy(__entry->file_path,
+ file->f_path.dentry->d_name.name,
+ MAX_FILTER_STR_VAL);
+ else
+ strlcpy(__entry->file_path, "null", MAX_FILTER_STR_VAL);
+ strlcpy(__entry->src_path, con->sbi->local_src,
+ MAX_FILTER_STR_VAL);
+ __entry->file_id = resp ? resp->file_id : UINT_MAX;
+ __entry->file_size = resp ? resp->file_size : ULLONG_MAX;
+ __entry->ino = resp ? resp->ino : 0;
+ __entry->ctime = resp ? resp->ctime : 0;
+ __entry->ctime_nsec = resp ? resp->ctime_nsec : 0;
+ __entry->stable_ctime = resp ? resp->stable_ctime : 0;
+ __entry->stable_ctime_nsec = resp ? resp->stable_ctime_nsec : 0;
+ __entry->retval = ret;
+ ),
+
+ TP_printk("server file %s is opened from %s, open result: %d, file id: %u, file size: %llu, ino: %llu, ctime: %llu.%llu, stable ctime: %llu.%llu",
+ __entry->file_path, __entry->src_path,
+ __entry->retval, __entry->file_id,
+ __entry->file_size, __entry->ino, __entry->ctime,
+ __entry->ctime_nsec, __entry->stable_ctime,
+ __entry->stable_ctime_nsec)
+);
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE hmdfs_trace
+#include
diff --git a/fs/hmdfs/inode.c b/fs/hmdfs/inode.c
new file mode 100644
index 0000000000000000000000000000000000000000..8cdedf42dc952a571a5185ef6acd654797216fe8
--- /dev/null
+++ b/fs/hmdfs/inode.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/inode.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "hmdfs_device_view.h"
+#include "inode.h"
+#include "comm/connection.h"
+
+/**
+ * Rules to generate inode numbers:
+ *
+ * "/", "/device_view", "/merge_view", "/device_view/local", "/device_view/cid"
+ * = DOMAIN {3} : dev_id {29} : HMDFS_ROOT {32}
+ *
+ * "/device_view/cid/xxx"
+ * = DOMAIN {3} : dev_id {29} : hash(remote_ino){32}
+ *
+ * "/merge_view/xxx"
+ * = DOMAIN {3} : lower's dev_id {29} : lower's ino_raw {32}
+ */
+
+#define BIT_WIDE_TOTAL 64
+
+#define BIT_WIDE_DOMAIN 3
+#define BIT_WIDE_DEVID 29
+#define BIT_WIDE_INO_RAW 32
+
+enum DOMAIN {
+ DOMAIN_ROOT,
+ DOMAIN_DEVICE_LOCAL,
+ DOMAIN_DEVICE_REMOTE,
+ DOMAIN_MERGE_VIEW,
+ DOMAIN_INVALID,
+};
+
+union hmdfs_ino {
+ const uint64_t ino_output;
+ struct {
+ uint64_t ino_raw : BIT_WIDE_INO_RAW;
+ uint64_t dev_id : BIT_WIDE_DEVID;
+ uint8_t domain : BIT_WIDE_DOMAIN;
+ };
+};
+
+static uint8_t read_ino_domain(uint64_t ino)
+{
+ union hmdfs_ino _ino = {
+ .ino_output = ino,
+ };
+
+ return _ino.domain;
+}
+
+struct iget_args {
+ /* The lower inode of local/merge/root(part) inode */
+ struct inode *lo_i;
+ /* The peer of remote inode */
+ struct hmdfs_peer *peer;
+ /* The ino of remote inode */
+ uint64_t remote_ino;
+
+ /* Returned inode's ino */
+ union hmdfs_ino ino;
+};
+
+/**
+ * iget_test - whether or not the inode with matched hashval is the one we are
+ * looking for
+ *
+ * @inode: the local inode we found in inode cache with matched hashval
+ * @data: struct iget_args
+ */
+static int iget_test(struct inode *inode, void *data)
+{
+ struct hmdfs_inode_info *hii = hmdfs_i(inode);
+ struct iget_args *ia = data;
+ int res = 0;
+
+ WARN_ON(ia->ino.domain < DOMAIN_ROOT ||
+ ia->ino.domain >= DOMAIN_INVALID);
+
+ if (read_ino_domain(inode->i_ino) == DOMAIN_ROOT)
+ return 0;
+
+ switch (ia->ino.domain) {
+ case DOMAIN_MERGE_VIEW:
+ res = (ia->lo_i == hii->lower_inode);
+ break;
+ case DOMAIN_DEVICE_LOCAL:
+ res = (ia->lo_i == hii->lower_inode);
+ break;
+ case DOMAIN_DEVICE_REMOTE:
+ res = (ia->peer == hii->conn &&
+ ia->remote_ino == hii->remote_ino);
+ break;
+ }
+
+ return res;
+}
+
+/**
+ * iget_set - initialize a inode with iget_args
+ *
+ * @sb: the superblock of current hmdfs instance
+ * @data: struct iget_args
+ */
+static int iget_set(struct inode *inode, void *data)
+{
+ struct hmdfs_inode_info *hii = hmdfs_i(inode);
+ struct iget_args *ia = (struct iget_args *)data;
+
+ inode->i_ino = ia->ino.ino_output;
+ inode_inc_iversion(inode);
+
+ hii->conn = ia->peer;
+ hii->remote_ino = ia->remote_ino;
+ hii->lower_inode = ia->lo_i;
+
+ return 0;
+}
+
+static uint64_t make_ino_raw_dev_local(uint64_t lo_ino)
+{
+ if (!(lo_ino >> BIT_WIDE_INO_RAW))
+ return lo_ino;
+
+ return lo_ino * GOLDEN_RATIO_64 >> BIT_WIDE_INO_RAW;
+}
+
+static uint64_t make_ino_raw_dev_remote(uint64_t remote_ino)
+{
+ return hash_long(remote_ino, BIT_WIDE_INO_RAW);
+}
+
+/**
+ * hmdfs_iget5_locked_merge - obtain an inode for the merge-view
+ *
+ * @sb: superblock of current instance
+ * @fst_lo_i: the lower inode of it's first comrade
+ *
+ * Simply replace the lower's domain for a new ino.
+ */
+struct inode *hmdfs_iget5_locked_merge(struct super_block *sb,
+ struct dentry *fst_lo_d)
+{
+ struct iget_args ia = {
+ .lo_i = d_inode(fst_lo_d),
+ .peer = NULL,
+ .remote_ino = 0,
+ .ino.ino_output = 0,
+ };
+
+ if (unlikely(!d_inode(fst_lo_d))) {
+ hmdfs_err("Received a invalid lower inode");
+ return NULL;
+ }
+
+ ia.ino.ino_raw = d_inode(fst_lo_d)->i_ino;
+ ia.ino.dev_id = hmdfs_d(fst_lo_d)->device_id;
+ ia.ino.domain = DOMAIN_MERGE_VIEW;
+ return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia);
+}
+
+/**
+ * hmdfs_iget5_locked_local - obtain an inode for the local-dev-view
+ *
+ * @sb: superblock of current instance
+ * @lo_i: the lower inode from local filesystem
+ *
+ * Hashing local inode's ino to generate our ino. We continue to compare the
+ * address of the lower_inode for uniqueness when collisions occurred.
+ */
+struct inode *hmdfs_iget5_locked_local(struct super_block *sb,
+ struct inode *lo_i)
+{
+ struct iget_args ia = {
+ .lo_i = lo_i,
+ .peer = NULL,
+ .remote_ino = 0,
+ .ino.ino_output = 0,
+ };
+
+ if (unlikely(!lo_i)) {
+ hmdfs_err("Received a invalid lower inode");
+ return NULL;
+ }
+ ia.ino.ino_raw = make_ino_raw_dev_local(lo_i->i_ino);
+ ia.ino.dev_id = 0;
+ ia.ino.domain = DOMAIN_DEVICE_LOCAL;
+ return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia);
+}
+
+/**
+ * hmdfs_iget5_locked_remote - obtain an inode for the remote-dev-view
+ *
+ * @sb: superblock of current instance
+ * @peer: corresponding device node
+ * @remote_ino: remote inode's ino
+ *
+ * Hash remote ino for ino's 32bit~1bit.
+ *
+ * Note that currenly implementation assume the each remote inode has unique
+ * ino. Thus the combination of the peer's unique dev_id and the remote_ino
+ * is enough to determine a unique remote inode.
+ */
+struct inode *hmdfs_iget5_locked_remote(struct super_block *sb,
+ struct hmdfs_peer *peer,
+ uint64_t remote_ino)
+{
+ struct iget_args ia = {
+ .lo_i = NULL,
+ .peer = peer,
+ .remote_ino = remote_ino,
+ .ino.ino_output = 0,
+ };
+
+ if (unlikely(!peer)) {
+ hmdfs_err("Received a invalid peer");
+ return NULL;
+ }
+
+ ia.ino.ino_raw = make_ino_raw_dev_remote(remote_ino);
+ ia.ino.dev_id = peer->device_id;
+ ia.ino.domain = DOMAIN_DEVICE_REMOTE;
+ return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia);
+}
+
+struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino,
+ struct inode *lo_i,
+ struct hmdfs_peer *peer)
+{
+ struct iget_args ia = {
+ .lo_i = lo_i,
+ .peer = peer,
+ .remote_ino = 0,
+ .ino.ino_raw = root_ino,
+ .ino.dev_id = peer ? peer->device_id : 0,
+ .ino.domain = DOMAIN_ROOT,
+ };
+
+ if (unlikely(root_ino < 0 || root_ino >= HMDFS_ROOT_INVALID)) {
+ hmdfs_err("Root %llu is invalid", root_ino);
+ return NULL;
+ }
+ if (unlikely(root_ino == HMDFS_ROOT_DEV_REMOTE && !peer)) {
+ hmdfs_err("Root %llu received a invalid peer", root_ino);
+ return NULL;
+ }
+
+ return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia);
+}
diff --git a/fs/hmdfs/inode.h b/fs/hmdfs/inode.h
new file mode 100644
index 0000000000000000000000000000000000000000..47f189f3cf828444036eebe1acc2a51b14fe25c4
--- /dev/null
+++ b/fs/hmdfs/inode.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/inode.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef INODE_H
+#define INODE_H
+
+#include "hmdfs.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
+#include
+#endif
+
+enum {
+ HMDFS_REMOTE_INODE_NONE = 0,
+ HMDFS_REMOTE_INODE_STASHING,
+ HMDFS_REMOTE_INODE_RESTORING,
+};
+
+/*****************************************************************************
+ * fid
+ *****************************************************************************/
+
+/* Bits for fid_flags */
+enum {
+ HMDFS_FID_NEED_OPEN = 0,
+ HMDFS_FID_OPENING,
+};
+
+struct hmdfs_fid {
+ __u64 ver;
+ __u32 id;
+};
+
+/*
+ * Cache file is stored in file like following format:
+ * ________________________________________________________________
+ * |meta file info| remote file(s) path | file content |
+ * | head | path | data |
+ * ↑ ↑
+ * path_offs data_offs
+ */
+struct hmdfs_cache_info {
+ /* Path start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
+ __u32 path_offs;
+ __u32 path_len;
+ __u32 path_cnt;
+ char *path_buf;
+ /* Stricky remote file(hardlink)s' path, split by '\0' */
+ char *path;
+ /* Data start offset in file (HMDFS_STASH_BLK_SIZE aligned) */
+ __u32 data_offs;
+ /* # of pages need to be written to remote file during offline */
+ atomic64_t to_write_pgs;
+ /* # of pages written to remote file during offline */
+ atomic64_t written_pgs;
+ /* Stash file handler */
+ struct file *cache_file;
+};
+
+/*****************************************************************************
+ * inode info and it's inline helpers
+ *****************************************************************************/
+
+struct hmdfs_inode_info {
+ struct inode *lower_inode; // for local/merge inode
+ struct hmdfs_peer *conn; // for remote inode
+ struct kref ref;
+ spinlock_t fid_lock;
+ struct hmdfs_fid fid;
+ unsigned long fid_flags;
+ wait_queue_head_t fid_wq;
+ __u8 inode_type; // deprecated: use ino system instead
+
+ /* writeback list */
+ struct list_head wb_list;
+
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ __u16 perm;
+#endif
+ /*
+ * lookup remote file will generate a local inode, this store the
+ * combination of remote inode number and generation in such situation.
+ * the uniqueness of local inode can be determined.
+ */
+ __u64 remote_ino;
+ /*
+ * if this value is not ULLONG_MAX, it means that remote getattr syscall
+ * should return this value as inode size.
+ */
+ __u64 getattr_isize;
+ /*
+ * this value stores remote ctime, explicitly when remote file is opened
+ */
+ struct hmdfs_time_t remote_ctime;
+ /*
+ * this value stores the last time, aligned to dcache_precision, that
+ * remote file was modified. It should be noted that this value won't
+ * be effective if writecace_expire is set.
+ */
+ struct hmdfs_time_t stable_ctime;
+ /*
+ * If this value is set nonzero, pagecache should be truncated if the
+ * time that the file is opened is beyond the value. Furthermore,
+ * the functionality of stable_ctime won't be effective.
+ */
+ unsigned long writecache_expire;
+ /*
+ * This value record how many times the file has been written while file
+ * is opened. 'writecache_expire' will set in close if this value is
+ * nonzero.
+ */
+ atomic64_t write_counter;
+ /*
+ * will be linked to hmdfs_peer::wr_opened_inode_list
+ * if the remote inode is writable-opened. And using
+ * wr_opened_cnt to track possibly multiple writeable-open.
+ */
+ struct list_head wr_opened_node;
+ atomic_t wr_opened_cnt;
+ spinlock_t stash_lock;
+ unsigned int stash_status;
+ struct hmdfs_cache_info *cache;
+ /* link to hmdfs_peer::stashed_inode_list when stashing completes */
+ struct list_head stash_node;
+ /*
+ * The flush/fsync thread will hold the write lock while threads
+ * calling writepage will hold the read lock. We use rwlock to
+ * eliminate the cases that flush/fsync operations are done with
+ * re-dirtied pages remain dirty.
+ *
+ * Here is the explanation in detail:
+ *
+ * During `writepage()`, the state of a re-dirtied page will switch
+ * to the following states in sequence:
+ * s1: page dirty + tree dirty
+ * s2: page dirty + tree dirty
+ * s3: page clean + tree dirty
+ * s4: page clean + tree clean + write back
+ * s5: page dirty + tree dirty + write back
+ * s6: page dirty + tree dirty
+ *
+ * A page upon s4 will thus be ignored by the concurrent
+ * `do_writepages()` contained by `close()`, `fsync()`, making it's
+ * state inconsistent.
+ *
+ * To avoid such situation, we use per-file rwsems to prevent
+ * concurrent in-flight `writepage` during `close()` or `fsync()`.
+ *
+ * Minimal overhead is brought in since rsems allow concurrent
+ * `writepage` while `close()` or `fsync()` is natural to wait for
+ * in-flight `writepage()`s to complete.
+ *
+ * NOTE that in the worst case, a process may wait for wsem for TIMEOUT
+ * even if a signal is pending. But we've to wait there to iterate all
+ * pages and make sure that no dirty page should remain.
+ */
+ struct rw_semaphore wpage_sem;
+
+ // The real inode shared with vfs. ALWAYS PUT IT AT THE BOTTOM.
+ struct inode vfs_inode;
+};
+
+struct hmdfs_readdir_work {
+ struct list_head head;
+ struct dentry *dentry;
+ struct hmdfs_peer *con;
+ struct delayed_work dwork;
+};
+
+static inline struct hmdfs_inode_info *hmdfs_i(struct inode *inode)
+{
+ return container_of(inode, struct hmdfs_inode_info, vfs_inode);
+}
+
+static inline bool hmdfs_inode_is_stashing(const struct hmdfs_inode_info *info)
+{
+ const struct hmdfs_sb_info *sbi = hmdfs_sb(info->vfs_inode.i_sb);
+
+ /* Refer to comments in hmdfs_stash_remote_inode() */
+ return (hmdfs_is_stash_enabled(sbi) &&
+ smp_load_acquire(&info->stash_status)); // protect
+}
+
+static inline void hmdfs_remote_fetch_fid(struct hmdfs_inode_info *info,
+ struct hmdfs_fid *fid)
+{
+ spin_lock(&info->fid_lock);
+ *fid = info->fid;
+ spin_unlock(&info->fid_lock);
+}
+
+/*****************************************************************************
+ * ino allocator
+ *****************************************************************************/
+
+enum HMDFS_ROOT {
+ HMDFS_ROOT_ANCESTOR = 1, // /
+ HMDFS_ROOT_DEV, // /device_view
+ HMDFS_ROOT_DEV_LOCAL, // /device_view/local
+ HMDFS_ROOT_DEV_REMOTE, // /device_view/remote
+ HMDFS_ROOT_MERGE, // /merge_view
+
+ HMDFS_ROOT_INVALID,
+};
+
+// delete layer, directory layer, not overlay layer
+enum HMDFS_LAYER_TYPE {
+ HMDFS_LAYER_ZERO = 0, // /
+ HMDFS_LAYER_FIRST_DEVICE, // /device_view
+ HMDFS_LAYER_SECOND_LOCAL, // /device_view/local
+ HMDFS_LAYER_SECOND_REMOTE, // /device_view/remote
+ HMDFS_LAYER_OTHER_LOCAL, // /device_view/local/xx
+ HMDFS_LAYER_OTHER_REMOTE, // /device_view/remote/xx
+
+ HMDFS_LAYER_FIRST_MERGE, // /merge_view
+ HMDFS_LAYER_OTHER_MERGE, // /merge_view/xxx
+ HMDFS_LAYER_INVALID,
+};
+
+struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino,
+ struct inode *lo_i,
+ struct hmdfs_peer *peer);
+struct inode *hmdfs_iget5_locked_merge(struct super_block *sb,
+ struct dentry *fst_lo_d);
+
+struct inode *hmdfs_iget5_locked_local(struct super_block *sb,
+ struct inode *lo_i);
+struct hmdfs_peer;
+struct inode *hmdfs_iget5_locked_remote(struct super_block *sb,
+ struct hmdfs_peer *peer,
+ uint64_t remote_ino);
+
+#endif // INODE_H
diff --git a/fs/hmdfs/inode_local.c b/fs/hmdfs/inode_local.c
new file mode 100644
index 0000000000000000000000000000000000000000..d34b765ab65daab8c1ca48a334ef92c9debc404a
--- /dev/null
+++ b/fs/hmdfs/inode_local.c
@@ -0,0 +1,963 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/inode_local.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "authority/authentication.h"
+#include "comm/socket_adapter.h"
+#include "comm/transport.h"
+#include "hmdfs_client.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_device_view.h"
+#include "hmdfs_trace.h"
+
+extern struct kmem_cache *hmdfs_dentry_cachep;
+
+static const char *const symlink_tgt_white_list[] = {
+ "/storage/",
+ "/sdcard/",
+};
+
+struct hmdfs_name_data {
+ struct dir_context ctx;
+ const struct qstr *to_find;
+ char *name;
+ bool found;
+};
+
+int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry,
+ int dentry_type)
+{
+ struct hmdfs_dentry_info *info =
+ kmem_cache_zalloc(hmdfs_dentry_cachep, GFP_ATOMIC);
+
+ if (!info)
+ return -ENOMEM;
+ dentry->d_fsdata = info;
+ INIT_LIST_HEAD(&info->cache_list_head);
+ INIT_LIST_HEAD(&info->remote_cache_list_head);
+ spin_lock_init(&info->cache_list_lock);
+ mutex_init(&info->remote_cache_list_lock);
+ mutex_init(&info->cache_pull_lock);
+ spin_lock_init(&info->lock);
+ info->dentry_type = dentry_type;
+ info->device_id = 0;
+ if (dentry_type == HMDFS_LAYER_ZERO ||
+ dentry_type == HMDFS_LAYER_FIRST_DEVICE ||
+ dentry_type == HMDFS_LAYER_SECOND_LOCAL ||
+ dentry_type == HMDFS_LAYER_SECOND_REMOTE)
+ d_set_d_op(dentry, &hmdfs_dev_dops);
+ else
+ d_set_d_op(dentry, &hmdfs_dops);
+ return 0;
+}
+
+static inline void set_symlink_flag(struct hmdfs_dentry_info *gdi)
+{
+ gdi->file_type = HM_SYMLINK;
+}
+
+struct inode *fill_inode_local(struct super_block *sb,
+ struct inode *lower_inode)
+{
+ struct inode *inode;
+ struct hmdfs_inode_info *info;
+
+ if (!igrab(lower_inode))
+ return ERR_PTR(-ESTALE);
+
+ inode = hmdfs_iget5_locked_local(sb, lower_inode);
+ if (!inode) {
+ hmdfs_err("iget5_locked get inode NULL");
+ iput(lower_inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ if (!(inode->i_state & I_NEW)) {
+ iput(lower_inode);
+ return inode;
+ }
+
+ info = hmdfs_i(inode);
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ info->perm = hmdfs_read_perm(lower_inode);
+#endif
+ if (S_ISDIR(lower_inode->i_mode))
+ inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRWXU |
+ S_IRWXG | S_IXOTH;
+ else if (S_ISREG(lower_inode->i_mode))
+ inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR |
+ S_IWUSR | S_IRGRP | S_IWGRP;
+ else if (S_ISLNK(lower_inode->i_mode))
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ inode->i_uid = lower_inode->i_uid;
+ inode->i_gid = lower_inode->i_gid;
+#else
+ inode->i_uid = KUIDT_INIT((uid_t)1000);
+ inode->i_gid = KGIDT_INIT((gid_t)1000);
+#endif
+ inode->i_atime = lower_inode->i_atime;
+ inode->i_ctime = lower_inode->i_ctime;
+ inode->i_mtime = lower_inode->i_mtime;
+ inode->i_generation = lower_inode->i_generation;
+
+ info->inode_type = HMDFS_LAYER_OTHER_LOCAL;
+ if (S_ISDIR(lower_inode->i_mode)) {
+ inode->i_op = &hmdfs_dir_inode_ops_local;
+ inode->i_fop = &hmdfs_dir_ops_local;
+ inode->i_mode |= S_IXUGO;
+ } else if (S_ISREG(lower_inode->i_mode)) {
+ inode->i_op = &hmdfs_file_iops_local;
+ inode->i_fop = &hmdfs_file_fops_local;
+ } else if (S_ISLNK(lower_inode->i_mode)) {
+ inode->i_op = &hmdfs_symlink_iops_local;
+ inode->i_fop = &hmdfs_file_fops_local;
+ }
+
+ fsstack_copy_inode_size(inode, lower_inode);
+ unlock_new_inode(inode);
+ return inode;
+}
+
+/* hmdfs_convert_lookup_flags - covert hmdfs lookup flags to vfs lookup flags
+ *
+ * @hmdfs_flags: hmdfs lookup flags
+ * @vfs_flags: pointer to converted flags
+ *
+ * return 0 on success, or err code on failure.
+ */
+int hmdfs_convert_lookup_flags(unsigned int hmdfs_flags,
+ unsigned int *vfs_flags)
+{
+ *vfs_flags = 0;
+
+ /* currently only support HMDFS_LOOKUP_REVAL */
+ if (hmdfs_flags & ~HMDFS_LOOKUP_REVAL)
+ return -EINVAL;
+
+ if (hmdfs_flags & HMDFS_LOOKUP_REVAL)
+ *vfs_flags |= LOOKUP_REVAL;
+
+ return 0;
+}
+
+static int hmdfs_name_match(struct dir_context *ctx, const char *name,
+ int namelen, loff_t offset, u64 ino,
+ unsigned int d_type)
+{
+ struct hmdfs_name_data *buf =
+ container_of(ctx, struct hmdfs_name_data, ctx);
+ struct qstr candidate = QSTR_INIT(name, namelen);
+
+ if (qstr_case_eq(buf->to_find, &candidate)) {
+ memcpy(buf->name, name, namelen);
+ buf->name[namelen] = 0;
+ buf->found = true;
+ return 1;
+ }
+ return 0;
+}
+
+static int __lookup_nosensitive(struct path *lower_parent_path,
+ struct dentry *child_dentry, unsigned int flags,
+ struct path *lower_path)
+{
+ struct file *file;
+ const struct cred *cred = current_cred();
+ const struct qstr *name = &child_dentry->d_name;
+ int err;
+ struct hmdfs_name_data buffer = {
+ .ctx.actor = hmdfs_name_match,
+ .to_find = name,
+ .name = __getname(),
+ .found = false,
+ };
+
+ if (!buffer.name) {
+ err = -ENOMEM;
+ goto out;
+ }
+ file = dentry_open(lower_parent_path, O_RDONLY, cred);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto put_name;
+ }
+ err = iterate_dir(file, &buffer.ctx);
+ fput(file);
+ if (err)
+ goto put_name;
+ if (buffer.found)
+ err = vfs_path_lookup(lower_parent_path->dentry,
+ lower_parent_path->mnt, buffer.name,
+ flags, lower_path);
+ else
+ err = -ENOENT;
+put_name:
+ __putname(buffer.name);
+out:
+ return err;
+}
+
+struct dentry *hmdfs_lookup_local(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ unsigned int flags)
+{
+ const char *d_name = child_dentry->d_name.name;
+ int err = 0;
+ struct path lower_path, lower_parent_path;
+ struct dentry *lower_dentry = NULL, *parent_dentry = NULL, *ret = NULL;
+ struct hmdfs_dentry_info *gdi = NULL;
+ struct inode *child_inode = NULL;
+ struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb);
+
+ trace_hmdfs_lookup_local(parent_inode, child_dentry, flags);
+ if (child_dentry->d_name.len > NAME_MAX) {
+ ret = ERR_PTR(-ENAMETOOLONG);
+ goto out;
+ }
+
+ /* local device */
+ parent_dentry = dget_parent(child_dentry);
+ hmdfs_get_lower_path(parent_dentry, &lower_parent_path);
+ err = init_hmdfs_dentry_info(sbi, child_dentry,
+ HMDFS_LAYER_OTHER_LOCAL);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto out_err;
+ }
+
+ gdi = hmdfs_d(child_dentry);
+
+ flags &= ~LOOKUP_FOLLOW;
+ err = vfs_path_lookup(lower_parent_path.dentry, lower_parent_path.mnt,
+ (child_dentry->d_name.name), 0, &lower_path);
+ if (err == -ENOENT && !sbi->s_case_sensitive)
+ err = __lookup_nosensitive(&lower_parent_path, child_dentry, 0,
+ &lower_path);
+ if (err && err != -ENOENT) {
+ ret = ERR_PTR(err);
+ goto out_err;
+ } else if (!err) {
+ hmdfs_set_lower_path(child_dentry, &lower_path);
+ child_inode = fill_inode_local(parent_inode->i_sb,
+ d_inode(lower_path.dentry));
+ if (S_ISLNK(d_inode(lower_path.dentry)->i_mode))
+ set_symlink_flag(gdi);
+ if (IS_ERR(child_inode)) {
+ err = PTR_ERR(child_inode);
+ ret = ERR_PTR(err);
+ hmdfs_put_reset_lower_path(child_dentry);
+ goto out_err;
+ }
+ ret = d_splice_alias(child_inode, child_dentry);
+ if (IS_ERR(ret)) {
+ err = PTR_ERR(ret);
+ hmdfs_put_reset_lower_path(child_dentry);
+ goto out_err;
+ }
+
+ check_and_fixup_ownership(parent_inode, child_inode,
+ lower_path.dentry,
+ child_dentry->d_name.name);
+ goto out_err;
+ }
+ /*
+ * return 0 here, so that vfs can continue the process of making this
+ * negative dentry to a positive one while creating a new file.
+ */
+ err = 0;
+ ret = 0;
+
+ lower_dentry = lookup_one_len_unlocked(d_name, lower_parent_path.dentry,
+ child_dentry->d_name.len);
+ if (IS_ERR(lower_dentry)) {
+ err = PTR_ERR(lower_dentry);
+ ret = lower_dentry;
+ goto out_err;
+ }
+ lower_path.dentry = lower_dentry;
+ lower_path.mnt = mntget(lower_parent_path.mnt);
+ hmdfs_set_lower_path(child_dentry, &lower_path);
+
+out_err:
+ if (!err)
+ hmdfs_set_time(child_dentry, jiffies);
+ hmdfs_put_lower_path(&lower_parent_path);
+ dput(parent_dentry);
+out:
+ trace_hmdfs_lookup_local_end(parent_inode, child_dentry, err);
+ return ret;
+}
+
+int hmdfs_mkdir_local_dentry(struct inode *dir, struct dentry *dentry,
+ umode_t mode)
+{
+ struct inode *lower_dir = hmdfs_i(dir)->lower_inode;
+ struct dentry *lower_dir_dentry = NULL;
+ struct super_block *sb = dir->i_sb;
+ struct path lower_path;
+ struct dentry *lower_dentry = NULL;
+ int error = 0;
+ struct inode *lower_inode = NULL;
+ struct inode *child_inode = NULL;
+ bool local_res = false;
+ struct cache_fs_override or;
+ __u16 child_perm;
+ kuid_t tmp_uid;
+
+ error = hmdfs_override_dir_id_fs(&or, dir, dentry, &child_perm);
+ if (error)
+ goto cleanup;
+
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_dir_dentry = lock_parent(lower_dentry);
+
+ tmp_uid = hmdfs_override_inode_uid(lower_dir);
+ mode = (mode & S_IFMT) | 00771;
+
+ error = vfs_mkdir(lower_dir, lower_dentry, mode);
+ hmdfs_revert_inode_uid(lower_dir, tmp_uid);
+ if (error) {
+ hmdfs_err("vfs_mkdir() error:%d", error);
+ goto out;
+ }
+ local_res = true;
+ lower_inode = d_inode(lower_dentry);
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ error = hmdfs_persist_perm(lower_dentry, &child_perm);
+#endif
+ child_inode = fill_inode_local(sb, lower_inode);
+ if (IS_ERR(child_inode)) {
+ error = PTR_ERR(child_inode);
+ goto out;
+ }
+ d_add(dentry, child_inode);
+ set_nlink(dir, hmdfs_i(dir)->lower_inode->i_nlink);
+out:
+ unlock_dir(lower_dir_dentry);
+ if (local_res)
+ hmdfs_drop_remote_cache_dents(dentry->d_parent);
+
+ if (error) {
+ hmdfs_clear_drop_flag(dentry->d_parent);
+ d_drop(dentry);
+ }
+ hmdfs_put_lower_path(&lower_path);
+ hmdfs_revert_dir_id_fs(&or);
+cleanup:
+ return error;
+}
+
+int hmdfs_mkdir_local(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ int err = 0;
+
+ if (check_filename(dentry->d_name.name, dentry->d_name.len)) {
+ err = -EINVAL;
+ return err;
+ }
+
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ err = -EACCES;
+ return err;
+ }
+ err = hmdfs_mkdir_local_dentry(dir, dentry, mode);
+ trace_hmdfs_mkdir_local(dir, dentry, err);
+ return err;
+}
+
+int hmdfs_create_local_dentry(struct inode *dir, struct dentry *dentry,
+ umode_t mode, bool want_excl)
+{
+ struct inode *lower_dir = NULL;
+ struct dentry *lower_dir_dentry = NULL;
+ struct super_block *sb = dir->i_sb;
+ struct path lower_path;
+ struct dentry *lower_dentry = NULL;
+ int error = 0;
+ struct inode *lower_inode = NULL;
+ struct inode *child_inode = NULL;
+ kuid_t tmp_uid;
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ const struct cred *saved_cred = NULL;
+ struct fs_struct *saved_fs = NULL, *copied_fs = NULL;
+ __u16 child_perm;
+#endif
+
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ saved_cred = hmdfs_override_file_fsids(dir, &child_perm);
+ if (!saved_cred) {
+ error = -ENOMEM;
+ goto path_err;
+ }
+
+ saved_fs = current->fs;
+ copied_fs = hmdfs_override_fsstruct(saved_fs);
+ if (!copied_fs) {
+ error = -ENOMEM;
+ goto revert_fsids;
+ }
+#endif
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ mode = (mode & S_IFMT) | 00660;
+ lower_dir_dentry = lock_parent(lower_dentry);
+ lower_dir = d_inode(lower_dir_dentry);
+ tmp_uid = hmdfs_override_inode_uid(lower_dir);
+ error = vfs_create(lower_dir, lower_dentry, mode, want_excl);
+ hmdfs_revert_inode_uid(lower_dir, tmp_uid);
+ unlock_dir(lower_dir_dentry);
+ if (error)
+ goto out;
+
+ lower_inode = d_inode(lower_dentry);
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ error = hmdfs_persist_perm(lower_dentry, &child_perm);
+#endif
+ child_inode = fill_inode_local(sb, lower_inode);
+ if (IS_ERR(child_inode)) {
+ error = PTR_ERR(child_inode);
+ goto out_created;
+ }
+ d_add(dentry, child_inode);
+
+out_created:
+ hmdfs_drop_remote_cache_dents(dentry->d_parent);
+out:
+ if (error) {
+ hmdfs_clear_drop_flag(dentry->d_parent);
+ d_drop(dentry);
+ }
+ hmdfs_put_lower_path(&lower_path);
+
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ hmdfs_revert_fsstruct(saved_fs, copied_fs);
+revert_fsids:
+ hmdfs_revert_fsids(saved_cred);
+#endif
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+path_err:
+#endif
+ return error;
+}
+
+int hmdfs_create_local(struct inode *dir, struct dentry *child_dentry,
+ umode_t mode, bool want_excl)
+{
+ int err = 0;
+
+ if (check_filename(child_dentry->d_name.name,
+ child_dentry->d_name.len)) {
+ err = -EINVAL;
+ return err;
+ }
+
+ if (hmdfs_file_type(child_dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ err = -EACCES;
+ return err;
+ }
+
+ err = hmdfs_create_local_dentry(dir, child_dentry, mode, want_excl);
+ trace_hmdfs_create_local(dir, child_dentry, err);
+ return err;
+}
+
+int hmdfs_rmdir_local_dentry(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *lower_dir = NULL;
+ struct dentry *lower_dir_dentry = NULL;
+ kuid_t tmp_uid;
+ struct path lower_path;
+ struct dentry *lower_dentry = NULL;
+ int error = 0;
+
+ hmdfs_clear_cache_dents(dentry, true);
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_dir_dentry = lock_parent(lower_dentry);
+ lower_dir = d_inode(lower_dir_dentry);
+ tmp_uid = hmdfs_override_inode_uid(lower_dir);
+
+ error = vfs_rmdir(lower_dir, lower_dentry);
+ hmdfs_revert_inode_uid(lower_dir, tmp_uid);
+ unlock_dir(lower_dir_dentry);
+ hmdfs_put_lower_path(&lower_path);
+ if (error)
+ goto path_err;
+ hmdfs_drop_remote_cache_dents(dentry->d_parent);
+path_err:
+ if (error)
+ hmdfs_clear_drop_flag(dentry->d_parent);
+ return error;
+}
+
+int hmdfs_rmdir_local(struct inode *dir, struct dentry *dentry)
+{
+ int err = 0;
+
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ err = -EACCES;
+ goto out;
+ }
+
+ err = hmdfs_rmdir_local_dentry(dir, dentry);
+ if (err != 0) {
+ hmdfs_err("rm dir failed:%d", err);
+ goto out;
+ }
+
+ /* drop dentry even remote failed
+ * it maybe cause that one remote devices disconnect
+ * when doing remote rmdir
+ */
+ d_drop(dentry);
+out:
+ /* return connect device's errcode */
+ trace_hmdfs_rmdir_local(dir, dentry, err);
+ return err;
+}
+
+int hmdfs_unlink_local_dentry(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *lower_dir = hmdfs_i(dir)->lower_inode;
+ struct dentry *lower_dir_dentry = NULL;
+ struct path lower_path;
+ struct dentry *lower_dentry = NULL;
+ int error;
+ kuid_t tmp_uid;
+
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ dget(lower_dentry);
+ lower_dir_dentry = lock_parent(lower_dentry);
+ tmp_uid = hmdfs_override_inode_uid(lower_dir);
+ error = vfs_unlink(lower_dir, lower_dentry, NULL);
+ hmdfs_revert_inode_uid(lower_dir, tmp_uid);
+ set_nlink(d_inode(dentry),
+ hmdfs_i(d_inode(dentry))->lower_inode->i_nlink);
+ unlock_dir(lower_dir_dentry);
+ dput(lower_dentry);
+ if (error)
+ goto path_err;
+
+ hmdfs_drop_remote_cache_dents(dentry->d_parent);
+ d_drop(dentry);
+ hmdfs_put_lower_path(&lower_path);
+
+path_err:
+ if (error)
+ hmdfs_clear_drop_flag(dentry->d_parent);
+ return error;
+}
+
+int hmdfs_unlink_local(struct inode *dir, struct dentry *dentry)
+{
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON)
+ return -EACCES;
+
+ return hmdfs_unlink_local_dentry(dir, dentry);
+}
+
+int hmdfs_rename_local_dentry(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct path lower_old_path;
+ struct path lower_new_path;
+ struct dentry *lower_old_dentry = NULL;
+ struct dentry *lower_new_dentry = NULL;
+ struct dentry *lower_old_dir_dentry = NULL;
+ struct dentry *lower_new_dir_dentry = NULL;
+ struct dentry *trap = NULL;
+ int rc = 0;
+ kuid_t old_dir_uid, new_dir_uid;
+
+ if (flags)
+ return -EINVAL;
+
+ hmdfs_get_lower_path(old_dentry, &lower_old_path);
+ lower_old_dentry = lower_old_path.dentry;
+ if (!lower_old_dentry) {
+ hmdfs_err("lower_old_dentry as NULL");
+ rc = -EACCES;
+ goto out_put_old_path;
+ }
+
+ hmdfs_get_lower_path(new_dentry, &lower_new_path);
+ lower_new_dentry = lower_new_path.dentry;
+ if (!lower_new_dentry) {
+ hmdfs_err("lower_new_dentry as NULL");
+ rc = -EACCES;
+ goto out_put_new_path;
+ }
+
+ lower_old_dir_dentry = dget_parent(lower_old_dentry);
+ lower_new_dir_dentry = dget_parent(lower_new_dentry);
+ trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ old_dir_uid = hmdfs_override_inode_uid(d_inode(lower_old_dir_dentry));
+ new_dir_uid = hmdfs_override_inode_uid(d_inode(lower_new_dir_dentry));
+
+ /* source should not be ancestor of target */
+ if (trap == lower_old_dentry) {
+ rc = -EINVAL;
+ goto out_lock;
+ }
+ /* target should not be ancestor of source */
+ if (trap == lower_new_dentry) {
+ rc = -ENOTEMPTY;
+ goto out_lock;
+ }
+
+ rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry,
+ d_inode(lower_new_dir_dentry), lower_new_dentry, NULL,
+ flags);
+out_lock:
+ dget(old_dentry);
+
+ hmdfs_revert_inode_uid(d_inode(lower_old_dir_dentry), old_dir_uid);
+ hmdfs_revert_inode_uid(d_inode(lower_new_dir_dentry), new_dir_uid);
+
+ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ if (rc == 0) {
+ hmdfs_drop_remote_cache_dents(old_dentry->d_parent);
+ if (old_dentry->d_parent != new_dentry->d_parent)
+ hmdfs_drop_remote_cache_dents(new_dentry->d_parent);
+ } else {
+ hmdfs_clear_drop_flag(old_dentry->d_parent);
+ if (old_dentry->d_parent != new_dentry->d_parent)
+ hmdfs_clear_drop_flag(old_dentry->d_parent);
+ d_drop(new_dentry);
+ }
+
+ dput(old_dentry);
+ dput(lower_old_dir_dentry);
+ dput(lower_new_dir_dentry);
+
+out_put_new_path:
+ hmdfs_put_lower_path(&lower_new_path);
+out_put_old_path:
+ hmdfs_put_lower_path(&lower_old_path);
+ return rc;
+}
+
+int hmdfs_rename_local(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ int err = 0;
+ int ret = 0;
+
+ trace_hmdfs_rename_local(old_dir, old_dentry, new_dir, new_dentry,
+ flags);
+ if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON ||
+ hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ err = -EACCES;
+ goto rename_out;
+ }
+
+ if (S_ISREG(old_dentry->d_inode->i_mode)) {
+ err = hmdfs_rename_local_dentry(old_dir, old_dentry, new_dir,
+ new_dentry, flags);
+ } else if (S_ISDIR(old_dentry->d_inode->i_mode)) {
+ ret = hmdfs_rename_local_dentry(old_dir, old_dentry, new_dir,
+ new_dentry, flags);
+ if (ret != 0) {
+ err = ret;
+ goto rename_out;
+ }
+ }
+
+ if (!err)
+ d_invalidate(old_dentry);
+
+rename_out:
+ return err;
+}
+
+static bool symname_is_allowed(const char *symname)
+{
+ size_t symname_len = strlen(symname);
+ const char *prefix = NULL;
+ int i, total;
+
+ /**
+ * Adjacent dots are prohibited.
+ * Note that vfs has escaped back slashes yet.
+ */
+ for (i = 0; i < symname_len - 1; ++i)
+ if (symname[i] == '.' && symname[i + 1] == '.')
+ goto out_fail;
+
+ /**
+ * Check if the symname is included in the whitelist
+ * Note that we skipped cmping strlen because symname is end with '\0'
+ */
+ total = sizeof(symlink_tgt_white_list) /
+ sizeof(*symlink_tgt_white_list);
+ for (i = 0; i < total; ++i) {
+ prefix = symlink_tgt_white_list[i];
+ if (!strncmp(symname, prefix, strlen(prefix)))
+ goto out_succ;
+ }
+
+out_fail:
+ hmdfs_err("Prohibited link path");
+ return false;
+out_succ:
+ return true;
+}
+
+int hmdfs_symlink_local(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ int err;
+ struct dentry *lower_dentry = NULL;
+ struct dentry *lower_parent_dentry = NULL;
+ struct path lower_path;
+ struct inode *child_inode = NULL;
+ struct inode *lower_dir_inode = hmdfs_i(dir)->lower_inode;
+ struct hmdfs_dentry_info *gdi = hmdfs_d(dentry);
+ kuid_t tmp_uid;
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ const struct cred *saved_cred = NULL;
+ struct fs_struct *saved_fs = NULL, *copied_fs = NULL;
+ __u16 child_perm;
+#endif
+
+ if (unlikely(!symname_is_allowed(symname))) {
+ err = -EPERM;
+ goto path_err;
+ }
+
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ saved_cred = hmdfs_override_file_fsids(dir, &child_perm);
+ if (!saved_cred) {
+ err = -ENOMEM;
+ goto path_err;
+ }
+
+ saved_fs = current->fs;
+ copied_fs = hmdfs_override_fsstruct(saved_fs);
+ if (!copied_fs) {
+ err = -ENOMEM;
+ goto revert_fsids;
+ }
+#endif
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_parent_dentry = lock_parent(lower_dentry);
+ tmp_uid = hmdfs_override_inode_uid(lower_dir_inode);
+ err = vfs_symlink(lower_dir_inode, lower_dentry, symname);
+ hmdfs_revert_inode_uid(lower_dir_inode, tmp_uid);
+ unlock_dir(lower_parent_dentry);
+ if (err)
+ goto out_err;
+ set_symlink_flag(gdi);
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ err = hmdfs_persist_perm(lower_dentry, &child_perm);
+#endif
+ child_inode = fill_inode_local(dir->i_sb, d_inode(lower_dentry));
+ if (IS_ERR(child_inode)) {
+ err = PTR_ERR(child_inode);
+ goto out_err;
+ }
+ d_add(dentry, child_inode);
+ fsstack_copy_attr_times(dir, lower_dir_inode);
+ fsstack_copy_inode_size(dir, lower_dir_inode);
+
+out_err:
+ hmdfs_put_lower_path(&lower_path);
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ hmdfs_revert_fsstruct(saved_fs, copied_fs);
+revert_fsids:
+ hmdfs_revert_fsids(saved_cred);
+#endif
+path_err:
+ trace_hmdfs_symlink_local(dir, dentry, err);
+ return err;
+}
+
+static const char *hmdfs_get_link_local(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
+{
+ const char *link = NULL;
+ struct dentry *lower_dentry = NULL;
+ struct inode *lower_inode = NULL;
+ struct path lower_path;
+
+ if (!dentry) {
+ hmdfs_err("dentry NULL");
+ link = ERR_PTR(-ECHILD);
+ goto link_out;
+ }
+
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ lower_inode = d_inode(lower_dentry);
+ if (!lower_inode->i_op || !lower_inode->i_op->get_link) {
+ hmdfs_err("The lower inode doesn't support get_link i_op");
+ link = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ link = lower_inode->i_op->get_link(lower_dentry, lower_inode, done);
+ if (IS_ERR_OR_NULL(link))
+ goto out;
+ fsstack_copy_attr_atime(inode, lower_inode);
+out:
+ hmdfs_put_lower_path(&lower_path);
+ trace_hmdfs_get_link_local(inode, dentry, PTR_ERR_OR_ZERO(link));
+link_out:
+ return link;
+}
+
+static int hmdfs_setattr_local(struct dentry *dentry, struct iattr *ia)
+{
+ struct inode *inode = d_inode(dentry);
+ struct inode *lower_inode = hmdfs_i(inode)->lower_inode;
+ struct path lower_path;
+ struct dentry *lower_dentry = NULL;
+ struct iattr lower_ia;
+ unsigned int ia_valid = ia->ia_valid;
+ int err = 0;
+ kuid_t tmp_uid;
+
+ hmdfs_get_lower_path(dentry, &lower_path);
+ lower_dentry = lower_path.dentry;
+ memcpy(&lower_ia, ia, sizeof(lower_ia));
+ if (ia_valid & ATTR_FILE)
+ lower_ia.ia_file = hmdfs_f(ia->ia_file)->lower_file;
+ lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE);
+ if (ia_valid & ATTR_SIZE) {
+ err = inode_newsize_ok(inode, ia->ia_size);
+ if (err)
+ goto out;
+ truncate_setsize(inode, ia->ia_size);
+ }
+ inode_lock(lower_inode);
+ tmp_uid = hmdfs_override_inode_uid(lower_inode);
+
+ err = notify_change(lower_dentry, &lower_ia, NULL);
+ i_size_write(inode, i_size_read(lower_inode));
+ inode->i_atime = lower_inode->i_atime;
+ inode->i_mtime = lower_inode->i_mtime;
+ inode->i_ctime = lower_inode->i_ctime;
+ err = update_inode_to_dentry(dentry, inode);
+ hmdfs_revert_inode_uid(lower_inode, tmp_uid);
+
+ inode_unlock(lower_inode);
+out:
+ hmdfs_put_lower_path(&lower_path);
+ return err;
+}
+
+static int hmdfs_getattr_local(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
+{
+ struct path lower_path;
+ int ret;
+
+ hmdfs_get_lower_path(path->dentry, &lower_path);
+ ret = vfs_getattr(&lower_path, stat, request_mask, flags);
+ stat->ino = d_inode(path->dentry)->i_ino;
+ hmdfs_put_lower_path(&lower_path);
+
+ return ret;
+}
+
+int hmdfs_permission(struct inode *inode, int mask)
+{
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ unsigned int mode = inode->i_mode;
+ struct hmdfs_inode_info *hii = hmdfs_i(inode);
+ kuid_t cur_uid = current_fsuid();
+
+ if (uid_eq(cur_uid, ROOT_UID) || uid_eq(cur_uid, SYSTEM_UID))
+ return 0;
+
+ if (uid_eq(cur_uid, inode->i_uid)) {
+ mode >>= 6;
+ } else if (in_group_p(inode->i_gid)) {
+ mode >>= 3;
+ } else if (is_pkg_auth(hii->perm)) {
+ if (uid_eq(cur_uid, inode->i_uid))
+ return 0;
+ } else if (is_system_auth(hii->perm)) {
+ if (in_group_p(MEDIA_RW_GID))
+ return 0;
+ }
+
+ if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
+ return 0;
+
+ trace_hmdfs_permission(inode->i_ino);
+ return -EACCES;
+#else
+
+ return 0;
+#endif
+}
+
+static ssize_t hmdfs_local_listxattr(struct dentry *dentry, char *list,
+ size_t size)
+{
+ struct path lower_path;
+ ssize_t res = 0;
+ size_t r_size = size;
+
+ if (!hmdfs_support_xattr(dentry))
+ return -EOPNOTSUPP;
+
+ if (size > HMDFS_LISTXATTR_SIZE_MAX)
+ r_size = HMDFS_LISTXATTR_SIZE_MAX;
+
+ hmdfs_get_lower_path(dentry, &lower_path);
+ res = vfs_listxattr(lower_path.dentry, list, r_size);
+ hmdfs_put_lower_path(&lower_path);
+
+ if (res == -ERANGE && r_size != size) {
+ hmdfs_info("no support listxattr size over than %d",
+ HMDFS_LISTXATTR_SIZE_MAX);
+ res = -E2BIG;
+ }
+
+ return res;
+}
+
+const struct inode_operations hmdfs_symlink_iops_local = {
+ .get_link = hmdfs_get_link_local,
+ .permission = hmdfs_permission,
+ .setattr = hmdfs_setattr_local,
+};
+
+const struct inode_operations hmdfs_dir_inode_ops_local = {
+ .lookup = hmdfs_lookup_local,
+ .mkdir = hmdfs_mkdir_local,
+ .create = hmdfs_create_local,
+ .rmdir = hmdfs_rmdir_local,
+ .unlink = hmdfs_unlink_local,
+ .symlink = hmdfs_symlink_local,
+ .rename = hmdfs_rename_local,
+ .permission = hmdfs_permission,
+ .setattr = hmdfs_setattr_local,
+ .getattr = hmdfs_getattr_local,
+};
+
+const struct inode_operations hmdfs_file_iops_local = {
+ .setattr = hmdfs_setattr_local,
+ .getattr = hmdfs_getattr_local,
+ .permission = hmdfs_permission,
+ .listxattr = hmdfs_local_listxattr,
+};
diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c
new file mode 100644
index 0000000000000000000000000000000000000000..f84f57d5e85c3664768b5c732f257fd765059ade
--- /dev/null
+++ b/fs/hmdfs/inode_merge.c
@@ -0,0 +1,1357 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/inode_merge.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include "hmdfs_merge_view.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "authority/authentication.h"
+#include "hmdfs_trace.h"
+
+struct kmem_cache *hmdfs_dentry_merge_cachep;
+
+struct dentry *hmdfs_get_fst_lo_d(struct dentry *dentry)
+{
+ struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry);
+ struct hmdfs_dentry_comrade *comrade = NULL;
+ struct dentry *d = NULL;
+
+ mutex_lock(&dim->comrade_list_lock);
+ comrade = list_first_entry_or_null(&dim->comrade_list,
+ struct hmdfs_dentry_comrade, list);
+ if (comrade)
+ d = dget(comrade->lo_d);
+ mutex_unlock(&dim->comrade_list_lock);
+ return d;
+}
+
+struct dentry *hmdfs_get_lo_d(struct dentry *dentry, int dev_id)
+{
+ struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry);
+ struct hmdfs_dentry_comrade *comrade = NULL;
+ struct dentry *d = NULL;
+
+ mutex_lock(&dim->comrade_list_lock);
+ list_for_each_entry(comrade, &dim->comrade_list, list) {
+ if (comrade->dev_id == dev_id) {
+ d = dget(comrade->lo_d);
+ break;
+ }
+ }
+ mutex_unlock(&dim->comrade_list_lock);
+ return d;
+}
+
+static void update_inode_attr(struct inode *inode, struct dentry *child_dentry)
+{
+ struct inode *li = NULL;
+ struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(child_dentry);
+ struct hmdfs_dentry_comrade *comrade = NULL;
+ struct hmdfs_dentry_comrade *fst_comrade = NULL;
+
+ mutex_lock(&cdi->comrade_list_lock);
+ fst_comrade = list_first_entry(&cdi->comrade_list,
+ struct hmdfs_dentry_comrade, list);
+ list_for_each_entry(comrade, &cdi->comrade_list, list) {
+ li = d_inode(comrade->lo_d);
+ if (!li)
+ continue;
+
+ if (comrade == fst_comrade) {
+ inode->i_atime = li->i_atime;
+ inode->i_ctime = li->i_ctime;
+ inode->i_mtime = li->i_mtime;
+ inode->i_size = li->i_size;
+ continue;
+ }
+
+ if (hmdfs_time_compare(&inode->i_mtime, &li->i_mtime) < 0)
+ inode->i_mtime = li->i_mtime;
+ }
+ mutex_unlock(&cdi->comrade_list_lock);
+}
+
+static int get_num_comrades(struct dentry *dentry)
+{
+ struct list_head *pos;
+ struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry);
+ int count = 0;
+
+ mutex_lock(&dim->comrade_list_lock);
+ list_for_each(pos, &dim->comrade_list)
+ count++;
+ mutex_unlock(&dim->comrade_list_lock);
+ return count;
+}
+
+static struct inode *fill_inode_merge(struct super_block *sb,
+ struct inode *parent_inode,
+ struct dentry *child_dentry,
+ struct dentry *lo_d_dentry)
+{
+ struct dentry *fst_lo_d = NULL;
+ struct hmdfs_inode_info *info = NULL;
+ struct inode *inode = NULL;
+ umode_t mode;
+
+ if (lo_d_dentry) {
+ fst_lo_d = lo_d_dentry;
+ dget(fst_lo_d);
+ } else {
+ fst_lo_d = hmdfs_get_fst_lo_d(child_dentry);
+ }
+ if (!fst_lo_d) {
+ inode = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ if (hmdfs_i(parent_inode)->inode_type == HMDFS_LAYER_ZERO)
+ inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_MERGE, NULL,
+ NULL);
+ else
+ inode = hmdfs_iget5_locked_merge(sb, fst_lo_d);
+ if (!inode) {
+ hmdfs_err("iget5_locked get inode NULL");
+ inode = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ if (!(inode->i_state & I_NEW))
+ goto out;
+ info = hmdfs_i(inode);
+ if (hmdfs_i(parent_inode)->inode_type == HMDFS_LAYER_ZERO)
+ info->inode_type = HMDFS_LAYER_FIRST_MERGE;
+ else
+ info->inode_type = HMDFS_LAYER_OTHER_MERGE;
+
+ inode->i_uid = KUIDT_INIT((uid_t)1000);
+ inode->i_gid = KGIDT_INIT((gid_t)1000);
+
+ update_inode_attr(inode, child_dentry);
+ mode = d_inode(fst_lo_d)->i_mode;
+ /* remote symlink need to treat as regfile,
+ * the specific operation is performed by device_view.
+ * local symlink is managed by merge_view.
+ */
+ if (hm_islnk(hmdfs_d(fst_lo_d)->file_type) &&
+ hmdfs_d(fst_lo_d)->device_id == 0) {
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+ inode->i_op = &hmdfs_symlink_iops_merge;
+ inode->i_fop = &hmdfs_file_fops_merge;
+ set_nlink(inode, 1);
+ } else if (S_ISREG(mode)) { // Reguler file 0660
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+ inode->i_op = &hmdfs_file_iops_merge;
+ inode->i_fop = &hmdfs_file_fops_merge;
+ set_nlink(inode, 1);
+ } else if (S_ISDIR(mode)) { // Directory 0771
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH;
+ inode->i_op = &hmdfs_dir_iops_merge;
+ inode->i_fop = &hmdfs_dir_fops_merge;
+ set_nlink(inode, get_num_comrades(child_dentry) + 2);
+ }
+
+ unlock_new_inode(inode);
+out:
+ dput(fst_lo_d);
+ return inode;
+}
+
+struct hmdfs_dentry_comrade *alloc_comrade(struct dentry *lo_d, int dev_id)
+{
+ struct hmdfs_dentry_comrade *comrade = NULL;
+
+ // 文件只有一个 comrade,考虑 {comrade, list + list lock}
+ comrade = kzalloc(sizeof(*comrade), GFP_KERNEL);
+ if (unlikely(!comrade))
+ return ERR_PTR(-ENOMEM);
+
+ comrade->lo_d = lo_d;
+ comrade->dev_id = dev_id;
+ dget(lo_d);
+ return comrade;
+}
+
+void link_comrade(struct list_head *onstack_comrades_head,
+ struct hmdfs_dentry_comrade *comrade)
+{
+ struct hmdfs_dentry_comrade *c = NULL;
+
+ list_for_each_entry(c, onstack_comrades_head, list) {
+ if (likely(c->dev_id != comrade->dev_id))
+ continue;
+ hmdfs_err("Redundant comrade of device %llu", c->dev_id);
+ dput(comrade->lo_d);
+ kfree(comrade);
+ WARN_ON(1);
+ return;
+ }
+
+ if (comrade_is_local(comrade))
+ list_add(&comrade->list, onstack_comrades_head);
+ else
+ list_add_tail(&comrade->list, onstack_comrades_head);
+}
+
+/**
+ * assign_comrades_unlocked - assign a child dentry with comrades
+ *
+ * We tend to setup a local list of all the comrades we found and place the
+ * list onto the dentry_info to achieve atomicity.
+ */
+static void assign_comrades_unlocked(struct dentry *child_dentry,
+ struct list_head *onstack_comrades_head)
+{
+ struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(child_dentry);
+
+ mutex_lock(&cdi->comrade_list_lock);
+ WARN_ON(!list_empty(&cdi->comrade_list));
+ list_splice_init(onstack_comrades_head, &cdi->comrade_list);
+ mutex_unlock(&cdi->comrade_list_lock);
+}
+
+static struct hmdfs_dentry_comrade *lookup_comrade(struct path lower_path,
+ const char *d_name,
+ int dev_id,
+ unsigned int flags)
+{
+ struct path path;
+ struct hmdfs_dentry_comrade *comrade = NULL;
+ int err;
+
+ err = vfs_path_lookup(lower_path.dentry, lower_path.mnt, d_name, flags,
+ &path);
+ if (err)
+ return ERR_PTR(err);
+
+ comrade = alloc_comrade(path.dentry, dev_id);
+ path_put(&path);
+ return comrade;
+}
+
+/**
+ * conf_name_trans_nop - do nothing but copy
+ *
+ * WARNING: always check before translation
+ */
+static char *conf_name_trans_nop(struct dentry *d)
+{
+ return kstrndup(d->d_name.name, d->d_name.len, GFP_KERNEL);
+}
+
+/**
+ * conf_name_trans_dir - conflicted name translation for directory
+ *
+ * WARNING: always check before translation
+ */
+static char *conf_name_trans_dir(struct dentry *d)
+{
+ int len = d->d_name.len - strlen(CONFLICTING_DIR_SUFFIX);
+
+ return kstrndup(d->d_name.name, len, GFP_KERNEL);
+}
+
+/**
+ * conf_name_trans_reg - conflicted name translation for regular file
+ *
+ * WARNING: always check before translation
+ */
+static char *conf_name_trans_reg(struct dentry *d, int *dev_id)
+{
+ int dot_pos, start_cpy_pos, num_len, i;
+ int len = d->d_name.len;
+ char *name = kstrndup(d->d_name.name, d->d_name.len, GFP_KERNEL);
+
+ if (unlikely(!name))
+ return NULL;
+
+ // find the last dot if possible
+ for (dot_pos = len - 1; dot_pos >= 0; dot_pos--) {
+ if (name[dot_pos] == '.')
+ break;
+ }
+ if (dot_pos == -1)
+ dot_pos = len;
+
+ // retrieve the conf sn (i.e. dev_id)
+ num_len = 0;
+ for (i = dot_pos - 1; i >= 0; i--) {
+ if (name[i] >= '0' && name[i] <= '9')
+ num_len++;
+ else
+ break;
+ }
+
+ *dev_id = 0;
+ for (i = 0; i < num_len; i++)
+ *dev_id = *dev_id * 10 + name[dot_pos - num_len + i] - '0';
+
+ // move the file suffix( '\0' included) right after the file name
+ start_cpy_pos =
+ dot_pos - num_len - strlen(CONFLICTING_FILE_CONST_SUFFIX);
+ memmove(name + start_cpy_pos, name + dot_pos, len - dot_pos + 1);
+ return name;
+}
+
+int check_filename(const char *name, int len)
+{
+ int cmp_res = 0;
+
+ if (len >= strlen(CONFLICTING_DIR_SUFFIX)) {
+ cmp_res = strncmp(name + len - strlen(CONFLICTING_DIR_SUFFIX),
+ CONFLICTING_DIR_SUFFIX,
+ strlen(CONFLICTING_DIR_SUFFIX));
+ if (cmp_res == 0)
+ return DT_DIR;
+ }
+
+ if (len >= strlen(CONFLICTING_FILE_CONST_SUFFIX)) {
+ int dot_pos, start_cmp_pos, num_len, i;
+
+ for (dot_pos = len - 1; dot_pos >= 0; dot_pos--) {
+ if (name[dot_pos] == '.')
+ break;
+ }
+ if (dot_pos == -1)
+ dot_pos = len;
+
+ num_len = 0;
+ for (i = dot_pos - 1; i >= 0; i--) {
+ if (name[i] >= '0' && name[i] <= '9')
+ num_len++;
+ else
+ break;
+ }
+
+ start_cmp_pos = dot_pos - num_len -
+ strlen(CONFLICTING_FILE_CONST_SUFFIX);
+ cmp_res = strncmp(name + start_cmp_pos,
+ CONFLICTING_FILE_CONST_SUFFIX,
+ strlen(CONFLICTING_FILE_CONST_SUFFIX));
+ if (cmp_res == 0)
+ return DT_REG;
+ }
+
+ return 0;
+}
+
+static int lookup_merge_normal(struct dentry *child_dentry, unsigned int flags)
+{
+ struct dentry *parent_dentry = dget_parent(child_dentry);
+ struct hmdfs_dentry_info_merge *pdi = hmdfs_dm(parent_dentry);
+ struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb);
+ struct hmdfs_dentry_comrade *comrade, *cc;
+ struct path lo_p, path;
+ LIST_HEAD(head);
+ int ret = -ENOENT;
+ int dev_id = -1;
+ int ftype;
+ char *lo_name;
+ umode_t mode;
+
+ ftype = check_filename(child_dentry->d_name.name,
+ child_dentry->d_name.len);
+ if (ftype == DT_REG)
+ lo_name = conf_name_trans_reg(child_dentry, &dev_id);
+ else if (ftype == DT_DIR)
+ lo_name = conf_name_trans_dir(child_dentry);
+ else
+ lo_name = conf_name_trans_nop(child_dentry);
+ if (unlikely(!lo_name)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = hmdfs_get_path_in_sb(child_dentry->d_sb, sbi->real_dst,
+ LOOKUP_DIRECTORY, &path);
+ if (ret) {
+ if (ret == -ENOENT)
+ ret = -EINVAL;
+ goto free;
+ }
+ lo_p.mnt = path.mnt;
+
+ ret = -ENOENT;
+ mutex_lock(&pdi->comrade_list_lock);
+ list_for_each_entry(cc, &pdi->comrade_list, list) {
+ if (ftype == DT_REG && cc->dev_id != dev_id)
+ continue;
+
+ lo_p.dentry = cc->lo_d;
+ comrade = lookup_comrade(lo_p, lo_name, cc->dev_id, flags);
+ if (IS_ERR(comrade)) {
+ ret = ret ? PTR_ERR(comrade) : 0;
+ continue;
+ }
+
+ mode = hmdfs_cm(comrade);
+ if ((ftype == DT_DIR && !S_ISDIR(mode)) ||
+ (ftype == DT_REG && S_ISDIR(mode))) {
+ destroy_comrade(comrade);
+ ret = ret ? PTR_ERR(comrade) : 0;
+ continue;
+ }
+
+ ret = 0;
+ link_comrade(&head, comrade);
+
+ if (!S_ISDIR(mode))
+ break;
+ }
+ mutex_unlock(&pdi->comrade_list_lock);
+
+ assign_comrades_unlocked(child_dentry, &head);
+ path_put(&path);
+free:
+ kfree(lo_name);
+out:
+ dput(parent_dentry);
+ return ret;
+}
+
+/**
+ * do_lookup_merge_root - lookup the root of the merge view(root/merge_view)
+ *
+ * It's common for a network filesystem to incur various of faults, so we
+ * intent to show mercy for faults here, except faults reported by the local.
+ */
+static int do_lookup_merge_root(struct path path_dev,
+ struct dentry *child_dentry, unsigned int flags)
+{
+ struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb);
+ struct hmdfs_dentry_comrade *comrade;
+ const int buf_len =
+ max((int)HMDFS_CID_SIZE + 1, (int)sizeof(DEVICE_VIEW_LOCAL));
+ char *buf = kzalloc(buf_len, GFP_KERNEL);
+ struct hmdfs_peer *peer;
+ LIST_HEAD(head);
+ int ret;
+
+ if (!buf)
+ return -ENOMEM;
+
+ // lookup real_dst/device_view/local
+ memcpy(buf, DEVICE_VIEW_LOCAL, sizeof(DEVICE_VIEW_LOCAL));
+ comrade = lookup_comrade(path_dev, buf, HMDFS_DEVID_LOCAL, flags);
+ if (IS_ERR(comrade)) {
+ ret = PTR_ERR(comrade);
+ goto out;
+ }
+ link_comrade(&head, comrade);
+
+ // lookup real_dst/device_view/cidxx
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(peer, &sbi->connections.node_list, list) {
+ mutex_unlock(&sbi->connections.node_lock);
+ memcpy(buf, peer->cid, HMDFS_CID_SIZE);
+ comrade = lookup_comrade(path_dev, buf, peer->device_id, flags);
+ if (IS_ERR(comrade))
+ continue;
+
+ link_comrade(&head, comrade);
+ mutex_lock(&sbi->connections.node_lock);
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+
+ assign_comrades_unlocked(child_dentry, &head);
+ ret = 0;
+
+out:
+ kfree(buf);
+ return ret;
+}
+
+// mkdir -p
+static void lock_root_inode_shared(struct inode *root, bool *locked, bool *down)
+{
+ struct rw_semaphore *sem = &root->i_rwsem;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0)
+#define RWSEM_READER_OWNED (1UL << 0)
+#define RWSEM_RD_NONSPINNABLE (1UL << 1)
+#define RWSEM_WR_NONSPINNABLE (1UL << 2)
+#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE)
+#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
+ struct task_struct *sem_owner =
+ (struct task_struct *)(atomic_long_read(&sem->owner) &
+ ~RWSEM_OWNER_FLAGS_MASK);
+#else
+ struct task_struct *sem_owner = sem->owner;
+#endif
+
+ *locked = false;
+ *down = false;
+
+ if (sem_owner != current)
+ return;
+
+ // It's us that takes the wsem
+ if (!inode_trylock_shared(root)) {
+ downgrade_write(sem);
+ *down = true;
+ }
+ *locked = true;
+}
+
+static void restore_root_inode_sem(struct inode *root, bool locked, bool down)
+{
+ if (!locked)
+ return;
+
+ inode_unlock_shared(root);
+ if (down)
+ inode_lock(root);
+}
+
+static int lookup_merge_root(struct inode *root_inode,
+ struct dentry *child_dentry, unsigned int flags)
+{
+ struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb);
+ struct path path_dev;
+ int ret = -ENOENT;
+ int buf_len;
+ char *buf = NULL;
+ bool locked, down;
+
+ // consider additional one slash and one '\0'
+ buf_len = strlen(sbi->real_dst) + 1 + sizeof(DEVICE_VIEW_ROOT);
+ if (buf_len > PATH_MAX)
+ return -ENAMETOOLONG;
+
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (unlikely(!buf))
+ return -ENOMEM;
+
+ sprintf(buf, "%s/%s", sbi->real_dst, DEVICE_VIEW_ROOT);
+ lock_root_inode_shared(root_inode, &locked, &down);
+ ret = hmdfs_get_path_in_sb(child_dentry->d_sb, buf, LOOKUP_DIRECTORY,
+ &path_dev);
+ if (ret)
+ goto free_buf;
+
+ ret = do_lookup_merge_root(path_dev, child_dentry, flags);
+ path_put(&path_dev);
+
+free_buf:
+ kfree(buf);
+ restore_root_inode_sem(root_inode, locked, down);
+ return ret;
+}
+
+int init_hmdfs_dentry_info_merge(struct hmdfs_sb_info *sbi,
+ struct dentry *dentry)
+{
+ struct hmdfs_dentry_info_merge *info = NULL;
+
+ info = kmem_cache_zalloc(hmdfs_dentry_merge_cachep, GFP_NOFS);
+ if (!info)
+ return -ENOMEM;
+
+ info->ctime = jiffies;
+ INIT_LIST_HEAD(&info->comrade_list);
+ mutex_init(&info->comrade_list_lock);
+ d_set_d_op(dentry, &hmdfs_dops_merge);
+ dentry->d_fsdata = info;
+ return 0;
+}
+
+static void update_dm(struct dentry *dst, struct dentry *src)
+{
+ struct hmdfs_dentry_info_merge *dmi_dst = hmdfs_dm(dst);
+ struct hmdfs_dentry_info_merge *dmi_src = hmdfs_dm(src);
+ LIST_HEAD(tmp_dst);
+ LIST_HEAD(tmp_src);
+
+ /* Mobilize all the comrades */
+ mutex_lock(&dmi_dst->comrade_list_lock);
+ mutex_lock(&dmi_src->comrade_list_lock);
+ list_splice_init(&dmi_dst->comrade_list, &tmp_dst);
+ list_splice_init(&dmi_src->comrade_list, &tmp_src);
+ list_splice(&tmp_dst, &dmi_src->comrade_list);
+ list_splice(&tmp_src, &dmi_dst->comrade_list);
+ mutex_unlock(&dmi_src->comrade_list_lock);
+ mutex_unlock(&dmi_dst->comrade_list_lock);
+}
+
+// do this in a map-reduce manner
+struct dentry *hmdfs_lookup_merge(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ unsigned int flags)
+{
+ bool create = flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET);
+ struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb);
+ struct hmdfs_inode_info *pii = hmdfs_i(parent_inode);
+ struct inode *child_inode = NULL;
+ struct dentry *ret_dentry = NULL;
+ int err = 0;
+
+ /*
+ * Internal flags like LOOKUP_CREATE should not pass to device view.
+ * LOOKUP_REVAL is needed because dentry cache in hmdfs might be stale
+ * after rename in lower fs. LOOKUP_FOLLOW is not needed because
+ * get_link is defined for symlink inode in merge_view.
+ * LOOKUP_DIRECTORY is not needed because merge_view can do the
+ * judgement that whether result is directory or not.
+ */
+ flags = flags & LOOKUP_REVAL;
+
+ child_dentry->d_fsdata = NULL;
+
+ if (child_dentry->d_name.len > NAME_MAX) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+
+ err = init_hmdfs_dentry_info_merge(sbi, child_dentry);
+ if (unlikely(err))
+ goto out;
+
+ if (pii->inode_type == HMDFS_LAYER_ZERO)
+ err = lookup_merge_root(parent_inode, child_dentry, flags);
+ else
+ err = lookup_merge_normal(child_dentry, flags);
+
+ if (!err) {
+ struct hmdfs_inode_info *info = NULL;
+
+ child_inode = fill_inode_merge(parent_inode->i_sb, parent_inode,
+ child_dentry, NULL);
+ ret_dentry = d_splice_alias(child_inode, child_dentry);
+ if (IS_ERR(ret_dentry)) {
+ clear_comrades(child_dentry);
+ err = PTR_ERR(ret_dentry);
+ goto out;
+ }
+ if (ret_dentry) {
+ update_dm(ret_dentry, child_dentry);
+ child_dentry = ret_dentry;
+ }
+ info = hmdfs_i(child_inode);
+ if (info->inode_type == HMDFS_LAYER_FIRST_MERGE)
+ hmdfs_root_inode_perm_init(child_inode);
+ else
+ check_and_fixup_ownership_remote(parent_inode,
+ child_dentry);
+
+ goto out;
+ }
+
+ if ((err == -ENOENT) && create)
+ err = 0;
+
+out:
+ hmdfs_trace_merge(trace_hmdfs_lookup_merge_end, parent_inode,
+ child_dentry, err);
+ return err ? ERR_PTR(err) : ret_dentry;
+}
+
+static int hmdfs_getattr_merge(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
+{
+ int ret;
+ struct path lower_path = {
+ .dentry = hmdfs_get_fst_lo_d(path->dentry),
+ .mnt = path->mnt,
+ };
+
+ if (unlikely(!lower_path.dentry)) {
+ hmdfs_err("Fatal! No comrades");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = vfs_getattr(&lower_path, stat, request_mask, flags);
+out:
+ dput(lower_path.dentry);
+ return ret;
+}
+
+static int hmdfs_setattr_merge(struct dentry *dentry, struct iattr *ia)
+{
+ struct inode *inode = d_inode(dentry);
+ struct dentry *lower_dentry = hmdfs_get_fst_lo_d(dentry);
+ struct inode *lower_inode = NULL;
+ struct iattr lower_ia;
+ unsigned int ia_valid = ia->ia_valid;
+ int err = 0;
+ kuid_t tmp_uid;
+
+ if (!lower_dentry) {
+ WARN_ON(1);
+ err = -EINVAL;
+ goto out;
+ }
+
+ lower_inode = d_inode(lower_dentry);
+ memcpy(&lower_ia, ia, sizeof(lower_ia));
+ if (ia_valid & ATTR_FILE)
+ lower_ia.ia_file = hmdfs_f(ia->ia_file)->lower_file;
+ lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE);
+
+ inode_lock(lower_inode);
+ tmp_uid = hmdfs_override_inode_uid(lower_inode);
+
+ err = notify_change(lower_dentry, &lower_ia, NULL);
+ i_size_write(inode, i_size_read(lower_inode));
+ inode->i_atime = lower_inode->i_atime;
+ inode->i_mtime = lower_inode->i_mtime;
+ inode->i_ctime = lower_inode->i_ctime;
+ hmdfs_revert_inode_uid(lower_inode, tmp_uid);
+
+ inode_unlock(lower_inode);
+
+out:
+ dput(lower_dentry);
+ return err;
+}
+
+const struct inode_operations hmdfs_file_iops_merge = {
+ .getattr = hmdfs_getattr_merge,
+ .setattr = hmdfs_setattr_merge,
+ .permission = hmdfs_permission,
+};
+
+int do_mkdir_merge(struct inode *parent_inode, struct dentry *child_dentry,
+ umode_t mode, struct inode *lo_i_parent,
+ struct dentry *lo_d_child)
+{
+ int ret = 0;
+ struct super_block *sb = parent_inode->i_sb;
+ struct inode *child_inode = NULL;
+
+ ret = vfs_mkdir(lo_i_parent, lo_d_child, mode);
+ if (ret)
+ goto out;
+
+ child_inode =
+ fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child);
+ if (IS_ERR(child_inode)) {
+ ret = PTR_ERR(child_inode);
+ goto out;
+ }
+
+ d_add(child_dentry, child_inode);
+ /* nlink should be increased with the joining of children */
+ set_nlink(parent_inode, 2);
+out:
+ return ret;
+}
+
+int do_create_merge(struct inode *parent_inode, struct dentry *child_dentry,
+ umode_t mode, bool want_excl, struct inode *lo_i_parent,
+ struct dentry *lo_d_child)
+{
+ int ret = 0;
+ struct super_block *sb = parent_inode->i_sb;
+ struct inode *child_inode = NULL;
+
+ ret = vfs_create(lo_i_parent, lo_d_child, mode, want_excl);
+ if (ret)
+ goto out;
+
+ child_inode =
+ fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child);
+ if (IS_ERR(child_inode)) {
+ ret = PTR_ERR(child_inode);
+ goto out;
+ }
+
+ d_add(child_dentry, child_inode);
+ /* nlink should be increased with the joining of children */
+ set_nlink(parent_inode, 2);
+out:
+ return ret;
+}
+
+int do_symlink_merge(struct inode *parent_inode, struct dentry *child_dentry,
+ const char *symname, struct inode *lower_parent_inode,
+ struct dentry *lo_d_child)
+{
+ int ret = 0;
+ struct super_block *sb = parent_inode->i_sb;
+ struct inode *child_inode = NULL;
+
+ ret = vfs_symlink(lower_parent_inode, lo_d_child, symname);
+ if (ret)
+ goto out;
+
+ child_inode =
+ fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child);
+ if (IS_ERR(child_inode)) {
+ ret = PTR_ERR(child_inode);
+ goto out;
+ }
+
+ d_add(child_dentry, child_inode);
+ fsstack_copy_attr_times(parent_inode, lower_parent_inode);
+ fsstack_copy_inode_size(parent_inode, lower_parent_inode);
+out:
+ return ret;
+}
+
+int hmdfs_do_ops_merge(struct inode *i_parent, struct dentry *d_child,
+ struct dentry *lo_d_child, struct path path,
+ struct hmdfs_recursive_para *rec_op_para)
+{
+ int ret = 0;
+
+ if (rec_op_para->is_last) {
+ switch (rec_op_para->opcode) {
+ case F_MKDIR_MERGE:
+ ret = do_mkdir_merge(i_parent, d_child,
+ rec_op_para->mode,
+ d_inode(path.dentry), lo_d_child);
+ break;
+ case F_CREATE_MERGE:
+ ret = do_create_merge(i_parent, d_child,
+ rec_op_para->mode,
+ rec_op_para->want_excl,
+ d_inode(path.dentry), lo_d_child);
+ break;
+ case F_SYMLINK_MERGE:
+ ret = do_symlink_merge(i_parent, d_child,
+ rec_op_para->name,
+ d_inode(path.dentry),
+ lo_d_child);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ } else {
+ ret = vfs_mkdir(d_inode(path.dentry), lo_d_child,
+ rec_op_para->mode);
+ }
+ if (ret)
+ hmdfs_err("vfs_ops failed, ops %d, err = %d",
+ rec_op_para->opcode, ret);
+ return ret;
+}
+
+int hmdfs_create_lower_dentry(struct inode *i_parent, struct dentry *d_child,
+ struct dentry *lo_d_parent, bool is_dir,
+ struct hmdfs_recursive_para *rec_op_para)
+{
+ struct hmdfs_sb_info *sbi = i_parent->i_sb->s_fs_info;
+ struct hmdfs_dentry_comrade *new_comrade = NULL;
+ struct dentry *lo_d_child = NULL;
+ char *path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ char *absolute_path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ char *path_name = NULL;
+ struct path path = { .mnt = NULL, .dentry = NULL };
+ int ret = 0;
+
+ if (unlikely(!path_buf || !absolute_path_buf)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ path_name = dentry_path_raw(lo_d_parent, path_buf, PATH_MAX);
+ if (IS_ERR(path_name)) {
+ ret = PTR_ERR(path_name);
+ goto out;
+ }
+ if ((strlen(sbi->real_dst) + strlen(path_name) +
+ strlen(d_child->d_name.name) + 2) > PATH_MAX) {
+ ret = -ENAMETOOLONG;
+ goto out;
+ }
+
+ sprintf(absolute_path_buf, "%s%s/%s", sbi->real_dst, path_name,
+ d_child->d_name.name);
+
+ if (is_dir)
+ lo_d_child = kern_path_create(AT_FDCWD, absolute_path_buf,
+ &path, LOOKUP_DIRECTORY);
+ else
+ lo_d_child = kern_path_create(AT_FDCWD, absolute_path_buf,
+ &path, 0);
+ if (IS_ERR(lo_d_child)) {
+ ret = PTR_ERR(lo_d_child);
+ goto out;
+ }
+ // to ensure link_comrade after vfs_mkdir succeed
+ ret = hmdfs_do_ops_merge(i_parent, d_child, lo_d_child, path,
+ rec_op_para);
+ if (ret)
+ goto out_put;
+ new_comrade = alloc_comrade(lo_d_child, HMDFS_DEVID_LOCAL);
+ if (IS_ERR(new_comrade)) {
+ ret = PTR_ERR(new_comrade);
+ goto out_put;
+ } else {
+ link_comrade_unlocked(d_child, new_comrade);
+ }
+
+out_put:
+ done_path_create(&path, lo_d_child);
+out:
+ kfree(absolute_path_buf);
+ kfree(path_buf);
+ return ret;
+}
+
+static int create_lo_d_parent_recur(struct dentry *d_parent,
+ struct dentry *d_child, umode_t mode,
+ struct hmdfs_recursive_para *rec_op_para)
+{
+ struct dentry *lo_d_parent, *d_pparent;
+ int ret = 0;
+
+ lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL);
+ if (!lo_d_parent) {
+ d_pparent = dget_parent(d_parent);
+ ret = create_lo_d_parent_recur(d_pparent, d_parent,
+ d_inode(d_parent)->i_mode,
+ rec_op_para);
+ dput(d_pparent);
+ if (ret)
+ goto out;
+ lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL);
+ if (!lo_d_parent) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+ rec_op_para->is_last = false;
+ rec_op_para->mode = mode;
+ ret = hmdfs_create_lower_dentry(d_inode(d_parent), d_child, lo_d_parent,
+ true, rec_op_para);
+out:
+ dput(lo_d_parent);
+ return ret;
+}
+
+int create_lo_d_child(struct inode *i_parent, struct dentry *d_child,
+ bool is_dir, struct hmdfs_recursive_para *rec_op_para)
+{
+ struct dentry *d_pparent, *lo_d_parent, *lo_d_child;
+ struct dentry *d_parent = dget_parent(d_child);
+ int ret = 0;
+ mode_t d_child_mode = rec_op_para->mode;
+
+ lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL);
+ if (!lo_d_parent) {
+ d_pparent = dget_parent(d_parent);
+ ret = create_lo_d_parent_recur(d_pparent, d_parent,
+ d_inode(d_parent)->i_mode,
+ rec_op_para);
+ dput(d_pparent);
+ if (unlikely(ret)) {
+ lo_d_child = ERR_PTR(ret);
+ goto out;
+ }
+ lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL);
+ if (!lo_d_parent) {
+ lo_d_child = ERR_PTR(-ENOENT);
+ goto out;
+ }
+ }
+ rec_op_para->is_last = true;
+ rec_op_para->mode = d_child_mode;
+ ret = hmdfs_create_lower_dentry(i_parent, d_child, lo_d_parent, is_dir,
+ rec_op_para);
+
+out:
+ dput(d_parent);
+ dput(lo_d_parent);
+ return ret;
+}
+
+void hmdfs_init_recursive_para(struct hmdfs_recursive_para *rec_op_para,
+ int opcode, mode_t mode, bool want_excl,
+ const char *name)
+{
+ rec_op_para->is_last = true;
+ rec_op_para->opcode = opcode;
+ rec_op_para->mode = mode;
+ rec_op_para->want_excl = want_excl;
+ rec_op_para->name = name;
+}
+
+int hmdfs_mkdir_merge(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ int ret = 0;
+ struct hmdfs_recursive_para *rec_op_para = NULL;
+
+ // confict_name & file_type is checked by hmdfs_mkdir_local
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ ret = -EACCES;
+ goto out;
+ }
+ rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL);
+ if (!rec_op_para) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ hmdfs_init_recursive_para(rec_op_para, F_MKDIR_MERGE, mode, false,
+ NULL);
+ ret = create_lo_d_child(dir, dentry, true, rec_op_para);
+out:
+ hmdfs_trace_merge(trace_hmdfs_mkdir_merge, dir, dentry, ret);
+ if (ret)
+ d_drop(dentry);
+ kfree(rec_op_para);
+ return ret;
+}
+
+int hmdfs_create_merge(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool want_excl)
+{
+ struct hmdfs_recursive_para *rec_op_para = NULL;
+ int ret = 0;
+
+ rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL);
+ if (!rec_op_para) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ hmdfs_init_recursive_para(rec_op_para, F_CREATE_MERGE, mode, want_excl,
+ NULL);
+ // confict_name & file_type is checked by hmdfs_create_local
+ ret = create_lo_d_child(dir, dentry, false, rec_op_para);
+out:
+ hmdfs_trace_merge(trace_hmdfs_create_merge, dir, dentry, ret);
+ if (ret)
+ d_drop(dentry);
+ kfree(rec_op_para);
+ return ret;
+}
+
+int do_rmdir_merge(struct inode *dir, struct dentry *dentry)
+{
+ int ret = 0;
+ struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry);
+ struct hmdfs_dentry_comrade *comrade = NULL;
+ struct dentry *lo_d = NULL;
+ struct dentry *lo_d_dir = NULL;
+ struct inode *lo_i_dir = NULL;
+
+ //TODO: 当前只删本地,因不会影响到图库场景
+ //TODO:图库重启清除软连接?或者什么场景会删除
+ //TODO: remove 调用同时删除空目录以及非空目录,结果不一致
+ //TODO: 如果校验会不会有并发问题?就算锁,也只能锁自己
+ mutex_lock(&dim->comrade_list_lock);
+ list_for_each_entry(comrade, &(dim->comrade_list), list) {
+ lo_d = comrade->lo_d;
+ lo_d_dir = lock_parent(lo_d);
+ lo_i_dir = d_inode(lo_d_dir);
+ //TODO: 部分成功,lo_d确认
+ ret = vfs_rmdir(lo_i_dir, lo_d);
+ unlock_dir(lo_d_dir);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&dim->comrade_list_lock);
+ hmdfs_trace_merge(trace_hmdfs_rmdir_merge, dir, dentry, ret);
+ return ret;
+}
+
+int hmdfs_rmdir_merge(struct inode *dir, struct dentry *dentry)
+{
+ int ret = 0;
+
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ ret = do_rmdir_merge(dir, dentry);
+ if (ret) {
+ hmdfs_err("rm dir failed:%d", ret);
+ goto out;
+ }
+
+ d_drop(dentry);
+out:
+ hmdfs_trace_merge(trace_hmdfs_rmdir_merge, dir, dentry, ret);
+ return ret;
+}
+
+int do_unlink_merge(struct inode *dir, struct dentry *dentry)
+{
+ int ret = 0;
+ struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry);
+ struct hmdfs_dentry_comrade *comrade = NULL;
+ struct dentry *lo_d = NULL;
+ struct dentry *lo_d_dir = NULL;
+ struct inode *lo_i_dir = NULL;
+ // TODO:文件场景 list_first_entry
+ mutex_lock(&dim->comrade_list_lock);
+ list_for_each_entry(comrade, &(dim->comrade_list), list) {
+ lo_d = comrade->lo_d;
+ lo_d_dir = lock_parent(lo_d);
+ lo_i_dir = d_inode(lo_d_dir);
+ ret = vfs_unlink(lo_i_dir, lo_d, NULL); // lo_d GET
+ unlock_dir(lo_d_dir);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&dim->comrade_list_lock);
+
+ return ret;
+}
+
+int hmdfs_unlink_merge(struct inode *dir, struct dentry *dentry)
+{
+ int ret = 0;
+
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ ret = do_unlink_merge(dir, dentry);
+ if (ret) {
+ hmdfs_err("unlink failed:%d", ret);
+ goto out;
+ }
+
+ d_drop(dentry);
+out:
+ return ret;
+}
+
+int hmdfs_symlink_merge(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ int ret = 0;
+ struct hmdfs_recursive_para *rec_op_para = NULL;
+
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL);
+ if (!rec_op_para) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ hmdfs_init_recursive_para(rec_op_para, F_SYMLINK_MERGE, 0, false,
+ symname);
+ ret = create_lo_d_child(dir, dentry, false, rec_op_para);
+
+out:
+ trace_hmdfs_symlink_merge(dir, dentry, ret);
+ if (ret)
+ d_drop(dentry);
+ kfree(rec_op_para);
+ return ret;
+}
+
+int do_rename_merge(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ int ret = 0;
+ struct hmdfs_sb_info *sbi = (old_dir->i_sb)->s_fs_info;
+ struct hmdfs_dentry_info_merge *dim = hmdfs_dm(old_dentry);
+ struct hmdfs_dentry_comrade *comrade = NULL, *new_comrade = NULL;
+ struct path lo_p_new = { .mnt = NULL, .dentry = NULL };
+ struct inode *lo_i_old_dir = NULL, *lo_i_new_dir = NULL;
+ struct dentry *lo_d_old_dir = NULL, *lo_d_old = NULL,
+ *lo_d_new_dir = NULL, *lo_d_new = NULL;
+ struct dentry *d_new_dir = NULL;
+ char *path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ char *abs_path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ char *path_name = NULL;
+
+ /* TODO: Will WPS rename a temporary file to another directory?
+ * could flags with replace bit result in rename ops
+ * cross_devices?
+ * currently does not support replace flags.
+ */
+ if (flags & ~RENAME_NOREPLACE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (unlikely(!path_buf || !abs_path_buf)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_for_each_entry(comrade, &dim->comrade_list, list) {
+ lo_d_old = comrade->lo_d;
+ d_new_dir = d_find_alias(new_dir);
+ lo_d_new_dir = hmdfs_get_lo_d(d_new_dir, comrade->dev_id);
+ dput(d_new_dir);
+
+ if (!lo_d_new_dir)
+ continue;
+ path_name = dentry_path_raw(lo_d_new_dir, path_buf, PATH_MAX);
+ dput(lo_d_new_dir);
+ if (IS_ERR(path_name)) {
+ ret = PTR_ERR(path_name);
+ continue;
+ }
+
+ if (strlen(sbi->real_dst) + strlen(path_name) +
+ strlen(new_dentry->d_name.name) + 2 > PATH_MAX) {
+ ret = -ENAMETOOLONG;
+ goto out;
+ }
+
+ snprintf(abs_path_buf, PATH_MAX, "%s%s/%s", sbi->real_dst,
+ path_name, new_dentry->d_name.name);
+ if (S_ISDIR(d_inode(old_dentry)->i_mode))
+ lo_d_new = kern_path_create(AT_FDCWD, abs_path_buf,
+ &lo_p_new,
+ LOOKUP_DIRECTORY);
+ else
+ lo_d_new = kern_path_create(AT_FDCWD, abs_path_buf,
+ &lo_p_new, 0);
+ if (IS_ERR(lo_d_new))
+ continue;
+
+ lo_d_new_dir = dget_parent(lo_d_new);
+ lo_i_new_dir = d_inode(lo_d_new_dir);
+ lo_d_old_dir = dget_parent(lo_d_old);
+ lo_i_old_dir = d_inode(lo_d_old_dir);
+
+ ret = vfs_rename(lo_i_old_dir, lo_d_old, lo_i_new_dir, lo_d_new,
+ NULL, flags);
+ new_comrade = alloc_comrade(lo_p_new.dentry, comrade->dev_id);
+ if (IS_ERR(new_comrade)) {
+ ret = PTR_ERR(new_comrade);
+ goto no_comrade;
+ }
+
+ link_comrade_unlocked(new_dentry, new_comrade);
+no_comrade:
+ done_path_create(&lo_p_new, lo_d_new);
+ dput(lo_d_old_dir);
+ dput(lo_d_new_dir);
+ }
+out:
+ kfree(abs_path_buf);
+ kfree(path_buf);
+ return ret;
+}
+
+int hmdfs_rename_merge(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ char *old_dir_buf = NULL;
+ char *new_dir_buf = NULL;
+ char *old_dir_path = NULL;
+ char *new_dir_path = NULL;
+ struct dentry *old_dir_dentry = NULL;
+ struct dentry *new_dir_dentry = NULL;
+ int ret = 0;
+
+ if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON ||
+ hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ ret = -EACCES;
+ goto rename_out;
+ }
+ old_dir_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ new_dir_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!old_dir_buf || !new_dir_buf) {
+ ret = -ENOMEM;
+ goto rename_out;
+ }
+
+ new_dir_dentry = d_find_alias(new_dir);
+ if (!new_dir_dentry) {
+ ret = -EINVAL;
+ goto rename_out;
+ }
+
+ old_dir_dentry = d_find_alias(old_dir);
+ if (!old_dir_dentry) {
+ ret = -EINVAL;
+ dput(new_dir_dentry);
+ goto rename_out;
+ }
+
+ old_dir_path = dentry_path_raw(old_dir_dentry, old_dir_buf, PATH_MAX);
+ new_dir_path = dentry_path_raw(new_dir_dentry, new_dir_buf, PATH_MAX);
+ dput(new_dir_dentry);
+ dput(old_dir_dentry);
+ if (strcmp(old_dir_path, new_dir_path)) {
+ ret = -EPERM;
+ goto rename_out;
+ }
+
+ trace_hmdfs_rename_merge(old_dir, old_dentry, new_dir, new_dentry,
+ flags);
+ ret = do_rename_merge(old_dir, old_dentry, new_dir, new_dentry, flags);
+
+ if (ret != 0)
+ d_drop(new_dentry);
+
+ if (S_ISREG(old_dentry->d_inode->i_mode) && !ret)
+ d_invalidate(old_dentry);
+
+rename_out:
+ hmdfs_trace_rename_merge(old_dir, old_dentry, new_dir, new_dentry, ret);
+ kfree(old_dir_buf);
+ kfree(new_dir_buf);
+ return ret;
+}
+
+static const char *hmdfs_get_link_merge(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
+{
+ const char *link = NULL;
+ struct dentry *lower_dentry = NULL;
+ struct inode *lower_inode = NULL;
+
+ if (!dentry) {
+ hmdfs_err("dentry NULL");
+ link = ERR_PTR(-ECHILD);
+ goto link_out;
+ }
+
+ lower_dentry = hmdfs_get_fst_lo_d(dentry);
+ if (!lower_dentry) {
+ WARN_ON(1);
+ link = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ lower_inode = d_inode(lower_dentry);
+ if (!lower_inode->i_op || !lower_inode->i_op->get_link) {
+ hmdfs_err("lower inode hold no operations");
+ link = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ link = lower_inode->i_op->get_link(lower_dentry, lower_inode, done);
+ if (IS_ERR_OR_NULL(link))
+ goto out;
+ fsstack_copy_attr_atime(inode, lower_inode);
+out:
+ dput(lower_dentry);
+ trace_hmdfs_get_link_merge(inode, dentry, PTR_ERR_OR_ZERO(link));
+link_out:
+ return link;
+}
+
+const struct inode_operations hmdfs_symlink_iops_merge = {
+ .get_link = hmdfs_get_link_merge,
+ .permission = hmdfs_permission,
+};
+
+const struct inode_operations hmdfs_dir_iops_merge = {
+ .lookup = hmdfs_lookup_merge,
+ .mkdir = hmdfs_mkdir_merge,
+ .create = hmdfs_create_merge,
+ .rmdir = hmdfs_rmdir_merge,
+ .unlink = hmdfs_unlink_merge,
+ .symlink = hmdfs_symlink_merge,
+ .rename = hmdfs_rename_merge,
+ .permission = hmdfs_permission,
+};
diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c
new file mode 100644
index 0000000000000000000000000000000000000000..98a0e34c2253cee0b09eaa0207b15ef2725d15ba
--- /dev/null
+++ b/fs/hmdfs/inode_remote.c
@@ -0,0 +1,989 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/inode_remote.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+#include
+
+#include "comm/socket_adapter.h"
+#include "hmdfs.h"
+#include "hmdfs_client.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_trace.h"
+#include "authority/authentication.h"
+#include "stash.h"
+
+struct hmdfs_lookup_ret *lookup_remote_dentry(struct dentry *child_dentry,
+ const struct qstr *qstr,
+ uint64_t dev_id)
+{
+ struct hmdfs_lookup_ret *lookup_ret;
+ struct hmdfs_dentry *dentry = NULL;
+ struct clearcache_item *cache_item = NULL;
+ struct hmdfs_dcache_lookup_ctx ctx;
+ struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb);
+
+ cache_item = hmdfs_find_cache_item(dev_id, child_dentry->d_parent);
+ if (!cache_item)
+ return NULL;
+
+ lookup_ret = kmalloc(sizeof(*lookup_ret), GFP_KERNEL);
+ if (!lookup_ret)
+ goto out;
+
+ hmdfs_init_dcache_lookup_ctx(&ctx, sbi, qstr, cache_item->filp);
+ dentry = hmdfs_find_dentry(child_dentry, &ctx);
+ if (!dentry) {
+ kfree(lookup_ret);
+ lookup_ret = NULL;
+ goto out;
+ }
+
+ lookup_ret->i_mode = le16_to_cpu(dentry->i_mode);
+ lookup_ret->i_size = le64_to_cpu(dentry->i_size);
+ lookup_ret->i_mtime = le64_to_cpu(dentry->i_mtime);
+ lookup_ret->i_mtime_nsec = le32_to_cpu(dentry->i_mtime_nsec);
+ lookup_ret->i_ino = le64_to_cpu(dentry->i_ino);
+
+ hmdfs_unlock_file(ctx.filp, get_dentry_group_pos(ctx.bidx),
+ DENTRYGROUP_SIZE);
+ kfree(ctx.page);
+out:
+ kref_put(&cache_item->ref, release_cache_item);
+ return lookup_ret;
+}
+
+/* get_remote_inode_info - fill hmdfs_lookup_ret by info from remote getattr
+ *
+ * @dentry: local dentry
+ * @hmdfs_peer: which remote devcie
+ * @flags: lookup flags
+ *
+ * return allocaed and initialized hmdfs_lookup_ret on success, and NULL on
+ * failure.
+ */
+struct hmdfs_lookup_ret *get_remote_inode_info(struct hmdfs_peer *con,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ int err = 0;
+ struct hmdfs_lookup_ret *lookup_ret = NULL;
+ struct hmdfs_getattr_ret *getattr_ret = NULL;
+ unsigned int expected_flags = 0;
+
+ lookup_ret = kmalloc(sizeof(*lookup_ret), GFP_KERNEL);
+ if (!lookup_ret)
+ return NULL;
+
+ err = hmdfs_remote_getattr(con, dentry, flags, &getattr_ret);
+ if (err) {
+ hmdfs_debug("inode info get failed with err %d", err);
+ kfree(lookup_ret);
+ return NULL;
+ }
+ /* make sure we got everything we need */
+ expected_flags = STATX_INO | STATX_SIZE | STATX_MODE | STATX_MTIME;
+ if ((getattr_ret->stat.result_mask & expected_flags) !=
+ expected_flags) {
+ hmdfs_debug("remote getattr failed with flag %x",
+ getattr_ret->stat.result_mask);
+ kfree(lookup_ret);
+ kfree(getattr_ret);
+ return NULL;
+ }
+
+ lookup_ret->i_mode = getattr_ret->stat.mode;
+ lookup_ret->i_size = getattr_ret->stat.size;
+ lookup_ret->i_mtime = getattr_ret->stat.mtime.tv_sec;
+ lookup_ret->i_mtime_nsec = getattr_ret->stat.mtime.tv_nsec;
+ lookup_ret->i_ino = getattr_ret->stat.ino;
+ kfree(getattr_ret);
+ return lookup_ret;
+}
+
+static void hmdfs_remote_readdir_work(struct work_struct *work)
+{
+ struct hmdfs_readdir_work *rw =
+ container_of(to_delayed_work(work), struct hmdfs_readdir_work,
+ dwork);
+ struct dentry *dentry = rw->dentry;
+ struct hmdfs_peer *con = rw->con;
+ const struct cred *old_cred = hmdfs_override_creds(con->sbi->cred);
+ bool empty = false;
+
+ get_remote_dentry_file(dentry, con);
+ hmdfs_d(dentry)->async_readdir_in_progress = false;
+ hmdfs_revert_creds(old_cred);
+
+ dput(dentry);
+ peer_put(con);
+ spin_lock(&con->sbi->async_readdir_work_lock);
+ list_del(&rw->head);
+ empty = list_empty(&con->sbi->async_readdir_work_list);
+ spin_unlock(&con->sbi->async_readdir_work_lock);
+ kfree(rw);
+
+ if (empty)
+ wake_up_interruptible(&con->sbi->async_readdir_wq);
+}
+
+static void get_remote_dentry_file_in_wq(struct dentry *dentry,
+ struct hmdfs_peer *con)
+{
+ struct hmdfs_readdir_work *rw = NULL;
+
+ /* do nothing if async readdir is already in progress */
+ if (cmpxchg_relaxed(&hmdfs_d(dentry)->async_readdir_in_progress, false,
+ true))
+ return;
+
+ rw = kmalloc(sizeof(*rw), GFP_KERNEL);
+ if (!rw) {
+ hmdfs_d(dentry)->async_readdir_in_progress = false;
+ return;
+ }
+
+ dget(dentry);
+ peer_get(con);
+ rw->dentry = dentry;
+ rw->con = con;
+ spin_lock(&con->sbi->async_readdir_work_lock);
+ INIT_DELAYED_WORK(&rw->dwork, hmdfs_remote_readdir_work);
+ list_add(&rw->head, &con->sbi->async_readdir_work_list);
+ spin_unlock(&con->sbi->async_readdir_work_lock);
+ queue_delayed_work(con->dentry_wq, &rw->dwork, 0);
+}
+
+void get_remote_dentry_file_sync(struct dentry *dentry, struct hmdfs_peer *con)
+{
+ get_remote_dentry_file_in_wq(dentry, con);
+ flush_workqueue(con->dentry_wq);
+}
+
+struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con,
+ struct dentry *dentry,
+ struct qstr *qstr,
+ unsigned int flags,
+ const char *relative_path)
+{
+ struct hmdfs_lookup_ret *result = NULL;
+
+ if (con->version > USERSPACE_MAX_VER) {
+ /*
+ * LOOKUP_REVAL means we found stale info from dentry file, thus
+ * we need to use remote getattr.
+ */
+ if (flags & LOOKUP_REVAL) {
+ /*
+ * HMDFS_LOOKUP_REVAL means we need to skip dentry cache
+ * in lookup, because dentry cache in server might have
+ * stale data.
+ */
+ result = get_remote_inode_info(con, dentry,
+ HMDFS_LOOKUP_REVAL);
+ get_remote_dentry_file_in_wq(dentry->d_parent, con);
+ return result;
+ }
+
+ /* If cache file is still valid */
+ if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time),
+ con->device_id, dentry->d_parent)) {
+ result = lookup_remote_dentry(dentry, qstr,
+ con->device_id);
+ /*
+ * If lookup from cache file failed, use getattr to see
+ * if remote have created the file.
+ */
+ if (!(flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) &&
+ !result)
+ result = get_remote_inode_info(con, dentry, 0);
+ /* If cache file expired, use getattr directly
+ * except create and rename opt
+ */
+ } else {
+ result = get_remote_inode_info(con, dentry, 0);
+ get_remote_dentry_file_in_wq(dentry->d_parent, con);
+ }
+ } else {
+ if (!relative_path)
+ return NULL;
+
+ result = con->conn_operations->remote_lookup(
+ con, relative_path, dentry->d_name.name);
+ }
+
+ return result;
+}
+
+/*
+ * hmdfs_update_inode_size - update inode size when finding aready existed
+ * inode.
+ *
+ * First of all, if the file is opened for writing, we don't update inode size
+ * here, because inode size is about to be changed after writing.
+ *
+ * If the file is not opened, simply update getattr_isize(not actual inode size,
+ * just a value showed to user). This is safe because inode size will be
+ * up-to-date after open.
+ *
+ * If the file is opened for read:
+ * a. getattr_isize == HMDFS_STALE_REMOTE_ISIZE
+ * 1) i_size == new_size, nothing need to be done.
+ * 2) i_size > new_size, we keep the i_size and set getattr_isize to new_size,
+ * stale data might be readed in this case, which is fine because file is
+ * opened before remote truncate the file.
+ * 3) i_size < new_size, we drop the last page of the file if i_size is not
+ * aligned to PAGE_SIZE, clear getattr_isize, and update i_size to
+ * new_size.
+ * b. getattr_isize != HMDFS_STALE_REMOTE_ISIZE, getattr_isize will only be set
+ * after 2).
+ * 4) getattr_isize > i_size, this situation is impossible.
+ * 5) i_size >= new_size, this case is the same as 2).
+ * 6) i_size < new_size, this case is the same as 3).
+ */
+static void hmdfs_update_inode_size(struct inode *inode, uint64_t new_size)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ int writecount;
+ uint64_t size;
+
+ inode_lock(inode);
+ size = info->getattr_isize;
+ if (size == HMDFS_STALE_REMOTE_ISIZE)
+ size = i_size_read(inode);
+ if (size == new_size) {
+ inode_unlock(inode);
+ return;
+ }
+
+ writecount = atomic_read(&inode->i_writecount);
+ /* check if writing is in progress */
+ if (writecount > 0) {
+ info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
+ inode_unlock(inode);
+ return;
+ }
+
+ /* check if there is no one who opens the file */
+ if (kref_read(&info->ref) == 0)
+ goto update_info;
+
+ /* check if there is someone who opens the file for read */
+ if (writecount == 0) {
+ uint64_t aligned_size;
+
+ /* use inode size here instead of getattr_isize */
+ size = i_size_read(inode);
+ if (new_size <= size)
+ goto update_info;
+ /*
+ * if the old inode size is not aligned to HMDFS_PAGE_SIZE, we
+ * need to drop the last page of the inode, otherwise zero will
+ * be returned while reading the new range in the page after
+ * chaning inode size.
+ */
+ aligned_size = round_down(size, HMDFS_PAGE_SIZE);
+ if (aligned_size != size)
+ truncate_inode_pages(inode->i_mapping, aligned_size);
+ i_size_write(inode, new_size);
+ info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
+ inode_unlock(inode);
+ return;
+ }
+
+update_info:
+ info->getattr_isize = new_size;
+ inode_unlock(inode);
+}
+
+static void hmdfs_update_inode(struct inode *inode,
+ struct hmdfs_lookup_ret *lookup_result)
+{
+ struct hmdfs_time_t remote_mtime = {
+ .tv_sec = lookup_result->i_mtime,
+ .tv_nsec = lookup_result->i_mtime_nsec,
+ };
+
+ /*
+ * We only update mtime if the file is not opened for writing. If we do
+ * update it before writing is about to start, user might see the mtime
+ * up-and-down if system time in server and client do not match. However
+ * mtime in client will eventually match server after timeout without
+ * writing.
+ */
+ if (!inode_is_open_for_write(inode))
+ inode->i_mtime = remote_mtime;
+
+ /*
+ * We don't care i_size of dir, and lock inode for dir
+ * might cause deadlock.
+ */
+ if (S_ISREG(inode->i_mode))
+ hmdfs_update_inode_size(inode, lookup_result->i_size);
+}
+
+static void hmdfs_fill_inode_android(struct inode *inode, struct inode *dir,
+ umode_t mode)
+{
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ inode->i_uid = dir->i_uid;
+ inode->i_gid = dir->i_gid;
+#endif
+}
+
+struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con,
+ struct hmdfs_lookup_ret *res, struct inode *dir)
+{
+ struct inode *inode = NULL;
+ struct hmdfs_inode_info *info;
+ umode_t mode = res->i_mode;
+
+ inode = hmdfs_iget5_locked_remote(sb, con, res->i_ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ info = hmdfs_i(inode);
+ info->inode_type = HMDFS_LAYER_OTHER_REMOTE;
+ if (con->version > USERSPACE_MAX_VER) {
+ /* the inode was found in cache */
+ if (!(inode->i_state & I_NEW)) {
+ hmdfs_fill_inode_android(inode, dir, mode);
+ hmdfs_update_inode(inode, res);
+ return inode;
+ }
+
+ hmdfs_remote_init_stash_status(con, inode, mode);
+ }
+
+ inode->i_ctime.tv_sec = 0;
+ inode->i_ctime.tv_nsec = 0;
+ inode->i_mtime.tv_sec = res->i_mtime;
+ inode->i_mtime.tv_nsec = res->i_mtime_nsec;
+
+ inode->i_uid = KUIDT_INIT((uid_t)1000);
+ inode->i_gid = KGIDT_INIT((gid_t)1000);
+
+ if (S_ISDIR(mode))
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH;
+ else if (S_ISREG(mode))
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+ else if (S_ISLNK(mode))
+ inode->i_mode = S_IFREG | S_IRWXU | S_IRWXG;
+
+ if (S_ISREG(mode) || S_ISLNK(mode)) { // Reguler file
+ inode->i_op = con->conn_operations->remote_file_iops;
+ inode->i_fop = con->conn_operations->remote_file_fops;
+ inode->i_size = res->i_size;
+ set_nlink(inode, 1);
+ } else if (S_ISDIR(mode)) { // Directory
+ inode->i_op = &hmdfs_dev_dir_inode_ops_remote;
+ inode->i_fop = &hmdfs_dev_dir_ops_remote;
+ set_nlink(inode, 2);
+ }
+ inode->i_mapping->a_ops = con->conn_operations->remote_file_aops;
+
+ hmdfs_fill_inode_android(inode, dir, mode);
+ unlock_new_inode(inode);
+ return inode;
+}
+
+static struct dentry *hmdfs_lookup_remote_dentry(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ int flags)
+{
+ struct dentry *ret = NULL;
+ struct inode *inode = NULL;
+ struct super_block *sb = parent_inode->i_sb;
+ struct hmdfs_sb_info *sbi = sb->s_fs_info;
+ struct hmdfs_lookup_ret *lookup_result = NULL;
+ struct hmdfs_peer *con = NULL;
+ char *file_name = NULL;
+ int file_name_len = child_dentry->d_name.len;
+ struct qstr qstr;
+ struct hmdfs_dentry_info *gdi = hmdfs_d(child_dentry);
+ uint64_t device_id = 0;
+ char *relative_path = NULL;
+
+ file_name = kzalloc(NAME_MAX + 1, GFP_KERNEL);
+ if (!file_name)
+ return ERR_PTR(-ENOMEM);
+ strncpy(file_name, child_dentry->d_name.name, file_name_len);
+
+ qstr.name = file_name;
+ qstr.len = strlen(file_name);
+
+ device_id = gdi->device_id;
+ con = hmdfs_lookup_from_devid(sbi, device_id);
+ if (!con) {
+ ret = ERR_PTR(-ESHUTDOWN);
+ goto done;
+ }
+
+ relative_path = hmdfs_get_dentry_relative_path(child_dentry->d_parent);
+ if (unlikely(!relative_path)) {
+ ret = ERR_PTR(-ENOMEM);
+ hmdfs_err("get relative path failed %d", -ENOMEM);
+ goto done;
+ }
+
+ lookup_result = hmdfs_lookup_by_con(con, child_dentry, &qstr, flags,
+ relative_path);
+ if (lookup_result != NULL) {
+ if (S_ISLNK(lookup_result->i_mode))
+ gdi->file_type = HM_SYMLINK;
+ inode = fill_inode_remote(sb, con, lookup_result, parent_inode);
+ ret = d_splice_alias(inode, child_dentry);
+ if (!IS_ERR_OR_NULL(ret))
+ child_dentry = ret;
+ if (!IS_ERR(ret))
+ check_and_fixup_ownership_remote(parent_inode,
+ child_dentry);
+ } else {
+ ret = ERR_PTR(-ENOENT);
+ }
+
+done:
+ if (con)
+ peer_put(con);
+ kfree(relative_path);
+ kfree(lookup_result);
+ kfree(file_name);
+ return ret;
+}
+
+struct dentry *hmdfs_lookup_remote(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ unsigned int flags)
+{
+ int err = 0;
+ struct dentry *ret = NULL;
+ struct hmdfs_dentry_info *gdi = NULL;
+ struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb);
+
+ trace_hmdfs_lookup_remote(parent_inode, child_dentry, flags);
+ if (child_dentry->d_name.len > NAME_MAX) {
+ err = -ENAMETOOLONG;
+ ret = ERR_PTR(-ENAMETOOLONG);
+ goto out;
+ }
+
+ err = init_hmdfs_dentry_info(sbi, child_dentry,
+ HMDFS_LAYER_OTHER_REMOTE);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto out;
+ }
+ gdi = hmdfs_d(child_dentry);
+ gdi->device_id = hmdfs_d(child_dentry->d_parent)->device_id;
+
+ if (is_current_hmdfs_server_ctx())
+ goto out;
+
+ ret = hmdfs_lookup_remote_dentry(parent_inode, child_dentry, flags);
+ /*
+ * don't return error if inode do not exist, so that vfs can continue
+ * to create it.
+ */
+ if (IS_ERR_OR_NULL(ret)) {
+ err = PTR_ERR(ret);
+ if (err == -ENOENT)
+ ret = NULL;
+ } else {
+ child_dentry = ret;
+ }
+
+out:
+ if (!err)
+ hmdfs_set_time(child_dentry, jiffies);
+ trace_hmdfs_lookup_remote_end(parent_inode, child_dentry, err);
+ return ret;
+}
+
+/* delete dentry in cache file */
+void delete_in_cache_file(uint64_t dev_id, struct dentry *dentry)
+{
+ struct clearcache_item *item = NULL;
+
+ item = hmdfs_find_cache_item(dev_id, dentry->d_parent);
+ if (item) {
+ hmdfs_delete_dentry(dentry, item->filp);
+ kref_put(&item->ref, release_cache_item);
+ } else {
+ hmdfs_info("find cache item failed, con:%llu", dev_id);
+ }
+}
+
+int hmdfs_mkdir_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry,
+ umode_t mode)
+{
+ int err = 0;
+ char *dir_path = NULL;
+ struct dentry *parent_dentry = dentry->d_parent;
+ struct inode *parent_inode = d_inode(parent_dentry);
+ struct super_block *sb = parent_inode->i_sb;
+ const unsigned char *d_name = dentry->d_name.name;
+ struct hmdfs_lookup_ret *mkdir_ret = NULL;
+ struct inode *inode = NULL;
+
+ mkdir_ret = kmalloc(sizeof(*mkdir_ret), GFP_KERNEL);
+ if (!mkdir_ret) {
+ err = -ENOMEM;
+ return err;
+ }
+ dir_path = hmdfs_get_dentry_relative_path(parent_dentry);
+ if (!dir_path) {
+ err = -EACCES;
+ goto mkdir_out;
+ }
+ err = hmdfs_client_start_mkdir(conn, dir_path, d_name, mode, mkdir_ret);
+ if (err) {
+ hmdfs_err("hmdfs_client_start_mkdir failed err = %d", err);
+ goto mkdir_out;
+ }
+ if (mkdir_ret) {
+ inode = fill_inode_remote(sb, conn, mkdir_ret, parent_inode);
+ if (!IS_ERR(inode))
+ d_add(dentry, inode);
+ else
+ err = PTR_ERR(inode);
+ check_and_fixup_ownership_remote(parent_inode, dentry);
+ } else {
+ err = -ENOENT;
+ }
+
+mkdir_out:
+ kfree(dir_path);
+ kfree(mkdir_ret);
+ return err;
+}
+
+int hmdfs_mkdir_remote(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ int err = 0;
+ struct hmdfs_inode_info *info = hmdfs_i(dir);
+ struct hmdfs_peer *con = info->conn;
+
+ if (!con) {
+ hmdfs_warning("qpb_debug: con is null!");
+ goto out;
+ }
+ if (con->version <= USERSPACE_MAX_VER) {
+ err = -EPERM;
+ goto out;
+ }
+ err = hmdfs_mkdir_remote_dentry(con, dentry, mode);
+ if (!err)
+ create_in_cache_file(con->device_id, dentry);
+ else
+ hmdfs_err("remote mkdir failed err = %d", err);
+
+out:
+ trace_hmdfs_mkdir_remote(dir, dentry, err);
+ return err;
+}
+
+int hmdfs_create_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry,
+ umode_t mode, bool want_excl)
+{
+ int err = 0;
+ char *dir_path = NULL;
+ struct dentry *parent_dentry = dentry->d_parent;
+ struct inode *parent_inode = d_inode(parent_dentry);
+ struct super_block *sb = parent_inode->i_sb;
+ const unsigned char *d_name = dentry->d_name.name;
+ struct hmdfs_lookup_ret *create_ret = NULL;
+ struct inode *inode = NULL;
+
+ create_ret = kmalloc(sizeof(*create_ret), GFP_KERNEL);
+ if (!create_ret) {
+ err = -ENOMEM;
+ return err;
+ }
+ dir_path = hmdfs_get_dentry_relative_path(parent_dentry);
+ if (!dir_path) {
+ err = -EACCES;
+ goto create_out;
+ }
+ err = hmdfs_client_start_create(conn, dir_path, d_name, mode,
+ want_excl, create_ret);
+ if (err) {
+ hmdfs_err("hmdfs_client_start_create failed err = %d", err);
+ goto create_out;
+ }
+ if (create_ret) {
+ inode = fill_inode_remote(sb, conn, create_ret, parent_inode);
+ if (!IS_ERR(inode))
+ d_add(dentry, inode);
+ else
+ err = PTR_ERR(inode);
+ check_and_fixup_ownership_remote(parent_inode, dentry);
+ } else {
+ err = -ENOENT;
+ hmdfs_err("get remote inode info failed err = %d", err);
+ }
+
+create_out:
+ kfree(dir_path);
+ kfree(create_ret);
+ return err;
+}
+
+int hmdfs_create_remote(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool want_excl)
+{
+ int err = 0;
+ struct hmdfs_inode_info *info = hmdfs_i(dir);
+ struct hmdfs_peer *con = info->conn;
+
+ if (!con) {
+ hmdfs_warning("qpb_debug: con is null!");
+ goto out;
+ }
+ if (con->version <= USERSPACE_MAX_VER) {
+ err = -EPERM;
+ goto out;
+ }
+ err = hmdfs_create_remote_dentry(con, dentry, mode, want_excl);
+ if (!err)
+ create_in_cache_file(con->device_id, dentry);
+ else
+ hmdfs_err("remote create failed err = %d", err);
+
+out:
+ trace_hmdfs_create_remote(dir, dentry, err);
+ return err;
+}
+
+int hmdfs_rmdir_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry)
+{
+ int error = 0;
+ char *dir_path = NULL;
+ const char *dentry_name = dentry->d_name.name;
+
+ dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
+ if (!dir_path) {
+ error = -EACCES;
+ goto rmdir_out;
+ }
+
+ error = hmdfs_client_start_rmdir(conn, dir_path, dentry_name);
+ if (!error)
+ delete_in_cache_file(conn->device_id, dentry);
+
+rmdir_out:
+ kfree(dir_path);
+ return error;
+}
+
+int hmdfs_rmdir_remote(struct inode *dir, struct dentry *dentry)
+{
+ int err = 0;
+ struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
+ struct hmdfs_peer *con = info->conn;
+
+ if (!con)
+ goto out;
+
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ err = -EACCES;
+ goto out;
+ }
+ if (con->version <= USERSPACE_MAX_VER) {
+ err = -EPERM;
+ goto out;
+ }
+ err = hmdfs_rmdir_remote_dentry(con, dentry);
+ /* drop dentry even remote failed
+ * it maybe cause that one remote devices disconnect
+ * when doing remote rmdir
+ */
+ d_drop(dentry);
+out:
+ /* return connect device's errcode */
+ trace_hmdfs_rmdir_remote(dir, dentry, err);
+ return err;
+}
+
+int hmdfs_dev_unlink_from_con(struct hmdfs_peer *conn, struct dentry *dentry)
+{
+ int error = 0;
+ char *dir_path = NULL;
+ const char *dentry_name = dentry->d_name.name;
+
+ dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
+ if (!dir_path) {
+ error = -EACCES;
+ goto unlink_out;
+ }
+ error = hmdfs_client_start_unlink(conn, dir_path, dentry_name);
+ if (!error) {
+ delete_in_cache_file(conn->device_id, dentry);
+ drop_nlink(d_inode(dentry));
+ d_drop(dentry);
+ }
+unlink_out:
+ kfree(dir_path);
+ return error;
+}
+
+int hmdfs_unlink_remote(struct inode *dir, struct dentry *dentry)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
+ struct hmdfs_peer *conn = info->conn;
+
+ if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON)
+ return -EACCES;
+
+ if (!conn)
+ return 0;
+
+ if (conn->status != NODE_STAT_ONLINE)
+ return 0;
+
+ return conn->conn_operations->remote_unlink(conn, dentry);
+}
+
+/* rename dentry in cache file */
+static void rename_in_cache_file(uint64_t dev_id, struct dentry *old_dentry,
+ struct dentry *new_dentry)
+{
+ struct clearcache_item *old_item = NULL;
+ struct clearcache_item *new_item = NULL;
+
+ old_item = hmdfs_find_cache_item(dev_id, old_dentry->d_parent);
+ new_item = hmdfs_find_cache_item(dev_id, new_dentry->d_parent);
+ if (old_item != NULL && new_item != NULL) {
+ hmdfs_rename_dentry(old_dentry, new_dentry, old_item->filp,
+ new_item->filp);
+ } else if (old_item != NULL) {
+ hmdfs_err("new cache item find failed!");
+ } else if (new_item != NULL) {
+ hmdfs_err("old cache item find failed!");
+ } else {
+ hmdfs_err("both cache item find failed!");
+ }
+
+ if (old_item)
+ kref_put(&old_item->ref, release_cache_item);
+ if (new_item)
+ kref_put(&new_item->ref, release_cache_item);
+}
+
+int hmdfs_rename_remote(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ int err = 0;
+ int ret = 0;
+ const char *old_dentry_d_name = old_dentry->d_name.name;
+ char *relative_old_dir_path = 0;
+ const char *new_dentry_d_name = new_dentry->d_name.name;
+ char *relative_new_dir_path = 0;
+ struct hmdfs_inode_info *info = hmdfs_i(old_dentry->d_inode);
+ struct hmdfs_peer *con = info->conn;
+
+ trace_hmdfs_rename_remote(old_dir, old_dentry, new_dir, new_dentry,
+ flags);
+
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+
+ if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON ||
+ hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) {
+ return -EACCES;
+ }
+
+ relative_old_dir_path =
+ hmdfs_get_dentry_relative_path(old_dentry->d_parent);
+ relative_new_dir_path =
+ hmdfs_get_dentry_relative_path(new_dentry->d_parent);
+ if (!relative_old_dir_path || !relative_new_dir_path) {
+ err = -EACCES;
+ goto rename_out;
+ }
+ if (S_ISREG(old_dentry->d_inode->i_mode)) {
+ if (con->version > USERSPACE_MAX_VER) {
+ hmdfs_debug("send MSG to remote devID %llu",
+ con->device_id);
+ err = hmdfs_client_start_rename(
+ con, relative_old_dir_path, old_dentry_d_name,
+ relative_new_dir_path, new_dentry_d_name,
+ flags);
+ if (!err)
+ rename_in_cache_file(con->device_id, old_dentry,
+ new_dentry);
+ }
+ } else if (S_ISDIR(old_dentry->d_inode->i_mode)) {
+ if ((con->status == NODE_STAT_ONLINE) &&
+ (con->version > USERSPACE_MAX_VER)) {
+ ret = hmdfs_client_start_rename(
+ con, relative_old_dir_path, old_dentry_d_name,
+ relative_new_dir_path, new_dentry_d_name,
+ flags);
+ if (!ret)
+ rename_in_cache_file(con->device_id, old_dentry,
+ new_dentry);
+ else
+ err = ret;
+ }
+ }
+ if (!err)
+ d_invalidate(old_dentry);
+rename_out:
+ kfree(relative_old_dir_path);
+ kfree(relative_new_dir_path);
+ return err;
+}
+
+static int hmdfs_dir_setattr_remote(struct dentry *dentry, struct iattr *ia)
+{
+ // Do not support dir setattr
+ return 0;
+}
+
+const struct inode_operations hmdfs_dev_dir_inode_ops_remote = {
+ .lookup = hmdfs_lookup_remote,
+ .mkdir = hmdfs_mkdir_remote,
+ .create = hmdfs_create_remote,
+ .rmdir = hmdfs_rmdir_remote,
+ .unlink = hmdfs_unlink_remote,
+ .rename = hmdfs_rename_remote,
+ .setattr = hmdfs_dir_setattr_remote,
+ .permission = hmdfs_permission,
+};
+
+static int hmdfs_setattr_remote(struct dentry *dentry, struct iattr *ia)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(d_inode(dentry));
+ struct hmdfs_peer *conn = info->conn;
+ struct inode *inode = d_inode(dentry);
+ char *send_buf = NULL;
+ int err = 0;
+
+ if (hmdfs_inode_is_stashing(info))
+ return -EAGAIN;
+
+ send_buf = hmdfs_get_dentry_relative_path(dentry);
+ if (!send_buf) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+ if (ia->ia_valid & ATTR_SIZE) {
+ err = inode_newsize_ok(inode, ia->ia_size);
+ if (err)
+ goto out_free;
+ truncate_setsize(inode, ia->ia_size);
+ info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
+ }
+ if (ia->ia_valid & ATTR_MTIME)
+ inode->i_mtime = ia->ia_mtime;
+
+ if ((ia->ia_valid & ATTR_SIZE) || (ia->ia_valid & ATTR_MTIME)) {
+ struct setattr_info send_setattr_info = {
+ .size = cpu_to_le64(ia->ia_size),
+ .valid = cpu_to_le32(ia->ia_valid),
+ .mtime = cpu_to_le64(ia->ia_mtime.tv_sec),
+ .mtime_nsec = cpu_to_le32(ia->ia_mtime.tv_nsec),
+ };
+ err = hmdfs_send_setattr(conn, send_buf, &send_setattr_info);
+ }
+out_free:
+ kfree(send_buf);
+ return err;
+}
+
+int hmdfs_remote_getattr(struct hmdfs_peer *conn, struct dentry *dentry,
+ unsigned int lookup_flags,
+ struct hmdfs_getattr_ret **result)
+{
+ char *send_buf = NULL;
+ struct hmdfs_getattr_ret *attr = NULL;
+ int err = 0;
+
+ if (dentry->d_sb != conn->sbi->sb || !result)
+ return -EINVAL;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ send_buf = hmdfs_get_dentry_relative_path(dentry);
+ if (!send_buf) {
+ kfree(attr);
+ return -ENOMEM;
+ }
+
+ err = hmdfs_send_getattr(conn, send_buf, lookup_flags, attr);
+ kfree(send_buf);
+
+ if (err) {
+ kfree(attr);
+ return err;
+ }
+
+ *result = attr;
+ return 0;
+}
+
+static int hmdfs_get_cached_attr_remote(const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ uint64_t size = info->getattr_isize;
+
+ stat->ino = inode->i_ino;
+ stat->mtime = inode->i_mtime;
+ stat->mode = inode->i_mode;
+ stat->uid.val = inode->i_uid.val;
+ stat->gid.val = inode->i_gid.val;
+ if (size == HMDFS_STALE_REMOTE_ISIZE)
+ size = i_size_read(inode);
+
+ stat->size = size;
+ return 0;
+}
+
+ssize_t hmdfs_remote_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ struct inode *inode = d_inode(dentry);
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ struct hmdfs_peer *conn = info->conn;
+ char *send_buf = NULL;
+ ssize_t res = 0;
+ size_t r_size = size;
+
+ if (!hmdfs_support_xattr(dentry))
+ return -EOPNOTSUPP;
+
+ if (size > HMDFS_LISTXATTR_SIZE_MAX)
+ r_size = HMDFS_LISTXATTR_SIZE_MAX;
+
+ send_buf = hmdfs_get_dentry_relative_path(dentry);
+ if (!send_buf)
+ return -ENOMEM;
+
+ res = hmdfs_send_listxattr(conn, send_buf, list, r_size);
+ kfree(send_buf);
+
+ if (res == -ERANGE && r_size != size) {
+ hmdfs_info("no support listxattr size over than %d",
+ HMDFS_LISTXATTR_SIZE_MAX);
+ res = -E2BIG;
+ }
+
+ return res;
+}
+
+const struct inode_operations hmdfs_dev_file_iops_remote = {
+ .setattr = hmdfs_setattr_remote,
+ .permission = hmdfs_permission,
+ .getattr = hmdfs_get_cached_attr_remote,
+ .listxattr = hmdfs_remote_listxattr,
+};
diff --git a/fs/hmdfs/inode_root.c b/fs/hmdfs/inode_root.c
new file mode 100644
index 0000000000000000000000000000000000000000..30d0ca6a2264020fd54c9e856a2dc5497214a787
--- /dev/null
+++ b/fs/hmdfs/inode_root.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/inode_root.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+
+#include "authority/authentication.h"
+#include "comm/socket_adapter.h"
+#include "comm/transport.h"
+#include "hmdfs_dentryfile.h"
+#include "hmdfs_device_view.h"
+#include "hmdfs_merge_view.h"
+#include "hmdfs_trace.h"
+
+static struct inode *fill_device_local_inode(struct super_block *sb,
+ struct inode *lower_inode)
+{
+ struct inode *inode = NULL;
+ struct hmdfs_inode_info *info = NULL;
+
+ if (!igrab(lower_inode))
+ return ERR_PTR(-ESTALE);
+
+ inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV_LOCAL, lower_inode,
+ NULL);
+ if (!inode) {
+ hmdfs_err("iget5_locked get inode NULL");
+ iput(lower_inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ if (!(inode->i_state & I_NEW)) {
+ iput(lower_inode);
+ return inode;
+ }
+
+ info = hmdfs_i(inode);
+ info->inode_type = HMDFS_LAYER_SECOND_LOCAL;
+
+ inode->i_mode =
+ (lower_inode->i_mode & S_IFMT) | S_IRWXU | S_IRWXG | S_IXOTH;
+
+ inode->i_uid = KUIDT_INIT((uid_t)1000);
+ inode->i_gid = KGIDT_INIT((gid_t)1000);
+
+ inode->i_atime = lower_inode->i_atime;
+ inode->i_ctime = lower_inode->i_ctime;
+ inode->i_mtime = lower_inode->i_mtime;
+
+ inode->i_op = &hmdfs_dir_inode_ops_local;
+ inode->i_fop = &hmdfs_dir_ops_local;
+
+ fsstack_copy_inode_size(inode, lower_inode);
+ unlock_new_inode(inode);
+ return inode;
+}
+
+static struct inode *fill_device_inode_remote(struct super_block *sb,
+ uint64_t dev_id)
+{
+ struct inode *inode = NULL;
+ struct hmdfs_inode_info *info = NULL;
+ struct hmdfs_peer *con = NULL;
+
+ con = hmdfs_lookup_from_devid(sb->s_fs_info, dev_id);
+ if (!con)
+ return ERR_PTR(-ENOENT);
+
+ inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV_REMOTE, NULL, con);
+ if (!inode) {
+ hmdfs_err("get inode NULL");
+ inode = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ if (!(inode->i_state & I_NEW))
+ goto out;
+
+ info = hmdfs_i(inode);
+ info->inode_type = HMDFS_LAYER_SECOND_REMOTE;
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH;
+
+ inode->i_uid = KUIDT_INIT((uid_t)1000);
+ inode->i_gid = KGIDT_INIT((gid_t)1000);
+ inode->i_op = &hmdfs_dev_dir_inode_ops_remote;
+ inode->i_fop = &hmdfs_dev_dir_ops_remote;
+
+ unlock_new_inode(inode);
+
+out:
+ peer_put(con);
+ return inode;
+}
+
+struct dentry *hmdfs_device_lookup(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ unsigned int flags)
+{
+ const char *d_name = child_dentry->d_name.name;
+ struct inode *root_inode = NULL;
+ struct super_block *sb = parent_inode->i_sb;
+ struct hmdfs_sb_info *sbi = sb->s_fs_info;
+ struct dentry *ret_dentry = NULL;
+ int err = 0;
+ struct hmdfs_peer *con = NULL;
+ struct hmdfs_dentry_info *di = NULL;
+ uint8_t *cid = NULL;
+ struct path *root_lower_path = NULL;
+
+ trace_hmdfs_device_lookup(parent_inode, child_dentry, flags);
+ if (!strncmp(d_name, DEVICE_VIEW_LOCAL,
+ sizeof(DEVICE_VIEW_LOCAL) - 1)) {
+ err = init_hmdfs_dentry_info(sbi, child_dentry,
+ HMDFS_LAYER_SECOND_LOCAL);
+ if (err) {
+ ret_dentry = ERR_PTR(err);
+ goto out;
+ }
+ di = hmdfs_d(sb->s_root);
+ root_lower_path = &(di->lower_path);
+ hmdfs_set_lower_path(child_dentry, root_lower_path);
+ path_get(root_lower_path);
+ root_inode = fill_device_local_inode(
+ sb, d_inode(root_lower_path->dentry));
+ if (IS_ERR(root_inode)) {
+ err = PTR_ERR(root_inode);
+ ret_dentry = ERR_PTR(err);
+ hmdfs_put_reset_lower_path(child_dentry);
+ goto out;
+ }
+ ret_dentry = d_splice_alias(root_inode, child_dentry);
+ if (IS_ERR(ret_dentry)) {
+ err = PTR_ERR(ret_dentry);
+ ret_dentry = ERR_PTR(err);
+ hmdfs_put_reset_lower_path(child_dentry);
+ goto out;
+ }
+ } else {
+ err = init_hmdfs_dentry_info(sbi, child_dentry,
+ HMDFS_LAYER_SECOND_REMOTE);
+ di = hmdfs_d(child_dentry);
+ if (err) {
+ ret_dentry = ERR_PTR(err);
+ goto out;
+ }
+ cid = kzalloc(HMDFS_CID_SIZE + 1, GFP_KERNEL);
+ if (!cid) {
+ err = -ENOMEM;
+ ret_dentry = ERR_PTR(err);
+ goto out;
+ }
+ memcpy(cid, d_name, HMDFS_CID_SIZE);
+ cid[HMDFS_CID_SIZE] = '\0';
+ con = hmdfs_lookup_from_cid(sbi, cid);
+ if (!con) {
+ kfree(cid);
+ err = -ENOENT;
+ ret_dentry = ERR_PTR(err);
+ goto out;
+ }
+ di->device_id = con->device_id;
+ root_inode = fill_device_inode_remote(sb, di->device_id);
+ if (IS_ERR(root_inode)) {
+ kfree(cid);
+ err = PTR_ERR(root_inode);
+ ret_dentry = ERR_PTR(err);
+ goto out;
+ }
+ ret_dentry = d_splice_alias(root_inode, child_dentry);
+ kfree(cid);
+ }
+ if (root_inode)
+ hmdfs_root_inode_perm_init(root_inode);
+ if (!err)
+ hmdfs_set_time(child_dentry, jiffies);
+out:
+ if (con)
+ peer_put(con);
+ trace_hmdfs_device_lookup_end(parent_inode, child_dentry, err);
+ return ret_dentry;
+}
+
+struct dentry *hmdfs_root_lookup(struct inode *parent_inode,
+ struct dentry *child_dentry,
+ unsigned int flags)
+{
+ const char *d_name = child_dentry->d_name.name;
+ struct inode *root_inode = NULL;
+ struct super_block *sb = parent_inode->i_sb;
+ struct hmdfs_sb_info *sbi = sb->s_fs_info;
+ struct dentry *ret = ERR_PTR(-ENOENT);
+ struct path root_path;
+
+ trace_hmdfs_root_lookup(parent_inode, child_dentry, flags);
+ if (sbi->s_merge_switch && !strcmp(d_name, MERGE_VIEW_ROOT)) {
+ ret = hmdfs_lookup_merge(parent_inode, child_dentry, flags);
+ if (ret && !IS_ERR(ret))
+ child_dentry = ret;
+ root_inode = d_inode(child_dentry);
+ } else if (!strcmp(d_name, DEVICE_VIEW_ROOT)) {
+ ret = ERR_PTR(init_hmdfs_dentry_info(
+ sbi, child_dentry, HMDFS_LAYER_FIRST_DEVICE));
+ if (IS_ERR(ret))
+ goto out;
+ ret = ERR_PTR(kern_path(sbi->local_src, 0, &root_path));
+ if (IS_ERR(ret))
+ goto out;
+ root_inode = fill_device_inode(sb, d_inode(root_path.dentry));
+ ret = d_splice_alias(root_inode, child_dentry);
+ path_put(&root_path);
+ }
+ if (!IS_ERR(ret) && root_inode)
+ hmdfs_root_inode_perm_init(root_inode);
+
+out:
+ trace_hmdfs_root_lookup_end(parent_inode, child_dentry,
+ PTR_ERR_OR_ZERO(ret));
+ return ret;
+}
+
+const struct inode_operations hmdfs_device_ops = {
+ .lookup = hmdfs_device_lookup,
+};
+
+const struct inode_operations hmdfs_root_ops = {
+ .lookup = hmdfs_root_lookup,
+};
+
+struct inode *fill_device_inode(struct super_block *sb,
+ struct inode *lower_inode)
+{
+ struct inode *inode = NULL;
+ struct hmdfs_inode_info *info = NULL;
+
+ inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV, NULL, NULL);
+ if (!inode) {
+ hmdfs_err("iget5_locked get inode NULL");
+ return ERR_PTR(-ENOMEM);
+ }
+ if (!(inode->i_state & I_NEW))
+ return inode;
+
+ info = hmdfs_i(inode);
+ info->inode_type = HMDFS_LAYER_FIRST_DEVICE;
+
+ inode->i_atime = lower_inode->i_atime;
+ inode->i_ctime = lower_inode->i_ctime;
+ inode->i_mtime = lower_inode->i_mtime;
+
+ inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | S_IXUSR |
+ S_IRGRP | S_IXGRP | S_IXOTH;
+ inode->i_uid = KUIDT_INIT((uid_t)1000);
+ inode->i_gid = KGIDT_INIT((gid_t)1000);
+ inode->i_op = &hmdfs_device_ops;
+ inode->i_fop = &hmdfs_device_fops;
+
+ fsstack_copy_inode_size(inode, lower_inode);
+ unlock_new_inode(inode);
+ return inode;
+}
+
+struct inode *fill_root_inode(struct super_block *sb, struct inode *lower_inode)
+{
+ struct inode *inode = NULL;
+ struct hmdfs_inode_info *info = NULL;
+
+ if (!igrab(lower_inode))
+ return ERR_PTR(-ESTALE);
+
+ inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_ANCESTOR, lower_inode,
+ NULL);
+ if (!inode) {
+ hmdfs_err("iget5_locked get inode NULL");
+ iput(lower_inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ if (!(inode->i_state & I_NEW)) {
+ iput(lower_inode);
+ return inode;
+ }
+
+ info = hmdfs_i(inode);
+ info->inode_type = HMDFS_LAYER_ZERO;
+ inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | S_IXUSR |
+ S_IRGRP | S_IXGRP | S_IXOTH;
+
+#ifdef CONFIG_HMDFS_FS_PERMISSION
+ inode->i_uid = lower_inode->i_uid;
+ inode->i_gid = lower_inode->i_gid;
+#else
+ inode->i_uid = KUIDT_INIT((uid_t)1000);
+ inode->i_gid = KGIDT_INIT((gid_t)1000);
+#endif
+ inode->i_atime = lower_inode->i_atime;
+ inode->i_ctime = lower_inode->i_ctime;
+ inode->i_mtime = lower_inode->i_mtime;
+
+ inode->i_op = &hmdfs_root_ops;
+ inode->i_fop = &hmdfs_root_fops;
+ fsstack_copy_inode_size(inode, lower_inode);
+ unlock_new_inode(inode);
+ return inode;
+}
diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c
new file mode 100644
index 0000000000000000000000000000000000000000..c9b28e8cb9f13232967b76bd598e38faec6a434e
--- /dev/null
+++ b/fs/hmdfs/main.c
@@ -0,0 +1,1069 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/main.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+
+#include "hmdfs.h"
+
+#include
+#include
+#include
+#include
+#include
+#if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE
+#include
+#else
+#include
+#endif
+
+#include "authority/authentication.h"
+#include "hmdfs_server.h"
+#include "comm/device_node.h"
+#include "comm/message_verify.h"
+#include "comm/protocol.h"
+#include "comm/socket_adapter.h"
+#include "hmdfs_merge_view.h"
+#include "server_writeback.h"
+
+#include "comm/node_cb.h"
+#include "stash.h"
+
+#define CREATE_TRACE_POINTS
+#include "hmdfs_trace.h"
+
+#define HMDFS_BOOT_COOKIE_RAND_SHIFT 33
+
+#define HMDFS_SB_SEQ_FROM 1
+
+struct hmdfs_mount_priv {
+ const char *dev_name;
+ const char *raw_data;
+};
+
+struct syncfs_item {
+ struct list_head list;
+ struct completion done;
+ bool need_abort;
+};
+
+static DEFINE_IDA(hmdfs_sb_seq);
+
+static inline int hmdfs_alloc_sb_seq(void)
+{
+ return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL);
+}
+
+static inline void hmdfs_free_sb_seq(unsigned int seq)
+{
+ if (!seq)
+ return;
+ ida_simple_remove(&hmdfs_sb_seq, seq);
+}
+
+static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name,
+ void *value, size_t size)
+{
+ struct path lower_path;
+ ssize_t res = 0;
+
+ hmdfs_get_lower_path(dentry, &lower_path);
+ res = vfs_getxattr(lower_path.dentry, name, value, size);
+ hmdfs_put_lower_path(&lower_path);
+ return res;
+}
+
+static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name,
+ void *value, size_t size)
+{
+ struct inode *inode = d_inode(dentry);
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ struct hmdfs_peer *conn = info->conn;
+ char *send_buf = NULL;
+ ssize_t res = 0;
+
+ send_buf = hmdfs_get_dentry_relative_path(dentry);
+ if (!send_buf)
+ return -ENOMEM;
+
+ res = hmdfs_send_getxattr(conn, send_buf, name, value, size);
+ kfree(send_buf);
+ return res;
+}
+
+static int hmdfs_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *value, size_t size)
+{
+ int res = 0;
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ size_t r_size = size;
+
+ if (!hmdfs_support_xattr(dentry))
+ return -EOPNOTSUPP;
+
+ if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ return -EOPNOTSUPP;
+
+ if (size > HMDFS_XATTR_SIZE_MAX)
+ r_size = HMDFS_XATTR_SIZE_MAX;
+
+ if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
+ res = hmdfs_xattr_local_get(dentry, name, value, r_size);
+ else
+ res = hmdfs_xattr_remote_get(dentry, name, value, r_size);
+
+ if (res == -ERANGE && r_size != size) {
+ hmdfs_info("no support xattr value size over than: %d",
+ HMDFS_XATTR_SIZE_MAX);
+ res = -E2BIG;
+ }
+
+ return res;
+}
+
+static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct path lower_path;
+ int res = 0;
+
+ hmdfs_get_lower_path(dentry, &lower_path);
+ if (value) {
+ res = vfs_setxattr(lower_path.dentry, name, value, size, flags);
+ } else {
+ WARN_ON(flags != XATTR_REPLACE);
+ res = vfs_removexattr(lower_path.dentry, name);
+ }
+
+ hmdfs_put_lower_path(&lower_path);
+ return res;
+}
+
+static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct inode *inode = d_inode(dentry);
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ struct hmdfs_peer *conn = info->conn;
+ char *send_buf = NULL;
+ int res = 0;
+
+ send_buf = hmdfs_get_dentry_relative_path(dentry);
+ if (!send_buf)
+ return -ENOMEM;
+
+ res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags);
+ kfree(send_buf);
+ return res;
+}
+
+static int hmdfs_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+
+ if (!hmdfs_support_xattr(dentry))
+ return -EOPNOTSUPP;
+
+ if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ return -EOPNOTSUPP;
+
+ if (size > HMDFS_XATTR_SIZE_MAX) {
+ hmdfs_info("no support too long xattr value: %zu", size);
+ return -E2BIG;
+ }
+
+ if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL)
+ return hmdfs_xattr_local_set(dentry, name, value, size, flags);
+
+ return hmdfs_xattr_remote_set(dentry, name, value, size, flags);
+}
+
+const struct xattr_handler hmdfs_xattr_handler = {
+ .prefix = "", /* catch all */
+ .get = hmdfs_xattr_get,
+ .set = hmdfs_xattr_set,
+};
+
+static const struct xattr_handler *hmdfs_xattr_handlers[] = {
+ &hmdfs_xattr_handler,
+};
+
+#define HMDFS_NODE_EVT_CB_DELAY 2
+
+struct kmem_cache *hmdfs_inode_cachep;
+struct kmem_cache *hmdfs_dentry_cachep;
+
+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+
+ kmem_cache_free(hmdfs_inode_cachep,
+ container_of(inode, struct hmdfs_inode_info,
+ vfs_inode));
+}
+
+static void hmdfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, i_callback);
+}
+
+static void hmdfs_evict_inode(struct inode *inode)
+{
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+
+ truncate_inode_pages(&inode->i_data, 0);
+ clear_inode(inode);
+ if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE ||
+ info->inode_type == HMDFS_LAYER_SECOND_REMOTE)
+ return;
+ if (info->inode_type == HMDFS_LAYER_ZERO ||
+ info->inode_type == HMDFS_LAYER_OTHER_LOCAL ||
+ info->inode_type == HMDFS_LAYER_SECOND_LOCAL) {
+ iput(info->lower_inode);
+ info->lower_inode = NULL;
+ }
+}
+
+void hmdfs_put_super(struct super_block *sb)
+{
+ struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
+ struct super_block *lower_sb = sbi->lower_sb;
+
+ hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst,
+ sbi->local_src);
+
+ hmdfs_fault_inject_fini(&sbi->fault_inject);
+ hmdfs_cfn_destroy(sbi);
+ hmdfs_unregister_sysfs(sbi);
+ hmdfs_connections_stop(sbi);
+ hmdfs_destroy_server_writeback(sbi);
+ hmdfs_exit_stash(sbi);
+ atomic_dec(&lower_sb->s_active);
+ put_cred(sbi->cred);
+ if (sbi->system_cred)
+ put_cred(sbi->system_cred);
+ hmdfs_destroy_writeback(sbi);
+ kfree(sbi->local_src);
+ kfree(sbi->local_dst);
+ kfree(sbi->real_dst);
+ kfree(sbi->cache_dir);
+ kfifo_free(&sbi->notify_fifo);
+ sb->s_fs_info = NULL;
+ sbi->lower_sb = NULL;
+ hmdfs_release_sysfs(sbi);
+ /* After all access are completed */
+ hmdfs_free_sb_seq(sbi->seq);
+ kfree(sbi->s_server_statis);
+ kfree(sbi->s_client_statis);
+ kfree(sbi);
+}
+
+static struct inode *hmdfs_alloc_inode(struct super_block *sb)
+{
+ struct hmdfs_inode_info *gi =
+ kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL);
+ if (!gi)
+ return NULL;
+ memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode));
+ INIT_LIST_HEAD(&gi->wb_list);
+ init_rwsem(&gi->wpage_sem);
+ gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE;
+ atomic64_set(&gi->write_counter, 0);
+ gi->fid.id = HMDFS_INODE_INVALID_FILE_ID;
+ spin_lock_init(&gi->fid_lock);
+ INIT_LIST_HEAD(&gi->wr_opened_node);
+ atomic_set(&gi->wr_opened_cnt, 0);
+ init_waitqueue_head(&gi->fid_wq);
+ INIT_LIST_HEAD(&gi->stash_node);
+ spin_lock_init(&gi->stash_lock);
+ return &gi->vfs_inode;
+}
+
+static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ int error = 0;
+ int ret = 0;
+ char *dir_path = NULL;
+ char *name_path = NULL;
+ struct hmdfs_peer *con = NULL;
+ struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb);
+
+ dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent);
+ if (!dir_path) {
+ error = -EACCES;
+ goto rmdir_out;
+ }
+
+ name_path = hmdfs_connect_path(dir_path, dentry->d_name.name);
+ if (!name_path) {
+ error = -EACCES;
+ goto rmdir_out;
+ }
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(con, &sbi->connections.node_list, list) {
+ if (con->status == NODE_STAT_ONLINE &&
+ con->version > USERSPACE_MAX_VER) {
+ peer_get(con);
+ mutex_unlock(&sbi->connections.node_lock);
+ hmdfs_debug("send MSG to remote devID %llu",
+ con->device_id);
+ ret = hmdfs_send_statfs(con, name_path, buf);
+ if (ret != 0)
+ error = ret;
+ peer_put(con);
+ mutex_lock(&sbi->connections.node_lock);
+ }
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+
+rmdir_out:
+ kfree(dir_path);
+ kfree(name_path);
+ return error;
+}
+
+static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ int err = 0;
+ struct path lower_path;
+ struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode);
+ struct super_block *sb = d_inode(dentry)->i_sb;
+ struct hmdfs_sb_info *sbi = sb->s_fs_info;
+
+ trace_hmdfs_statfs(dentry, info->inode_type);
+ // merge_view & merge_view/xxx & device_view assigned src_inode info
+ if (hmdfs_i_merge(info) ||
+ (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) {
+ err = kern_path(sbi->local_src, 0, &lower_path);
+ if (err)
+ goto out;
+ err = vfs_statfs(&lower_path, buf);
+ path_put(&lower_path);
+ } else if (!IS_ERR_OR_NULL(info->lower_inode)) {
+ hmdfs_get_lower_path(dentry, &lower_path);
+ err = vfs_statfs(&lower_path, buf);
+ hmdfs_put_lower_path(&lower_path);
+ } else {
+ err = hmdfs_remote_statfs(dentry, buf);
+ }
+
+ buf->f_type = HMDFS_SUPER_MAGIC;
+out:
+ return err;
+}
+
+static int hmdfs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb);
+
+ if (sbi->s_case_sensitive)
+ seq_puts(m, ",sensitive");
+ else
+ seq_puts(m, ",insensitive");
+
+ if (sbi->s_merge_switch)
+ seq_puts(m, ",merge_enable");
+ else
+ seq_puts(m, ",merge_disable");
+
+ seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages);
+
+ if (sbi->cache_dir)
+ seq_printf(m, ",cache_dir=%s", sbi->cache_dir);
+ if (sbi->real_dst)
+ seq_printf(m, ",real_dst=%s", sbi->real_dst);
+
+ seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_");
+ seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_");
+
+ return 0;
+}
+
+static int hmdfs_sync_fs(struct super_block *sb, int wait)
+{
+ int time_left;
+ int err = 0;
+ struct hmdfs_peer *con = NULL;
+ struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
+ int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS);
+ struct syncfs_item item, *entry = NULL, *tmp = NULL;
+
+ if (!wait)
+ return 0;
+
+ trace_hmdfs_syncfs_enter(sbi);
+
+ spin_lock(&sbi->hsi.list_lock);
+ if (!sbi->hsi.is_executing) {
+ sbi->hsi.is_executing = true;
+ item.need_abort = false;
+ spin_unlock(&sbi->hsi.list_lock);
+ } else {
+ init_completion(&item.done);
+ list_add_tail(&item.list, &sbi->hsi.wait_list);
+ spin_unlock(&sbi->hsi.list_lock);
+ wait_for_completion(&item.done);
+ }
+
+ if (item.need_abort)
+ goto out;
+
+ /*
+ * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make
+ * sure all remote syncfs calls return back or timeout by waiting,
+ * during the waiting period we must protect @sbi->remote_syncfs_count
+ * and @sbi->remote_syncfs_ret from concurrent executing.
+ */
+
+ spin_lock(&sbi->hsi.v_lock);
+ sbi->hsi.version++;
+ /*
+ * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count
+ * into spinlock protection area to avoid following scenario caused
+ * by out-of-order execution:
+ *
+ * synfs syncfs_cb
+ * sbi->hsi.remote_ret = 0;
+ * atomic_set(&sbi->hsi.wait_count, 0);
+ * lock
+ * version == old_version
+ * sbi->hsi.remote_ret = resp->ret_code
+ * atomic_dec(&sbi->hsi.wait_count);
+ * unlock
+ * lock
+ * version = old_version + 1
+ * unlock
+ *
+ * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned
+ * before spin lock which may compete with syncfs_cb(), making
+ * these two values' assignment protected by spinlock can fix this.
+ */
+ sbi->hsi.remote_ret = 0;
+ atomic_set(&sbi->hsi.wait_count, 0);
+ spin_unlock(&sbi->hsi.v_lock);
+
+ mutex_lock(&sbi->connections.node_lock);
+ list_for_each_entry(con, &sbi->connections.node_list, list) {
+ /*
+ * Dirty data does not need to be synchronized to remote
+ * devices that go offline normally. It's okay to drop
+ * them.
+ */
+ if (con->status != NODE_STAT_ONLINE)
+ continue;
+
+ peer_get(con);
+ mutex_unlock(&sbi->connections.node_lock);
+
+ /*
+ * There exists a gap between sync_inodes_sb() and sync_fs()
+ * which may race with remote writing, leading error count
+ * on @sb_dirty_count. The dirty data produced during the
+ * gap period won't be synced in next syncfs operation.
+ * To avoid this, we have to invoke sync_inodes_sb() again
+ * after getting @con->sb_dirty_count.
+ */
+ con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count);
+ sync_inodes_sb(sb);
+
+ if (!con->old_sb_dirty_count) {
+ peer_put(con);
+ mutex_lock(&sbi->connections.node_lock);
+ continue;
+ }
+
+ err = hmdfs_send_syncfs(con, syncfs_timeout);
+ if (err) {
+ hmdfs_warning("send syncfs failed with %d on node %llu",
+ err, con->device_id);
+ sbi->hsi.remote_ret = err;
+ peer_put(con);
+ mutex_lock(&sbi->connections.node_lock);
+ continue;
+ }
+
+ atomic_inc(&sbi->hsi.wait_count);
+
+ peer_put(con);
+ mutex_lock(&sbi->connections.node_lock);
+ }
+ mutex_unlock(&sbi->connections.node_lock);
+
+ /*
+ * Async work in background will make sure @sbi->remote_syncfs_count
+ * decreased to zero finally whether syncfs success or fail.
+ */
+ time_left = wait_event_interruptible(
+ sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0);
+ if (time_left < 0) {
+ hmdfs_warning("syncfs is interrupted by external signal");
+ err = -EINTR;
+ }
+
+ if (!err && sbi->hsi.remote_ret)
+ err = sbi->hsi.remote_ret;
+
+ /* Abandon syncfs processes in pending_list */
+ list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) {
+ entry->need_abort = true;
+ complete(&entry->done);
+ }
+ INIT_LIST_HEAD(&sbi->hsi.pending_list);
+
+ /* Pick the last syncfs process in wait_list */
+ spin_lock(&sbi->hsi.list_lock);
+ if (list_empty(&sbi->hsi.wait_list)) {
+ sbi->hsi.is_executing = false;
+ } else {
+ entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item,
+ list);
+ list_del_init(&entry->list);
+ list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list);
+ entry->need_abort = false;
+ complete(&entry->done);
+ }
+ spin_unlock(&sbi->hsi.list_lock);
+
+out:
+ trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count),
+ get_cmd_timeout(sbi, F_SYNCFS), err);
+
+ /* TODO: Return synfs err back to syscall */
+
+ return err;
+}
+
+struct super_operations hmdfs_sops = {
+ .alloc_inode = hmdfs_alloc_inode,
+ .destroy_inode = hmdfs_destroy_inode,
+ .evict_inode = hmdfs_evict_inode,
+ .put_super = hmdfs_put_super,
+ .statfs = hmdfs_statfs,
+ .show_options = hmdfs_show_options,
+ .sync_fs = hmdfs_sync_fs,
+};
+
+static void init_once(void *obj)
+{
+ struct hmdfs_inode_info *i = obj;
+
+ inode_init_once(&i->vfs_inode);
+}
+
+static int __init hmdfs_init_caches(void)
+{
+ int err = -ENOMEM;
+
+ hmdfs_inode_cachep =
+ kmem_cache_create("hmdfs_inode_cache",
+ sizeof(struct hmdfs_inode_info), 0,
+ SLAB_RECLAIM_ACCOUNT, init_once);
+ if (unlikely(!hmdfs_inode_cachep))
+ goto out;
+ hmdfs_dentry_cachep =
+ kmem_cache_create("hmdfs_dentry_cache",
+ sizeof(struct hmdfs_dentry_info), 0,
+ SLAB_RECLAIM_ACCOUNT, NULL);
+ if (unlikely(!hmdfs_dentry_cachep))
+ goto out_des_ino;
+ hmdfs_dentry_merge_cachep =
+ kmem_cache_create("hmdfs_dentry_merge_cache",
+ sizeof(struct hmdfs_dentry_info_merge), 0,
+ SLAB_RECLAIM_ACCOUNT, NULL);
+ if (unlikely(!hmdfs_dentry_merge_cachep))
+ goto out_des_dc;
+ return 0;
+
+out_des_dc:
+ kmem_cache_destroy(hmdfs_dentry_cachep);
+out_des_ino:
+ kmem_cache_destroy(hmdfs_inode_cachep);
+out:
+ return err;
+}
+
+static void hmdfs_destroy_caches(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(hmdfs_inode_cachep);
+ hmdfs_inode_cachep = NULL;
+ kmem_cache_destroy(hmdfs_dentry_cachep);
+ hmdfs_dentry_cachep = NULL;
+ kmem_cache_destroy(hmdfs_dentry_merge_cachep);
+ hmdfs_dentry_merge_cachep = NULL;
+}
+
+uint64_t path_hash(const char *path, int len, bool case_sense)
+{
+ uint64_t res = 0;
+ const char *kp = path;
+ char c;
+ /* Mocklisp hash function. */
+ while (*kp) {
+ c = *kp;
+ if (!case_sense)
+ c = tolower(c);
+ res = (res << 5) - res + (uint64_t)(c);
+ kp++;
+ }
+ return res;
+}
+
+static char *get_full_path(struct path *path)
+{
+ char *buf, *tmp;
+ char *ret = NULL;
+
+ buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!buf)
+ goto out;
+
+ tmp = d_path(path, buf, PATH_MAX);
+ if (IS_ERR(tmp))
+ goto out;
+
+ ret = kstrdup(tmp, GFP_KERNEL);
+out:
+ kfree(buf);
+ return ret;
+}
+
+static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi)
+{
+ memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout));
+
+ set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE);
+ set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S);
+ set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE);
+ set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE);
+ set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S);
+ set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S);
+ set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON);
+ set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON);
+}
+
+static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi)
+{
+ int ret;
+
+ ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL);
+ if (ret)
+ goto out;
+
+ /*
+ * We have to use dynamic memory since struct server/client_statistic
+ * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h.
+ */
+ sbi->s_server_statis =
+ kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL);
+ sbi->s_client_statis =
+ kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL);
+ if (!sbi->s_server_statis || !sbi->s_client_statis) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = hmdfs_alloc_sb_seq();
+ if (ret < 0) {
+ hmdfs_err("no sb seq available err %d", ret);
+ goto out;
+ }
+ sbi->seq = ret;
+ ret = 0;
+
+ spin_lock_init(&sbi->notify_fifo_lock);
+ sbi->s_case_sensitive = false;
+ sbi->s_features = HMDFS_FEATURE_READPAGES |
+ HMDFS_FEATURE_READPAGES_OPEN |
+ HMDFS_ATOMIC_OPEN;
+ sbi->s_merge_switch = false;
+ sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD;
+ sbi->dcache_precision = DEFAULT_DCACHE_PRECISION;
+ sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT;
+ sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT;
+ hmdfs_init_cmd_timeout(sbi);
+ sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY;
+ sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE;
+ sbi->s_offline_stash = true;
+ sbi->s_dentry_cache = true;
+ sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS;
+ /* Initialize before hmdfs_register_sysfs() */
+ atomic_set(&sbi->connections.conn_seq, 0);
+ mutex_init(&sbi->connections.node_lock);
+ INIT_LIST_HEAD(&sbi->connections.node_list);
+
+ init_waitqueue_head(&sbi->async_readdir_wq);
+ INIT_LIST_HEAD(&sbi->async_readdir_msg_list);
+ INIT_LIST_HEAD(&sbi->async_readdir_work_list);
+ spin_lock_init(&sbi->async_readdir_msg_lock);
+ spin_lock_init(&sbi->async_readdir_work_lock);
+
+ return 0;
+
+out:
+ return ret;
+}
+
+void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd,
+ enum hmdfs_resp_type type, unsigned long start,
+ unsigned long end)
+{
+ unsigned long duration;
+
+ switch (type) {
+ case HMDFS_RESP_DELAY:
+ sbi->s_client_statis[cmd].delay_resp_cnt++;
+ break;
+ case HMDFS_RESP_TIMEOUT:
+ sbi->s_client_statis[cmd].timeout_cnt++;
+ break;
+ case HMDFS_RESP_NORMAL:
+ duration = end - start;
+ sbi->s_client_statis[cmd].total += duration;
+ sbi->s_client_statis[cmd].resp_cnt++;
+ if (sbi->s_client_statis[cmd].max < duration)
+ sbi->s_client_statis[cmd].max = duration;
+ break;
+ default:
+ hmdfs_err("Wrong cmd %d with resp type %d", cmd, type);
+ }
+}
+
+static int hmdfs_update_dst(struct hmdfs_sb_info *sbi)
+{
+ int err = 0;
+ const char *path_local = UPDATE_LOCAL_DST;
+ int len = 0;
+
+ sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL);
+ if (!sbi->real_dst) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ kfree(sbi->local_dst);
+
+ len = strlen(sbi->real_dst) + strlen(path_local) + 1;
+ if (len > PATH_MAX) {
+ err = -EINVAL;
+ goto out_err;
+ }
+ sbi->local_dst = kmalloc(len, GFP_KERNEL);
+ if (!sbi->local_dst) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1,
+ "%s%s", sbi->real_dst, path_local);
+out_err:
+ return err;
+}
+
+/*
+ * Generate boot cookie like following format:
+ *
+ * | random | boot time(ms) | 0x00 |
+ * |--------|-----------------|-------|
+ * 16 33 15 (bits)
+ *
+ * This will make sure boot cookie is unique in a period
+ * 2^33 / 1000 / 3600 / 24 = 99.4(days).
+ */
+uint64_t hmdfs_gen_boot_cookie(void)
+{
+ uint64_t now;
+ uint16_t rand;
+
+ now = ktime_to_ms(ktime_get());
+ prandom_bytes(&rand, sizeof(rand));
+
+ now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1;
+ now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT);
+
+ return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT;
+}
+
+static int hmdfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data;
+ const char *dev_name = priv->dev_name;
+ const char *raw_data = priv->raw_data;
+ struct hmdfs_sb_info *sbi;
+ int err = 0;
+ struct inode *root_inode;
+ struct path lower_path;
+ struct super_block *lower_sb;
+ struct dentry *root_dentry;
+ char ctrl_path[CTRL_PATH_MAX_LEN];
+ uint64_t ctrl_hash;
+
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ err = hmdfs_init_sbi(sbi);
+ if (err)
+ goto out_freesbi;
+ sbi->sb = sb;
+ err = hmdfs_parse_options(sbi, raw_data);
+ if (err)
+ goto out_freesbi;
+
+ sb->s_fs_info = sbi;
+ sb->s_magic = HMDFS_SUPER_MAGIC;
+ sb->s_xattr = hmdfs_xattr_handlers;
+ sb->s_op = &hmdfs_sops;
+
+ sbi->boot_cookie = hmdfs_gen_boot_cookie();
+
+ err = hmdfs_init_writeback(sbi);
+ if (err)
+ goto out_freesbi;
+ err = hmdfs_init_server_writeback(sbi);
+ if (err)
+ goto out_freesbi;
+
+ err = hmdfs_init_stash(sbi);
+ if (err)
+ goto out_freesbi;
+
+ // add ctrl sysfs node
+ ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true);
+ scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash);
+ hmdfs_debug("hash %llu", ctrl_hash);
+ err = hmdfs_register_sysfs(ctrl_path, sbi);
+ if (err)
+ goto out_freesbi;
+
+ err = hmdfs_update_dst(sbi);
+ if (err)
+ goto out_unreg_sysfs;
+
+ err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
+ &lower_path);
+ if (err) {
+ hmdfs_err("open dev failed, errno = %d", err);
+ goto out_unreg_sysfs;
+ }
+
+ lower_sb = lower_path.dentry->d_sb;
+ atomic_inc(&lower_sb->s_active);
+ sbi->lower_sb = lower_sb;
+ sbi->local_src = get_full_path(&lower_path);
+ if (!sbi->local_src) {
+ hmdfs_err("get local_src failed!");
+ goto out_sput;
+ }
+
+ sb->s_time_gran = lower_sb->s_time_gran;
+ sb->s_maxbytes = lower_sb->s_maxbytes;
+ sb->s_stack_depth = lower_sb->s_stack_depth + 1;
+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+ hmdfs_err("maximum fs stacking depth exceeded");
+ err = -EINVAL;
+ goto out_sput;
+ }
+ root_inode = fill_root_inode(sb, d_inode(lower_path.dentry));
+ if (IS_ERR(root_inode)) {
+ err = PTR_ERR(root_inode);
+ goto out_sput;
+ }
+ hmdfs_root_inode_perm_init(root_inode);
+ sb->s_root = root_dentry = d_make_root(root_inode);
+ if (!root_dentry) {
+ err = -ENOMEM;
+ goto out_sput;
+ }
+
+ err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO);
+ if (err)
+ goto out_freeroot;
+ hmdfs_set_lower_path(root_dentry, &lower_path);
+ d_rehash(sb->s_root);
+ sbi->cred = get_cred(current_cred());
+ INIT_LIST_HEAD(&sbi->client_cache);
+ INIT_LIST_HEAD(&sbi->server_cache);
+ INIT_LIST_HEAD(&sbi->to_delete);
+ mutex_init(&sbi->cache_list_lock);
+ hmdfs_cfn_load(sbi);
+
+ /* Initialize syncfs info */
+ spin_lock_init(&sbi->hsi.v_lock);
+ init_waitqueue_head(&sbi->hsi.wq);
+ sbi->hsi.version = 0;
+ sbi->hsi.is_executing = false;
+ INIT_LIST_HEAD(&sbi->hsi.wait_list);
+ INIT_LIST_HEAD(&sbi->hsi.pending_list);
+ spin_lock_init(&sbi->hsi.list_lock);
+ hmdfs_fault_inject_init(&sbi->fault_inject, ctrl_path);
+
+ return err;
+out_freeroot:
+ dput(sb->s_root);
+ sb->s_root = NULL;
+out_sput:
+ atomic_dec(&lower_sb->s_active);
+ path_put(&lower_path);
+out_unreg_sysfs:
+ hmdfs_unregister_sysfs(sbi);
+ hmdfs_release_sysfs(sbi);
+out_freesbi:
+ if (sbi) {
+ sb->s_fs_info = NULL;
+ hmdfs_exit_stash(sbi);
+ hmdfs_destroy_writeback(sbi);
+ hmdfs_destroy_server_writeback(sbi);
+ kfifo_free(&sbi->notify_fifo);
+ hmdfs_free_sb_seq(sbi->seq);
+ kfree(sbi->local_src);
+ kfree(sbi->local_dst);
+ kfree(sbi->real_dst);
+ kfree(sbi->cache_dir);
+ kfree(sbi->s_server_statis);
+ kfree(sbi->s_client_statis);
+ kfree(sbi);
+ }
+out_err:
+ return err;
+}
+
+static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
+{
+ struct hmdfs_mount_priv priv = {
+ .dev_name = dev_name,
+ .raw_data = raw_data,
+ };
+ return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super);
+}
+
+
+static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi)
+{
+ struct sendmsg_wait_queue *msg_wq = NULL;
+ struct hmdfs_readdir_work *rw = NULL;
+ struct hmdfs_readdir_work *tmp = NULL;
+ struct list_head del_work;
+
+ /* cancel work that are not running */
+
+ INIT_LIST_HEAD(&del_work);
+ spin_lock(&sbi->async_readdir_work_lock);
+ list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) {
+ if (cancel_delayed_work(&rw->dwork))
+ list_move(&rw->head, &del_work);
+ }
+ spin_unlock(&sbi->async_readdir_work_lock);
+
+ list_for_each_entry_safe(rw, tmp, &del_work, head) {
+ dput(rw->dentry);
+ peer_put(rw->con);
+ kfree(rw);
+ }
+
+ /* wake up async readdir that are waiting for remote */
+ spin_lock(&sbi->async_readdir_msg_lock);
+ sbi->async_readdir_prohibit = true;
+ list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg)
+ hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL);
+ spin_unlock(&sbi->async_readdir_msg_lock);
+
+ /* wait for all async readdir to finish */
+ if (!list_empty(&sbi->async_readdir_work_list))
+ wait_event_interruptible_timeout(sbi->async_readdir_wq,
+ (list_empty(&sbi->async_readdir_work_list)), HZ);
+
+ WARN_ON(!(list_empty(&sbi->async_readdir_work_list)));
+}
+
+static void hmdfs_kill_super(struct super_block *sb)
+{
+ struct hmdfs_sb_info *sbi = hmdfs_sb(sb);
+
+ /*
+ * async readdir is holding ref for dentry, not for vfsmount. Thus
+ * shrink_dcache_for_umount() will warn about dentry still in use
+ * if async readdir is not done.
+ */
+ if (sbi)
+ hmdfs_cancel_async_readdir(sbi);
+ kill_anon_super(sb);
+}
+
+static struct file_system_type hmdfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "hmdfs",
+ .mount = hmdfs_mount,
+ .kill_sb = hmdfs_kill_super,
+};
+
+static int __init hmdfs_init(void)
+{
+ int err = 0;
+
+ err = hmdfs_init_caches();
+ if (err)
+ goto out_err;
+
+ hmdfs_node_evt_cb_init();
+
+ hmdfs_stash_add_node_evt_cb();
+ hmdfs_client_add_node_evt_cb();
+ hmdfs_server_add_node_evt_cb();
+
+ err = register_filesystem(&hmdfs_fs_type);
+ if (err) {
+ hmdfs_err("hmdfs register failed!");
+ goto out_err;
+ }
+ err = hmdfs_sysfs_init();
+ if (err)
+ goto out_err;
+
+ hmdfs_message_verify_init();
+ hmdfs_create_debugfs_root();
+ return 0;
+out_err:
+ hmdfs_sysfs_exit();
+ unregister_filesystem(&hmdfs_fs_type);
+ hmdfs_destroy_caches();
+ hmdfs_err("hmdfs init failed!");
+ return err;
+}
+
+static void __exit hmdfs_exit(void)
+{
+ hmdfs_destroy_debugfs_root();
+ hmdfs_sysfs_exit();
+ unregister_filesystem(&hmdfs_fs_type);
+ ida_destroy(&hmdfs_sb_seq);
+ hmdfs_destroy_caches();
+ hmdfs_info("hmdfs exited!");
+}
+
+module_init(hmdfs_init);
+module_exit(hmdfs_exit);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao");
+MODULE_DESCRIPTION("Harmony distributed file system");
diff --git a/fs/hmdfs/server_writeback.c b/fs/hmdfs/server_writeback.c
new file mode 100644
index 0000000000000000000000000000000000000000..b3a18ff67691e879d93b756f5dd48c66e6cb5937
--- /dev/null
+++ b/fs/hmdfs/server_writeback.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/server_writeback.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+
+#include "hmdfs.h"
+#include "hmdfs_trace.h"
+#include "server_writeback.h"
+
+#define HMDFS_SRV_WB_DEF_DIRTY_THRESH 50UL
+
+static void hmdfs_srv_wb_handler(struct work_struct *work)
+{
+ struct hmdfs_server_writeback *hswb = container_of(work,
+ struct hmdfs_server_writeback,
+ dirty_sb_writeback_work);
+ struct super_block *lower_sb = hswb->sbi->lower_sb;
+ int dirty_pages;
+
+ if (writeback_in_progress(&lower_sb->s_bdi->wb) ||
+ !down_read_trylock(&lower_sb->s_umount))
+ return;
+
+ dirty_pages = hswb->dirty_nr_pages_to_wb;
+ writeback_inodes_sb_nr(lower_sb, dirty_pages, WB_REASON_FS_FREE_SPACE);
+ up_read(&lower_sb->s_umount);
+
+ trace_hmdfs_start_srv_wb(hswb->sbi, dirty_pages, hswb->dirty_thresh_pg);
+}
+
+void hmdfs_server_check_writeback(struct hmdfs_server_writeback *hswb)
+{
+ unsigned long old_time, now;
+ int dirty_nr_pages;
+
+ old_time = hswb->last_reset_time;
+ now = jiffies;
+ dirty_nr_pages = atomic_inc_return(&hswb->dirty_nr_pages);
+ if (time_after(now, old_time + HZ) &&
+ cmpxchg(&hswb->last_reset_time, old_time, now) == old_time) {
+ /*
+ * We calculate the speed of page dirting to handle
+ * following situations:
+ *
+ * 1. Dense writing, average page writing speed
+ * exceeds @hswb->dirty_thresh_pg:
+ * 0-1s 100MB
+ * 2. Sporadic writing, average page writing speed
+ * belows @hswb->dirty_thresh_pg:
+ * 0-0.1s 40MB
+ * 3.1-3.2 20MB
+ */
+ unsigned int writepage_speed;
+
+ writepage_speed = dirty_nr_pages / ((now - old_time) / HZ);
+ if (writepage_speed >= hswb->dirty_thresh_pg) {
+ /*
+ * Writeback @hswb->dirty_nr_pages_to_wb pages in
+ * server-writeback work. If work is delayed after
+ * 1s, @hswb->dirty_nr_pages_to_wb could be assigned
+ * another new value (eg. 60MB), the old value (eg.
+ * 80MB) will be overwritten, which means 80MB data
+ * will be omitted to writeback. We can tolerate this
+ * situation, The writeback pressure is too high if
+ * the previous work is not completed, so it's
+ * meaningless to continue subsequent work.
+ */
+ hswb->dirty_nr_pages_to_wb = dirty_nr_pages;
+ /*
+ * There are 3 conditions to trigger queuing work:
+ *
+ * A. Server successfully handles writepage for client
+ * B. Every 1 second interval
+ * C. Speed for page dirting exceeds @dirty_thresh_pg
+ */
+ queue_work(hswb->dirty_writeback_wq,
+ &hswb->dirty_sb_writeback_work);
+ }
+
+ /*
+ * There is no need to account the number of dirty pages
+ * from remote client very accurately. Allow the missing
+ * count to increase by other process in the gap between
+ * increment and zero out.
+ */
+ atomic_set(&hswb->dirty_nr_pages, 0);
+ }
+}
+
+void hmdfs_destroy_server_writeback(struct hmdfs_sb_info *sbi)
+{
+ if (!sbi->h_swb)
+ return;
+
+ flush_work(&sbi->h_swb->dirty_sb_writeback_work);
+ destroy_workqueue(sbi->h_swb->dirty_writeback_wq);
+ kfree(sbi->h_swb);
+ sbi->h_swb = NULL;
+}
+
+int hmdfs_init_server_writeback(struct hmdfs_sb_info *sbi)
+{
+ struct hmdfs_server_writeback *hswb;
+ char name[HMDFS_WQ_NAME_LEN];
+
+ hswb = kzalloc(sizeof(struct hmdfs_server_writeback), GFP_KERNEL);
+ if (!hswb)
+ return -ENOMEM;
+
+ hswb->sbi = sbi;
+ hswb->dirty_writeback_control = true;
+ hswb->dirty_thresh_pg = HMDFS_SRV_WB_DEF_DIRTY_THRESH <<
+ HMDFS_MB_TO_PAGE_SHIFT;
+ atomic_set(&hswb->dirty_nr_pages, 0);
+ hswb->last_reset_time = jiffies;
+
+ snprintf(name, sizeof(name), "dfs_srv_wb%u", sbi->seq);
+ hswb->dirty_writeback_wq = create_singlethread_workqueue(name);
+ if (!hswb->dirty_writeback_wq) {
+ hmdfs_err("Failed to create server writeback workqueue!");
+ kfree(hswb);
+ return -ENOMEM;
+ }
+ INIT_WORK(&hswb->dirty_sb_writeback_work, hmdfs_srv_wb_handler);
+ sbi->h_swb = hswb;
+
+ return 0;
+}
+
diff --git a/fs/hmdfs/server_writeback.h b/fs/hmdfs/server_writeback.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb645e6391e9dd4c46deacf48a41711fc3191e0b
--- /dev/null
+++ b/fs/hmdfs/server_writeback.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/server_writeback.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef SERVER_WRITEBACK_H
+#define SERVER_WRITEBACK_H
+
+#include "hmdfs.h"
+
+#define HMDFS_MB_TO_PAGE_SHIFT (20 - HMDFS_PAGE_OFFSET)
+
+struct hmdfs_server_writeback {
+ struct hmdfs_sb_info *sbi;
+ /* Enable hmdfs server dirty writeback control */
+ bool dirty_writeback_control;
+
+ /* Current # of dirty pages from remote client in recent 1s */
+ atomic_t dirty_nr_pages;
+ /* Current # of dirty pages to writeback */
+ int dirty_nr_pages_to_wb;
+ /* Dirty thresh(Dirty data pages in 1s) to trigger wb */
+ unsigned int dirty_thresh_pg;
+ /* Last reset timestamp(in jiffies) for @dirty_nr_pages */
+ unsigned long last_reset_time;
+
+ struct workqueue_struct *dirty_writeback_wq;
+ /* Per-fs pages from client writeback work */
+ struct work_struct dirty_sb_writeback_work;
+};
+
+void hmdfs_server_check_writeback(struct hmdfs_server_writeback *hswb);
+
+void hmdfs_destroy_server_writeback(struct hmdfs_sb_info *sbi);
+
+int hmdfs_init_server_writeback(struct hmdfs_sb_info *sbi);
+
+#endif
diff --git a/fs/hmdfs/stash.c b/fs/hmdfs/stash.c
new file mode 100644
index 0000000000000000000000000000000000000000..c320af7f60e0d42372c39a74709e1cdff7f36c74
--- /dev/null
+++ b/fs/hmdfs/stash.c
@@ -0,0 +1,2247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/stash.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "stash.h"
+#include "comm/node_cb.h"
+#include "comm/protocol.h"
+#include "comm/connection.h"
+#include "file_remote.h"
+#include "hmdfs_dentryfile.h"
+#include "authority/authentication.h"
+
+/* Head magic used to identify a stash file */
+#define HMDFS_STASH_FILE_HEAD_MAGIC 0xF7AB06C3
+/* Head and path in stash file are aligned with HMDFS_STASH_BLK_SIZE */
+#define HMDFS_STASH_BLK_SIZE 4096
+#define HMDFS_STASH_BLK_SHIFT 12
+#define HMDFS_STASH_PAGE_TO_SECTOR_SHIFT 3
+#define HMDFS_STASH_DIR_NAME "stash"
+#define HMDFS_STASH_FMT_DIR_NAME "v1"
+#define HMDFS_STASH_WORK_DIR_NAME \
+ (HMDFS_STASH_DIR_NAME "/" HMDFS_STASH_FMT_DIR_NAME)
+
+#define HMDFS_STASH_FILE_NAME_LEN 20
+
+#define HMDFS_STASH_FLUSH_CNT 2
+
+#define HMDFS_STASH_PATH_LEN (HMDFS_CID_SIZE + HMDFS_STASH_FILE_NAME_LEN + 1)
+
+struct hmdfs_cache_file_head {
+ __le32 magic;
+ __le32 crc_offset;
+ __le64 ino;
+ __le64 size;
+ __le64 blocks;
+ __le64 last_write_pos;
+ __le64 ctime;
+ __le32 ctime_nsec;
+ __le32 change_detect_cap;
+ __le64 ichange_count;
+ __le32 path_offs;
+ __le32 path_len;
+ __le32 path_cnt;
+ __le32 data_offs;
+ /* Attention: expand new fields in here to compatible with old ver */
+ __le32 crc32;
+} __packed;
+
+struct hmdfs_stash_work {
+ struct hmdfs_peer *conn;
+ struct list_head *list;
+ struct work_struct work;
+ struct completion done;
+};
+
+struct hmdfs_inode_tbl {
+ unsigned int cnt;
+ unsigned int max;
+ uint64_t inodes[0];
+};
+
+struct hmdfs_stash_dir_context {
+ struct dir_context dctx;
+ char name[NAME_MAX + 1];
+ struct hmdfs_inode_tbl *tbl;
+};
+
+struct hmdfs_restore_stats {
+ unsigned int succeed;
+ unsigned int fail;
+ unsigned int keep;
+ unsigned long long ok_pages;
+ unsigned long long fail_pages;
+};
+
+struct hmdfs_stash_stats {
+ unsigned int succeed;
+ unsigned int donothing;
+ unsigned int fail;
+ unsigned long long ok_pages;
+ unsigned long long fail_pages;
+};
+
+struct hmdfs_file_restore_ctx {
+ struct hmdfs_peer *conn;
+ struct path src_dir_path;
+ struct path dst_root_path;
+ char *dst;
+ char *page;
+ struct file *src_filp;
+ uint64_t inum;
+ uint64_t pages;
+ unsigned int seq;
+ unsigned int data_offs;
+ /* output */
+ bool keep;
+};
+
+struct hmdfs_copy_args {
+ struct file *src;
+ struct file *dst;
+ void *buf;
+ size_t buf_len;
+ unsigned int seq;
+ unsigned int data_offs;
+ uint64_t inum;
+};
+
+struct hmdfs_copy_ctx {
+ struct hmdfs_copy_args args;
+ loff_t src_pos;
+ loff_t dst_pos;
+ /* output */
+ size_t copied;
+ bool eof;
+};
+
+struct hmdfs_rebuild_stats {
+ unsigned int succeed;
+ unsigned int total;
+ unsigned int fail;
+ unsigned int invalid;
+};
+
+struct hmdfs_check_work {
+ struct hmdfs_peer *conn;
+ struct work_struct work;
+ struct completion done;
+};
+
+typedef int (*stash_operation_func)(struct hmdfs_peer *,
+ unsigned int,
+ struct path *,
+ const struct hmdfs_inode_tbl *,
+ void *);
+
+static struct dentry *hmdfs_do_vfs_mkdir(struct dentry *parent,
+ const char *name, int namelen,
+ umode_t mode)
+{
+ struct inode *dir = d_inode(parent);
+ struct dentry *child = NULL;
+ int err;
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+
+ child = lookup_one_len(name, parent, namelen);
+ if (IS_ERR(child))
+ goto out;
+
+ if (d_is_positive(child)) {
+ if (d_can_lookup(child))
+ goto out;
+
+ dput(child);
+ child = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ err = vfs_mkdir(dir, child, mode);
+ if (err) {
+ dput(child);
+ child = ERR_PTR(err);
+ goto out;
+ }
+
+out:
+ inode_unlock(dir);
+ return child;
+}
+
+struct dentry *hmdfs_stash_new_work_dir(struct dentry *parent)
+{
+ struct dentry *base = NULL;
+ struct dentry *work = NULL;
+
+ base = hmdfs_do_vfs_mkdir(parent, HMDFS_STASH_DIR_NAME,
+ strlen(HMDFS_STASH_DIR_NAME), 0700);
+ if (IS_ERR(base))
+ return base;
+
+ work = hmdfs_do_vfs_mkdir(base, HMDFS_STASH_FMT_DIR_NAME,
+ strlen(HMDFS_STASH_FMT_DIR_NAME), 0700);
+ dput(base);
+
+ return work;
+}
+
+static struct file *hmdfs_new_stash_file(struct path *d_path, const char *cid)
+{
+ struct dentry *parent = NULL;
+ struct dentry *child = NULL;
+ struct file *filp = NULL;
+ struct path stash;
+ int err;
+
+ parent = hmdfs_do_vfs_mkdir(d_path->dentry, cid, strlen(cid), 0700);
+ if (IS_ERR(parent)) {
+ err = PTR_ERR(parent);
+ hmdfs_err("mkdir error %d", err);
+ goto mkdir_err;
+ }
+
+ child = vfs_tmpfile(parent, S_IFREG | 0600, 0);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ hmdfs_err("new stash file error %d", err);
+ goto tmpfile_err;
+ }
+
+ stash.mnt = d_path->mnt;
+ stash.dentry = child;
+ filp = dentry_open(&stash, O_LARGEFILE | O_WRONLY, current_cred());
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ hmdfs_err("open stash file error %d", err);
+ goto open_err;
+ }
+
+ dput(child);
+ dput(parent);
+
+ return filp;
+
+open_err:
+ dput(child);
+tmpfile_err:
+ dput(parent);
+mkdir_err:
+ return ERR_PTR(err);
+}
+
+static inline bool hmdfs_is_dir(struct dentry *child)
+{
+ return d_is_positive(child) && d_can_lookup(child);
+}
+
+static inline bool hmdfs_is_reg(struct dentry *child)
+{
+ return d_is_positive(child) && d_is_reg(child);
+}
+
+static void hmdfs_set_stash_file_head(const struct hmdfs_cache_info *cache,
+ uint64_t ino,
+ struct hmdfs_cache_file_head *head)
+{
+ long long blocks;
+ unsigned int crc_offset;
+
+ memset(head, 0, sizeof(*head));
+ head->magic = cpu_to_le32(HMDFS_STASH_FILE_HEAD_MAGIC);
+ head->ino = cpu_to_le64(ino);
+ head->size = cpu_to_le64(i_size_read(file_inode(cache->cache_file)));
+ blocks = atomic64_read(&cache->written_pgs) <<
+ HMDFS_STASH_PAGE_TO_SECTOR_SHIFT;
+ head->blocks = cpu_to_le64(blocks);
+ head->path_offs = cpu_to_le32(cache->path_offs);
+ head->path_len = cpu_to_le32(cache->path_len);
+ head->path_cnt = cpu_to_le32(cache->path_cnt);
+ head->data_offs = cpu_to_le32(cache->data_offs);
+ crc_offset = offsetof(struct hmdfs_cache_file_head, crc32);
+ head->crc_offset = cpu_to_le32(crc_offset);
+ head->crc32 = cpu_to_le32(crc32(0, head, crc_offset));
+}
+
+static int hmdfs_flush_stash_file_metadata(struct hmdfs_inode_info *info)
+{
+ struct hmdfs_cache_info *cache = NULL;
+ struct hmdfs_peer *conn = info->conn;
+ struct hmdfs_cache_file_head cache_head;
+ size_t written;
+ loff_t pos;
+ unsigned int head_size;
+
+ /* No metadata if no cache file info */
+ cache = info->cache;
+ if (!cache)
+ return -EINVAL;
+
+ if (strlen(cache->path) == 0) {
+ long long to_write_pgs = atomic64_read(&cache->to_write_pgs);
+
+ /* Nothing to stash. No need to flush meta data. */
+ if (to_write_pgs == 0)
+ return 0;
+
+ hmdfs_err("peer 0x%x:0x%llx inode 0x%llx lost %lld pages due to no path",
+ conn->owner, conn->device_id,
+ info->remote_ino, to_write_pgs);
+ return -EINVAL;
+ }
+
+ hmdfs_set_stash_file_head(cache, info->remote_ino, &cache_head);
+
+ /* Write head */
+ pos = 0;
+ head_size = sizeof(cache_head);
+ written = kernel_write(cache->cache_file, &cache_head, head_size, &pos);
+ if (written != head_size) {
+ hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write head len %u err %zd",
+ conn->owner, conn->device_id, info->remote_ino,
+ head_size, written);
+ return -EIO;
+ }
+ /* Write path */
+ pos = (loff_t)cache->path_offs << HMDFS_STASH_BLK_SHIFT;
+ written = kernel_write(cache->cache_file, cache->path, cache->path_len,
+ &pos);
+ if (written != cache->path_len) {
+ hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write path len %u err %zd",
+ conn->owner, conn->device_id, info->remote_ino,
+ cache->path_len, written);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* Mainly from inode_wait_for_writeback() */
+static void hmdfs_wait_remote_writeback_once(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ struct inode *inode = &info->vfs_inode;
+ DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
+ wait_queue_head_t *wq_head = NULL;
+ bool in_sync = false;
+
+ spin_lock(&inode->i_lock);
+ in_sync = inode->i_state & I_SYNC;
+ spin_unlock(&inode->i_lock);
+
+ if (!in_sync)
+ return;
+
+ hmdfs_info("peer 0x%x:0x%llx ino 0x%llx wait for wb once",
+ conn->owner, conn->device_id, info->remote_ino);
+
+ wq_head = bit_waitqueue(&inode->i_state, __I_SYNC);
+ __wait_on_bit(wq_head, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+}
+
+static void hmdfs_reset_remote_write_err(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ struct address_space *mapping = info->vfs_inode.i_mapping;
+ int flags_err;
+ errseq_t old;
+ int wb_err;
+
+ flags_err = filemap_check_errors(mapping);
+
+ old = errseq_sample(&mapping->wb_err);
+ wb_err = errseq_check_and_advance(&mapping->wb_err, &old);
+ if (flags_err || wb_err)
+ hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx wb error %d %d before stash",
+ conn->owner, conn->device_id, info->remote_ino,
+ flags_err, wb_err);
+}
+
+static bool hmdfs_is_mapping_clean(struct address_space *mapping)
+{
+ bool clean = false;
+
+ /* b93b016313b3b ("page cache: use xa_lock") introduces i_pages */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
+ xa_lock_irq(&mapping->i_pages);
+#else
+ spin_lock_irq(&mapping->tree_lock);
+#endif
+ clean = !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+ !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
+ xa_unlock_irq(&mapping->i_pages);
+#else
+ spin_unlock_irq(&mapping->tree_lock);
+#endif
+ return clean;
+}
+
+static int hmdfs_flush_stash_file_data(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ struct inode *inode = &info->vfs_inode;
+ struct address_space *mapping = inode->i_mapping;
+ bool all_clean = true;
+ int err = 0;
+ int i;
+
+ /* Wait for the completion of write syscall */
+ inode_lock(inode);
+ inode_unlock(inode);
+
+ all_clean = hmdfs_is_mapping_clean(mapping);
+ if (all_clean) {
+ hmdfs_reset_remote_write_err(conn, info);
+ return 0;
+ }
+
+ /*
+ * No-sync_all writeback during offline may have not seen
+ * the setting of stash_status as HMDFS_REMOTE_INODE_STASHING
+ * and will call mapping_set_error() after we just reset
+ * the previous error. So waiting for these writeback once,
+ * and the following writeback will do local write.
+ */
+ hmdfs_wait_remote_writeback_once(conn, info);
+
+ /* Need to clear previous error ? */
+ hmdfs_reset_remote_write_err(conn, info);
+
+ /*
+ * 1. dirty page: do write back
+ * 2. writeback page: wait for its completion
+ * 3. writeback -> redirty page: do filemap_write_and_wait()
+ * twice, so 2th writeback should not allow
+ * writeback -> redirty transition
+ */
+ for (i = 0; i < HMDFS_STASH_FLUSH_CNT; i++) {
+ err = filemap_write_and_wait(mapping);
+ if (err) {
+ hmdfs_err("peer 0x%x:0x%llx inode 0x%llx #%d stash flush error %d",
+ conn->owner, conn->device_id,
+ info->remote_ino, i, err);
+ return err;
+ }
+ }
+
+ if (!hmdfs_is_mapping_clean(mapping))
+ hmdfs_err("peer 0x%x:0x%llx inode 0x%llx is still dirty dt %d wb %d",
+ conn->owner, conn->device_id, info->remote_ino,
+ !!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY),
+ !!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK));
+
+ return 0;
+}
+
+static int hmdfs_flush_stash_file(struct hmdfs_inode_info *info)
+{
+ int err;
+
+ err = hmdfs_flush_stash_file_data(info->conn, info);
+ if (!err)
+ err = hmdfs_flush_stash_file_metadata(info);
+
+ return err;
+}
+
+static int hmdfs_enable_stash_file(struct hmdfs_inode_info *info,
+ struct dentry *stash)
+{
+ char name[HMDFS_STASH_FILE_NAME_LEN];
+ struct dentry *parent = NULL;
+ struct inode *dir = NULL;
+ struct dentry *child = NULL;
+ int err = 0;
+ bool retried = false;
+
+ snprintf(name, sizeof(name), "0x%llx", info->remote_ino);
+
+ parent = lock_parent(stash);
+ dir = d_inode(parent);
+
+lookup_again:
+ child = lookup_one_len(name, parent, strlen(name));
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ child = NULL;
+ hmdfs_err("lookup %s err %d", name, err);
+ goto out;
+ }
+
+ if (d_is_positive(child)) {
+ hmdfs_warning("%s exists (mode 0%o)",
+ name, d_inode(child)->i_mode);
+
+ err = vfs_unlink(dir, child, NULL);
+ if (err) {
+ hmdfs_err("unlink %s err %d", name, err);
+ goto out;
+ }
+ if (retried) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ retried = true;
+ dput(child);
+ goto lookup_again;
+ }
+
+ err = vfs_link(stash, dir, child, NULL);
+ if (err) {
+ hmdfs_err("link stash file to %s err %d", name, err);
+ goto out;
+ }
+
+out:
+ unlock_dir(parent);
+ if (child)
+ dput(child);
+
+ return err;
+}
+
+/* Return 1 if stash is done, 0 if nothing is stashed */
+static int hmdfs_close_stash_file(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ struct file *cache_file = info->cache->cache_file;
+ struct dentry *c_dentry = file_dentry(cache_file);
+ struct inode *c_inode = d_inode(c_dentry);
+ long long to_write_pgs = atomic64_read(&info->cache->to_write_pgs);
+ int err;
+
+ hmdfs_info("peer 0x%x:0x%llx inode 0x%llx stashed bytes %lld pages %lld",
+ conn->owner, conn->device_id, info->remote_ino,
+ i_size_read(c_inode), to_write_pgs);
+
+ if (to_write_pgs == 0)
+ return 0;
+
+ err = vfs_fsync(cache_file, 0);
+ if (!err)
+ err = hmdfs_enable_stash_file(info, c_dentry);
+ else
+ hmdfs_err("fsync stash file err %d", err);
+
+ return err < 0 ? err : 1;
+}
+
+static void hmdfs_del_file_cache(struct hmdfs_cache_info *cache)
+{
+ if (!cache)
+ return;
+
+ fput(cache->cache_file);
+ kfree(cache->path_buf);
+ kfree(cache);
+}
+
+static struct hmdfs_cache_info *
+hmdfs_new_file_cache(struct hmdfs_peer *conn, struct hmdfs_inode_info *info)
+{
+ struct hmdfs_cache_info *cache = NULL;
+ struct dentry *stash_dentry = NULL;
+ int err;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ if (!cache)
+ return ERR_PTR(-ENOMEM);
+
+ atomic64_set(&cache->to_write_pgs, 0);
+ atomic64_set(&cache->written_pgs, 0);
+ cache->path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!cache->path_buf) {
+ err = -ENOMEM;
+ goto free_cache;
+ }
+
+ /* Need to handle "hardlink" ? */
+ stash_dentry = d_find_any_alias(&info->vfs_inode);
+ if (stash_dentry) {
+ /* Needs full path in hmdfs, will be a device-view path */
+ cache->path = dentry_path_raw(stash_dentry, cache->path_buf,
+ PATH_MAX);
+ dput(stash_dentry);
+ if (IS_ERR(cache->path)) {
+ err = PTR_ERR(cache->path);
+ hmdfs_err("peer 0x%x:0x%llx inode 0x%llx gen path err %d",
+ conn->owner, conn->device_id,
+ info->remote_ino, err);
+ goto free_path;
+ }
+ } else {
+ /* Write-opened file was closed before finding dentry */
+ hmdfs_info("peer 0x%x:0x%llx inode 0x%llx no dentry found",
+ conn->owner, conn->device_id, info->remote_ino);
+ cache->path_buf[0] = '\0';
+ cache->path = cache->path_buf;
+ }
+
+ cache->path_cnt = 1;
+ cache->path_len = strlen(cache->path) + 1;
+ cache->path_offs = DIV_ROUND_UP(sizeof(struct hmdfs_cache_file_head),
+ HMDFS_STASH_BLK_SIZE);
+ cache->data_offs = cache->path_offs + DIV_ROUND_UP(cache->path_len,
+ HMDFS_STASH_BLK_SIZE);
+ cache->cache_file = hmdfs_new_stash_file(&conn->sbi->stash_work_dir,
+ conn->cid);
+ if (IS_ERR(cache->cache_file)) {
+ err = PTR_ERR(cache->cache_file);
+ goto free_path;
+ }
+
+ return cache;
+
+free_path:
+ kfree(cache->path_buf);
+free_cache:
+ kfree(cache);
+ return ERR_PTR(err);
+}
+
+static void hmdfs_init_stash_file_cache(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ struct hmdfs_cache_info *cache = NULL;
+
+ cache = hmdfs_new_file_cache(conn, info);
+ if (IS_ERR(cache))
+ /*
+ * Continue even creating stash info failed.
+ * We need to ensure there is no dirty pages
+ * after stash completes
+ */
+ cache = NULL;
+
+ /* Make write() returns */
+ spin_lock(&info->stash_lock);
+ info->cache = cache;
+ info->stash_status = HMDFS_REMOTE_INODE_STASHING;
+ spin_unlock(&info->stash_lock);
+}
+
+static void hmdfs_update_stash_stats(struct hmdfs_stash_stats *stats,
+ const struct hmdfs_cache_info *cache,
+ int err)
+{
+ unsigned long long ok_pages, fail_pages;
+
+ if (cache) {
+ ok_pages = err > 0 ? atomic64_read(&cache->written_pgs) : 0;
+ fail_pages = atomic64_read(&cache->to_write_pgs) - ok_pages;
+ stats->ok_pages += ok_pages;
+ stats->fail_pages += fail_pages;
+ }
+
+ if (err > 0)
+ stats->succeed++;
+ else if (!err)
+ stats->donothing++;
+ else
+ stats->fail++;
+}
+
+/* Return 1 if stash is done, 0 if nothing is stashed */
+static int hmdfs_stash_remote_inode(struct hmdfs_inode_info *info,
+ struct hmdfs_stash_stats *stats)
+{
+ struct hmdfs_cache_info *cache = info->cache;
+ struct hmdfs_peer *conn = info->conn;
+ unsigned int status;
+ int err = 0;
+
+ hmdfs_info("stash peer 0x%x:0x%llx ino 0x%llx",
+ conn->owner, conn->device_id, info->remote_ino);
+
+ err = hmdfs_flush_stash_file(info);
+ if (!err)
+ err = hmdfs_close_stash_file(conn, info);
+
+ if (err <= 0)
+ set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags);
+ status = err > 0 ? HMDFS_REMOTE_INODE_RESTORING :
+ HMDFS_REMOTE_INODE_NONE;
+ spin_lock(&info->stash_lock);
+ info->cache = NULL;
+ /*
+ * Use smp_store_release() to ensure order between HMDFS_FID_NEED_OPEN
+ * and HMDFS_REMOTE_INODE_NONE.
+ */
+ smp_store_release(&info->stash_status, status);
+ spin_unlock(&info->stash_lock);
+
+ hmdfs_update_stash_stats(stats, cache, err);
+ hmdfs_del_file_cache(cache);
+
+ return err;
+}
+
+static void hmdfs_init_cache_for_stash_files(struct hmdfs_peer *conn,
+ struct list_head *list)
+{
+ const struct cred *old_cred = NULL;
+ struct hmdfs_inode_info *info = NULL;
+
+ /* For file creation under stash_work_dir */
+ old_cred = hmdfs_override_creds(conn->sbi->cred);
+ list_for_each_entry(info, list, stash_node)
+ hmdfs_init_stash_file_cache(conn, info);
+ hmdfs_revert_creds(old_cred);
+}
+
+static void hmdfs_init_stash_cache_work_fn(struct work_struct *base)
+{
+ struct hmdfs_stash_work *work =
+ container_of(base, struct hmdfs_stash_work, work);
+
+ hmdfs_init_cache_for_stash_files(work->conn, work->list);
+ complete(&work->done);
+}
+
+static void hmdfs_init_cache_for_stash_files_by_work(struct hmdfs_peer *conn,
+ struct list_head *list)
+{
+ struct hmdfs_stash_work work = {
+ .conn = conn,
+ .list = list,
+ .done = COMPLETION_INITIALIZER_ONSTACK(work.done),
+ };
+
+ INIT_WORK_ONSTACK(&work.work, hmdfs_init_stash_cache_work_fn);
+ schedule_work(&work.work);
+ wait_for_completion(&work.done);
+}
+
+static void hmdfs_stash_fetch_ready_files(struct hmdfs_peer *conn,
+ bool check, struct list_head *list)
+{
+ struct hmdfs_inode_info *info = NULL;
+
+ spin_lock(&conn->wr_opened_inode_lock);
+ list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) {
+ int status;
+
+ /* Paired with *_release() in hmdfs_reset_stashed_inode() */
+ status = smp_load_acquire(&info->stash_status);
+ if (status == HMDFS_REMOTE_INODE_NONE) {
+ list_add_tail(&info->stash_node, list);
+ /*
+ * Prevent close() removing the inode from
+ * writeable-opened inode list
+ */
+ hmdfs_remote_add_wr_opened_inode_nolock(conn, info);
+ /* Prevent the inode from eviction */
+ ihold(&info->vfs_inode);
+ } else if (check && status == HMDFS_REMOTE_INODE_STASHING) {
+ hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unexpected stash status %d",
+ conn->owner, conn->device_id,
+ info->remote_ino, status);
+ }
+ }
+ spin_unlock(&conn->wr_opened_inode_lock);
+}
+
+static void hmdfs_stash_offline_prepare(struct hmdfs_peer *conn, int evt,
+ unsigned int seq)
+{
+ LIST_HEAD(preparing);
+
+ if (!hmdfs_is_stash_enabled(conn->sbi))
+ return;
+
+ mutex_lock(&conn->offline_cb_lock);
+
+ hmdfs_stash_fetch_ready_files(conn, true, &preparing);
+
+ if (list_empty(&preparing))
+ goto out;
+
+ hmdfs_init_cache_for_stash_files_by_work(conn, &preparing);
+out:
+ mutex_unlock(&conn->offline_cb_lock);
+}
+
+static void hmdfs_track_inode_locked(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ spin_lock(&conn->stashed_inode_lock);
+ list_add_tail(&info->stash_node, &conn->stashed_inode_list);
+ conn->stashed_inode_nr++;
+ spin_unlock(&conn->stashed_inode_lock);
+}
+
+static void
+hmdfs_update_peer_stash_stats(struct hmdfs_stash_statistics *stash_stats,
+ const struct hmdfs_stash_stats *stats)
+{
+ stash_stats->cur_ok = stats->succeed;
+ stash_stats->cur_nothing = stats->donothing;
+ stash_stats->cur_fail = stats->fail;
+ stash_stats->total_ok += stats->succeed;
+ stash_stats->total_nothing += stats->donothing;
+ stash_stats->total_fail += stats->fail;
+ stash_stats->ok_pages += stats->ok_pages;
+ stash_stats->fail_pages += stats->fail_pages;
+}
+
+static void hmdfs_stash_remote_inodes(struct hmdfs_peer *conn,
+ struct list_head *list)
+{
+ const struct cred *old_cred = NULL;
+ struct hmdfs_inode_info *info = NULL;
+ struct hmdfs_inode_info *next = NULL;
+ struct hmdfs_stash_stats stats;
+
+ /* For file creation, write and relink under stash_work_dir */
+ old_cred = hmdfs_override_creds(conn->sbi->cred);
+
+ memset(&stats, 0, sizeof(stats));
+ list_for_each_entry_safe(info, next, list, stash_node) {
+ int err;
+
+ list_del_init(&info->stash_node);
+
+ err = hmdfs_stash_remote_inode(info, &stats);
+ if (err > 0)
+ hmdfs_track_inode_locked(conn, info);
+
+ hmdfs_remote_del_wr_opened_inode(conn, info);
+ if (err <= 0)
+ iput(&info->vfs_inode);
+ }
+ hmdfs_revert_creds(old_cred);
+
+ hmdfs_update_peer_stash_stats(&conn->stats.stash, &stats);
+ hmdfs_info("peer 0x%x:0x%llx total stashed %u cur ok %u none %u fail %u",
+ conn->owner, conn->device_id, conn->stashed_inode_nr,
+ stats.succeed, stats.donothing, stats.fail);
+}
+
+static void hmdfs_stash_offline_do_stash(struct hmdfs_peer *conn, int evt,
+ unsigned int seq)
+{
+ struct hmdfs_inode_info *info = NULL;
+ LIST_HEAD(preparing);
+ LIST_HEAD(stashing);
+
+ if (!hmdfs_is_stash_enabled(conn->sbi))
+ return;
+
+ /* release seq_lock to prevent blocking no-offline sync cb */
+ mutex_unlock(&conn->seq_lock);
+ /* acquire offline_cb_lock to serialized with offline sync cb */
+ mutex_lock(&conn->offline_cb_lock);
+
+ hmdfs_stash_fetch_ready_files(conn, false, &preparing);
+ if (!list_empty(&preparing))
+ hmdfs_init_cache_for_stash_files(conn, &preparing);
+
+ spin_lock(&conn->wr_opened_inode_lock);
+ list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) {
+ int status = READ_ONCE(info->stash_status);
+
+ if (status == HMDFS_REMOTE_INODE_STASHING)
+ list_add_tail(&info->stash_node, &stashing);
+ }
+ spin_unlock(&conn->wr_opened_inode_lock);
+
+ if (list_empty(&stashing))
+ goto unlock;
+
+ hmdfs_stash_remote_inodes(conn, &stashing);
+
+unlock:
+ mutex_unlock(&conn->offline_cb_lock);
+ mutex_lock(&conn->seq_lock);
+}
+
+static struct hmdfs_inode_info *
+hmdfs_lookup_stash_inode(struct hmdfs_peer *conn, uint64_t inum)
+{
+ struct hmdfs_inode_info *info = NULL;
+
+ list_for_each_entry(info, &conn->stashed_inode_list, stash_node) {
+ if (info->remote_ino == inum)
+ return info;
+ }
+
+ return NULL;
+}
+
+static void hmdfs_untrack_stashed_inode(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ list_del_init(&info->stash_node);
+ iput(&info->vfs_inode);
+
+ conn->stashed_inode_nr--;
+}
+
+static void hmdfs_reset_stashed_inode(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info)
+{
+ struct inode *ino = &info->vfs_inode;
+
+ /*
+ * For updating stash_status after iput()
+ * in hmdfs_untrack_stashed_inode()
+ */
+ ihold(ino);
+ hmdfs_untrack_stashed_inode(conn, info);
+ /*
+ * Ensure the order of stash_node and stash_status:
+ * only update stash_status to NONE after removal of
+ * stash_node is completed.
+ */
+ smp_store_release(&info->stash_status,
+ HMDFS_REMOTE_INODE_NONE);
+ iput(ino);
+}
+
+static void hmdfs_drop_stashed_inodes(struct hmdfs_peer *conn)
+{
+ struct hmdfs_inode_info *info = NULL;
+ struct hmdfs_inode_info *next = NULL;
+
+ if (list_empty(&conn->stashed_inode_list))
+ return;
+
+ hmdfs_warning("peer 0x%x:0x%llx drop unrestorable file %u",
+ conn->owner, conn->device_id, conn->stashed_inode_nr);
+
+ list_for_each_entry_safe(info, next,
+ &conn->stashed_inode_list, stash_node) {
+ hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unrestorable status %u",
+ conn->owner, conn->device_id, info->remote_ino,
+ READ_ONCE(info->stash_status));
+
+ hmdfs_reset_stashed_inode(conn, info);
+ }
+}
+
+static struct file *hmdfs_open_stash_dir(struct path *d_path, const char *cid)
+{
+ int err = 0;
+ struct dentry *parent = d_path->dentry;
+ struct inode *dir = d_inode(parent);
+ struct dentry *child = NULL;
+ struct path peer_path;
+ struct file *filp = NULL;
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ child = lookup_one_len(cid, parent, strlen(cid));
+ if (!IS_ERR(child)) {
+ if (!hmdfs_is_dir(child)) {
+ if (d_is_positive(child)) {
+ hmdfs_err("invalid stash dir mode 0%o", d_inode(child)->i_mode);
+ err = -EINVAL;
+ } else {
+ err = -ENOENT;
+ }
+ dput(child);
+ }
+ } else {
+ err = PTR_ERR(child);
+ hmdfs_err("lookup stash dir err %d", err);
+ }
+ inode_unlock(dir);
+
+ if (err)
+ return ERR_PTR(err);
+
+ peer_path.mnt = d_path->mnt;
+ peer_path.dentry = child;
+ filp = dentry_open(&peer_path, O_RDONLY | O_DIRECTORY, current_cred());
+ if (IS_ERR(filp))
+ hmdfs_err("open err %d", (int)PTR_ERR(filp));
+
+ dput(child);
+
+ return filp;
+}
+
+static int hmdfs_new_inode_tbl(struct hmdfs_inode_tbl **tbl)
+{
+ struct hmdfs_inode_tbl *new = NULL;
+
+ new = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ new->cnt = 0;
+ new->max = (PAGE_SIZE - offsetof(struct hmdfs_inode_tbl, inodes)) /
+ sizeof(new->inodes[0]);
+ *tbl = new;
+
+ return 0;
+}
+
+static int hmdfs_parse_stash_file_name(struct dir_context *dctx,
+ const char *name,
+ int namelen,
+ unsigned int d_type,
+ uint64_t *stash_inum)
+{
+ struct hmdfs_stash_dir_context *ctx = NULL;
+ int err;
+
+ if (d_type != DT_UNKNOWN && d_type != DT_REG)
+ return 0;
+ if (namelen > NAME_MAX)
+ return 0;
+
+ ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx);
+ memcpy(ctx->name, name, namelen);
+ ctx->name[namelen] = '\0';
+ err = kstrtoull(ctx->name, 16, stash_inum);
+ if (err) {
+ hmdfs_err("unexpected stash file err %d", err);
+ return 0;
+ }
+ return 1;
+}
+
+static int hmdfs_has_stash_file(struct dir_context *dctx, const char *name,
+ int namelen, loff_t offset,
+ u64 inum, unsigned int d_type)
+{
+ struct hmdfs_stash_dir_context *ctx = NULL;
+ uint64_t stash_inum;
+ int err;
+
+ ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx);
+ err = hmdfs_parse_stash_file_name(dctx, name, namelen,
+ d_type, &stash_inum);
+ if (!err)
+ return 0;
+
+ ctx->tbl->cnt++;
+ return 1;
+}
+
+static int hmdfs_fill_stash_file(struct dir_context *dctx, const char *name,
+ int namelen, loff_t offset,
+ u64 inum, unsigned int d_type)
+{
+ struct hmdfs_stash_dir_context *ctx = NULL;
+ uint64_t stash_inum;
+ int err;
+
+ ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx);
+ err = hmdfs_parse_stash_file_name(dctx, name, namelen,
+ d_type, &stash_inum);
+ if (!err)
+ return 0;
+ if (ctx->tbl->cnt >= ctx->tbl->max)
+ return 1;
+
+ ctx->tbl->inodes[ctx->tbl->cnt++] = stash_inum;
+
+ return 0;
+}
+
+static int hmdfs_del_stash_file(struct dentry *parent, struct dentry *child)
+{
+ struct inode *dir = d_inode(parent);
+ int err = 0;
+
+ /* Prevent d_delete() from calling dentry_unlink_inode() */
+ dget(child);
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ err = vfs_unlink(dir, child, NULL);
+ if (err)
+ hmdfs_err("remove stash file err %d", err);
+ inode_unlock(dir);
+
+ dput(child);
+
+ return err;
+}
+
+static inline bool hmdfs_is_node_offlined(const struct hmdfs_peer *conn,
+ unsigned int seq)
+{
+ /*
+ * open()/fsync() may fail due to "status = NODE_STAT_OFFLINE"
+ * in hmdfs_disconnect_node().
+ * Pair with smp_mb() in hmdfs_disconnect_node() to ensure
+ * getting the newest event sequence.
+ */
+ smp_mb__before_atomic();
+ return hmdfs_node_evt_seq(conn) != seq;
+}
+
+static int hmdfs_verify_restore_file_head(struct hmdfs_file_restore_ctx *ctx,
+ const struct hmdfs_cache_file_head *head)
+{
+ struct inode *inode = file_inode(ctx->src_filp);
+ struct hmdfs_peer *conn = ctx->conn;
+ unsigned int crc, read_crc, crc_offset;
+ loff_t path_offs, data_offs, isize;
+ int err = 0;
+
+ if (le32_to_cpu(head->magic) != HMDFS_STASH_FILE_HEAD_MAGIC) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid magic: got 0x%x, exp 0x%x",
+ conn->owner, conn->device_id, ctx->inum,
+ le32_to_cpu(head->magic),
+ HMDFS_STASH_FILE_HEAD_MAGIC);
+ goto out;
+ }
+
+ crc_offset = le32_to_cpu(head->crc_offset);
+ read_crc = le32_to_cpu(*((__le32 *)((char *)head + crc_offset)));
+ crc = crc32(0, head, crc_offset);
+ if (read_crc != crc) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid crc: got 0x%x, exp 0x%x",
+ conn->owner, conn->device_id, ctx->inum,
+ read_crc, crc);
+ goto out;
+ }
+
+ if (le64_to_cpu(head->ino) != ctx->inum) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid ino: got %llu, exp %llu",
+ conn->owner, conn->device_id, ctx->inum,
+ le64_to_cpu(head->ino), ctx->inum);
+ goto out;
+ }
+
+ path_offs = (loff_t)le32_to_cpu(head->path_offs) <<
+ HMDFS_STASH_BLK_SHIFT;
+ if (path_offs <= 0 || path_offs >= i_size_read(inode)) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_offs %d, stash file size %llu",
+ conn->owner, conn->device_id, ctx->inum,
+ le32_to_cpu(head->path_offs), i_size_read(inode));
+ goto out;
+ }
+
+ data_offs = (loff_t)le32_to_cpu(head->data_offs) <<
+ HMDFS_STASH_BLK_SHIFT;
+ if (path_offs >= data_offs) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, path_offs %d",
+ conn->owner, conn->device_id, ctx->inum,
+ le32_to_cpu(head->data_offs),
+ le32_to_cpu(head->path_offs));
+ goto out;
+ }
+ if (data_offs <= 0 || data_offs >= i_size_read(inode)) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, stash file size %llu",
+ conn->owner, conn->device_id, ctx->inum,
+ le32_to_cpu(head->data_offs), i_size_read(inode));
+ goto out;
+ }
+
+ isize = le64_to_cpu(head->size);
+ if (isize != i_size_read(inode)) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid isize: got %llu, exp %llu",
+ conn->owner, conn->device_id, ctx->inum,
+ le64_to_cpu(head->size), i_size_read(inode));
+ goto out;
+ }
+
+ if (le32_to_cpu(head->path_cnt) < 1) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_cnt %d",
+ conn->owner, conn->device_id, ctx->inum,
+ le32_to_cpu(head->path_cnt));
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+static int hmdfs_get_restore_file_metadata(struct hmdfs_file_restore_ctx *ctx)
+{
+ struct hmdfs_cache_file_head head;
+ struct hmdfs_peer *conn = ctx->conn;
+ unsigned int head_size, read_size, head_crc_offset;
+ loff_t pos;
+ ssize_t rd;
+ int err = 0;
+
+ head_size = sizeof(struct hmdfs_cache_file_head);
+ memset(&head, 0, head_size);
+ /* Read part head */
+ pos = 0;
+ read_size = offsetof(struct hmdfs_cache_file_head, crc_offset) +
+ sizeof(head.crc_offset);
+ rd = kernel_read(ctx->src_filp, &head, read_size, &pos);
+ if (rd != read_size) {
+ err = rd < 0 ? rd : -ENODATA;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read part head err %d",
+ conn->owner, conn->device_id, ctx->inum, err);
+ goto out;
+ }
+ head_crc_offset = le32_to_cpu(head.crc_offset);
+ if (head_crc_offset + sizeof(head.crc32) < head_crc_offset ||
+ head_crc_offset + sizeof(head.crc32) > head_size) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx got bad head: Too long crc_offset %u which exceeds head size %u",
+ conn->owner, conn->device_id, ctx->inum,
+ head_crc_offset, head_size);
+ goto out;
+ }
+
+ /* Read full head */
+ pos = 0;
+ read_size = le32_to_cpu(head.crc_offset) + sizeof(head.crc32);
+ rd = kernel_read(ctx->src_filp, &head, read_size, &pos);
+ if (rd != read_size) {
+ err = rd < 0 ? rd : -ENODATA;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read full head err %d",
+ conn->owner, conn->device_id, ctx->inum, err);
+ goto out;
+ }
+
+ err = hmdfs_verify_restore_file_head(ctx, &head);
+ if (err)
+ goto out;
+
+ ctx->pages = le64_to_cpu(head.blocks) >>
+ HMDFS_STASH_PAGE_TO_SECTOR_SHIFT;
+ ctx->data_offs = le32_to_cpu(head.data_offs);
+ /* Read path */
+ read_size = min_t(unsigned int, le32_to_cpu(head.path_len), PATH_MAX);
+ pos = (loff_t)le32_to_cpu(head.path_offs) << HMDFS_STASH_BLK_SHIFT;
+ rd = kernel_read(ctx->src_filp, ctx->dst, read_size, &pos);
+ if (rd != read_size) {
+ err = rd < 0 ? rd : -ENODATA;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path err %d",
+ conn->owner, conn->device_id, ctx->inum, err);
+ goto out;
+ }
+ if (strnlen(ctx->dst, read_size) >= read_size) {
+ err = -EUCLEAN;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path not end with \\0",
+ conn->owner, conn->device_id, ctx->inum);
+ goto out;
+ }
+ /* TODO: Pick a valid path from all paths */
+
+out:
+ return err;
+}
+
+static int hmdfs_open_restore_dst_file(struct hmdfs_file_restore_ctx *ctx,
+ unsigned int rw_flag, struct file **filp)
+{
+ struct hmdfs_peer *conn = ctx->conn;
+ struct file *dst = NULL;
+ int err = 0;
+
+ err = hmdfs_get_restore_file_metadata(ctx);
+ if (err)
+ goto out;
+
+ /* Error comes from connection or server ? */
+ dst = file_open_root(&ctx->dst_root_path,
+ ctx->dst, O_LARGEFILE | rw_flag, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ hmdfs_err("open remote file ino 0x%llx err %d", ctx->inum, err);
+ if (hmdfs_is_node_offlined(conn, ctx->seq))
+ err = -ESHUTDOWN;
+ goto out;
+ }
+
+ *filp = dst;
+out:
+ return err;
+}
+
+static bool hmdfs_need_abort_restore(struct hmdfs_file_restore_ctx *ctx,
+ struct hmdfs_inode_info *pinned,
+ struct file *opened_file)
+{
+ struct hmdfs_inode_info *opened = hmdfs_i(file_inode(opened_file));
+
+ if (opened->inode_type != HMDFS_LAYER_OTHER_REMOTE)
+ goto abort;
+
+ if (opened == pinned)
+ return false;
+
+abort:
+ hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx invalid remote file",
+ ctx->conn->owner, ctx->conn->device_id, ctx->inum);
+ hmdfs_warning("got: peer 0x%x:0x%llx inode 0x%llx type %d status %d",
+ opened->conn ? opened->conn->owner : 0,
+ opened->conn ? opened->conn->device_id : 0,
+ opened->remote_ino, opened->inode_type,
+ opened->stash_status);
+ hmdfs_warning("pinned: peer 0x%x:0x%llx inode 0x%llx type %d status %d",
+ pinned->conn->owner, pinned->conn->device_id,
+ pinned->remote_ino, pinned->inode_type,
+ pinned->stash_status);
+ return true;
+}
+
+static void hmdfs_init_copy_args(const struct hmdfs_file_restore_ctx *ctx,
+ struct file *dst, struct hmdfs_copy_args *args)
+{
+ args->src = ctx->src_filp;
+ args->dst = dst;
+ args->buf = ctx->page;
+ args->buf_len = PAGE_SIZE;
+ args->seq = ctx->seq;
+ args->data_offs = ctx->data_offs;
+ args->inum = ctx->inum;
+}
+
+static ssize_t hmdfs_write_dst(struct hmdfs_peer *conn, struct file *filp,
+ void *buf, size_t len, loff_t pos)
+{
+ mm_segment_t old_fs;
+ struct kiocb kiocb;
+ struct iovec iov;
+ struct iov_iter iter;
+ ssize_t wr;
+ int err = 0;
+
+ file_start_write(filp);
+
+ old_fs = force_uaccess_begin();
+
+ init_sync_kiocb(&kiocb, filp);
+ kiocb.ki_pos = pos;
+
+ iov.iov_base = buf;
+ iov.iov_len = len;
+ iov_iter_init(&iter, WRITE, &iov, 1, len);
+
+ wr = hmdfs_file_write_iter_remote_nocheck(&kiocb, &iter);
+
+ force_uaccess_end(old_fs);
+
+ file_end_write(filp);
+
+ if (wr != len) {
+ struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp));
+
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short write ret %zd exp %zu",
+ conn->owner, conn->device_id, info->remote_ino,
+ wr, len);
+ err = wr < 0 ? (int)wr : -EFAULT;
+ }
+
+ return err;
+}
+
+static int hmdfs_rd_src_wr_dst(struct hmdfs_peer *conn,
+ struct hmdfs_copy_ctx *ctx)
+{
+ const struct hmdfs_copy_args *args = NULL;
+ int err = 0;
+ loff_t rd_pos;
+ ssize_t rd;
+
+ ctx->eof = false;
+ ctx->copied = 0;
+
+ args = &ctx->args;
+ rd_pos = ctx->src_pos;
+ rd = kernel_read(args->src, args->buf, args->buf_len, &rd_pos);
+ if (rd < 0) {
+ err = (int)rd;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short read err %d",
+ conn->owner, conn->device_id, args->inum, err);
+ goto out;
+ } else if (rd == 0) {
+ ctx->eof = true;
+ goto out;
+ }
+
+ err = hmdfs_write_dst(conn, args->dst, args->buf, rd, ctx->dst_pos);
+ if (!err)
+ ctx->copied = rd;
+ else if (hmdfs_is_node_offlined(conn, args->seq))
+ err = -ESHUTDOWN;
+out:
+ return err;
+}
+
+static int hmdfs_copy_src_to_dst(struct hmdfs_peer *conn,
+ const struct hmdfs_copy_args *args)
+{
+ int err = 0;
+ struct file *src = NULL;
+ struct hmdfs_copy_ctx ctx;
+ loff_t seek_pos, data_init_pos;
+ loff_t src_size;
+
+ ctx.args = *args;
+
+ src = ctx.args.src;
+ data_init_pos = (loff_t)ctx.args.data_offs << HMDFS_STASH_BLK_SHIFT;
+ seek_pos = data_init_pos;
+ src_size = i_size_read(file_inode(src));
+ while (true) {
+ loff_t data_pos;
+
+ data_pos = vfs_llseek(src, seek_pos, SEEK_DATA);
+ if (data_pos > seek_pos) {
+ seek_pos = data_pos;
+ continue;
+ } else if (data_pos < 0) {
+ if (data_pos == -ENXIO) {
+ loff_t src_blks = file_inode(src)->i_blocks;
+
+ hmdfs_info("peer 0x%x:0x%llx ino 0x%llx end at 0x%llx (sz 0x%llx blk 0x%llx)",
+ conn->owner, conn->device_id,
+ args->inum, seek_pos,
+ src_size, src_blks);
+ } else {
+ err = (int)data_pos;
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx seek pos 0x%llx err %d",
+ conn->owner, conn->device_id,
+ args->inum, seek_pos, err);
+ }
+ break;
+ }
+
+ hmdfs_debug("peer 0x%x:0x%llx ino 0x%llx seek to 0x%llx",
+ conn->owner, conn->device_id, args->inum, data_pos);
+
+ ctx.src_pos = data_pos;
+ ctx.dst_pos = data_pos - data_init_pos;
+ err = hmdfs_rd_src_wr_dst(conn, &ctx);
+ if (err || ctx.eof)
+ break;
+
+ seek_pos += ctx.copied;
+ if (seek_pos >= src_size)
+ break;
+ }
+
+ return err;
+}
+
+static int hmdfs_restore_src_to_dst(struct hmdfs_file_restore_ctx *ctx,
+ struct file *dst)
+{
+ struct file *src = ctx->src_filp;
+ struct hmdfs_copy_args args;
+ int err;
+
+ hmdfs_init_copy_args(ctx, dst, &args);
+ err = hmdfs_copy_src_to_dst(ctx->conn, &args);
+ if (err)
+ goto out;
+
+ err = vfs_fsync(dst, 0);
+ if (err) {
+ hmdfs_err("fsync remote file ino 0x%llx err %d", ctx->inum, err);
+ if (hmdfs_is_node_offlined(ctx->conn, ctx->seq))
+ err = -ESHUTDOWN;
+ }
+
+out:
+ if (err)
+ truncate_inode_pages(file_inode(dst)->i_mapping, 0);
+
+ /* Remove the unnecessary cache */
+ invalidate_mapping_pages(file_inode(src)->i_mapping, 0, -1);
+
+ return err;
+}
+
+
+static int hmdfs_restore_file(struct hmdfs_file_restore_ctx *ctx)
+{
+ struct hmdfs_peer *conn = ctx->conn;
+ uint64_t inum = ctx->inum;
+ struct hmdfs_inode_info *pinned_info = NULL;
+ struct file *dst_filp = NULL;
+ int err = 0;
+ bool keep = false;
+
+ hmdfs_info("peer 0x%x:0x%llx ino 0x%llx do restore",
+ conn->owner, conn->device_id, inum);
+
+ pinned_info = hmdfs_lookup_stash_inode(conn, inum);
+ if (pinned_info) {
+ unsigned int status = READ_ONCE(pinned_info->stash_status);
+
+ if (status != HMDFS_REMOTE_INODE_RESTORING) {
+ hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid status %u",
+ conn->owner, conn->device_id, inum, status);
+ err = -EINVAL;
+ goto clean;
+ }
+ } else {
+ hmdfs_warning("peer 0x%x:0x%llx ino 0x%llx doesn't being pinned",
+ conn->owner, conn->device_id, inum);
+ err = -EINVAL;
+ goto clean;
+ }
+
+ set_bit(HMDFS_FID_NEED_OPEN, &pinned_info->fid_flags);
+ err = hmdfs_open_restore_dst_file(ctx, O_RDWR, &dst_filp);
+ if (err) {
+ if (err == -ESHUTDOWN)
+ keep = true;
+ goto clean;
+ }
+
+ if (hmdfs_need_abort_restore(ctx, pinned_info, dst_filp))
+ goto abort;
+
+ err = hmdfs_restore_src_to_dst(ctx, dst_filp);
+ if (err == -ESHUTDOWN)
+ keep = true;
+abort:
+ fput(dst_filp);
+clean:
+ if (pinned_info && !keep)
+ hmdfs_reset_stashed_inode(conn, pinned_info);
+ ctx->keep = keep;
+
+ hmdfs_info("peer 0x%x:0x%llx ino 0x%llx restore err %d keep %d",
+ conn->owner, conn->device_id, inum, err, ctx->keep);
+
+ return err;
+}
+
+static int hmdfs_init_file_restore_ctx(struct hmdfs_peer *conn,
+ unsigned int seq, struct path *src_dir,
+ struct hmdfs_file_restore_ctx *ctx)
+{
+ struct hmdfs_sb_info *sbi = conn->sbi;
+ struct path dst_root;
+ char *dst = NULL;
+ char *page = NULL;
+ int err = 0;
+
+ err = hmdfs_get_path_in_sb(sbi->sb, sbi->real_dst, LOOKUP_DIRECTORY,
+ &dst_root);
+ if (err)
+ return err;
+
+ dst = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto put_path;
+ }
+
+ page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!page) {
+ err = -ENOMEM;
+ goto free_dst;
+ }
+
+ ctx->conn = conn;
+ ctx->src_dir_path = *src_dir;
+ ctx->dst_root_path = dst_root;
+ ctx->dst = dst;
+ ctx->page = page;
+ ctx->seq = seq;
+
+ return 0;
+free_dst:
+ kfree(dst);
+put_path:
+ path_put(&dst_root);
+ return err;
+}
+
+static void hmdfs_exit_file_restore_ctx(struct hmdfs_file_restore_ctx *ctx)
+{
+ path_put(&ctx->dst_root_path);
+ kfree(ctx->dst);
+ kfree(ctx->page);
+}
+
+static struct file *hmdfs_open_stash_file(struct path *p_path, char *name)
+{
+ struct dentry *parent = NULL;
+ struct inode *dir = NULL;
+ struct dentry *child = NULL;
+ struct file *filp = NULL;
+ struct path c_path;
+ int err = 0;
+
+ parent = p_path->dentry;
+ dir = d_inode(parent);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ child = lookup_one_len(name, parent, strlen(name));
+ if (!IS_ERR(child) && !hmdfs_is_reg(child)) {
+ if (d_is_positive(child)) {
+ hmdfs_err("invalid stash file (mode 0%o)",
+ d_inode(child)->i_mode);
+ err = -EINVAL;
+ } else {
+ hmdfs_err("missing stash file");
+ err = -ENOENT;
+ }
+ dput(child);
+ } else if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ hmdfs_err("lookup stash file err %d", err);
+ }
+ inode_unlock(dir);
+
+ if (err)
+ return ERR_PTR(err);
+
+ c_path.mnt = p_path->mnt;
+ c_path.dentry = child;
+ filp = dentry_open(&c_path, O_RDONLY | O_LARGEFILE, current_cred());
+ if (IS_ERR(filp))
+ hmdfs_err("open stash file err %d", (int)PTR_ERR(filp));
+
+ dput(child);
+
+ return filp;
+}
+
+static void hmdfs_update_restore_stats(struct hmdfs_restore_stats *stats,
+ bool keep, uint64_t pages, int err)
+{
+ if (!err) {
+ stats->succeed++;
+ stats->ok_pages += pages;
+ } else if (keep) {
+ stats->keep++;
+ } else {
+ stats->fail++;
+ stats->fail_pages += pages;
+ }
+}
+
+static int hmdfs_restore_files(struct hmdfs_peer *conn,
+ unsigned int seq, struct path *dir,
+ const struct hmdfs_inode_tbl *tbl,
+ void *priv)
+{
+ unsigned int i;
+ struct hmdfs_file_restore_ctx ctx;
+ int err = 0;
+ struct hmdfs_restore_stats *stats = priv;
+
+ err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx);
+ if (err)
+ return err;
+
+ for (i = 0; i < tbl->cnt; i++) {
+ char name[HMDFS_STASH_FILE_NAME_LEN];
+ struct file *filp = NULL;
+
+ snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]);
+ filp = hmdfs_open_stash_file(dir, name);
+ /* Continue to restore if any error */
+ if (IS_ERR(filp)) {
+ stats->fail++;
+ continue;
+ }
+
+ ctx.inum = tbl->inodes[i];
+ ctx.src_filp = filp;
+ ctx.keep = false;
+ ctx.pages = 0;
+ err = hmdfs_restore_file(&ctx);
+ hmdfs_update_restore_stats(stats, ctx.keep, ctx.pages, err);
+
+ if (!ctx.keep)
+ hmdfs_del_stash_file(dir->dentry,
+ file_dentry(ctx.src_filp));
+ fput(ctx.src_filp);
+
+ /* Continue to restore */
+ if (err == -ESHUTDOWN)
+ break;
+ err = 0;
+ }
+
+ hmdfs_exit_file_restore_ctx(&ctx);
+
+ return err;
+}
+
+static bool hmdfs_is_valid_stash_status(struct hmdfs_inode_info *inode_info,
+ uint64_t ino)
+{
+ return (inode_info->inode_type == HMDFS_LAYER_OTHER_REMOTE &&
+ inode_info->stash_status == HMDFS_REMOTE_INODE_RESTORING &&
+ inode_info->remote_ino == ino);
+}
+
+static int hmdfs_rebuild_stash_list(struct hmdfs_peer *conn,
+ unsigned int seq,
+ struct path *dir,
+ const struct hmdfs_inode_tbl *tbl,
+ void *priv)
+{
+ struct hmdfs_file_restore_ctx ctx;
+ unsigned int i;
+ int err;
+ struct hmdfs_rebuild_stats *stats = priv;
+
+ err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx);
+ if (err)
+ return err;
+
+ stats->total += tbl->cnt;
+
+ for (i = 0; i < tbl->cnt; i++) {
+ char name[HMDFS_STASH_FILE_NAME_LEN];
+ struct file *src_filp = NULL;
+ struct file *dst_filp = NULL;
+ struct hmdfs_inode_info *inode_info = NULL;
+ bool is_valid = true;
+
+ snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]);
+ src_filp = hmdfs_open_stash_file(dir, name);
+ if (IS_ERR(src_filp)) {
+ stats->fail++;
+ continue;
+ }
+ ctx.inum = tbl->inodes[i];
+ ctx.src_filp = src_filp;
+
+ /* No need to track the open which only needs meta info */
+ err = hmdfs_open_restore_dst_file(&ctx, O_RDONLY, &dst_filp);
+ if (err) {
+ fput(src_filp);
+ if (err == -ESHUTDOWN)
+ break;
+ stats->fail++;
+ err = 0;
+ continue;
+ }
+
+ inode_info = hmdfs_i(file_inode(dst_filp));
+ is_valid = hmdfs_is_valid_stash_status(inode_info,
+ ctx.inum);
+ if (is_valid) {
+ stats->succeed++;
+ } else {
+ hmdfs_err("peer 0x%x:0x%llx inode 0x%llx invalid state: type: %d, status: %u, inode: %llu",
+ conn->owner, conn->device_id, ctx.inum,
+ inode_info->inode_type,
+ READ_ONCE(inode_info->stash_status),
+ inode_info->remote_ino);
+ stats->invalid++;
+ }
+
+ fput(ctx.src_filp);
+ fput(dst_filp);
+ }
+
+ hmdfs_exit_file_restore_ctx(&ctx);
+ return err;
+}
+
+static int hmdfs_iter_stash_file(struct hmdfs_peer *conn,
+ unsigned int seq,
+ struct file *filp,
+ stash_operation_func op,
+ void *priv)
+{
+ int err = 0;
+ struct hmdfs_stash_dir_context ctx = {
+ .dctx.actor = hmdfs_fill_stash_file,
+ };
+ struct hmdfs_inode_tbl *tbl = NULL;
+ struct path dir;
+
+ err = hmdfs_new_inode_tbl(&tbl);
+ if (err)
+ goto out;
+
+ dir.mnt = filp->f_path.mnt;
+ dir.dentry = file_dentry(filp);
+
+ ctx.tbl = tbl;
+ ctx.dctx.pos = 0;
+ do {
+ tbl->cnt = 0;
+ err = iterate_dir(filp, &ctx.dctx);
+ if (err || !tbl->cnt) {
+ if (err)
+ hmdfs_err("iterate stash dir err %d", err);
+ break;
+ }
+ err = op(conn, seq, &dir, tbl, priv);
+ } while (!err);
+
+out:
+ kfree(tbl);
+ return err;
+}
+
+static void hmdfs_rebuild_check_work_fn(struct work_struct *base)
+{
+ struct hmdfs_check_work *work =
+ container_of(base, struct hmdfs_check_work, work);
+ struct hmdfs_peer *conn = work->conn;
+ struct hmdfs_sb_info *sbi = conn->sbi;
+ struct file *filp = NULL;
+ const struct cred *old_cred = NULL;
+ struct hmdfs_stash_dir_context ctx = {
+ .dctx.actor = hmdfs_has_stash_file,
+ };
+ struct hmdfs_inode_tbl tbl;
+ int err;
+
+ old_cred = hmdfs_override_creds(sbi->cred);
+ filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid);
+ if (IS_ERR(filp))
+ goto out;
+
+ memset(&tbl, 0, sizeof(tbl));
+ ctx.tbl = &tbl;
+ err = iterate_dir(filp, &ctx.dctx);
+ if (!err && ctx.tbl->cnt > 0)
+ conn->need_rebuild_stash_list = true;
+
+ fput(filp);
+out:
+ hmdfs_revert_creds(old_cred);
+ hmdfs_info("peer 0x%x:0x%llx %sneed to rebuild stash list",
+ conn->owner, conn->device_id,
+ conn->need_rebuild_stash_list ? "" : "don't ");
+ complete(&work->done);
+}
+
+static void hmdfs_stash_add_do_check(struct hmdfs_peer *conn, int evt,
+ unsigned int seq)
+{
+ struct hmdfs_sb_info *sbi = conn->sbi;
+ struct hmdfs_check_work work = {
+ .conn = conn,
+ .done = COMPLETION_INITIALIZER_ONSTACK(work.done),
+ };
+
+ if (!hmdfs_is_stash_enabled(sbi))
+ return;
+
+ INIT_WORK_ONSTACK(&work.work, hmdfs_rebuild_check_work_fn);
+ schedule_work(&work.work);
+ wait_for_completion(&work.done);
+}
+
+static void
+hmdfs_update_peer_rebuild_stats(struct hmdfs_rebuild_statistics *rebuild_stats,
+ const struct hmdfs_rebuild_stats *stats)
+{
+ rebuild_stats->cur_ok = stats->succeed;
+ rebuild_stats->cur_fail = stats->fail;
+ rebuild_stats->cur_invalid = stats->invalid;
+ rebuild_stats->total_ok += stats->succeed;
+ rebuild_stats->total_fail += stats->fail;
+ rebuild_stats->total_invalid += stats->invalid;
+}
+
+/* rebuild stash inode list */
+static void hmdfs_stash_online_prepare(struct hmdfs_peer *conn, int evt,
+ unsigned int seq)
+{
+ struct hmdfs_sb_info *sbi = conn->sbi;
+ struct file *filp = NULL;
+ const struct cred *old_cred = NULL;
+ int err;
+ struct hmdfs_rebuild_stats stats;
+
+ if (!hmdfs_is_stash_enabled(sbi) ||
+ !conn->need_rebuild_stash_list)
+ return;
+
+ /* release seq_lock to prevent blocking no-online sync cb */
+ mutex_unlock(&conn->seq_lock);
+ old_cred = hmdfs_override_creds(sbi->cred);
+ filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid);
+ if (IS_ERR(filp))
+ goto out;
+
+ memset(&stats, 0, sizeof(stats));
+ err = hmdfs_iter_stash_file(conn, seq, filp,
+ hmdfs_rebuild_stash_list, &stats);
+ if (err == -ESHUTDOWN) {
+ hmdfs_info("peer 0x%x:0x%llx offline again during rebuild",
+ conn->owner, conn->device_id);
+ } else {
+ WRITE_ONCE(conn->need_rebuild_stash_list, false);
+ if (err)
+ hmdfs_warning("partial rebuild fail err %d", err);
+ }
+
+ hmdfs_update_peer_rebuild_stats(&conn->stats.rebuild, &stats);
+ hmdfs_info("peer 0x%x:0x%llx rebuild stashed-file total %u succeed %u fail %u invalid %u",
+ conn->owner, conn->device_id, stats.total, stats.succeed,
+ stats.fail, stats.invalid);
+ fput(filp);
+out:
+ conn->stats.rebuild.time++;
+ hmdfs_revert_creds(old_cred);
+ if (!READ_ONCE(conn->need_rebuild_stash_list)) {
+ /*
+ * Use smp_mb__before_atomic() to ensure order between
+ * writing @conn->need_rebuild_stash_list and
+ * reading conn->rebuild_inode_status_nr.
+ */
+ smp_mb__before_atomic();
+ /*
+ * Wait until all inodes finish rebuilding stash status before
+ * accessing @conn->stashed_inode_list in restoring.
+ */
+ wait_event(conn->rebuild_inode_status_wq,
+ !atomic_read(&conn->rebuild_inode_status_nr));
+ }
+ mutex_lock(&conn->seq_lock);
+}
+
+static void
+hmdfs_update_peer_restore_stats(struct hmdfs_restore_statistics *restore_stats,
+ const struct hmdfs_restore_stats *stats)
+{
+ restore_stats->cur_ok = stats->succeed;
+ restore_stats->cur_fail = stats->fail;
+ restore_stats->cur_keep = stats->keep;
+ restore_stats->total_ok += stats->succeed;
+ restore_stats->total_fail += stats->fail;
+ restore_stats->total_keep += stats->keep;
+ restore_stats->ok_pages += stats->ok_pages;
+ restore_stats->fail_pages += stats->fail_pages;
+}
+
+static void hmdfs_stash_online_do_restore(struct hmdfs_peer *conn, int evt,
+ unsigned int seq)
+{
+ struct hmdfs_sb_info *sbi = conn->sbi;
+ struct file *filp = NULL;
+ const struct cred *old_cred = NULL;
+ struct hmdfs_restore_stats stats;
+ int err = 0;
+
+ if (!hmdfs_is_stash_enabled(sbi) || conn->need_rebuild_stash_list) {
+ if (conn->need_rebuild_stash_list)
+ hmdfs_info("peer 0x%x:0x%llx skip restoring due to rebuild-need",
+ conn->owner, conn->device_id);
+ return;
+ }
+
+ /* release seq_lock to prevent blocking no-online sync cb */
+ mutex_unlock(&conn->seq_lock);
+ /* For dir iteration, file read and unlink */
+ old_cred = hmdfs_override_creds(conn->sbi->cred);
+
+ memset(&stats, 0, sizeof(stats));
+ filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid);
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ goto out;
+ }
+
+ err = hmdfs_iter_stash_file(conn, seq, filp,
+ hmdfs_restore_files, &stats);
+
+ fput(filp);
+out:
+ hmdfs_revert_creds(old_cred);
+
+ /* offline again ? */
+ if (err != -ESHUTDOWN)
+ hmdfs_drop_stashed_inodes(conn);
+
+ hmdfs_update_peer_restore_stats(&conn->stats.restore, &stats);
+ hmdfs_info("peer 0x%x:0x%llx restore stashed-file ok %u fail %u keep %u",
+ conn->owner, conn->device_id,
+ stats.succeed, stats.fail, stats.keep);
+
+ mutex_lock(&conn->seq_lock);
+}
+
+static void hmdfs_stash_del_do_cleanup(struct hmdfs_peer *conn, int evt,
+ unsigned int seq)
+{
+ struct hmdfs_inode_info *info = NULL;
+ struct hmdfs_inode_info *next = NULL;
+ unsigned int preparing;
+
+ if (!hmdfs_is_stash_enabled(conn->sbi))
+ return;
+
+ /* Async cb is cancelled */
+ preparing = 0;
+ list_for_each_entry_safe(info, next, &conn->wr_opened_inode_list,
+ wr_opened_node) {
+ int status = READ_ONCE(info->stash_status);
+
+ if (status == HMDFS_REMOTE_INODE_STASHING) {
+ struct hmdfs_cache_info *cache = NULL;
+
+ spin_lock(&info->stash_lock);
+ cache = info->cache;
+ info->cache = NULL;
+ info->stash_status = HMDFS_REMOTE_INODE_NONE;
+ spin_unlock(&info->stash_lock);
+
+ hmdfs_remote_del_wr_opened_inode(conn, info);
+ hmdfs_del_file_cache(cache);
+ /* put inode after all access are completed */
+ iput(&info->vfs_inode);
+ preparing++;
+ }
+ }
+ hmdfs_info("release %u preparing inodes", preparing);
+
+ hmdfs_info("release %u pinned inodes", conn->stashed_inode_nr);
+ if (list_empty(&conn->stashed_inode_list))
+ return;
+
+ list_for_each_entry_safe(info, next,
+ &conn->stashed_inode_list, stash_node)
+ hmdfs_untrack_stashed_inode(conn, info);
+}
+
+void hmdfs_exit_stash(struct hmdfs_sb_info *sbi)
+{
+ if (!sbi->s_offline_stash)
+ return;
+
+ if (sbi->stash_work_dir.dentry) {
+ path_put(&sbi->stash_work_dir);
+ sbi->stash_work_dir.dentry = NULL;
+ }
+}
+
+int hmdfs_init_stash(struct hmdfs_sb_info *sbi)
+{
+ int err = 0;
+ struct path parent;
+ struct dentry *child = NULL;
+
+ if (!sbi->s_offline_stash)
+ return 0;
+
+ err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
+ &parent);
+ if (err) {
+ hmdfs_err("invalid cache dir err %d", err);
+ goto out;
+ }
+
+ child = hmdfs_stash_new_work_dir(parent.dentry);
+ if (!IS_ERR(child)) {
+ sbi->stash_work_dir.mnt = mntget(parent.mnt);
+ sbi->stash_work_dir.dentry = child;
+ } else {
+ err = PTR_ERR(child);
+ hmdfs_err("create stash work dir err %d", err);
+ }
+
+ path_put(&parent);
+out:
+ return err;
+}
+
+static int hmdfs_stash_write_local_file(struct hmdfs_peer *conn,
+ struct hmdfs_inode_info *info,
+ struct hmdfs_writepage_context *ctx,
+ struct hmdfs_cache_info *cache)
+{
+ struct page *page = ctx->page;
+ const struct cred *old_cred = NULL;
+ void *buf = NULL;
+ loff_t pos;
+ unsigned int flags;
+ ssize_t written;
+ int err = 0;
+
+ buf = kmap(page);
+ pos = (loff_t)page->index << PAGE_SHIFT;
+ /* enable NOFS for memory allocation */
+ flags = memalloc_nofs_save();
+ old_cred = hmdfs_override_creds(conn->sbi->cred);
+ pos += cache->data_offs << HMDFS_STASH_BLK_SHIFT;
+ written = kernel_write(cache->cache_file, buf, ctx->count, &pos);
+ hmdfs_revert_creds(old_cred);
+ memalloc_nofs_restore(flags);
+ kunmap(page);
+
+ if (written != ctx->count) {
+ hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx page 0x%lx data_offs 0x%x len %u err %zd",
+ conn->owner, conn->device_id, info->remote_ino,
+ page->index, cache->data_offs, ctx->count, written);
+ err = -EIO;
+ }
+
+ return err;
+}
+
+int hmdfs_stash_writepage(struct hmdfs_peer *conn,
+ struct hmdfs_writepage_context *ctx)
+{
+ struct inode *inode = ctx->page->mapping->host;
+ struct hmdfs_inode_info *info = hmdfs_i(inode);
+ struct hmdfs_cache_info *cache = NULL;
+ int err;
+
+ /* e.g. fail to create stash file */
+ cache = info->cache;
+ if (!cache)
+ return -EIO;
+
+ err = hmdfs_stash_write_local_file(conn, info, ctx, cache);
+ if (!err) {
+ hmdfs_client_writepage_done(info, ctx);
+ atomic64_inc(&cache->written_pgs);
+ put_task_struct(ctx->caller);
+ kfree(ctx);
+ }
+ atomic64_inc(&cache->to_write_pgs);
+
+ return err;
+}
+
+static void hmdfs_stash_rebuild_status(struct hmdfs_peer *conn,
+ struct inode *inode)
+{
+ char *path_str = NULL;
+ struct hmdfs_inode_info *info = NULL;
+ const struct cred *old_cred = NULL;
+ struct path path;
+ struct path *stash_path = NULL;
+ int err = 0;
+
+ path_str = kmalloc(HMDFS_STASH_PATH_LEN, GFP_KERNEL);
+ if (!path_str) {
+ err = -ENOMEM;
+ return;
+ }
+
+ info = hmdfs_i(inode);
+ err = snprintf(path_str, HMDFS_STASH_PATH_LEN, "%s/0x%llx",
+ conn->cid, info->remote_ino);
+ if (err >= HMDFS_STASH_PATH_LEN) {
+ kfree(path_str);
+ hmdfs_err("peer 0x%x:0x%llx inode 0x%llx too long name len",
+ conn->owner, conn->device_id, info->remote_ino);
+ return;
+ }
+ old_cred = hmdfs_override_creds(conn->sbi->cred);
+ stash_path = &conn->sbi->stash_work_dir;
+ err = vfs_path_lookup(stash_path->dentry, stash_path->mnt,
+ path_str, 0, &path);
+ hmdfs_revert_creds(old_cred);
+ if (!err) {
+ if (hmdfs_is_reg(path.dentry)) {
+ WRITE_ONCE(info->stash_status,
+ HMDFS_REMOTE_INODE_RESTORING);
+ ihold(&info->vfs_inode);
+ hmdfs_track_inode_locked(conn, info);
+ } else {
+ hmdfs_info("peer 0x%x:0x%llx inode 0x%llx unexpected stashed file mode 0%o",
+ conn->owner, conn->device_id,
+ info->remote_ino,
+ d_inode(path.dentry)->i_mode);
+ }
+
+ path_put(&path);
+ } else if (err && err != -ENOENT) {
+ hmdfs_err("peer 0x%x:0x%llx inode 0x%llx find %s err %d",
+ conn->owner, conn->device_id, info->remote_ino,
+ path_str, err);
+ }
+
+ kfree(path_str);
+}
+
+static inline bool
+hmdfs_need_rebuild_inode_stash_status(struct hmdfs_peer *conn, umode_t mode)
+{
+ return hmdfs_is_stash_enabled(conn->sbi) &&
+ READ_ONCE(conn->need_rebuild_stash_list) &&
+ (S_ISREG(mode) || S_ISLNK(mode));
+}
+
+void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn,
+ struct inode *inode, umode_t mode)
+{
+ if (!hmdfs_need_rebuild_inode_stash_status(conn, mode))
+ return;
+
+ atomic_inc(&conn->rebuild_inode_status_nr);
+ /*
+ * Use smp_mb__after_atomic() to ensure order between writing
+ * @conn->rebuild_inode_status_nr and reading
+ * @conn->need_rebuild_stash_list.
+ */
+ smp_mb__after_atomic();
+ if (READ_ONCE(conn->need_rebuild_stash_list))
+ hmdfs_stash_rebuild_status(conn, inode);
+ if (atomic_dec_and_test(&conn->rebuild_inode_status_nr))
+ wake_up(&conn->rebuild_inode_status_wq);
+}
+
+static struct hmdfs_node_cb_desc stash_cb[] = {
+ {
+ .evt = NODE_EVT_OFFLINE,
+ .sync = true,
+ .min_version = DFS_2_0,
+ .fn = hmdfs_stash_offline_prepare,
+ },
+ {
+ .evt = NODE_EVT_OFFLINE,
+ .sync = false,
+ .min_version = DFS_2_0,
+ .fn = hmdfs_stash_offline_do_stash,
+ },
+ /* Don't known peer version yet, so min_version is 0 */
+ {
+ .evt = NODE_EVT_ADD,
+ .sync = true,
+ .fn = hmdfs_stash_add_do_check,
+ },
+ {
+ .evt = NODE_EVT_ONLINE,
+ .sync = false,
+ .min_version = DFS_2_0,
+ .fn = hmdfs_stash_online_prepare,
+ },
+ {
+ .evt = NODE_EVT_ONLINE,
+ .sync = false,
+ .min_version = DFS_2_0,
+ .fn = hmdfs_stash_online_do_restore,
+ },
+ {
+ .evt = NODE_EVT_DEL,
+ .sync = true,
+ .min_version = DFS_2_0,
+ .fn = hmdfs_stash_del_do_cleanup,
+ },
+};
+
+void __init hmdfs_stash_add_node_evt_cb(void)
+{
+ hmdfs_node_add_evt_cb(stash_cb, ARRAY_SIZE(stash_cb));
+}
+
diff --git a/fs/hmdfs/stash.h b/fs/hmdfs/stash.h
new file mode 100644
index 0000000000000000000000000000000000000000..f38e737f94721093eb305b08c8c4128dbed218e0
--- /dev/null
+++ b/fs/hmdfs/stash.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs/hmdfs/stash.h
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#ifndef HMDFS_STASH_H
+#define HMDFS_STASH_H
+
+#include "hmdfs.h"
+#include "hmdfs_client.h"
+
+extern void hmdfs_stash_add_node_evt_cb(void);
+
+extern void hmdfs_exit_stash(struct hmdfs_sb_info *sbi);
+extern int hmdfs_init_stash(struct hmdfs_sb_info *sbi);
+
+extern int hmdfs_stash_writepage(struct hmdfs_peer *conn,
+ struct hmdfs_writepage_context *ctx);
+
+extern void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn,
+ struct inode *inode, umode_t mode);
+
+#endif
diff --git a/fs/hmdfs/super.c b/fs/hmdfs/super.c
new file mode 100644
index 0000000000000000000000000000000000000000..92012f80ab3768395a127d233e10c00d3eeb6b11
--- /dev/null
+++ b/fs/hmdfs/super.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/hmdfs/super.c
+ *
+ * Copyright (c) 2020-2021 Huawei Device Co., Ltd.
+ */
+
+#include
+#include
+#include
+#include
+
+#include "hmdfs.h"
+
+enum {
+ OPT_RA_PAGES,
+ OPT_LOCAL_DST,
+ OPT_CACHE_DIR,
+ OPT_S_CASE,
+ OPT_VIEW_TYPE,
+ OPT_NO_OFFLINE_STASH,
+ OPT_NO_DENTRY_CACHE,
+ OPT_ERR,
+};
+
+static match_table_t hmdfs_tokens = {
+ { OPT_RA_PAGES, "ra_pages=%s" },
+ { OPT_LOCAL_DST, "local_dst=%s" },
+ { OPT_CACHE_DIR, "cache_dir=%s" },
+ { OPT_S_CASE, "sensitive" },
+ { OPT_VIEW_TYPE, "merge" },
+ { OPT_NO_OFFLINE_STASH, "no_offline_stash" },
+ { OPT_NO_DENTRY_CACHE, "no_dentry_cache" },
+ { OPT_ERR, NULL },
+};
+
+#define DEAULT_RA_PAGES 128
+
+void __hmdfs_log(const char *level, const bool ratelimited,
+ const char *function, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (ratelimited)
+ printk_ratelimited("%s hmdfs: %s() %pV\n", level,
+ function, &vaf);
+ else
+ printk("%s hmdfs: %s() %pV\n", level, function, &vaf);
+ va_end(args);
+}
+
+static int hmdfs_match_strdup(const substring_t *s, char **dst)
+{
+ char *dup = NULL;
+
+ dup = match_strdup(s);
+ if (!dup)
+ return -ENOMEM;
+
+ *dst = dup;
+
+ return 0;
+}
+
+int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data)
+{
+ char *p = NULL;
+ char *name = NULL;
+ char *options = NULL;
+ char *options_src = NULL;
+ substring_t args[MAX_OPT_ARGS];
+ unsigned long value = DEAULT_RA_PAGES;
+ struct super_block *sb = sbi->sb;
+ int err = 0;
+
+ options = kstrdup(data, GFP_KERNEL);
+ if (data && !options) {
+ err = -ENOMEM;
+ goto out;
+ }
+ options_src = options;
+ err = super_setup_bdi(sb);
+ if (err)
+ goto out;
+
+ while ((p = strsep(&options_src, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+ args[0].to = args[0].from = NULL;
+ token = match_token(p, hmdfs_tokens, args);
+
+ switch (token) {
+ case OPT_RA_PAGES:
+ name = match_strdup(&args[0]);
+ if (name) {
+ err = kstrtoul(name, 10, &value);
+ if (err)
+ goto out;
+ kfree(name);
+ name = NULL;
+ }
+ break;
+ case OPT_LOCAL_DST:
+ err = hmdfs_match_strdup(&args[0], &sbi->local_dst);
+ if (err)
+ goto out;
+ break;
+ case OPT_CACHE_DIR:
+ err = hmdfs_match_strdup(&args[0], &sbi->cache_dir);
+ if (err)
+ goto out;
+ break;
+ case OPT_S_CASE:
+ sbi->s_case_sensitive = true;
+ break;
+ case OPT_VIEW_TYPE:
+ sbi->s_merge_switch = true;
+ break;
+ case OPT_NO_OFFLINE_STASH:
+ sbi->s_offline_stash = false;
+ break;
+ case OPT_NO_DENTRY_CACHE:
+ sbi->s_dentry_cache = false;
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ }
+out:
+ kfree(options);
+ sb->s_bdi->ra_pages = value;
+ if (sbi->local_dst == NULL)
+ err = -EINVAL;
+
+ if (sbi->s_offline_stash && !sbi->cache_dir) {
+ hmdfs_warning("no cache_dir for offline stash");
+ sbi->s_offline_stash = false;
+ }
+
+ if (sbi->s_dentry_cache && !sbi->cache_dir) {
+ hmdfs_warning("no cache_dir for dentry cache");
+ sbi->s_dentry_cache = false;
+ }
+
+ return err;
+}