From c049769e634d48a5b0dd6fdeb2ba3a22323a4e4d Mon Sep 17 00:00:00 2001 From: wang_yuanzhi Date: Tue, 13 Aug 2024 17:13:02 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0Linux=E5=86=85=E6=A0=B8OH?= =?UTF-8?q?=E7=89=B9=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: wang_yuanzhi --- drivers/Kconfig | 2 + drivers/Makefile | 2 + drivers/accesstokenid/Kconfig | 5 + drivers/accesstokenid/Makefile | 2 + drivers/accesstokenid/access_tokenid.c | 397 +++ drivers/accesstokenid/access_tokenid.h | 73 + drivers/hck/Kconfig | 21 + drivers/hck/Makefile | 4 + drivers/hck/vendor_hooks.c | 17 + drivers/staging/Kconfig | 6 + drivers/staging/Makefile | 6 + drivers/staging/blackbox/Kconfig | 108 + drivers/staging/blackbox/Makefile | 5 + drivers/staging/blackbox/blackbox_common.c | 270 ++ drivers/staging/blackbox/blackbox_core.c | 596 ++++ drivers/staging/blackbox/blackbox_storage.c | 203 ++ drivers/staging/hievent/Kconfig | 12 + drivers/staging/hievent/Makefile | 2 + drivers/staging/hievent/hievent_driver.c | 416 +++ drivers/staging/hievent/hievent_driver.h | 22 + drivers/staging/hievent/hiview_hievent.c | 488 +++ drivers/staging/hievent/hiview_hievent.h | 34 + drivers/staging/hilog/Kconfig | 22 + drivers/staging/hilog/Makefile | 5 + drivers/staging/hilog/hilog.c | 402 +++ drivers/staging/hisysevent/Kconfig | 6 + drivers/staging/hisysevent/Makefile | 2 + .../staging/hisysevent/hiview_hisysevent.c | 519 ++++ drivers/staging/hungtask/Kconfig | 14 + drivers/staging/hungtask/Makefile | 3 + drivers/staging/hungtask/hungtask_base.c | 1031 ++++++ drivers/staging/hungtask/hungtask_user.c | 260 ++ drivers/staging/hungtask/hungtask_user.h | 37 + drivers/staging/zerohung/Kconfig | 7 + drivers/staging/zerohung/Makefile | 2 + drivers/staging/zerohung/watchpoint/Makefile | 2 + .../zerohung/watchpoint/hung_wp_screen.c | 299 ++ drivers/staging/zerohung/zrhung_event.c | 34 + fs/epfs/Kconfig | 12 + fs/epfs/Makefile | 3 + fs/epfs/dentry.c | 23 + fs/epfs/dir.c | 18 + fs/epfs/epfs.h | 43 + fs/epfs/file.c | 296 ++ fs/epfs/inode.c | 111 + fs/epfs/internal.h | 39 + fs/epfs/main.c | 44 + fs/epfs/super.c | 129 + fs/hmdfs/Kconfig | 40 + fs/hmdfs/Makefile | 15 + fs/hmdfs/authority/authentication.c | 459 +++ fs/hmdfs/authority/authentication.h | 350 +++ fs/hmdfs/authority/config.c | 377 +++ fs/hmdfs/client_writeback.c | 519 ++++ fs/hmdfs/client_writeback.h | 136 + fs/hmdfs/comm/connection.c | 1312 ++++++++ fs/hmdfs/comm/connection.h | 358 +++ fs/hmdfs/comm/crypto.c | 260 ++ fs/hmdfs/comm/crypto.h | 36 + fs/hmdfs/comm/device_node.c | 1694 ++++++++++ fs/hmdfs/comm/device_node.h | 109 + fs/hmdfs/comm/message_verify.c | 980 ++++++ fs/hmdfs/comm/message_verify.h | 27 + fs/hmdfs/comm/node_cb.c | 76 + fs/hmdfs/comm/node_cb.h | 44 + fs/hmdfs/comm/protocol.h | 489 +++ fs/hmdfs/comm/socket_adapter.c | 1158 +++++++ fs/hmdfs/comm/socket_adapter.h | 193 ++ fs/hmdfs/comm/transport.c | 1218 ++++++++ fs/hmdfs/comm/transport.h | 76 + fs/hmdfs/dentry.c | 333 ++ fs/hmdfs/file_local.c | 386 +++ fs/hmdfs/file_merge.c | 585 ++++ fs/hmdfs/file_remote.c | 1056 +++++++ fs/hmdfs/file_remote.h | 26 + fs/hmdfs/file_root.c | 154 + fs/hmdfs/hmdfs.h | 345 +++ fs/hmdfs/hmdfs_client.c | 1097 +++++++ fs/hmdfs/hmdfs_client.h | 121 + fs/hmdfs/hmdfs_dentryfile.c | 2756 +++++++++++++++++ fs/hmdfs/hmdfs_dentryfile.h | 342 ++ fs/hmdfs/hmdfs_device_view.h | 252 ++ fs/hmdfs/hmdfs_merge_view.h | 200 ++ fs/hmdfs/hmdfs_server.c | 1943 ++++++++++++ fs/hmdfs/hmdfs_server.h | 83 + fs/hmdfs/hmdfs_share.c | 348 +++ fs/hmdfs/hmdfs_share.h | 67 + fs/hmdfs/hmdfs_trace.h | 891 ++++++ fs/hmdfs/inode.c | 254 ++ fs/hmdfs/inode.h | 237 ++ fs/hmdfs/inode_local.c | 883 ++++++ fs/hmdfs/inode_merge.c | 1401 +++++++++ fs/hmdfs/inode_remote.c | 1001 ++++++ fs/hmdfs/inode_root.c | 307 ++ fs/hmdfs/main.c | 1101 +++++++ fs/hmdfs/server_writeback.c | 135 + fs/hmdfs/server_writeback.h | 40 + fs/hmdfs/stash.c | 2247 ++++++++++++++ fs/hmdfs/stash.h | 25 + fs/hmdfs/super.c | 175 ++ fs/proc/base.c | 20 + include/dfx/hiview_hisysevent.h | 67 + include/dfx/hung_wp_screen.h | 24 + include/dfx/hungtask_base.h | 111 + include/dfx/zrhung.h | 11 + include/linux/blackbox.h | 84 + include/linux/blackbox_common.h | 44 + include/linux/blackbox_storage.h | 22 + include/linux/hck/lite_hck_ced.h | 50 + include/linux/hck/lite_hck_code_sign.h | 38 + include/linux/hck/lite_hck_hideaddr.h | 25 + include/linux/hck/lite_hck_inet.h | 31 + include/linux/hck/lite_hck_jit_memory.h | 41 + include/linux/hck/lite_hck_sample.h | 36 + include/linux/hck/lite_hck_xpm.h | 55 + include/linux/hck/lite_vendor_hooks.h | 126 + include/linux/sched.h | 4 + include/linux/uidgid.h | 3 + kernel/fork.c | 4 + 119 files changed, 35567 insertions(+) create mode 100755 drivers/accesstokenid/Kconfig create mode 100755 drivers/accesstokenid/Makefile create mode 100755 drivers/accesstokenid/access_tokenid.c create mode 100755 drivers/accesstokenid/access_tokenid.h create mode 100755 drivers/hck/Kconfig create mode 100755 drivers/hck/Makefile create mode 100755 drivers/hck/vendor_hooks.c create mode 100755 drivers/staging/blackbox/Kconfig create mode 100755 drivers/staging/blackbox/Makefile create mode 100755 drivers/staging/blackbox/blackbox_common.c create mode 100755 drivers/staging/blackbox/blackbox_core.c create mode 100755 drivers/staging/blackbox/blackbox_storage.c create mode 100755 drivers/staging/hievent/Kconfig create mode 100755 drivers/staging/hievent/Makefile create mode 100755 drivers/staging/hievent/hievent_driver.c create mode 100755 drivers/staging/hievent/hievent_driver.h create mode 100755 drivers/staging/hievent/hiview_hievent.c create mode 100755 drivers/staging/hievent/hiview_hievent.h create mode 100755 drivers/staging/hilog/Kconfig create mode 100755 drivers/staging/hilog/Makefile create mode 100755 drivers/staging/hilog/hilog.c create mode 100755 drivers/staging/hisysevent/Kconfig create mode 100755 drivers/staging/hisysevent/Makefile create mode 100755 drivers/staging/hisysevent/hiview_hisysevent.c create mode 100755 drivers/staging/hungtask/Kconfig create mode 100755 drivers/staging/hungtask/Makefile create mode 100755 drivers/staging/hungtask/hungtask_base.c create mode 100755 drivers/staging/hungtask/hungtask_user.c create mode 100755 drivers/staging/hungtask/hungtask_user.h create mode 100755 drivers/staging/zerohung/Kconfig create mode 100755 drivers/staging/zerohung/Makefile create mode 100755 drivers/staging/zerohung/watchpoint/Makefile create mode 100755 drivers/staging/zerohung/watchpoint/hung_wp_screen.c create mode 100755 drivers/staging/zerohung/zrhung_event.c create mode 100755 fs/epfs/Kconfig create mode 100755 fs/epfs/Makefile create mode 100755 fs/epfs/dentry.c create mode 100755 fs/epfs/dir.c create mode 100755 fs/epfs/epfs.h create mode 100755 fs/epfs/file.c create mode 100755 fs/epfs/inode.c create mode 100755 fs/epfs/internal.h create mode 100755 fs/epfs/main.c create mode 100755 fs/epfs/super.c create mode 100755 fs/hmdfs/Kconfig create mode 100755 fs/hmdfs/Makefile create mode 100755 fs/hmdfs/authority/authentication.c create mode 100755 fs/hmdfs/authority/authentication.h create mode 100755 fs/hmdfs/authority/config.c create mode 100755 fs/hmdfs/client_writeback.c create mode 100755 fs/hmdfs/client_writeback.h create mode 100755 fs/hmdfs/comm/connection.c create mode 100755 fs/hmdfs/comm/connection.h create mode 100755 fs/hmdfs/comm/crypto.c create mode 100755 fs/hmdfs/comm/crypto.h create mode 100755 fs/hmdfs/comm/device_node.c create mode 100755 fs/hmdfs/comm/device_node.h create mode 100755 fs/hmdfs/comm/message_verify.c create mode 100755 fs/hmdfs/comm/message_verify.h create mode 100755 fs/hmdfs/comm/node_cb.c create mode 100755 fs/hmdfs/comm/node_cb.h create mode 100755 fs/hmdfs/comm/protocol.h create mode 100755 fs/hmdfs/comm/socket_adapter.c create mode 100755 fs/hmdfs/comm/socket_adapter.h create mode 100755 fs/hmdfs/comm/transport.c create mode 100755 fs/hmdfs/comm/transport.h create mode 100755 fs/hmdfs/dentry.c create mode 100755 fs/hmdfs/file_local.c create mode 100755 fs/hmdfs/file_merge.c create mode 100755 fs/hmdfs/file_remote.c create mode 100755 fs/hmdfs/file_remote.h create mode 100755 fs/hmdfs/file_root.c create mode 100755 fs/hmdfs/hmdfs.h create mode 100755 fs/hmdfs/hmdfs_client.c create mode 100755 fs/hmdfs/hmdfs_client.h create mode 100755 fs/hmdfs/hmdfs_dentryfile.c create mode 100755 fs/hmdfs/hmdfs_dentryfile.h create mode 100755 fs/hmdfs/hmdfs_device_view.h create mode 100755 fs/hmdfs/hmdfs_merge_view.h create mode 100755 fs/hmdfs/hmdfs_server.c create mode 100755 fs/hmdfs/hmdfs_server.h create mode 100755 fs/hmdfs/hmdfs_share.c create mode 100755 fs/hmdfs/hmdfs_share.h create mode 100755 fs/hmdfs/hmdfs_trace.h create mode 100755 fs/hmdfs/inode.c create mode 100755 fs/hmdfs/inode.h create mode 100755 fs/hmdfs/inode_local.c create mode 100755 fs/hmdfs/inode_merge.c create mode 100755 fs/hmdfs/inode_remote.c create mode 100755 fs/hmdfs/inode_root.c create mode 100755 fs/hmdfs/main.c create mode 100755 fs/hmdfs/server_writeback.c create mode 100755 fs/hmdfs/server_writeback.h create mode 100755 fs/hmdfs/stash.c create mode 100755 fs/hmdfs/stash.h create mode 100755 fs/hmdfs/super.c create mode 100755 include/dfx/hiview_hisysevent.h create mode 100755 include/dfx/hung_wp_screen.h create mode 100755 include/dfx/hungtask_base.h create mode 100755 include/dfx/zrhung.h create mode 100755 include/linux/blackbox.h create mode 100755 include/linux/blackbox_common.h create mode 100755 include/linux/blackbox_storage.h create mode 100755 include/linux/hck/lite_hck_ced.h create mode 100755 include/linux/hck/lite_hck_code_sign.h create mode 100755 include/linux/hck/lite_hck_hideaddr.h create mode 100755 include/linux/hck/lite_hck_inet.h create mode 100755 include/linux/hck/lite_hck_jit_memory.h create mode 100755 include/linux/hck/lite_hck_sample.h create mode 100755 include/linux/hck/lite_hck_xpm.h create mode 100755 include/linux/hck/lite_vendor_hooks.h diff --git a/drivers/Kconfig b/drivers/Kconfig index 0d399ddaa..6f57f1827 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -236,4 +236,6 @@ source "drivers/interconnect/Kconfig" source "drivers/counter/Kconfig" source "drivers/most/Kconfig" +source "drivers/accesstokenid/Kconfig" +source "drivers/hck/Kconfig" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index a110338c8..cd996585f 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -187,3 +187,5 @@ obj-$(CONFIG_GNSS) += gnss/ obj-$(CONFIG_INTERCONNECT) += interconnect/ obj-$(CONFIG_COUNTER) += counter/ obj-$(CONFIG_MOST) += most/ +obj-$(CONFIG_ACCESS_TOKENID) += accesstokenid/ +obj-$(CONFIG_HCK_VENDOR_HOOKS) += hck/ diff --git a/drivers/accesstokenid/Kconfig b/drivers/accesstokenid/Kconfig new file mode 100755 index 000000000..30d2957a1 --- /dev/null +++ b/drivers/accesstokenid/Kconfig @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +config ACCESS_TOKENID + bool "Access task's token" + default n + diff --git a/drivers/accesstokenid/Makefile b/drivers/accesstokenid/Makefile new file mode 100755 index 000000000..738a550f8 --- /dev/null +++ b/drivers/accesstokenid/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_ACCESS_TOKENID) += access_tokenid.o diff --git a/drivers/accesstokenid/access_tokenid.c b/drivers/accesstokenid/access_tokenid.c new file mode 100755 index 000000000..33a61ef16 --- /dev/null +++ b/drivers/accesstokenid/access_tokenid.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * access_tokenid.c + * + * Copyright (C) 2022-2023 Huawei Technologies Co., Ltd. All rights reserved. + * + */ + +#define pr_fmt(fmt) "access_token_id: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include "access_tokenid.h" + +DEFINE_RWLOCK(token_rwlock); +#define ACCESS_TOKEN_UID KUIDT_INIT(3020) +#define MAX_NODE_NUM 500 +#define UINT32_T_BITS 32 + +static struct kmem_cache *g_cache = NULL; +static struct token_perm_node *g_token_perm_root = NULL; +static size_t g_total_node_num = 0; + +int access_tokenid_get_tokenid(struct file *file, void __user *uarg) +{ + return copy_to_user(uarg, ¤t->token, + sizeof(current->token)) ? -EFAULT : 0; +} + +static bool check_permission_for_set_tokenid(struct file *file, unsigned long long tokenid) +{ + kuid_t uid = current_uid(); + struct inode *inode = file->f_inode; + access_tokenid_inner *tokenid_inner = (access_tokenid_inner *)&tokenid; + + if (inode == NULL) { + pr_err("%s: file inode is null\n", __func__); + return false; + } + + if (uid_eq(uid, GLOBAL_ROOT_UID) || + uid_eq(uid, inode->i_uid)) { + return true; + } else if (uid_eq(uid, NWEBSPAWN_UID) && (tokenid_inner->render_flag == 1)) { + return true; + } + + return false; +} + +int access_tokenid_set_tokenid(struct file *file, void __user *uarg) +{ + unsigned long long tmp = 0; + + if (copy_from_user(&tmp, uarg, sizeof(tmp))) + return -EFAULT; + + if (!check_permission_for_set_tokenid(file, tmp)) + return -EPERM; + + current->token = tmp; + return 0; +} + +static bool check_permission_for_ftokenid(struct file *file) +{ + int i; + struct group_info *group_info; + kuid_t uid = current_uid(); + struct inode *inode = file->f_inode; + + if (inode == NULL) { + pr_err("%s: file inode is null\n", __func__); + return false; + } + + if (uid_eq(uid, GLOBAL_ROOT_UID)) + return true; + + group_info = get_current_groups(); + for (i = 0; i < group_info->ngroups; i++) { + kgid_t gid = group_info->gid[i]; + + if (gid_eq(gid, inode->i_gid)) { + put_group_info(group_info); + return true; + } + } + + put_group_info(group_info); + return false; +} + +int access_tokenid_get_ftokenid(struct file *file, void __user *uarg) +{ + if (!check_permission_for_ftokenid(file)) + return -EPERM; + + return copy_to_user(uarg, ¤t->ftoken, + sizeof(current->ftoken)) ? -EFAULT : 0; +} + +int access_tokenid_set_ftokenid(struct file *file, void __user *uarg) +{ + unsigned long long tmp = 0; + + if (!check_permission_for_ftokenid(file)) + return -EPERM; + + if (copy_from_user(&tmp, uarg, sizeof(tmp))) + return -EFAULT; + + current->ftoken = tmp; + return 0; +} + +static bool check_permission_for_set_token_permission(void) +{ + kuid_t uid = current_uid(); + return uid_eq(uid, ACCESS_TOKEN_UID); +} + +static void add_node_to_left_tree_tail(struct token_perm_node *root_node, struct token_perm_node *node) +{ + if ((root_node == NULL) || (node == NULL)) + return; + + struct token_perm_node *current_node = root_node; + while (true) { + if (current_node->left == NULL) { + current_node->left = node; + break; + } + current_node = current_node->left; + } +} + +static void find_node_by_token(struct token_perm_node *root_node, uint32_t token, + struct token_perm_node **target_node, struct token_perm_node **parent_node) +{ + *target_node = NULL; + *parent_node = NULL; + struct token_perm_node *current_node = root_node; + while (current_node != NULL) { + if (current_node->perm_data.token == token) { + *target_node = current_node; + break; + } + *parent_node = current_node; + if (current_node->perm_data.token > token) { + current_node = current_node->left; + } else { + current_node = current_node->right; + } + } +} + +static int add_node_to_tree(struct token_perm_node *root_node, struct token_perm_node *node) +{ + struct token_perm_node *target_node = NULL; + struct token_perm_node *parent_node = NULL; + find_node_by_token(root_node, node->perm_data.token, &target_node, &parent_node); + if (target_node != NULL) { + target_node->perm_data = node->perm_data; + return 0; + } + if (g_total_node_num >= MAX_NODE_NUM) { + pr_err("%s: the number of token nodes is exceeded.\n", __func__); + return -EDQUOT; + } + if (parent_node == NULL) { + g_token_perm_root = node; + } else if (parent_node->perm_data.token > node->perm_data.token) { + parent_node->left = node; + } else { + parent_node->right = node; + } + g_total_node_num++; + return 1; +} + +static struct token_perm_node *remove_node_by_token(struct token_perm_node *root_node, uint32_t token) +{ + struct token_perm_node *target_node = NULL; + struct token_perm_node *parent_node = NULL; + find_node_by_token(root_node, token, &target_node, &parent_node); + if (target_node == NULL) { + pr_err("%s: target token to be removed not found.\n", __func__); + return NULL; + } + + struct token_perm_node **new_node_addr = NULL; + if (parent_node == NULL) { + new_node_addr = &root_node; + } else if (parent_node->perm_data.token > token) { + new_node_addr = &(parent_node->left); + } else { + new_node_addr = &(parent_node->right); + } + if (target_node->right != NULL) { + *new_node_addr = target_node->right; + add_node_to_left_tree_tail(target_node->right, target_node->left); + } else { + *new_node_addr = target_node->left; + } + g_total_node_num--; + return target_node; +} + +int access_tokenid_add_permission(struct file *file, void __user *uarg) +{ + if (!check_permission_for_set_token_permission()) + return -EPERM; + + struct token_perm_node *node = kmem_cache_zalloc(g_cache, GFP_KERNEL); + if (node == NULL) + return -ENOMEM; + if (copy_from_user(&(node->perm_data), uarg, sizeof(ioctl_add_perm_data))) { + kmem_cache_free(g_cache, node); + return -EFAULT; + } + + write_lock(&token_rwlock); + int ret = add_node_to_tree(g_token_perm_root, node); + write_unlock(&token_rwlock); + if (ret <= 0) { + kmem_cache_free(g_cache, node); + return ret; + } + return 0; +} + +int access_tokenid_remove_permission(struct file *file, void __user *uarg) +{ + if (!check_permission_for_set_token_permission()) + return -EPERM; + + uint32_t token = 0; + if (copy_from_user(&token, uarg, sizeof(token))) + return -EFAULT; + + write_lock(&token_rwlock); + struct token_perm_node *target_node = remove_node_by_token(g_token_perm_root, token); + write_unlock(&token_rwlock); + + if (target_node != NULL) + kmem_cache_free(g_cache, target_node); + + return 0; +} + +int access_tokenid_set_permission(struct file *file, void __user *uarg) +{ + if (!check_permission_for_set_token_permission()) + return -EPERM; + + ioctl_set_get_perm_data set_perm_data; + if (copy_from_user(&set_perm_data, uarg, sizeof(set_perm_data))) + return -EFAULT; + + uint32_t idx = set_perm_data.op_code / UINT32_T_BITS; + if (idx >= MAX_PERM_GROUP_NUM) { + pr_err("%s: invalid op_code.\n", __func__); + return -EINVAL; + } + + struct token_perm_node *target_node = NULL; + struct token_perm_node *parent_node = NULL; + write_lock(&token_rwlock); + find_node_by_token(g_token_perm_root, set_perm_data.token, &target_node, &parent_node); + if (target_node == NULL) { + write_unlock(&token_rwlock); + pr_err("%s: token not found.\n", __func__); + return -ENODATA; + } + uint32_t bit_idx = set_perm_data.op_code % UINT32_T_BITS; + if (set_perm_data.is_granted) { + target_node->perm_data.perm[idx] |= (uint32_t)0x01 << bit_idx; + } else { + target_node->perm_data.perm[idx] &= ~((uint32_t)0x01 << bit_idx); + } + write_unlock(&token_rwlock); + return 0; +} + +int access_tokenid_get_permission(struct file *file, void __user *uarg) +{ + ioctl_set_get_perm_data get_perm_data; + if (copy_from_user(&get_perm_data, uarg, sizeof(get_perm_data))) + return -EFAULT; + + uint32_t idx = get_perm_data.op_code / UINT32_T_BITS; + if (idx >= MAX_PERM_GROUP_NUM) { + pr_err("%s: invalid op_code.\n", __func__); + return -EINVAL; + } + + struct token_perm_node *target_node = NULL; + struct token_perm_node *parent_node = NULL; + read_lock(&token_rwlock); + find_node_by_token(g_token_perm_root, get_perm_data.token, &target_node, &parent_node); + read_unlock(&token_rwlock); + if (target_node == NULL) + return -ENODATA; + + uint32_t bit_idx = get_perm_data.op_code % UINT32_T_BITS; + return (target_node->perm_data.perm[idx] & ((uint32_t)0x01 << bit_idx)) >> bit_idx; +} + +typedef int (*access_token_id_func)(struct file *file, void __user *arg); + +static access_token_id_func g_func_array[ACCESS_TOKENID_MAX_NR] = { + NULL, /* reserved */ + access_tokenid_get_tokenid, + access_tokenid_set_tokenid, + access_tokenid_get_ftokenid, + access_tokenid_set_ftokenid, + access_tokenid_add_permission, + access_tokenid_remove_permission, + access_tokenid_get_permission, + access_tokenid_set_permission, +}; + +static long access_tokenid_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + void __user *uarg = (void __user *)arg; + unsigned int func_cmd = _IOC_NR(cmd); + + if (uarg == NULL) { + pr_err("%s: invalid user uarg\n", __func__); + return -EINVAL; + } + + if (_IOC_TYPE(cmd) != ACCESS_TOKEN_ID_IOCTL_BASE) { + pr_err("%s: access tokenid magic fail, TYPE=%d\n", + __func__, _IOC_TYPE(cmd)); + return -EINVAL; + } + + if (func_cmd >= ACCESS_TOKENID_MAX_NR) { + pr_err("%s: access tokenid cmd error, cmd:%d\n", + __func__, func_cmd); + return -EINVAL; + } + + if (g_func_array[func_cmd]) + return (*g_func_array[func_cmd])(file, uarg); + + return -EINVAL; +} + +static const struct file_operations access_tokenid_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = access_tokenid_ioctl, + .compat_ioctl = access_tokenid_ioctl, +}; + +static struct miscdevice access_tokenid_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "access_token_id", + .fops = &access_tokenid_fops, +}; + +static int access_tokenid_init_module(void) +{ + int err; + + err = misc_register(&access_tokenid_device); + if (err < 0) { + pr_err("access_tokenid register failed\n"); + return err; + } + + g_cache = kmem_cache_create("access_token_node", sizeof(struct token_perm_node), 0, SLAB_HWCACHE_ALIGN, NULL); + if (g_cache == NULL) { + pr_err("access_tokenid kmem_cache create failed\n"); + return -ENOMEM; + } + pr_info("access_tokenid init success\n"); + return 0; +} + +static void access_tokenid_exit_module(void) +{ + kmem_cache_destroy(g_cache); + misc_deregister(&access_tokenid_device); +} + +/* module entry points */ +module_init(access_tokenid_init_module); +module_exit(access_tokenid_exit_module); diff --git a/drivers/accesstokenid/access_tokenid.h b/drivers/accesstokenid/access_tokenid.h new file mode 100755 index 000000000..7eb3119ef --- /dev/null +++ b/drivers/accesstokenid/access_tokenid.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * access_tokenid.h + * + * Copyright (C) 2022-2023 Huawei Technologies Co., Ltd. All rights reserved. + * + */ + +#ifndef _ACCESS_TOKEN_ID_H +#define _ACCESS_TOKEN_ID_H + +#include +#include + +#define ACCESS_TOKEN_ID_IOCTL_BASE 'A' +#define MAX_PERM_GROUP_NUM 64 + +enum { + GET_TOKEN_ID = 1, + SET_TOKEN_ID, + GET_FTOKEN_ID, + SET_FTOKEN_ID, + ADD_PERMISSIONS, + REMOVE_PERMISSIONS, + GET_PERMISSION, + SET_PERMISSION, + ACCESS_TOKENID_MAX_NR +}; + +typedef struct { + unsigned int token_uniqueid : 20; + unsigned int res : 5; + unsigned int render_flag : 1; + unsigned int dlp_flag : 1; + unsigned int type : 2; + unsigned int version : 3; +} access_tokenid_inner; + +typedef struct { + uint32_t token; + uint32_t op_code; + bool is_granted; +} ioctl_set_get_perm_data; + +typedef struct { + uint32_t token; + uint32_t perm[MAX_PERM_GROUP_NUM]; +} ioctl_add_perm_data; + +struct token_perm_node { + ioctl_add_perm_data perm_data; + struct token_perm_node *left; + struct token_perm_node *right; +}; + +#define ACCESS_TOKENID_GET_TOKENID \ + _IOR(ACCESS_TOKEN_ID_IOCTL_BASE, GET_TOKEN_ID, unsigned long long) +#define ACCESS_TOKENID_SET_TOKENID \ + _IOW(ACCESS_TOKEN_ID_IOCTL_BASE, SET_TOKEN_ID, unsigned long long) +#define ACCESS_TOKENID_GET_FTOKENID \ + _IOR(ACCESS_TOKEN_ID_IOCTL_BASE, GET_FTOKEN_ID, unsigned long long) +#define ACCESS_TOKENID_SET_FTOKENID \ + _IOW(ACCESS_TOKEN_ID_IOCTL_BASE, SET_FTOKEN_ID, unsigned long long) +#define ACCESS_TOKENID_ADD_PERMISSIONS \ + _IOW(ACCESS_TOKEN_ID_IOCTL_BASE, ADD_PERMISSIONS, ioctl_add_perm_data) +#define ACCESS_TOKENID_REMOVE_PERMISSIONS \ + _IOW(ACCESS_TOKEN_ID_IOCTL_BASE, REMOVE_PERMISSIONS, uint32_t) +#define ACCESS_TOKENID_GET_PERMISSION \ + _IOW(ACCESS_TOKEN_ID_IOCTL_BASE, GET_PERMISSION, ioctl_set_get_perm_data) +#define ACCESS_TOKENID_SET_PERMISSION \ + _IOW(ACCESS_TOKEN_ID_IOCTL_BASE, SET_PERMISSION, ioctl_set_get_perm_data) + +#endif /* _ACCESS_TOKEN_ID_H */ diff --git a/drivers/hck/Kconfig b/drivers/hck/Kconfig new file mode 100755 index 000000000..1028c52a3 --- /dev/null +++ b/drivers/hck/Kconfig @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-only +menu "Hck" + +config HCK + bool "Hck Drivers" + help + Enable support for various drivers needed on the OpenHarmony Common Kernel + +if HCK + +config HCK_VENDOR_HOOKS + bool "Hck Vendor Hooks" + help + Enable vendor hooks implemented as tracepoints + + Allow vendor modules to attach to tracepoint "hooks" defined via + DECLARE_HCK_HOOK DECLARE_HCK_RESTRICTED_HOOK + +endif # if HCK + +endmenu diff --git a/drivers/hck/Makefile b/drivers/hck/Makefile new file mode 100755 index 000000000..93dc6acc7 --- /dev/null +++ b/drivers/hck/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +ccflags-y += -I$(src) + +obj-$(CONFIG_HCK_VENDOR_HOOKS) += vendor_hooks.o \ No newline at end of file diff --git a/drivers/hck/vendor_hooks.c b/drivers/hck/vendor_hooks.c new file mode 100755 index 000000000..6dce54016 --- /dev/null +++ b/drivers/hck/vendor_hooks.c @@ -0,0 +1,17 @@ +//SPDX-License-Identifier: GPL-2.0-only +/*vendor_hooks.c + * + *OpenHarmony Common Kernel Vendor Hook Support + * + */ + +/* lite vendor hook */ +#define CREATE_LITE_VENDOR_HOOK +/* add your lite vendor hook header file here */ +#include +#include +#include +#include +#include +#include +#include diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index e03627ad4..5bf431040 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -102,4 +102,10 @@ source "drivers/staging/qlge/Kconfig" source "drivers/staging/wfx/Kconfig" +source "drivers/staging/hilog/Kconfig" +source "drivers/staging/hievent/Kconfig" +source "drivers/staging/hisysevent/Kconfig" +source "drivers/staging/zerohung/Kconfig" +source "drivers/staging/hungtask/Kconfig" +source "drivers/staging/blackbox/Kconfig" endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index c7f8d8d8d..31d2063c7 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -41,3 +41,9 @@ obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/ obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/ obj-$(CONFIG_QLGE) += qlge/ obj-$(CONFIG_WFX) += wfx/ +obj-$(CONFIG_HILOG) += hilog/ +obj-$(CONFIG_HIEVENT) += hievent/ +obj-$(CONFIG_HISYSEVENT) += hisysevent/ +obj-$(CONFIG_DFX_ZEROHUNG) += zerohung/ +obj-$(CONFIG_DFX_HUNGTASK) += hungtask/ +obj-$(CONFIG_BLACKBOX) += blackbox/ diff --git a/drivers/staging/blackbox/Kconfig b/drivers/staging/blackbox/Kconfig new file mode 100755 index 000000000..0e985823c --- /dev/null +++ b/drivers/staging/blackbox/Kconfig @@ -0,0 +1,108 @@ +# SPDX-License-Identifier: GPL-2.0 +menu "Blackbox Options" + +config BLACKBOX + bool "Support for blackbox" + select STORAGE if BLACKBOX_STORAGE_MATERIAL + default y + help + The blackbox is a fault log collecting framework for registered modules + of chips. When a fault occurs, blackbox will invoke the registered + function to save the log and reset the module. + +config BLACKBOX_LOG_ROOT_PATH + string "root path of the blackbox log" + depends on BLACKBOX + help + define the root path of the blackbox log + +config BLACKBOX_LOG_PART_REPRESENTATIVE + string "representative of the blackbox log part" + depends on BLACKBOX + help + define the representative of the blackbox log part + +config BLACKBOX_STORAGE_BY_MEMORY + tristate "blackbox fault log storage by memory directly" + depends on BLACKBOX + select STORAGE_BY_MEMORY + help + This option enables saving fault logs with memory by blackbox when a + panic occurs. It depends on supporting warm reset and disabling erase + ddr when warm reset. + +config BLACKBOX_USE_PSTORE_BLK_DEBUG + bool "blackbox use pstore blk for debug" + depends on BLACKBOX + default n + help + If Y, this enables pstore blk for blackbox. + +config BLACKBOX_STORAGE_BY_PSTORE_BLK + tristate "blackbox fault log storage by pstore blk" + depends on BLACKBOX + depends on PSTORE_BLK + depends on PSTORE_BLACKBOX + select STORAGE_BY_PSTORE_BLK + help + This option enables saving fault logs with pstore blk by blackbox when a + panic occurs. It depends on supporting pstore blk. Especially, flash + driver's panic_write implementation is needed. Othersize, if a panic + happen, then fault log can not be saved. + +config BLACKBOX_STORAGE_BY_PSTORE_RAM + tristate "blackbox fault log storage by pstore ram" + depends on BLACKBOX + depends on PSTORE_RAM + depends on PSTORE_BLACKBOX + select STORAGE_BY_PSTORE_RAM + help + This option enables saving fault logs with pstore ram by blackbox when a + panic occurs. It depends on supporting pstore ram. + +config BLACKBOX_STORAGE_BY_RAW_PARTITION + tristate "blackbox fault log storage by RAW partition" + depends on BLACKBOX + select STORAGE_BY_RAW_PARTITION + help + This option enables saving fault logs with RAW partition by blackbox when a + panic occurs. It depends on reserving partition for blackbox. + +config BLACKBOX_STORAGE_MATERIAL + def_bool y + depends on BLACKBOX + depends on BLACKBOX_STORAGE_BY_MEMORY || BLACKBOX_STORAGE_BY_PSTORE_BLK || \ + BLACKBOX_STORAGE_BY_PSTORE_RAM || BLACKBOX_STORAGE_BY_RAW_PARTITION + +choice + prompt "Default storage material for fault log when a panic occurs." + depends on BLACKBOX_STORAGE_MATERIAL + help + This option choose the default fault log material for blackbox when a + panic occurs. + + The default materail is ram directly. It's easy, but not work offen. + + config DEF_BLACKBOX_STORAGE_BY_MEMORY + bool "memory" if BLACKBOX_STORAGE_BY_MEMORY + + config DEF_BLACKBOX_STORAGE_BY_PSTORE_BLK + bool "pstore_blk" if BLACKBOX_STORAGE_BY_PSTORE_BLK + + config DEF_BLACKBOX_STORAGE_BY_PSTORE_RAM + bool "pstore_ram" if BLACKBOX_STORAGE_BY_PSTORE_RAM + + config DEF_BLACKBOX_STORAGE_BY_RAW_PARTITION + bool "raw_partition" if BLACKBOX_STORAGE_BY_RAW_PARTITION + +endchoice + +config DEF_BLACKBOX_STORAGE + string + depends on BLACKBOX_STORAGE_MATERIAL + default "memory" if DEF_BLACKBOX_STORAGE_BY_MEMORY + default "pstore_blk" if DEF_BLACKBOX_STORAGE_BY_PSTORE_BLK + default "pstore_ram" if DEF_BLACKBOX_STORAGE_BY_PSTORE_RAM + default "raw_partition" if DEF_BLACKBOX_STORAGE_BY_RAW_PARTITION + +endmenu diff --git a/drivers/staging/blackbox/Makefile b/drivers/staging/blackbox/Makefile new file mode 100755 index 000000000..9befa81a1 --- /dev/null +++ b/drivers/staging/blackbox/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_BLACKBOX) += blackbox_core.o \ + blackbox_storage.o \ + blackbox_common.o diff --git a/drivers/staging/blackbox/blackbox_common.c b/drivers/staging/blackbox/blackbox_common.c new file mode 100755 index 000000000..ea4e01d9d --- /dev/null +++ b/drivers/staging/blackbox/blackbox_common.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void sys_reset(void) +{ + bbox_print_info("reset the system now!\n"); + emergency_restart(); + bbox_print_info("reset the system failed!\n"); +} + +void change_own(char *path, int uid, int gid) +{ + //mm_segment_t old_fs; + int ret = -1; + + if (unlikely(!path || uid == -1 || gid == -1)) { + bbox_print_err("path or uid or gid error.\n"); + return; + } + + //old_fs = get_fs(); + //set_fs(KERNEL_DS); + ret = ksys_chown(path, uid, gid); + if (ret != 0) + bbox_print_err("ksys_chown [%s] failed, ret: %d\n", path, ret); + + //set_fs(old_fs); +} + +int full_write_file(const char *pfile_path, char *buf, + size_t buf_size, bool is_append) +{ + struct file *filp = NULL; + char *pathname = NULL; + //mm_segment_t old_fs; + loff_t pos = 0; + int ret = -1; + + if (unlikely(!pfile_path || !buf)) { + bbox_print_err("pfile_path or buf is NULL!\n"); + return -EINVAL; + } + + filp = file_open(pfile_path, O_CREAT | O_RDWR | + (is_append ? O_APPEND : O_TRUNC), BBOX_FILE_LIMIT); + if (IS_ERR(filp)) { + bbox_print_err("open %s failed! [%ld]\n", pfile_path, PTR_ERR(filp)); + return -EBADF; + } + + //old_fs = get_fs(); + //set_fs(KERNEL_DS); + + ret = vfs_write(filp, buf, buf_size, &pos); + + //set_fs(old_fs); + + file_close(filp); + + if (ret < 0) { + pathname = getfullpath(filp); + bbox_print_err("write [%s] failed! [%d]\n", pathname ? pathname : "", ret); + return ret; + } + + return 0; +} + +int file_exists(const char *name) +{ + struct path path; + int ret; + + ret = kern_path(name, LOOKUP_FOLLOW, &path); + if (ret) + return ret; + + ret = inode_permission(&init_user_ns, d_inode(path.dentry), MAY_ACCESS); + path_put(&path); + return ret; +} + +static int create_new_dir(char *name) +{ + struct dentry *dentry; + struct path path; + int ret; + + if (unlikely(!name)) { + bbox_print_err("name is NULL!\n"); + return -EINVAL; + } + + ret = file_exists(name); + if (ret) { + dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + ret = vfs_mkdir(&init_user_ns, d_inode(path.dentry), dentry, BBOX_DIR_LIMIT); + if (ret && ret != -EEXIST) + bbox_print_err("Create dir [%s] failed! ret: %d\n", name, ret); + + done_path_create(&path, dentry); + } + + return 0; +} + +int create_log_dir(const char *path) +{ + char *cur_path = NULL; + int index = 0; + + if (unlikely(!path)) { + bbox_print_err("path is NULL!\n"); + return -EINVAL; + } + + if (*path != '/') + return -EINVAL; + cur_path = kmalloc(PATH_MAX_LEN + 1, GFP_KERNEL); + if (unlikely(!cur_path)) { + bbox_print_err("kmalloc failed!\n"); + return -ENOMEM; + } + memset(cur_path, 0, PATH_MAX_LEN + 1); + cur_path[index++] = *path++; + while (*path != '\0') { + if (*path == '/') + create_new_dir(cur_path); + cur_path[index] = *path; + path++; + index++; + } + create_new_dir(cur_path); + kfree(cur_path); + + return 0; +} + +void get_timestamp(char *buf, size_t buf_size) +{ + struct rtc_time tm; + struct timespec64 tv; + + if (unlikely(!buf || buf_size == 0)) { + bbox_print_err("buf: %p, buf_size: %u\n", buf, (unsigned int)buf_size); + return; + } + + memset(buf, 0, buf_size); + memset(&tm, 0, sizeof(tm)); + + memset(&tv, 0, sizeof(tv)); + ktime_get_real_ts64(&tv); + tv.tv_sec -= (long)sys_tz.tz_minuteswest * SECONDS_PER_MINUTE; + rtc_time64_to_tm(tv.tv_sec, &tm); + + (void)scnprintf(buf, buf_size, TIMESTAMP_FORMAT, + tm.tm_year + YEAR_BASE, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, get_ticks()); + buf[buf_size - 1] = '\0'; +} +EXPORT_SYMBOL_GPL(get_timestamp); + +unsigned long long get_ticks(void) +{ + /* use only one int value to save time: */ + + struct timespec64 uptime; + + ktime_get_ts64(&uptime); + + ktime_get_boottime_ts64(&uptime); + + return (u64)uptime.tv_sec; +} + +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dentry) +{ + inode_unlock(d_inode(dentry)); + dput(dentry); +} + +struct file *file_open(const char *filename, int open_mode, int mode) +{ + struct file *filp = NULL; + //mm_segment_t old_fs; + + //old_fs = get_fs(); + //set_fs(KERNEL_DS); + + filp = filp_open(filename, open_mode, mode); + //set_fs(old_fs); + + return filp; +} + +void file_close(struct file *filp) +{ + if (likely(filp)) + filp_close(filp, NULL); +} + +int file_delete(struct file *filp) +{ + struct dentry *dentry = NULL; + struct dentry *parent = NULL; + int ret = 0; + + if (unlikely(!filp)) { + bbox_print_err("file is NULL!\n"); + return -EINVAL; + } + + dentry = file_dentry(filp); + parent = lock_parent(dentry); + + if (dentry->d_parent == parent) { + dget(dentry); + ret = vfs_unlink(&init_user_ns, d_inode(parent), dentry, NULL); + dput(dentry); + } + + unlock_dir(parent); + + return ret; +} + +char *getfullpath(struct file *filp) +{ + char *buf = NULL, *path = NULL; + + if (unlikely(!filp)) + return NULL; + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (unlikely(!buf)) + return NULL; + memset(buf, 0, PATH_MAX); + + // get the path + path = d_path(&filp->f_path, buf, PATH_MAX); + + kfree(buf); + + return path; +} diff --git a/drivers/staging/blackbox/blackbox_core.c b/drivers/staging/blackbox/blackbox_core.c new file mode 100755 index 000000000..ff23b0593 --- /dev/null +++ b/drivers/staging/blackbox/blackbox_core.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_DFX_ZEROHUNG +#include +#endif +#include +#include + +/* ---- local macroes ---- */ +/* bbox/BBOX - blackbox */ +#define HISTORY_LOG_NAME "history.log" +#define LOG_PART_WAIT_TIME 1000 /* unit: ms */ +#define HISTORY_LOG_MAX_LEN 1024 +#define TOP_CATEGORY_SYSTEM_RESET "System Reset" +#define TOP_CATEGORY_FREEZE "System Freeze" +#define TOP_CATEGORY_SYSTEM_POWEROFF "POWEROFF" +#define TOP_CATEGORY_SUBSYSTEM_CRASH "Subsystem Crash" + +#ifndef CONFIG_BLACKBOX_LOG_ROOT_PATH +#error no blackbox log root path +#endif +#ifndef CONFIG_BLACKBOX_LOG_PART_REPRESENTATIVE +#error no representative of the blackbox log part +#endif + +/* ---- local prototypes ---- */ +struct bbox_ops { + struct list_head list; + struct module_ops ops; +}; + +struct error_info_to_category { + const char *module; + struct { + const char *event; + const char *category; + const char *top_category; + } map; +}; + +/* ---- local variables ---- */ +static LIST_HEAD(ops_list); +static DEFINE_SPINLOCK(ops_list_lock); +static DEFINE_SEMAPHORE(temp_error_info_sem); +static struct error_info_to_category error_info_categories[] = { + { + MODULE_SYSTEM, + {EVENT_SYSREBOOT, CATEGORY_SYSTEM_REBOOT, TOP_CATEGORY_SYSTEM_RESET} + }, + { + MODULE_SYSTEM, + {EVENT_LONGPRESS, CATEGORY_SYSTEM_REBOOT, TOP_CATEGORY_SYSTEM_RESET} + }, + { + MODULE_SYSTEM, + {EVENT_COMBINATIONKEY, CATEGORY_SYSTEM_REBOOT, TOP_CATEGORY_SYSTEM_RESET} + }, + { + MODULE_SYSTEM, + {EVENT_SUBSYSREBOOT, CATEGORY_SYSTEM_REBOOT, TOP_CATEGORY_SYSTEM_RESET} + }, + { + MODULE_SYSTEM, + {EVENT_POWEROFF, CATEGORY_SYSTEM_POWEROFF, TOP_CATEGORY_SYSTEM_POWEROFF} + }, + { + MODULE_SYSTEM, + {EVENT_PANIC, CATEGORY_SYSTEM_PANIC, TOP_CATEGORY_SYSTEM_RESET} + }, + { + MODULE_SYSTEM, + {EVENT_OOPS, CATEGORY_SYSTEM_OOPS, TOP_CATEGORY_SYSTEM_RESET} + }, + { + MODULE_SYSTEM, + {EVENT_SYS_WATCHDOG, CATEGORY_SYSTEM_WATCHDOG, TOP_CATEGORY_FREEZE} + }, + { + MODULE_SYSTEM, + {EVENT_HUNGTASK, CATEGORY_SYSTEM_HUNGTASK, TOP_CATEGORY_FREEZE} + }, +#ifdef CONFIG_BLACKBOX_EXPAND_EVENT + #include +#endif +}; + +struct error_info *temp_error_info; + +/* ---- local function prototypes ---- */ +static const char *get_top_category(const char *module, const char *event); +static const char *get_category(const char *module, const char *event); +static void format_log_dir(char *buf, size_t buf_size, const char *log_root_dir, + const char *timestamp); +static void save_history_log(const char *log_root_dir, struct error_info *info, + const char *timestamp, int need_sys_reset); +#ifdef CONFIG_BLACKBOX_DEBUG +static void save_invalid_log(const struct bbox_ops *ops, const struct error_info *info); +#endif +static void wait_for_log_part(void); +static void format_error_info(struct error_info *info, const char event[EVENT_MAX_LEN], + const char module[MODULE_MAX_LEN], + const char error_desc[ERROR_DESC_MAX_LEN]); +static void save_last_log(void); +static int save_error_log(void *pparam); + +/* ---- global function prototypes ---- */ + +/* ---- function definitions ---- */ +static const char *get_top_category(const char *module, const char *event) +{ + int i; + int count = (int)ARRAY_SIZE(error_info_categories); + + if (unlikely(!module || !event)) { + bbox_print_err("module: %p, event: %p\n", module, event); + return TOP_CATEGORY_SUBSYSTEM_CRASH; + } + + for (i = 0; i < count; i++) { + if (!strcmp(error_info_categories[i].module, module) && + !strcmp(error_info_categories[i].map.event, event)) + return error_info_categories[i].map.top_category; + } + if (!strcmp(module, MODULE_SYSTEM)) + return TOP_CATEGORY_SYSTEM_RESET; + + return TOP_CATEGORY_SUBSYSTEM_CRASH; +} + +static const char *get_category(const char *module, const char *event) +{ + int i; + int count = (int)ARRAY_SIZE(error_info_categories); + + if (unlikely(!module || !event)) { + bbox_print_err("module: %p, event: %p\n", module, event); + return CATEGORY_SUBSYSTEM_CUSTOM; + } + + for (i = 0; i < count; i++) { + if (!strcmp(error_info_categories[i].module, module) && + !strcmp(error_info_categories[i].map.event, event)) + return error_info_categories[i].map.category; + } + if (!strcmp(module, MODULE_SYSTEM)) + return CATEGORY_SYSTEM_CUSTOM; + + return CATEGORY_SUBSYSTEM_CUSTOM; +} + +static void format_log_dir(char *buf, size_t buf_size, const char *log_root_dir, + const char *timestamp) +{ + if (unlikely(!buf || buf_size == 0 || !log_root_dir || + !timestamp)) { + bbox_print_err("buf: %p, buf_size: %u, log_root_dir: %p, timestamp: %p\n", + buf, (unsigned int)buf_size, log_root_dir, timestamp); + return; + } + + memset(buf, 0, buf_size); + scnprintf(buf, buf_size - 1, "%s/%s", log_root_dir, timestamp); +} + +static void format_error_info(struct error_info *info, const char event[EVENT_MAX_LEN], + const char module[MODULE_MAX_LEN], + const char error_desc[ERROR_DESC_MAX_LEN]) +{ + if (unlikely(!info || !event || !module || !error_desc)) { + bbox_print_err("info: %p, event: %p, module: %p, error_desc: %p\n", + info, event, module, error_desc); + return; + } + + memset(info, 0, sizeof(*info)); + strncpy(info->event, event, min(strlen(event), + sizeof(info->event) - 1)); + strncpy(info->module, module, min(strlen(module), + sizeof(info->module) - 1)); + strncpy(info->category, get_category(module, event), + min(strlen(get_category(module, event)), sizeof(info->category) - 1)); + get_timestamp(info->error_time, TIMESTAMP_MAX_LEN); + strncpy(info->error_desc, error_desc, min(strlen(error_desc), + sizeof(info->error_desc) - 1)); +} + +static void save_history_log(const char *log_root_dir, struct error_info *info, + const char *timestamp, int need_sys_reset) +{ + char history_log_path[PATH_MAX_LEN]; + char *buf; + + if (unlikely(!log_root_dir || !info || !timestamp)) { + bbox_print_err("log_root_dir: %p, info: %p, timestamp: %p\n", + log_root_dir, info, timestamp); + return; + } + + buf = kmalloc(HISTORY_LOG_MAX_LEN + 1, GFP_KERNEL); + if (!buf) + return; + memset(buf, 0, HISTORY_LOG_MAX_LEN + 1); + + scnprintf(buf, HISTORY_LOG_MAX_LEN, HISTORY_LOG_FORMAT, + get_top_category(info->module, info->event), info->module, + info->category, info->event, timestamp, + need_sys_reset ? "true" : "false", info->error_desc, log_root_dir); +#ifdef CONFIG_DFX_ZEROHUNG + zrhung_send_event("KERNEL_VENDOR", info->category, info->error_desc); +#endif + memset(history_log_path, 0, sizeof(history_log_path)); + scnprintf(history_log_path, sizeof(history_log_path) - 1, + "%s/%s", log_root_dir, HISTORY_LOG_NAME); + full_write_file(history_log_path, buf, strlen(buf), 1); + ksys_sync(); + kfree(buf); +} + +#ifdef CONFIG_BLACKBOX_DEBUG +static void save_invalid_log(const struct bbox_ops *ops, const struct error_info *info) +{ + char invalid_log_path[PATH_MAX_LEN]; + char timestamp[TIMESTAMP_MAX_LEN]; + + if (unlikely(!ops || !info)) { + bbox_print_err("ops: %p, info: %p\n", ops, info); + return; + } + + get_timestamp(timestamp, sizeof(timestamp)); + format_log_dir(invalid_log_path, PATH_MAX_LEN, CONFIG_BLACKBOX_LOG_PART_REPRESENTATIVE, + timestamp); + create_log_dir(invalid_log_path); + if (ops->ops.save_last_log(invalid_log_path, (struct error_info *)info) != 0) + bbox_print_err("[%s] failed to save invalid log!\n", ops->ops.module); +} +#endif + +static bool is_log_part_mounted(void) +{ + return file_exists(CONFIG_BLACKBOX_LOG_PART_REPRESENTATIVE) == 0; +} + +static void wait_for_log_part(void) +{ + bbox_print_info("wait for log part [%s] begin!\n", + CONFIG_BLACKBOX_LOG_PART_REPRESENTATIVE); + while (!is_log_part_mounted()) + msleep(LOG_PART_WAIT_TIME); + + bbox_print_info("wait for log part [%s] end!\n", + CONFIG_BLACKBOX_LOG_PART_REPRESENTATIVE); +} + +static bool find_module_ops(struct error_info *info, struct bbox_ops **ops) +{ + struct bbox_ops *cur = NULL; + bool find_module = false; + + if (unlikely(!info || !ops)) { + bbox_print_err("info: %p, ops: %p!\n", info, ops); + return find_module; + } + + list_for_each_entry(cur, &ops_list, list) { + if (!strcmp(cur->ops.module, info->module)) { + *ops = cur; + find_module = true; + break; + } + } + if (!find_module) + bbox_print_err("[%s] hasn't been registered!\n", info->module); + + return find_module; +} + +static void invoke_module_ops(const char *log_dir, struct error_info *info, + struct bbox_ops *ops) +{ + if (unlikely(!info || !ops)) { + bbox_print_err("info: %p, ops: %p!\n", info, ops); + return; + } + + if (ops->ops.dump && log_dir) { + bbox_print_info("[%s] starts dumping data!\n", ops->ops.module); + ops->ops.dump(log_dir, info); + bbox_print_info("[%s] ends dumping data!\n", ops->ops.module); + } + if (ops->ops.reset) { + bbox_print_info("[%s] starts resetting!\n", ops->ops.module); + ops->ops.reset(info); + bbox_print_info("[%s] ends resetting!\n", ops->ops.module); + } +} + +static void save_log_without_reset(struct error_info *info) +{ + unsigned long flags; + struct bbox_ops *ops = NULL; + char *log_dir = NULL; + char timestamp[TIMESTAMP_MAX_LEN]; + + if (unlikely(!info)) { + bbox_print_err("info: %p!\n", info); + return; + } + + /* get timestamp */ + get_timestamp(timestamp, sizeof(timestamp)); + + /* get bbox ops */ + spin_lock_irqsave(&ops_list_lock, flags); + if (!find_module_ops(info, &ops)) { + spin_unlock_irqrestore(&ops_list_lock, flags); + return; + } + spin_unlock_irqrestore(&ops_list_lock, flags); + create_log_dir(CONFIG_BLACKBOX_LOG_ROOT_PATH); + if (ops->ops.dump) { + /* create log root path */ + log_dir = kmalloc(PATH_MAX_LEN, GFP_KERNEL); + if (log_dir) { + format_log_dir(log_dir, PATH_MAX_LEN, + CONFIG_BLACKBOX_LOG_ROOT_PATH, timestamp); + create_log_dir(log_dir); + } else + bbox_print_err("kmalloc failed!\n"); + } + invoke_module_ops(log_dir, info, ops); + save_history_log(CONFIG_BLACKBOX_LOG_ROOT_PATH, info, timestamp, 0); + kfree(log_dir); +} + +static void save_log_with_reset(struct error_info *info) +{ + struct bbox_ops *ops = NULL; + + if (unlikely(!info)) { + bbox_print_err("info: %p!\n", info); + return; + } + + if (!find_module_ops(info, &ops)) + return; + + invoke_module_ops("", info, ops); + if (strcmp(info->category, CATEGORY_SYSTEM_REBOOT) && + strcmp(info->category, CATEGORY_SYSTEM_PANIC)) + sys_reset(); +} + +static void save_temp_error_info(const char event[EVENT_MAX_LEN], + const char module[MODULE_MAX_LEN], + const char error_desc[ERROR_DESC_MAX_LEN]) +{ + if (unlikely(!event || !module || !error_desc)) { + bbox_print_err("event: %p, module: %p, error_desc: %p\n", + event, module, error_desc); + return; + } + + down(&temp_error_info_sem); + format_error_info(temp_error_info, event, module, error_desc); + up(&temp_error_info_sem); +} + +static void do_save_last_log(const struct bbox_ops *ops, struct error_info *info) +{ + char *log_dir = NULL; + int ret; + + if (unlikely(!ops || !info)) { + bbox_print_err("ops: %p, info: %p\n", + ops, info); + return; + } + + memset((void *)info, 0, sizeof(*info)); + ret = ops->ops.get_last_log_info((struct error_info *)info); + if (ret) { + bbox_print_err("[%s] failed to get log info!\n", ops->ops.module); +#ifdef CONFIG_BLACKBOX_DEBUG + if (ret == -ENOMSG) + save_invalid_log(ops, info); +#endif + return; + } + + strncpy(info->category, get_category(info->module, info->event), + min(strlen(get_category(info->module, info->event)), sizeof(info->category) - 1)); + + bbox_print_info("[%s] starts saving log!\n", ops->ops.module); + bbox_print_info("event: [%s] module: [%s], time is [%s]!\n", + info->event, info->module, info->error_time); + + log_dir = kmalloc(PATH_MAX_LEN, GFP_KERNEL); + if (!log_dir) + return; + + if (!strlen(info->error_time)) + get_timestamp((char *)info->error_time, TIMESTAMP_MAX_LEN); + + format_log_dir(log_dir, PATH_MAX_LEN, CONFIG_BLACKBOX_LOG_ROOT_PATH, + info->error_time); + create_log_dir(log_dir); + if (ops->ops.save_last_log(log_dir, (struct error_info *)info) == 0) + save_history_log(CONFIG_BLACKBOX_LOG_ROOT_PATH, + (struct error_info *)info, info->error_time, 1); + else + bbox_print_err("[%s] failed to save log!\n", ops->ops.module); + kfree(log_dir); +} + +static void save_last_log(void) +{ + unsigned long flags; + struct error_info *info = NULL; + struct bbox_ops *ops = NULL; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return; + + spin_lock_irqsave(&ops_list_lock, flags); + list_for_each_entry(ops, &ops_list, list) { + if (ops->ops.get_last_log_info && + ops->ops.save_last_log) { + spin_unlock_irqrestore(&ops_list_lock, flags); + do_save_last_log(ops, info); + spin_lock_irqsave(&ops_list_lock, flags); + } else { + bbox_print_err("[%s] get_last_log_info: %p, %s: %p\n", + ops->ops.module, ops->ops.get_last_log_info, + __func__, ops->ops.save_last_log); + } + } + spin_unlock_irqrestore(&ops_list_lock, flags); + kfree(info); +} + +static void save_temp_error_log(void) +{ + down(&temp_error_info_sem); + if (!temp_error_info) { + bbox_print_err("temp_error_info: %p\n", temp_error_info); + up(&temp_error_info_sem); + return; + } + + if (strlen(temp_error_info->event) != 0) + save_log_without_reset(temp_error_info); + + kfree(temp_error_info); + temp_error_info = NULL; + up(&temp_error_info_sem); +} + +static int save_error_log(void *pparam) +{ + wait_for_log_part(); + save_last_log(); + save_temp_error_log(); + + return 0; +} + +int bbox_register_module_ops(struct module_ops *ops) +{ + struct bbox_ops *new_ops = NULL; + struct bbox_ops *temp = NULL; + unsigned long flags; + + if (unlikely(!ops)) { + bbox_print_err("ops: %p\n", ops); + return -EINVAL; + } + + new_ops = kmalloc(sizeof(*new_ops), GFP_KERNEL); + if (!new_ops) + return -ENOMEM; + memset(new_ops, 0, sizeof(*new_ops)); + memcpy(&new_ops->ops, ops, sizeof(*ops)); + spin_lock_irqsave(&ops_list_lock, flags); + if (list_empty(&ops_list)) + goto __out; + + list_for_each_entry(temp, &ops_list, list) { + if (!strcmp(temp->ops.module, ops->module)) { + spin_unlock_irqrestore(&ops_list_lock, flags); + kfree(new_ops); + bbox_print_info("[%s] has been registered!\n", temp->ops.module); + return -ENODATA; + } + } + +__out: + bbox_print_info("[%s] is registered successfully!\n", ops->module); + list_add_tail(&new_ops->list, &ops_list); + spin_unlock_irqrestore(&ops_list_lock, flags); + + return 0; +} + +int bbox_notify_error(const char event[EVENT_MAX_LEN], const char module[MODULE_MAX_LEN], + const char error_desc[ERROR_DESC_MAX_LEN], int need_sys_reset) +{ + struct error_info *info = NULL; + + if (unlikely(!event || !module || !error_desc)) { + bbox_print_err("event: %p, module: %p, error_desc: %p\n", event, + module, error_desc); + return -EINVAL; + } + + info = kmalloc(sizeof(*info), GFP_ATOMIC); + if (!info) + return -ENOMEM; + + format_error_info(info, event, module, error_desc); + show_stack(current, NULL, KERN_DEFAULT); + if (!need_sys_reset) { + /* handle the error which do not need reset */ + if (!is_log_part_mounted()) + save_temp_error_info(event, module, error_desc); + else + save_log_without_reset(info); + } else { + /* handle the error which need reset */ + save_log_with_reset(info); + } + + kfree(info); + + return 0; +} + +static void __init select_storage_material(void) +{ + const struct reboot_crashlog_storage *tmp = NULL; + + if (!storage_material) + return; + + for (tmp = storage_lastwords; tmp->material; tmp++) { + if (!strcmp(storage_material, tmp->material)) { + storage_lastword = tmp; + return; + } + } +} + +static int __init blackbox_core_init(void) +{ + struct task_struct *tsk = NULL; + + select_storage_material(); + + temp_error_info = kmalloc(sizeof(*temp_error_info), GFP_KERNEL); + if (!temp_error_info) + return -ENOMEM; + + memset(temp_error_info, 0, sizeof(*temp_error_info)); + + /* Create a kernel thread to save log */ + tsk = kthread_run(save_error_log, NULL, "save_error_log"); + if (IS_ERR(tsk)) { + kfree(temp_error_info); + temp_error_info = NULL; + bbox_print_err("kthread_run failed!\n"); + return -ESRCH; + } + + return 0; +} + +core_initcall(blackbox_core_init); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Blackbox core framework"); +MODULE_AUTHOR("OHOS"); diff --git a/drivers/staging/blackbox/blackbox_storage.c b/drivers/staging/blackbox/blackbox_storage.c new file mode 100755 index 000000000..6cb74fd0d --- /dev/null +++ b/drivers/staging/blackbox/blackbox_storage.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +char *storage_material = +#ifdef CONFIG_DEF_BLACKBOX_STORAGE + CONFIG_DEF_BLACKBOX_STORAGE; +#else + NULL; +#endif +const struct reboot_crashlog_storage *storage_lastword __ro_after_init; + +#if IS_ENABLED(CONFIG_DEF_BLACKBOX_STORAGE_BY_MEMORY) +static DEFINE_SEMAPHORE(kmsg_sem); +static char *lastlog; +unsigned int lastlog_len; +static int get_log_by_memory(void *in, unsigned int inlen) +{ + return 0; +} + +static int storage_log_by_memory(void *out, unsigned int outlen) +{ + if (unlikely(!out)) + return -EINVAL; + + /* Initialized from caller. */ + lastlog = out; + lastlog_len = outlen; + return 0; +} + +/* Called after storage_log_by_memory successfully. */ +static void do_kmsg_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + struct fault_log_info *pinfo; + + if (unlikely(!lastlog)) + return; + + /* get kernel log from kmsg dump module */ + if (down_trylock(&kmsg_sem) != 0) { + bbox_print_err("down_trylock failed!\n"); + return; + } + pinfo = (struct fault_log_info *)lastlog; + (void)kmsg_dump_get_buffer(dumper, true, lastlog + sizeof(*pinfo), + lastlog_len - sizeof(*pinfo), (size_t *)&pinfo->len); + up(&kmsg_sem); +} +#endif + +#if defined(CONFIG_DEF_BLACKBOX_STORAGE_BY_PSTORE_BLK) || \ + defined(CONFIG_DEF_BLACKBOX_STORAGE_BY_PSTORE_RAM) +#define LOG_FILE_WAIT_TIME 1000 /* unit: ms */ +#define RETRY_MAX_COUNT 10 +#define PSTORE_MOUNT_POINT "/sys/fs/pstore/" +#define FILE_LIMIT (0660) + +static bool is_pstore_part_ready(char *pstore_file) +{ + const char *cur_name = NULL; + struct dentry *root_dentry; + struct dentry *cur_dentry; + struct file *filp = NULL; + char *full_path = NULL; + bool is_ready = false; + + if (unlikely(!pstore_file)) + return -EINVAL; + memset(pstore_file, 0, sizeof(*pstore_file)); + + filp = file_open(PSTORE_MOUNT_POINT, O_RDONLY, 0); + if (IS_ERR(filp)) { + bbox_print_err("open %s failed! err is [%ld]\n", PSTORE_MOUNT_POINT, PTR_ERR(filp)); + return -EBADF; + } + + full_path = vmalloc(PATH_MAX_LEN); + if (!full_path) + goto __out; + + root_dentry = filp->f_path.dentry; + list_for_each_entry(cur_dentry, &root_dentry->d_subdirs, d_child) { + cur_name = cur_dentry->d_name.name; + + memset(full_path, 0, PATH_MAX_LEN); + snprintf(full_path, PATH_MAX_LEN - 1, "%s%s", PSTORE_MOUNT_POINT, cur_name); + + if (S_ISREG(d_inode(cur_dentry)->i_mode) && !strncmp(cur_name, "blackbox", + strlen("blackbox"))) { + is_ready = true; + if (strcmp(full_path, pstore_file) > 0) + strncpy(pstore_file, full_path, strlen(full_path)); + } + } + + if (is_ready && strlen(pstore_file)) + bbox_print_info("get pstore file name %s successfully!\n", pstore_file); + +__out: + file_close(filp); + vfree(full_path); + + return is_ready; +} + +static int get_log_by_pstore(void *in, unsigned int inlen) +{ + char pstore_file[PATH_MAX_LEN]; + struct file *filp = NULL; + char *pathname = NULL; + //mm_segment_t old_fs; + void *pbuf = NULL; + loff_t pos = 0; + static int retry; + int ret = -1; + + memset(pstore_file, 0, PATH_MAX_LEN); + while (!is_pstore_part_ready((char *)&pstore_file)) { + msleep(LOG_FILE_WAIT_TIME); + retry++; + if (retry >= RETRY_MAX_COUNT) + return -ENOENT; + } + + if (likely(in)) { + filp = file_open(pstore_file, O_RDONLY, FILE_LIMIT); + if (IS_ERR(filp)) { + bbox_print_err("open %s failed! err is [%ld]\n", pstore_file, + PTR_ERR(filp)); + return -EBADF; + } + memset(in, 0, inlen); + pbuf = in; + + //old_fs = get_fs(); + //set_fs(KERNEL_DS); + + ret = vfs_read(filp, pbuf, inlen, &pos); + if (ret < 0) { + pathname = getfullpath(filp); + bbox_print_err("read %s failed! err is [%d]\n", pathname ? pathname : "", + ret); + goto __error; + } + + //set_fs(old_fs); + file_close(filp); + file_delete(filp); + return 0; + } + + return -EBADF; +__error: + set_fs(old_fs); + file_close(filp); + return -EIO; +} +#endif + +const struct reboot_crashlog_storage storage_lastwords[] = { +#if IS_ENABLED(CONFIG_DEF_BLACKBOX_STORAGE_BY_MEMORY) + { + .get_log = get_log_by_memory, + .storage_log = storage_log_by_memory, + .blackbox_dump = do_kmsg_dump, + .material = "memory", + }, +#endif +#if IS_ENABLED(CONFIG_DEF_BLACKBOX_STORAGE_BY_PSTORE_BLK) + { + .get_log = get_log_by_pstore, + .blackbox_dump = pstore_blackbox_dump, + .material = "pstore_blk", + }, +#endif +#if IS_ENABLED(CONFIG_DEF_BLACKBOX_STORAGE_BY_PSTORE_RAM) + { + .get_log = get_log_by_pstore, + .blackbox_dump = pstore_blackbox_dump, + .material = "pstore_ram", + }, +#endif +#if IS_ENABLED(CONFIG_DEF_BLACKBOX_STORAGE_BY_RAW_PARTITION) + { + .material = "raw_partition", + }, +#endif + { } +}; + diff --git a/drivers/staging/hievent/Kconfig b/drivers/staging/hievent/Kconfig new file mode 100755 index 000000000..b445a2b90 --- /dev/null +++ b/drivers/staging/hievent/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +config HIEVENT + tristate "Enable hievent" + help + hievent buffer manager + +config BBOX_BUFFER_SIZE + int "bbox buffer size" + depends on HIEVENT + default 2048 + help + Define the default ring buffer size of BBOX \ No newline at end of file diff --git a/drivers/staging/hievent/Makefile b/drivers/staging/hievent/Makefile new file mode 100755 index 000000000..5b2adc23a --- /dev/null +++ b/drivers/staging/hievent/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_HIEVENT) += hievent_driver.o \ No newline at end of file diff --git a/drivers/staging/hievent/hievent_driver.c b/drivers/staging/hievent/hievent_driver.c new file mode 100755 index 000000000..86363d11d --- /dev/null +++ b/drivers/staging/hievent/hievent_driver.c @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#define pr_fmt(fmt) "hievent_driver " fmt + +#include "hievent_driver.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct class *hievent_class; +static dev_t hievent_devno; + +#define HIEVENT_BUFFER ((size_t)CONFIG_BBOX_BUFFER_SIZE) +#define HIEVENT_DRIVER "/dev/bbox" +#define HIEVENT_DEV_NAME "bbox" +#define HIEVENT_DEV_NR 1 + +struct hievent_entry { + unsigned short len; + unsigned short header_size; + char msg[0]; +}; + +struct hievent_char_device { + struct cdev devm; + int flag; + struct mutex mtx; /* lock to protect read/write buffer */ + unsigned char *buffer; + wait_queue_head_t wq; + size_t write_offset; + size_t head_offset; + size_t size; + size_t count; +} hievent_dev; + +static inline unsigned char *hievent_buffer_head(void) +{ + if (hievent_dev.head_offset > HIEVENT_BUFFER) + hievent_dev.head_offset = + hievent_dev.head_offset % HIEVENT_BUFFER; + + return hievent_dev.buffer + hievent_dev.head_offset; +} + +static void hievent_buffer_inc(size_t sz) +{ + if (hievent_dev.size + sz <= HIEVENT_BUFFER) { + hievent_dev.size += sz; + hievent_dev.write_offset += sz; + hievent_dev.write_offset %= HIEVENT_BUFFER; + hievent_dev.count++; + } +} + +static void hievent_buffer_dec(size_t sz) +{ + if (hievent_dev.size >= sz) { + hievent_dev.size -= sz; + hievent_dev.head_offset += sz; + hievent_dev.head_offset %= HIEVENT_BUFFER; + hievent_dev.count--; + } +} + +static int hievent_read_ring_buffer(unsigned char __user *buffer, + size_t buf_len) +{ + size_t retval; + size_t buf_left = HIEVENT_BUFFER - hievent_dev.head_offset; + + if (buf_left > buf_len) { + retval = copy_to_user(buffer, hievent_buffer_head(), buf_len); + } else { + size_t mem_len = (buf_len > buf_left) ? buf_left : buf_len; + + retval = copy_to_user(buffer, hievent_buffer_head(), mem_len); + if (retval < 0) + return retval; + + retval = copy_to_user(buffer + buf_left, hievent_dev.buffer, + buf_len - buf_left); + } + return retval; +} + +static int hievent_read_ring_head_buffer(unsigned char * const buffer, + size_t buf_len) +{ + size_t buf_left = HIEVENT_BUFFER - hievent_dev.head_offset; + + if (buf_left > buf_len) { + memcpy(buffer, hievent_buffer_head(), buf_len); + } else { + size_t mem_len = (buf_len > buf_left) ? buf_left : buf_len; + + memcpy(buffer, hievent_buffer_head(), mem_len); + memcpy(buffer + buf_left, hievent_dev.buffer, + buf_len - buf_left); + } + return 0; +} + +static ssize_t hievent_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + size_t retval; + struct hievent_entry header; + + (void)file; + + wait_event_interruptible(hievent_dev.wq, (hievent_dev.size > 0)); + + (void)mutex_lock(&hievent_dev.mtx); + + retval = hievent_read_ring_head_buffer((unsigned char *)&header, + sizeof(header)); + if (retval < 0) { + retval = -EINVAL; + goto out; + } + + if (count < header.len + sizeof(header)) { + retval = -ENOMEM; + goto out; + } + + hievent_buffer_dec(sizeof(header)); + + retval = hievent_read_ring_buffer((unsigned char __user *)(user_buf), header.len); + if (retval < 0) { + retval = -EINVAL; + goto out; + } + hievent_buffer_dec(header.len); + + retval = header.len + sizeof(header); +out: + if (retval == -ENOMEM) { + // clean ring buffer + hievent_dev.write_offset = 0; + hievent_dev.head_offset = 0; + hievent_dev.size = 0; + hievent_dev.count = 0; + } + (void)mutex_unlock(&hievent_dev.mtx); + + return retval; +} + +static int hievent_write_ring_head_buffer(const unsigned char *buffer, + size_t buf_len) +{ + size_t buf_left = HIEVENT_BUFFER - hievent_dev.write_offset; + + if (buf_len > buf_left) { + memcpy(hievent_dev.buffer + hievent_dev.write_offset, + buffer, buf_left); + memcpy(hievent_dev.buffer, buffer + buf_left, + min(HIEVENT_BUFFER, buf_len - buf_left)); + } else { + memcpy(hievent_dev.buffer + hievent_dev.write_offset, + buffer, min(buf_left, buf_len)); + } + + return 0; +} + +static void hievent_head_init(struct hievent_entry * const header, size_t len) +{ + header->len = (unsigned short)len; + header->header_size = sizeof(struct hievent_entry); +} + +static void hievent_cover_old_log(size_t buf_len) +{ + int retval; + struct hievent_entry header; + size_t total_size = buf_len + sizeof(struct hievent_entry); + + while (total_size + hievent_dev.size > HIEVENT_BUFFER) { + retval = hievent_read_ring_head_buffer((unsigned char *)&header, + sizeof(header)); + if (retval < 0) + break; + + /* let count decrease twice */ + hievent_buffer_dec(sizeof(header)); + hievent_buffer_dec(header.len); + } +} + +int hievent_write_internal(const char *buffer, size_t buf_len) +{ + struct hievent_entry header; + int retval; + + if (buf_len < sizeof(int) || + buf_len > HIEVENT_BUFFER - sizeof(struct hievent_entry)) + return -EINVAL; + + (void)mutex_lock(&hievent_dev.mtx); + + hievent_cover_old_log(buf_len); + + hievent_head_init(&header, buf_len); + retval = hievent_write_ring_head_buffer((unsigned char *)&header, + sizeof(header)); + if (retval) { + retval = -EINVAL; + goto out; + } + hievent_buffer_inc(sizeof(header)); + + retval = hievent_write_ring_head_buffer((unsigned char *)(buffer), + header.len); + if (retval) { + retval = -EINVAL; + goto out; + } + + hievent_buffer_inc(header.len); + + retval = header.len; + +out: + (void)mutex_unlock(&hievent_dev.mtx); + if (retval > 0) + wake_up_interruptible(&hievent_dev.wq); + + return retval; +} + +static unsigned int hievent_poll(struct file *filep, poll_table *wait) +{ + unsigned int mask = 0; + + poll_wait(filep, &hievent_dev.wq, wait); + if (hievent_dev.size > 0) { + mask |= POLLIN | POLLRDNORM; + return mask; + } + + return 0; +} + +static ssize_t hievent_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + int check_code = 0; + unsigned char *temp_buffer = NULL; + const struct iovec *iov = from->iov; + int retval; + size_t buf_len; + (void)iocb; + + if (from->nr_segs != 2) { /* must contain 2 segments */ + pr_err("invalid nr_segs: %ld", from->nr_segs); + retval = -EINVAL; + goto out; + } + + /* seg 0 info is checkcode*/ + retval = copy_from_user(&check_code, iov[0].iov_base, + sizeof(check_code)); + if (retval || check_code != CHECK_CODE) { + retval = -EINVAL; + goto out; + } + + /* seg 1 info */ + buf_len = iov[1].iov_len; + if (buf_len > HIEVENT_BUFFER - sizeof(struct hievent_entry)) { + retval = -ENOMEM; + goto out; + } + + temp_buffer = kmalloc(buf_len, GFP_KERNEL); + if (!temp_buffer) { + retval = -ENOMEM; + goto out; + } + + retval = copy_from_user(temp_buffer, iov[1].iov_base, iov[1].iov_len); + if (retval) { + retval = -EIO; + goto free_mem; + } + + retval = hievent_write_internal(temp_buffer, buf_len); + if (retval < 0) { + retval = -EIO; + goto free_mem; + } + retval = buf_len + iov[0].iov_len; + +free_mem: + kfree(temp_buffer); + +out: + return retval; +} + +static const struct file_operations hievent_fops = { + .read = hievent_read, /* read */ + .poll = hievent_poll, /* poll */ + .write_iter = hievent_write_iter, /* write_iter */ +}; + +static int hievent_device_init(void) +{ + hievent_dev.buffer = kmalloc(HIEVENT_BUFFER, GFP_KERNEL); + if (!hievent_dev.buffer) + return -ENOMEM; + + init_waitqueue_head(&hievent_dev.wq); + mutex_init(&hievent_dev.mtx); + hievent_dev.write_offset = 0; + hievent_dev.head_offset = 0; + hievent_dev.size = 0; + hievent_dev.count = 0; + + return 0; +} + +static int __init hieventdev_init(void) +{ + int result; + struct device *dev_ret = NULL; + + result = alloc_chrdev_region(&hievent_devno, 0, HIEVENT_DEV_NR, HIEVENT_DEV_NAME); + if (result < 0) { + pr_err("register %s failed", HIEVENT_DRIVER); + return -ENODEV; + } + + cdev_init(&hievent_dev.devm, &hievent_fops); + hievent_dev.devm.owner = THIS_MODULE; + + result = cdev_add(&hievent_dev.devm, hievent_devno, HIEVENT_DEV_NR); + if (result < 0) { + pr_err("cdev_add failed"); + goto unreg_dev; + } + + result = hievent_device_init(); + if (result < 0) { + pr_err("hievent_device_init failed"); + goto del_dev; + } + + hievent_class = class_create(THIS_MODULE, HIEVENT_DEV_NAME); + if (IS_ERR(hievent_class)) { + pr_err("class_create failed"); + goto del_buffer; + } + + dev_ret = device_create(hievent_class, 0, hievent_devno, 0, HIEVENT_DEV_NAME); + if (IS_ERR(dev_ret)) { + pr_err("device_create failed"); + goto del_class; + } + + return 0; + +del_class: + class_destroy(hievent_class); +del_buffer: + kfree(hievent_dev.buffer); +del_dev: + cdev_del(&hievent_dev.devm); +unreg_dev: + unregister_chrdev_region(hievent_devno, HIEVENT_DEV_NR); + + return -ENODEV; +} + +static void __exit hievent_exit_module(void) +{ + device_destroy(hievent_class, hievent_devno); + class_destroy(hievent_class); + kfree(hievent_dev.buffer); + cdev_del(&hievent_dev.devm); + unregister_chrdev_region(hievent_devno, HIEVENT_DEV_NR); +} + +static int __init hievent_init_module(void) +{ + int state; + + state = hieventdev_init(); + return 0; +} + +module_init(hievent_init_module); +module_exit(hievent_exit_module); + +MODULE_AUTHOR("OHOS"); +MODULE_DESCRIPTION("User mode hievent device interface"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("hievent"); diff --git a/drivers/staging/hievent/hievent_driver.h b/drivers/staging/hievent/hievent_driver.h new file mode 100755 index 000000000..83c67d9d2 --- /dev/null +++ b/drivers/staging/hievent/hievent_driver.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef HIEVENT_DRIVER_H +#define HIEVENT_DRIVER_H + +#include + +#define CHECK_CODE 0x7BCDABCD + +struct idap_header { + char level; + char category; + char log_type; + char sn; +}; + +int hievent_write_internal(const char *buffer, size_t buf_len); + +#endif /* HIEVENT_DRIVER_H */ diff --git a/drivers/staging/hievent/hiview_hievent.c b/drivers/staging/hievent/hiview_hievent.c new file mode 100755 index 000000000..4533b6fbb --- /dev/null +++ b/drivers/staging/hievent/hiview_hievent.c @@ -0,0 +1,488 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#include "hiview_hievent.h" +#include "hievent_driver.h" + +#include +#include + +#define INT_TYPE_MAX_LEN 21 + +#define MAX_PATH_LEN 256 +#define MAX_STR_LEN (10 * 1024) + +/* CONFIG_BBOX_BUFFER_SIZE is max length of /dev/bbox */ +#define EVENT_INFO_BUF_LEN ((size_t)CONFIG_BBOX_BUFFER_SIZE) +#define EVENT_INFO_PACK_BUF_LEN min((size_t)CONFIG_BBOX_BUFFER_SIZE, 2048) + +#define BUF_POINTER_FORWARD \ +do { \ + if (tmplen < len) { \ + tmp += tmplen; \ + len -= tmplen; \ + } else { \ + tmp += len; \ + len = 0; \ + } \ +} while (0) + +struct hievent_payload { + char *key; + char *value; + struct hievent_payload *next; +}; + +static int hievent_convert_string(struct hiview_hievent *event, char **pbuf); + +static struct hievent_payload *hievent_payload_create(void); + +static void hievent_payload_destroy(struct hievent_payload *p); + +static struct hievent_payload *hievent_get_payload(struct hievent_payload *head, + const char *key); + +static void hievent_add_payload(struct hiview_hievent *obj, + struct hievent_payload *payload); + +static struct hievent_payload *hievent_payload_create(void) +{ + struct hievent_payload *payload = NULL; + + payload = kmalloc(sizeof(*payload), GFP_KERNEL); + if (!payload) + return NULL; + + payload->key = NULL; + payload->value = NULL; + payload->next = NULL; + + return payload; +} + +static void hievent_payload_destroy(struct hievent_payload *p) +{ + if (!p) + return; + + kfree(p->value); + kfree(p->key); + kfree(p); +} + +static struct hievent_payload *hievent_get_payload(struct hievent_payload *head, + const char *key) +{ + struct hievent_payload *p = head; + + while (p) { + if (key && p->key) { + if (strcmp(p->key, key) == 0) + return p; + } + p = p->next; + } + + return NULL; +} + +static void hievent_add_payload(struct hiview_hievent *obj, + struct hievent_payload *payload) +{ + if (!obj->head) { + obj->head = payload; + } else { + struct hievent_payload *p = obj->head; + + while (p->next) + p = p->next; + p->next = payload; + } +} + +struct hiview_hievent *hievent_create(unsigned int eventid) +{ + struct hiview_hievent *event = NULL; + + /* combined event obj struct */ + event = kmalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return NULL; + + memset(event, 0, sizeof(*event)); + event->eventid = eventid; + pr_debug("%s : %u\n", __func__, eventid); + + return (void *)event; +} + +int hievent_put_integer(struct hiview_hievent *event, + const char *key, long value) +{ + int ret; + struct hievent_payload *payload = NULL; + + if ((!event) || (!key)) { + pr_err("Bad input event or key for %s", __func__); + return -EINVAL; + } + + payload = hievent_get_payload(event->head, key); + if (!payload) { + payload = hievent_payload_create(); + if (!payload) + return -ENOMEM; + payload->key = kstrdup(key, GFP_KERNEL); + hievent_add_payload(event, payload); + } + + kfree(payload->value); + + payload->value = kmalloc(INT_TYPE_MAX_LEN, GFP_KERNEL); + if (!payload->value) + return -ENOMEM; + + (void)memset(payload->value, 0, INT_TYPE_MAX_LEN); + ret = snprintf(payload->value, INT_TYPE_MAX_LEN, "%d", (int)value); + if (ret < 0) + return -ENOMEM; + + return 0; +} + +int hievent_put_string(struct hiview_hievent *event, + const char *key, const char *value) +{ + struct hievent_payload *payload = NULL; + int len; + + if ((!event) || (!key) || (!value)) { + pr_err("Bad key for %s", __func__); + return -EINVAL; + } + + payload = hievent_get_payload(event->head, key); + if (!payload) { + payload = hievent_payload_create(); + if (!payload) + return -ENOMEM; + + payload->key = kstrdup(key, GFP_KERNEL); + hievent_add_payload(event, payload); + } + + kfree(payload->value); + + len = strlen(value); + /* prevent length larger than MAX_STR_LEN */ + if (len > MAX_STR_LEN) + len = MAX_STR_LEN; + + payload->value = kmalloc(len + 1, GFP_KERNEL); + if (!payload->value) + return -ENOMEM; + + (void)memset(payload->value, 0, len + 1); + if (strncpy(payload->value, value, len) > 0) + payload->value[len] = '\0'; + + return 0; +} + +int hievent_set_time(struct hiview_hievent *event, long long seconds) +{ + if ((!event) || (seconds == 0)) { + pr_err("Bad input for %s", __func__); + return -EINVAL; + } + event->time = seconds; + return 0; +} + +static int append_array_item(char **pool, int pool_len, const char *path) +{ + int i; + + if ((!path) || (path[0] == 0)) { + pr_err("Bad path %s", __func__); + return -EINVAL; + } + + if (strlen(path) > MAX_PATH_LEN) { + pr_err("file path over max: %d", MAX_PATH_LEN); + return -EINVAL; + } + + for (i = 0; i < pool_len; i++) { + if (pool[i] != 0) + continue; + + pool[i] = kstrdup(path, GFP_KERNEL); + if (!pool[i]) + return -ENOMEM; + + break; + } + + if (i == MAX_PATH_NUMBER) { + pr_err("Too many paths"); + return -EINVAL; + } + + return 0; +} + +int hievent_add_filepath(struct hiview_hievent *event, const char *path) +{ + if (!event) { + pr_err("Bad path %s", __func__); + return -EINVAL; + } + return append_array_item(event->file_path, MAX_PATH_NUMBER, path); +} + +/* make string ":" to "::", ";" to ";;", and remove newline character + * for example: "abc:def;ghi" transfer to "abc::def;;ghi" + */ +static char *hievent_make_regular(char *value) +{ + int count = 0; + int len = 0; + char *temp = value; + char *regular = NULL; + char *regular_tmp = NULL; + size_t regular_len; + + while (*temp != '\0') { + if (*temp == ':') + count++; + else if (*temp == ';') + count++; + else if ((*temp == '\n') || (*temp == '\r')) + *temp = ' '; + + temp++; + len++; + } + + /* no need to transfer, just return old value */ + if (count == 0) + return value; + + regular_len = len + count * 2 + 1; // 2 char in a byte + regular = kmalloc(regular_len, GFP_KERNEL); + if (!regular) + return NULL; + + (void)memset(regular, 0, regular_len); + regular_tmp = regular; + temp = value; + while (*temp != 0) { + if ((*temp == ':') || (*temp == ';')) + *regular_tmp++ = *temp; + + *regular_tmp++ = *temp; + temp++; + } + *regular_tmp = '\0'; + + return regular; +} + +int logbuff_to_exception(char category, int level, char log_type, + char sn, const char *msg, int msglen) +{ + struct idap_header *hdr = NULL; + size_t buf_len = sizeof(int) + sizeof(struct idap_header) + msglen; + int ret; + int *check_code = NULL; + char *buffer = kmalloc(buf_len, GFP_KERNEL); + + if (!buffer) + return -ENOMEM; + + check_code = (int *)buffer; + *check_code = CHECK_CODE; + + hdr = (struct idap_header *)(buffer + sizeof(int)); + hdr->level = level; + hdr->category = category; + hdr->log_type = log_type; + hdr->sn = sn; + + memcpy(buffer + sizeof(int) + sizeof(struct idap_header), msg, msglen); + + ret = hievent_write_internal(buffer, buf_len); + + kfree(buffer); + + return ret; +} + +static int hievent_fill_payload(struct hiview_hievent *event, char **pbuf, + char *tmp, int length) +{ + struct hievent_payload *p = event->head; + int len = length; + int tmplen; + unsigned int keycount = 0; + + while (p) { + char *value = NULL; + char *regular_value = NULL; + int need_free = 1; + + if (!p->value) { + p = p->next; + continue; + } + if (keycount == 0) { + tmplen = snprintf(tmp, len - 1, " --extra "); + BUF_POINTER_FORWARD; + } + keycount++; + + /* fill key */ + if (p->key) + tmplen = snprintf(tmp, len - 1, "%s:", p->key); + + BUF_POINTER_FORWARD; + /* fill value */ + tmplen = 0; + + value = p->value; + regular_value = hievent_make_regular(value); + if (!regular_value) { + regular_value = "NULL"; + need_free = 0; + } + tmplen = snprintf(tmp, len - 1, "%s;", regular_value); + if ((value != regular_value) && need_free) + kfree(regular_value); + + BUF_POINTER_FORWARD; + p = p->next; + } + return len; +} + +static int hievent_convert_string(struct hiview_hievent *event, char **pbuf) +{ + int len; + char *tmp = NULL; + int tmplen; + unsigned int i; + + char *buf = kmalloc(EVENT_INFO_BUF_LEN, GFP_KERNEL); + + if (!buf) { + *pbuf = NULL; + return 0; + } + + (void)memset(buf, 0, EVENT_INFO_BUF_LEN); + len = EVENT_INFO_BUF_LEN; + tmp = buf; + + /* fill eventid */ + tmplen = snprintf(tmp, len - 1, "eventid %d", event->eventid); + BUF_POINTER_FORWARD; + + /* fill the path */ + for (i = 0; i < MAX_PATH_NUMBER; i++) { + if (!event->file_path[i]) + break; + + tmplen = snprintf(tmp, len - 1, " -i %s", event->file_path[i]); + BUF_POINTER_FORWARD; + } + + /* fill time */ + if (event->time) { + tmplen = snprintf(tmp, len - 1, " -t %lld", event->time); + BUF_POINTER_FORWARD; + } + + /* fill the payload info */ + len = hievent_fill_payload(event, pbuf, tmp, len); + *pbuf = buf; + return (EVENT_INFO_BUF_LEN - len); +} + +#define IDAP_LOGTYPE_CMD 1 +static int hievent_write_logexception(char *str, const int strlen) +{ + char tempchr; + char *strptr = str; + int left_buf_len = strlen + 1; + int sent_cnt = 0; + + while (left_buf_len > 0) { + if (left_buf_len > EVENT_INFO_PACK_BUF_LEN) { + tempchr = strptr[EVENT_INFO_PACK_BUF_LEN - 1]; + strptr[EVENT_INFO_PACK_BUF_LEN - 1] = '\0'; + logbuff_to_exception(0, 0, IDAP_LOGTYPE_CMD, 1, strptr, + EVENT_INFO_PACK_BUF_LEN); + left_buf_len -= (EVENT_INFO_PACK_BUF_LEN - 1); + strptr += (EVENT_INFO_PACK_BUF_LEN - 1); + strptr[0] = tempchr; + sent_cnt++; + } else { + logbuff_to_exception(0, 0, IDAP_LOGTYPE_CMD, 0, strptr, + left_buf_len); + sent_cnt++; + break; + } + } + + return sent_cnt; +} + +int hievent_report(struct hiview_hievent *obj) +{ + char *str = NULL; + int buf_len; + int sent_packet; + + if (!obj) { + pr_err("Bad event %s", __func__); + return -EINVAL; + } + + buf_len = hievent_convert_string(obj, &str); + if (!str) + return -EINVAL; + + sent_packet = hievent_write_logexception(str, buf_len); + pr_err("report: %s", str); + kfree(str); + + return sent_packet; +} + +void hievent_destroy(struct hiview_hievent *event) +{ + int i; + struct hievent_payload *p = NULL; + + if (!event) + return; + + p = event->head; + while (p) { + struct hievent_payload *del = p; + + p = p->next; + hievent_payload_destroy(del); + } + + event->head = NULL; + for (i = 0; i < MAX_PATH_NUMBER; i++) { + kfree(event->file_path[i]); + event->file_path[i] = NULL; + } + + kfree(event); +} diff --git a/drivers/staging/hievent/hiview_hievent.h b/drivers/staging/hievent/hiview_hievent.h new file mode 100755 index 000000000..c1c003510 --- /dev/null +++ b/drivers/staging/hievent/hiview_hievent.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef HIVIEW_HIEVENT_H +#define HIVIEW_HIEVENT_H + +#define MAX_PATH_NUMBER 10 + +/* hievent struct */ +struct hiview_hievent { + unsigned int eventid; + + long long time; + + /* payload linked list */ + struct hievent_payload *head; + + /* file path needs uploaded */ + char *file_path[MAX_PATH_NUMBER]; +}; + +struct hiview_hievent *hievent_create(unsigned int eventid); +int hievent_put_integer(struct hiview_hievent *event, + const char *key, long value); +int hievent_put_string(struct hiview_hievent *event, + const char *key, const char *value); +int hievent_set_time(struct hiview_hievent *event, long long seconds); +int hievent_add_filepath(struct hiview_hievent *event, const char *path); +int hievent_report(struct hiview_hievent *obj); +void hievent_destroy(struct hiview_hievent *event); + +#endif /* HIVIEW_HIEVENT_H */ diff --git a/drivers/staging/hilog/Kconfig b/drivers/staging/hilog/Kconfig new file mode 100755 index 000000000..243934c4c --- /dev/null +++ b/drivers/staging/hilog/Kconfig @@ -0,0 +1,22 @@ +# +# Sensor device configuration +# + +config HILOG + tristate "Hilog support" + default n + help + hilog buffer manager. + + Hilog is a simple log manager for OpenHarmonyOS. + log string write to /dev/hilog, and the hilog driver copy it + to the ring buffer. Ring buffer can be read from userspace. + + If unsure, say N. + +config HILOG_BUFFER_SIZE + int "hilog buffer size" + depends on HILOG + default 4096 + help + Define the default ring buffer size of hilog diff --git a/drivers/staging/hilog/Makefile b/drivers/staging/hilog/Makefile new file mode 100755 index 000000000..e53c86a5d --- /dev/null +++ b/drivers/staging/hilog/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the hi hilog drivers. +# + +obj-$(CONFIG_HILOG) += hilog.o diff --git a/drivers/staging/hilog/hilog.c b/drivers/staging/hilog/hilog.c new file mode 100755 index 000000000..4e6168ce7 --- /dev/null +++ b/drivers/staging/hilog/hilog.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef HILOGDEV_MAJOR +#define HILOGDEV_MAJOR 245 +#endif + +#ifndef HILOG_NR_DEVS +#define HILOG_NR_DEVS 2 +#endif + +#ifndef MEMDEV_SIZE +#define MEMDEV_SIZE 4096 +#endif + +static int hilog_major = HILOGDEV_MAJOR; + +module_param(hilog_major, int, 0444); + +struct cdev g_hilog_cdev; + +#define HILOG_BUFFER ((size_t)CONFIG_HILOG_BUFFER_SIZE) +#define HILOG_DRIVER "/dev/hilog" + +struct hilog_entry { + unsigned int len; + unsigned int header_size; + unsigned int pid : 16; + unsigned int task_id : 16; + unsigned int sec; + unsigned int nsec; + unsigned int reserved; + char msg[0]; +}; + +static ssize_t hilog_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos); +static ssize_t hilog_read(struct file *file, + char __user *user_buf, size_t count, loff_t *ppos); + +static const struct file_operations hilog_fops = { + .read = hilog_read, + .write = hilog_write, +}; + +struct hilog_char_device { + int flag; + struct mutex mtx; /* lock to protect read/write buffer */ + unsigned char *buffer; + wait_queue_head_t wq; + size_t wr_off; + size_t hdr_off; + size_t size; + size_t count; +} hilog_dev; + +static inline unsigned char *hilog_buffer_head(void) +{ + return hilog_dev.buffer + hilog_dev.hdr_off; +} + +static void hilog_buffer_inc(size_t sz) +{ + if (hilog_dev.size + sz <= HILOG_BUFFER) { + hilog_dev.size += sz; + hilog_dev.wr_off += sz; + hilog_dev.wr_off %= HILOG_BUFFER; + hilog_dev.count++; + } +} + +static void hilog_buffer_dec(size_t sz) +{ + if (hilog_dev.size >= sz) { + hilog_dev.size -= sz; + hilog_dev.hdr_off += sz; + hilog_dev.hdr_off %= HILOG_BUFFER; + hilog_dev.count--; + } +} + +static int hilog_read_ring_buff(unsigned char __user *buffer, size_t buf_len) +{ + size_t retval; + size_t buf_left = HILOG_BUFFER - hilog_dev.hdr_off; + + if (buf_left > buf_len) { + retval = copy_to_user(buffer, hilog_buffer_head(), buf_len); + } else { + size_t mem_len = (buf_len > buf_left) ? buf_left : buf_len; + + retval = copy_to_user(buffer, hilog_buffer_head(), mem_len); + if (retval < 0) + return retval; + + retval = copy_to_user(buffer + buf_left, hilog_dev.buffer, + buf_len - buf_left); + } + return retval; +} + +static int hilog_read_ring_head_buffer(unsigned char *buffer, size_t buf_len) +{ + size_t buf_left = HILOG_BUFFER - hilog_dev.hdr_off; + + if (buf_left > buf_len) { + memcpy(buffer, hilog_buffer_head(), buf_len); + } else { + size_t mem_len = (buf_len > buf_left) ? buf_left : buf_len; + + memcpy(buffer, hilog_buffer_head(), mem_len); + memcpy(buffer + buf_left, hilog_dev.buffer, buf_len - buf_left); + } + + return 0; +} + +static ssize_t hilog_read(struct file *file, + char __user *user_buf, size_t count, loff_t *ppos) +{ + size_t retval; + struct hilog_entry header; + + (void)file; + wait_event_interruptible(hilog_dev.wq, (hilog_dev.size > 0)); + + (void)mutex_lock(&hilog_dev.mtx); + + retval = hilog_read_ring_head_buffer((unsigned char *)&header, + sizeof(header)); + if (retval < 0) { + retval = -EINVAL; + goto out; + } + + if (count < header.len + sizeof(header)) { + pr_err("buffer too small,buf_len=%d, header.len=%d,%d\n", + (int)count, header.len, header.header_size); + retval = -ENOMEM; + goto out; + } + + hilog_buffer_dec(sizeof(header)); + retval = copy_to_user((unsigned char *)user_buf, + (unsigned char *)&header, + min(count, sizeof(header))); + + if (retval < 0) { + retval = -EINVAL; + goto out; + } + + retval = hilog_read_ring_buff((unsigned char *) + (user_buf + sizeof(header)), + header.len); + if (retval < 0) { + retval = -EINVAL; + goto out; + } + + hilog_buffer_dec(header.len); + retval = header.len + sizeof(header); +out: + if (retval == -ENOMEM) { + // clean ring buffer + hilog_dev.wr_off = 0; + hilog_dev.hdr_off = 0; + hilog_dev.size = 0; + hilog_dev.count = 0; + } + (void)mutex_unlock(&hilog_dev.mtx); + + return retval; +} + +static int hilog_write_ring_buffer(unsigned char __user *buffer, size_t buf_len) +{ + int retval; + size_t buf_left = HILOG_BUFFER - hilog_dev.wr_off; + + if (buf_len > buf_left) { + retval = copy_from_user(hilog_dev.buffer + hilog_dev.wr_off, + buffer, buf_left); + if (retval) + return -1; + retval = copy_from_user(hilog_dev.buffer, buffer + buf_left, + min(HILOG_BUFFER, buf_len - buf_left)); + } else { + retval = copy_from_user(hilog_dev.buffer + hilog_dev.wr_off, + buffer, min(buf_left, buf_len)); + } + + if (retval < 0) + return -1; + + return 0; +} + +static int hilog_write_ring_head_buffer(unsigned char *buffer, size_t buf_len) +{ + size_t buf_left = HILOG_BUFFER - hilog_dev.wr_off; + + if (buf_len > buf_left) { + memcpy(hilog_dev.buffer + hilog_dev.wr_off, + buffer, buf_left); + memcpy(hilog_dev.buffer, buffer + buf_left, + min(HILOG_BUFFER, buf_len - buf_left)); + } else { + memcpy(hilog_dev.buffer + hilog_dev.wr_off, + buffer, min(buf_left, buf_len)); + } + + return 0; +} + +static void hilog_head_init(struct hilog_entry *header, size_t len) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0) +#define NANOSEC_PER_MIRCOSEC 1000 + struct timeval now = { 0 }; + + do_gettimeofday(&now); + + header->sec = now.tv_sec; + header->nsec = now.tv_usec * NANOSEC_PER_MIRCOSEC; +#else + struct timespec64 now = { 0 }; + + ktime_get_real_ts64(&now); + + header->sec = now.tv_sec; + header->nsec = now.tv_nsec; +#endif + + header->len = len; + header->pid = current->pid; + header->task_id = current->tgid; + header->header_size = sizeof(struct hilog_entry); +} + +static void hilog_cover_old_log(size_t buf_len) +{ + int retval; + struct hilog_entry header; + size_t total_size = buf_len + sizeof(struct hilog_entry); + static int drop_log_lines; + static bool is_last_time_full; + bool is_this_time_full = false; + + while (total_size + hilog_dev.size > HILOG_BUFFER) { + retval = hilog_read_ring_head_buffer((unsigned char *)&header, + sizeof(header)); + if (retval < 0) + break; + + drop_log_lines++; + is_this_time_full = true; + is_last_time_full = true; + hilog_buffer_dec(sizeof(header) + header.len); + } + if (is_last_time_full && !is_this_time_full) { + /* so we can only print one log if hilog ring buffer is full in a short time */ + if (drop_log_lines > 0) + pr_info("hilog ringbuffer full, drop %d line(s) log\n", + drop_log_lines); + is_last_time_full = false; + drop_log_lines = 0; + } +} + +int hilog_write_internal(const char __user *buffer, size_t buf_len) +{ + struct hilog_entry header; + int retval; + + (void)mutex_lock(&hilog_dev.mtx); + hilog_cover_old_log(buf_len); + hilog_head_init(&header, buf_len); + + retval = hilog_write_ring_head_buffer((unsigned char *)&header, + sizeof(header)); + if (retval) { + retval = -ENODATA; + goto out; + } + hilog_buffer_inc(sizeof(header)); + + retval = hilog_write_ring_buffer((unsigned char *)(buffer), header.len); + if (retval) { + retval = -ENODATA; + goto out; + } + + hilog_buffer_inc(header.len); + + retval = header.len; + +out: + (void)mutex_unlock(&hilog_dev.mtx); + if (retval > 0) + wake_up_interruptible(&hilog_dev.wq); + else if (retval < 0) + pr_err("write fail retval=%d\n", retval); + + return retval; +} + +static ssize_t hilog_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + (void)file; + if (count + sizeof(struct hilog_entry) > HILOG_BUFFER) { + pr_err("input too large\n"); + return -ENOMEM; + } + + return hilog_write_internal(user_buf, count); +} + +static void hilog_device_init(void) +{ + hilog_dev.buffer = kmalloc(HILOG_BUFFER, GFP_KERNEL); + if (!hilog_dev.buffer) + return; + + init_waitqueue_head(&hilog_dev.wq); + mutex_init(&hilog_dev.mtx); + hilog_dev.wr_off = 0; + hilog_dev.hdr_off = 0; + hilog_dev.size = 0; + hilog_dev.count = 0; +} + +static int __init hilogdev_init(void) +{ + int result; + dev_t devno = MKDEV(hilog_major, 0); + + result = register_chrdev_region(devno, 2, "hilog"); + if (result < 0) { + pr_emerg("\t register hilog error %d\n", result); + return result; + } + + cdev_init(&g_hilog_cdev, &hilog_fops); + g_hilog_cdev.owner = THIS_MODULE; + g_hilog_cdev.ops = &hilog_fops; + + cdev_add(&g_hilog_cdev, MKDEV(hilog_major, 0), HILOG_NR_DEVS); + + hilog_device_init(); + return 0; +} + +static void __exit hilog_exit_module(void) +{ + cdev_del(&g_hilog_cdev); + unregister_chrdev_region(MKDEV(hilog_major, 0), HILOG_NR_DEVS); +} + +static int __init hilog_init_module(void) +{ + int state = hilogdev_init(); + + pr_info("\t hilog_init Start%d\n", state); + return 0; +} + +module_init(hilog_init_module); +module_exit(hilog_exit_module); + +MODULE_AUTHOR("OHOS"); +MODULE_DESCRIPTION("User mode hilog device interface"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("hilog"); diff --git a/drivers/staging/hisysevent/Kconfig b/drivers/staging/hisysevent/Kconfig new file mode 100755 index 000000000..a40621cb8 --- /dev/null +++ b/drivers/staging/hisysevent/Kconfig @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +config HISYSEVENT + tristate "Enable hisysevent" + depends on HIEVENT + help + Say Y here to enable hisysevent feature support. diff --git a/drivers/staging/hisysevent/Makefile b/drivers/staging/hisysevent/Makefile new file mode 100755 index 000000000..f3b6c9296 --- /dev/null +++ b/drivers/staging/hisysevent/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_HISYSEVENT) += hiview_hisysevent.o diff --git a/drivers/staging/hisysevent/hiview_hisysevent.c b/drivers/staging/hisysevent/hiview_hisysevent.c new file mode 100755 index 000000000..755450822 --- /dev/null +++ b/drivers/staging/hisysevent/hiview_hisysevent.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ +#include + +#ifdef CONFIG_HISYSEVENT + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PARAM_INT_MAX_LEN 21 // 21 = 20 (max len) + 1 ('\0') +#define PARAM_STR_MAX_LEN 1536 // 1.5KB + +#define MAX_DOMAIN_LENGTH 16 +#define MAX_EVENT_NAME_LENGTH 32 +#define MAX_PARAM_NAME_LENGTH 48 +#define MAX_PARAM_NUMBER 128 + +#define HISYSEVENT_WRITER_DEV "/dev/bbox" +#define HISYSEVENT_INFO_BUF_LEN (2048 - 6) // 2KB - 6 (read_gap) + +#define MINUTE_TO_SECS 60 +#define SEC_TO_MILLISEC 1000 +#define MILLISEC_TO_NANOSEC (1000 * 1000) +#define TIME_ZONE_LEN 6 + +#define BUF_POINTER_FORWARD \ + do { \ + if (tmp_len >= 0 && tmp_len < len) { \ + tmp += tmp_len; \ + len -= tmp_len; \ + } else { \ + pr_err("string over length"); \ + tmp += len; \ + len = 0; \ + } \ + } while (0) + +static int CHECK_CODE = 0x7BCDABCD; + +struct hisysevent_payload { + /* key of the event param */ + char *key; + + /* value of the event param */ + char *value; + + /* next param */ + struct hisysevent_payload *next; +}; + +/* hisysevent struct */ +struct hiview_hisysevent { + /* event domain */ + char *domain; + + /* event name */ + char *name; + + /* event type */ + int type; + + /* event time */ + long long time; + + /* time zone */ + char *tz; + + /* process id */ + int pid; + + /* thread id */ + int tid; + + /* user id */ + int uid; + + /* payload linked list */ + struct hisysevent_payload *head; + + /* length of payload */ + int payload_cnt; +}; + +static struct hisysevent_payload *hisysevent_payload_create(void) +{ + struct hisysevent_payload *payload = NULL; + + payload = kmalloc(sizeof(*payload), GFP_KERNEL); + if (!payload) + return NULL; + + payload->key = NULL; + payload->value = NULL; + payload->next = NULL; + return payload; +} + +static void hisysevent_payload_destroy(struct hisysevent_payload *p) +{ + if (!p) + return; + + kfree(p->value); + kfree(p->key); + kfree(p); +} + +static void +hisysevent_add_payload(struct hiview_hisysevent *event, struct hisysevent_payload *payload) +{ + if (!event->head) { + event->head = payload; + } else { + struct hisysevent_payload *temp = event->head; + + while (temp->next) + temp = temp->next; + temp->next = payload; + } +} + +static struct hisysevent_payload * +hisysevent_get_payload(struct hiview_hisysevent *event, const char *key) +{ + struct hisysevent_payload *p = event->head; + + if (!key) + return NULL; + + while (p) { + if (p->key && strcmp(p->key, key) == 0) + return p; + p = p->next; + } + + return NULL; +} + +static struct hisysevent_payload * +hisysevent_get_or_create_payload(struct hiview_hisysevent *event, const char *key) +{ + struct hisysevent_payload *payload = hisysevent_get_payload(event, key); + + if (payload) { + kfree(payload->value); + return payload; + } + + payload = hisysevent_payload_create(); + if (!payload) + return NULL; + + payload->key = kstrdup(key, GFP_ATOMIC); + if (!payload->key) { + hisysevent_payload_destroy(payload); + return NULL; + } + + hisysevent_add_payload(event, payload); + return payload; +} + +static int json_add_number(char *json, size_t len, const char *key, long long num) +{ + return snprintf(json, len, "\"%s\":%lld,", key, num); +} + +static int json_add_string(char *json, size_t len, const char *key, const char *str) +{ + return snprintf(json, len, "\"%s\":%s,", key, str); +} + +static int json_add_string2(char *json, size_t len, const char *key, const char *str) +{ + return snprintf(json, len, "\"%s\":\"%s\",", key, str); +} + +static int hisysevent_convert_base(const struct hiview_hisysevent *event, char **buf, int len) +{ + int tmp_len = 0; + char *tmp = *buf; + + tmp_len = json_add_string2(tmp, len, "domain_", event->domain); + BUF_POINTER_FORWARD; + tmp_len = json_add_string2(tmp, len, "name_", event->name); + BUF_POINTER_FORWARD; + tmp_len = json_add_number(tmp, len, "type_", event->type); + BUF_POINTER_FORWARD; + tmp_len = json_add_number(tmp, len, "time_", event->time); + BUF_POINTER_FORWARD; + tmp_len = json_add_string2(tmp, len, "tz_", event->tz); + BUF_POINTER_FORWARD; + tmp_len = json_add_number(tmp, len, "pid_", event->pid); + BUF_POINTER_FORWARD; + tmp_len = json_add_number(tmp, len, "tid_", event->tid); + BUF_POINTER_FORWARD; + tmp_len = json_add_number(tmp, len, "uid_", event->uid); + BUF_POINTER_FORWARD; + *buf = tmp; + return len; +} + +static int hisysevent_convert_payload(struct hisysevent_payload *payload, char **buf, int len) +{ + int tmp_len = 0; + char *tmp = *buf; + struct hisysevent_payload *tmp_payload = payload; + + while (tmp_payload) { + if (!tmp_payload->key || !tmp_payload->value) { + pr_err("drop invalid payload"); + tmp_payload = tmp_payload->next; + continue; + } + tmp_len = json_add_string(tmp, len, tmp_payload->key, tmp_payload->value); + BUF_POINTER_FORWARD; + tmp_payload = tmp_payload->next; + } + *buf = tmp; + return len; +} + +static int hisysevent_convert_json(const struct hiview_hisysevent *event, char **buf_ptr) +{ + char *tmp; + int tmp_len = 0; + int buf_len = HISYSEVENT_INFO_BUF_LEN; + int len = buf_len; + char *buf = vmalloc(buf_len + 1); + + if (!buf) + return -ENOMEM; + memset(buf, 0, buf_len + 1); + + tmp = buf; + tmp_len = snprintf(tmp, len, "%c", '{'); + BUF_POINTER_FORWARD; + + len = hisysevent_convert_base(event, &tmp, len); + if (!event->head) + goto convert_end; + len = hisysevent_convert_payload(event->head, &tmp, len); + +convert_end: + if (len <= 1) { // remaining len must > 1, for '}' and '\0' + vfree(buf); + return -EINVAL; + } + + tmp_len = snprintf(tmp - 1, len, "%c", '}'); + BUF_POINTER_FORWARD; + *buf_ptr = buf; + return 0; +} + +static void hisysevent_set_time(struct hiview_hisysevent *event) +{ + struct timespec64 ts; + struct timezone tz = sys_tz; + int tz_index = 0; + char time_zone[TIME_ZONE_LEN]; + int tz_hour; + int tz_min; + long long millisecs = 0; + + ktime_get_real_ts64(&ts); + millisecs = ts.tv_sec * SEC_TO_MILLISEC + ts.tv_nsec / MILLISEC_TO_NANOSEC; + event->time = millisecs; + + tz_hour = (-tz.tz_minuteswest) / MINUTE_TO_SECS; + time_zone[tz_index++] = tz_hour >= 0 ? '+' : '-'; + tz_min = (-tz.tz_minuteswest) % MINUTE_TO_SECS; + sprintf(&time_zone[tz_index], "%02u%02u", abs(tz_hour), abs(tz_min)); + time_zone[TIME_ZONE_LEN - 1] = '\0'; + event->tz = kstrdup(time_zone, GFP_ATOMIC); +} + +static bool is_valid_string(const char *str, unsigned int max_len) +{ + unsigned int len = 0; + unsigned int i; + + if (!str) + return false; + + len = strlen(str); + if (len == 0 || len > max_len) + return false; + + for (i = 0; i < len; i++) { + if (!isalpha(str[i]) && str[i] != '_') + return false; + } + return true; +} + +static bool is_valid_num_of_param(struct hiview_hisysevent *event) +{ + if (!event) + return false; + + return (event->payload_cnt) < MAX_PARAM_NUMBER; +} + +struct hiview_hisysevent * +hisysevent_create(const char *domain, const char *name, enum hisysevent_type type) +{ + struct hiview_hisysevent *event = NULL; + + if (!is_valid_string(domain, MAX_DOMAIN_LENGTH)) { + pr_err("invalid event domain"); + return NULL; + } + if (!is_valid_string(name, MAX_EVENT_NAME_LENGTH)) { + pr_err("invalid event name"); + return NULL; + } + + event = kmalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return NULL; + memset(event, 0, sizeof(*event)); + + event->domain = kstrdup(domain, GFP_ATOMIC); + if (!(event->domain)) + goto create_err; + + event->name = kstrdup(name, GFP_ATOMIC); + if (!(event->name)) + goto create_err; + + event->type = type; + event->pid = current->pid; + event->tid = current->tgid; + event->uid = current_uid().val; + hisysevent_set_time(event); + if (!(event->tz)) + goto create_err; + + event->payload_cnt = 0; + pr_info("create hisysevent succ, domain=%s, name=%s, type=%d", + event->domain, event->name, event->type); + return (void *)event; + +create_err: + hisysevent_destroy(&event); + return NULL; +} +EXPORT_SYMBOL_GPL(hisysevent_create); + +void hisysevent_destroy(struct hiview_hisysevent **event) +{ + struct hisysevent_payload *payload = NULL; + + if (!event || !*event) + return; + + kfree((*event)->domain); + kfree((*event)->name); + kfree((*event)->tz); + payload = (*event)->head; + while (payload) { + struct hisysevent_payload *temp = payload; + + payload = payload->next; + hisysevent_payload_destroy(temp); + } + kfree(*event); + *event = NULL; +} +EXPORT_SYMBOL_GPL(hisysevent_destroy); + +int hisysevent_put_integer(struct hiview_hisysevent *event, const char *key, long long value) +{ + struct hisysevent_payload *payload = NULL; + + if (!event) { + pr_err("invalid event"); + return -EINVAL; + } + if (!is_valid_num_of_param(event)) { + pr_err("invalid num of param"); + return -EINVAL; + } + if (!is_valid_string(key, MAX_PARAM_NAME_LENGTH)) { + pr_err("invalid key"); + return -EINVAL; + } + + payload = hisysevent_get_or_create_payload(event, key); + if (!payload) { + pr_err("failed to get or create payload"); + return -ENOMEM; + } + + payload->value = kmalloc(PARAM_INT_MAX_LEN, GFP_KERNEL); + if (!payload->value) + return -ENOMEM; + + memset(payload->value, 0, PARAM_INT_MAX_LEN); + snprintf(payload->value, PARAM_INT_MAX_LEN, "%lld", value); + event->payload_cnt++; + return 0; +} +EXPORT_SYMBOL_GPL(hisysevent_put_integer); + +int hisysevent_put_string(struct hiview_hisysevent *event, const char *key, const char *value) +{ + struct hisysevent_payload *payload = NULL; + int len = 0; + int tmp_len = 0; + char *tmp_value = NULL; + + if (!event) { + pr_err("invalid event"); + return -EINVAL; + } + if (!is_valid_num_of_param(event)) { + pr_err("invalid num of param"); + return -EINVAL; + } + if (!is_valid_string(key, MAX_PARAM_NAME_LENGTH)) { + pr_err("invalid key"); + return -EINVAL; + } + if (!value) { + pr_err("invalid value"); + return -EINVAL; + } + + payload = hisysevent_get_or_create_payload(event, key); + if (!payload) { + pr_err("failed to get or create payload"); + return -ENOMEM; + } + + len = strlen(value); + if (len > PARAM_STR_MAX_LEN) { + pr_warn("string cannot exceed 1536 Byte, len=%d", len); + len = PARAM_STR_MAX_LEN; + } + + tmp_len = len + 3; // 3 for \", \", \0 + payload->value = kmalloc(tmp_len, GFP_KERNEL); + if (!payload->value) + return -ENOMEM; + memset(payload->value, 0, tmp_len); + + tmp_value = payload->value; + snprintf(tmp_value++, tmp_len--, "%c", '\"'); + memcpy(tmp_value, value, len); + snprintf(tmp_value + len, tmp_len - len, "%c", '\"'); + event->payload_cnt++; + return 0; +} +EXPORT_SYMBOL_GPL(hisysevent_put_string); + +int hisysevent_write(struct hiview_hisysevent *event) +{ + struct iov_iter iter; + //mm_segment_t oldfs; + char *data = NULL; + struct file *filp = NULL; + struct iovec vec[3]; + unsigned long vcount = 0; + int ret; + + if (!event) + return -EINVAL; + + ret = hisysevent_convert_json(event, &data); + if (ret != 0 || !data) { + pr_err("failed to convert event to string"); + return -EINVAL; + } + pr_info("write hisysevent data=%s", data); + + filp = filp_open(HISYSEVENT_WRITER_DEV, O_WRONLY, 0); + + if (!filp || IS_ERR(filp)) { + ret = PTR_ERR(filp); + pr_err("failed to access '%s', res=%d", HISYSEVENT_WRITER_DEV, ret); + vfree(data); + return -ENODEV; + } + + vec[vcount].iov_base = &CHECK_CODE; + vec[vcount++].iov_len = sizeof(CHECK_CODE); + vec[vcount].iov_base = data; + vec[vcount++].iov_len = strlen(data) + 1; + + //oldfs = get_fs(); + //set_fs(KERNEL_DS); + iov_iter_init(&iter, WRITE, vec, vcount, iov_length(vec, vcount)); + ret = vfs_iter_write(filp, &iter, &filp->f_pos, 0); + //set_fs(oldfs); + + if (ret < 0) + pr_err("failed to write hisysevent, ret=%d", ret); + + filp_close(filp, NULL); + vfree(data); + return ret; +} +EXPORT_SYMBOL_GPL(hisysevent_write); + +#endif /* CONFIG_HISYSEVENT */ diff --git a/drivers/staging/hungtask/Kconfig b/drivers/staging/hungtask/Kconfig new file mode 100755 index 000000000..4e80dc9fc --- /dev/null +++ b/drivers/staging/hungtask/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 +config DFX_HUNGTASK + bool "DFX hungtask" + depends on DETECT_HUNG_TASK + default n + help + Base DFX hungtask module + +config DFX_HUNGTASK_USER + bool "DFX hungtask user watchdog module" + depends on DFX_HUNGTASK + default n + help + DFX hungtask user watchdog module \ No newline at end of file diff --git a/drivers/staging/hungtask/Makefile b/drivers/staging/hungtask/Makefile new file mode 100755 index 000000000..12def220e --- /dev/null +++ b/drivers/staging/hungtask/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_DFX_HUNGTASK) += hungtask_base.o +obj-$(CONFIG_DFX_HUNGTASK_USER) += hungtask_user.o diff --git a/drivers/staging/hungtask/hungtask_base.c b/drivers/staging/hungtask/hungtask_base.c new file mode 100755 index 000000000..30408c0ba --- /dev/null +++ b/drivers/staging/hungtask/hungtask_base.c @@ -0,0 +1,1031 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#define pr_fmt(fmt) "hungtask_base " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_DFX_ZEROHUNG +#include +#endif +#include +#include "hungtask_user.h" + +static struct rb_root list_tasks = RB_ROOT; +static DEFINE_SPINLOCK(list_tasks_lock); +static struct hlist_head whitelist[WHITELIST_LEN]; +static struct whitelist_item whitetmplist[WHITELIST_LEN]; +static bool whitelist_empty = true; +static int remove_cnt; +static struct task_item *remove_list[MAX_REMOVE_LIST_NUM + 1]; +static unsigned long __read_mostly hungtask_timeout_secs = + CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; +static int did_panic; +static unsigned int hungtask_enable = HT_DISABLE; +static unsigned int whitelist_type = WHITE_LIST; +static int whitelist_dump_cnt = DEFAULT_WHITE_DUMP_CNT; +static int whitelist_panic_cnt = DEFAULT_WHITE_PANIC_CNT; +static int appspawn_pid; +static int dump_and_upload; +static int time_since_upload; +static int hung_task_must_panic; +static int report_zrhung_id; +static struct task_hung_upload upload; +static int do_refresh; +static char frozen_buf[FROZEN_BUF_LEN]; +static int frozen_used; +static bool frozed_head; +static unsigned long cur_heartbeat; +static struct work_struct send_work; +static char report_buf_text[REPORT_MSGLENGTH]; + +bool hashlist_find(struct hlist_head *head, int count, pid_t tgid) +{ + struct hashlist_node *hnode = NULL; + + if (count <= 0) + return false; + if (hlist_empty(&head[tgid % count])) + return false; + hlist_for_each_entry(hnode, &head[tgid % count], list) { + if (hnode->pid == tgid) + return true; + } + return false; +} + +void hashlist_clear(struct hlist_head *head, int count) +{ + int i = 0; + struct hlist_node *n = NULL; + struct hashlist_node *hnode = NULL; + + for (i = 0; i < count; i++) { + hlist_for_each_entry_safe(hnode, n, &head[i], list) { + hlist_del(&hnode->list); + kfree(hnode); + hnode = NULL; + } + } + for (i = 0; i < count; i++) + INIT_HLIST_HEAD(&head[i]); +} + +bool hashlist_insert(struct hlist_head *head, int count, pid_t tgid) +{ + struct hashlist_node *hnode = NULL; + + if (hashlist_find(head, count, tgid)) + return false; + hnode = kmalloc(sizeof(struct hashlist_node), GFP_ATOMIC); + if (!hnode) + return false; + INIT_HLIST_NODE(&hnode->list); + hnode->pid = tgid; + hlist_add_head(&hnode->list, &head[tgid % count]); + return true; +} + +static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) +{ + bool can_cont = false; + + get_task_struct(g); + get_task_struct(t); + rcu_read_unlock(); + cond_resched(); + rcu_read_lock(); + can_cont = pid_alive(g) && pid_alive(t); + put_task_struct(t); + put_task_struct(g); + return can_cont; +} + +static bool rcu_break(int *max_count, int *batch_count, + struct task_struct *g, + struct task_struct *t) +{ + if (!(*max_count)--) + return true; + if (!--(*batch_count)) { + *batch_count = HUNG_TASK_BATCHING; + if (!rcu_lock_break(g, t)) + return true; + } + return false; +} + +static pid_t get_pid_by_name(const char *name) +{ + int max_count = PID_MAX_LIMIT; + int batch_count = HUNG_TASK_BATCHING; + struct task_struct *g = NULL; + struct task_struct *t = NULL; + int pid = 0; + + rcu_read_lock(); + do_each_thread(g, t) { + if (rcu_break(&max_count, &batch_count, g, t)) + goto unlock; + if (!strncmp(t->comm, name, TASK_COMM_LEN)) { + pid = t->tgid; + goto unlock; + } + } while_each_thread(g, t); + +unlock: + rcu_read_unlock(); + return pid; +} + +static unsigned int get_task_type(pid_t pid, pid_t tgid, struct task_struct *parent) +{ + unsigned int flag = TASK_TYPE_IGNORE; + /* check tgid of it's parent as PPID */ + if (parent) { + pid_t ppid = parent->tgid; + + if (ppid == PID_KTHREAD) + flag |= TASK_TYPE_KERNEL; + else if (ppid == appspawn_pid) + flag |= TASK_TYPE_APP; + else if (ppid == PID_INIT) + flag |= TASK_TYPE_NATIVE; + } + if (!whitelist_empty && hashlist_find(whitelist, WHITELIST_LEN, tgid)) + flag |= TASK_TYPE_WHITE | TASK_TYPE_JANK; + + return flag; +} + +static void refresh_appspawn_pids(void) +{ + int max_count = PID_MAX_LIMIT; + int batch_count = HUNG_TASK_BATCHING; + struct task_struct *g = NULL; + struct task_struct *t = NULL; + + rcu_read_lock(); + do_each_thread(g, t) { + if (rcu_break(&max_count, &batch_count, g, t)) + goto unlock; + if (!strncmp(t->comm, "appspawn", TASK_COMM_LEN)) + appspawn_pid = t->tgid; + } while_each_thread(g, t); +unlock: + rcu_read_unlock(); +} + +static void refresh_task_type(pid_t pid, int task_type) +{ + struct task_item *item = NULL; + struct rb_node *p = NULL; + + spin_lock(&list_tasks_lock); + for (p = rb_first(&list_tasks); p; p = rb_next(p)) { + item = rb_entry(p, struct task_item, node); + if (item->tgid == pid) + item->task_type = task_type; + } + spin_unlock(&list_tasks_lock); +} + +static void refresh_whitelist_pids(void) +{ + int i; + + hashlist_clear(whitelist, WHITELIST_LEN); + for (i = 0; i < WHITELIST_LEN; i++) { + if (!strlen(whitetmplist[i].name)) + continue; + whitetmplist[i].pid = + get_pid_by_name(whitetmplist[i].name); + if (!whitetmplist[i].pid) + continue; + refresh_task_type(whitetmplist[i].pid, + TASK_TYPE_WHITE | TASK_TYPE_JANK); + if (hashlist_insert(whitelist, WHITELIST_LEN, + whitetmplist[i].pid)) + pr_info("whitelist[%d]-%s-%d\n", i, + whitetmplist[i].name, whitetmplist[i].pid); + else + pr_info("can't find %s\n", whitetmplist[i].name); + } + refresh_appspawn_pids(); +} + +static struct task_item *find_task(pid_t pid, struct rb_root *root) +{ + struct rb_node **p = &root->rb_node; + struct task_item *cur = NULL; + struct rb_node *parent = NULL; + + while (*p) { + parent = *p; + cur = rb_entry(parent, struct task_item, node); + if (!cur) + return NULL; + if (pid < cur->pid) + p = &(*p)->rb_left; + else if (pid > cur->pid) + p = &(*p)->rb_right; + else + return cur; + } + return NULL; +} + +static bool insert_task(struct task_item *item, struct rb_root *root) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct task_item *cur = NULL; + + while (*p) { + parent = *p; + + cur = rb_entry(parent, struct task_item, node); + if (!cur) + return false; + if (item->pid < cur->pid) { + p = &(*p)->rb_left; + } else if (item->pid > cur->pid) { + p = &(*p)->rb_right; + } else { + pr_info("insert pid=%d,tgid=%d,name=%s,type=%d fail\n", + item->pid, item->tgid, + item->name, item->task_type); + return false; + } + } + rb_link_node(&item->node, parent, p); + rb_insert_color(&item->node, root); + return true; +} + +void show_block_task(struct task_item *taskitem, struct task_struct *p) +{ + unsigned long last_arrival; + unsigned long last_queued; + +#ifdef CONFIG_SCHED_INFO + last_arrival = p->sched_info.last_arrival; + last_queued = p->sched_info.last_queued; +#else + last_arrival = 0; + last_queued = 0; +#endif /* CONFIG_SCHED_INFO */ + if (unlikely(p->flags & PF_FROZEN)) { + if (taskitem) + pr_err("name=%s,PID=%d,tgid=%d,tgname=%s," + "FROZEN for %ds,type=%d,la%lu/lq%lu\n", + p->comm, p->pid, p->tgid, + p->group_leader->comm, + taskitem->d_state_time * HEARTBEAT_TIME, + taskitem->task_type, + last_arrival, last_queued); + else + pr_err("name=%s,PID=%d,tgid=%d,tgname=%s," + "just FROZE,la%lu/lq%lu\n", + p->comm, p->pid, p->tgid, + p->group_leader->comm, + last_arrival, last_queued); + } else { + if (taskitem) + pr_err("name=%s,PID=%d,tgid=%d,prio=%d,cpu=%d,tgname=%s," + "type=%d,blocked for %ds,la%lu/lq%lu\n", + taskitem->name, taskitem->pid, p->tgid, p->prio, + task_cpu(p), p->group_leader->comm, taskitem->task_type, + taskitem->d_state_time * HEARTBEAT_TIME, + last_arrival, last_queued); + else + pr_err("name=%s,PID=%d,tgid=%d,prio=%d,cpu=%d," + "tgname=%s,la%lu/lq%lu\n", + p->comm, p->pid, p->tgid, p->prio, task_cpu(p), + p->group_leader->comm, + last_arrival, last_queued); + + sched_show_task(p); + } +} + +void htbase_show_state_filter(unsigned long state_filter) +{ + struct task_struct *g = NULL; + struct task_struct *p = NULL; + struct task_item *taskitem = NULL; + +#if BITS_PER_LONG == 32 + pr_info(" task PC stack pid father\n"); +#else + pr_info(" task PC stack pid father\n"); +#endif + rcu_read_lock(); + for_each_process_thread(g, p) { + /* + * reset the NMI-timeout, listing all files on a slow + * console might take a lot of time: + */ + touch_nmi_watchdog(); + if ((p->__state == TASK_RUNNING) || (p->__state & state_filter)) { + spin_lock(&list_tasks_lock); + taskitem = find_task(p->pid, &list_tasks); + spin_unlock(&list_tasks_lock); + show_block_task(taskitem, p); + } + } + touch_all_softlockup_watchdogs(); + rcu_read_unlock(); + /* Show locks if hungtask happen */ + if ((state_filter == TASK_UNINTERRUPTIBLE) || !state_filter) + debug_show_all_locks(); +} + +void hungtask_show_state_filter(unsigned long state_filter) +{ + pr_err("BinderChain_SysRq start\n"); + htbase_show_state_filter(state_filter); + pr_err("BinderChain_SysRq end\n"); +} + +void do_dump_task(struct task_struct *task) +{ + sched_show_task(task); + debug_show_held_locks(task); +} + +void do_show_task(struct task_struct *task, unsigned int flag, int d_state_time) +{ + pr_err("%s, flag=%d\n", __func__, flag); + rcu_read_lock(); + if (!pid_alive(task)) { + rcu_read_unlock(); + return; + } + if (flag & (FLAG_DUMP_WHITE | FLAG_DUMP_APP)) { + int cnt = 0; + + trace_sched_process_hang(task); + cnt = d_state_time; + pr_err("INFO: task %s:%d tgid:%d blocked for %ds in %s\n", + task->comm, task->pid, task->tgid, + (HEARTBEAT_TIME * cnt), + (flag & FLAG_DUMP_WHITE) ? "whitelist" : "applist"); + pr_err(" %s %s %.*s\n", + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + do_dump_task(task); + touch_nmi_watchdog(); + if (flag & FLAG_DUMP_WHITE && (!dump_and_upload)) { + dump_and_upload++; + upload.pid = task->pid; + upload.tgid = task->tgid; + upload.duration = d_state_time; + memset(upload.name, 0, sizeof(upload.name)); + strncpy(upload.name, task->comm, sizeof(upload.name)); + upload.flag = flag; + if (task->flags & PF_FROZEN) + upload.flag = (upload.flag | FLAG_PF_FROZEN); + } + } + rcu_read_unlock(); +} + +static void do_panic(void) +{ + if (sysctl_hung_task_panic) { + trigger_all_cpu_backtrace(); + panic("hungtask: blocked tasks"); + } +} + +static void create_taskitem(struct task_item *taskitem, + struct task_struct *task) +{ + taskitem->pid = task->pid; + taskitem->tgid = task->tgid; + memset(taskitem->name, 0, sizeof(taskitem->name)); + strncpy(taskitem->name, task->comm, sizeof(taskitem->name)); + taskitem->switch_count = task->nvcsw + task->nivcsw; + taskitem->dump_wa = 0; /* whitelist or applist task dump times */ + taskitem->panic_wa = 0; /* whitelist or applist task panic times */ + taskitem->d_state_time = -1; + taskitem->isdone_wa = true; /* if task in white or app dealed */ +} + +static bool refresh_task(struct task_item *taskitem, struct task_struct *task) +{ + bool is_called = false; + + if (taskitem->switch_count != (task->nvcsw + task->nivcsw)) { + taskitem->switch_count = task->nvcsw + task->nivcsw; + is_called = true; + return is_called; + } + if (taskitem->task_type & TASK_TYPE_WHITE) { + taskitem->isdone_wa = false; + taskitem->dump_wa++; + taskitem->panic_wa++; + } + taskitem->d_state_time++; + if (task->flags & PF_FROZEN) + taskitem->task_type |= TASK_TYPE_FROZEN; + return is_called; +} + +static void remove_list_tasks(struct task_item *item) +{ + rb_erase(&item->node, &list_tasks); + kfree(item); +} + +static void shrink_process_item(struct task_item *item, bool *is_finish) +{ + if (remove_cnt >= MAX_REMOVE_LIST_NUM) { + int i; + + remove_list[remove_cnt++] = item; + for (i = 0; i < remove_cnt; i++) + remove_list_tasks(remove_list[i]); + remove_cnt = 0; + *is_finish = false; + } else { + remove_list[remove_cnt++] = item; + } +} + +static void shrink_list_tasks(void) +{ + int i; + bool is_finish = false; + struct rb_node *n = NULL; + struct task_item *item = NULL; + + spin_lock(&list_tasks_lock); + while (!is_finish) { + is_finish = true; + for (n = rb_first(&list_tasks); n != NULL; n = rb_next(n)) { + item = rb_entry(n, struct task_item, node); + if (!item) + continue; + if (item->isdone_wa) { + shrink_process_item(item, &is_finish); + if (!is_finish) + break; + } + } + } + for (i = 0; i < remove_cnt; i++) + remove_list_tasks(remove_list[i]); + remove_cnt = 0; + spin_unlock(&list_tasks_lock); +} + +static void check_parameters(void) +{ + if ((whitelist_dump_cnt < 0) || + (whitelist_dump_cnt > DEFAULT_WHITE_DUMP_CNT)) + whitelist_dump_cnt = DEFAULT_WHITE_DUMP_CNT; + if ((whitelist_panic_cnt <= 0) || + (whitelist_panic_cnt > DEFAULT_WHITE_PANIC_CNT)) + whitelist_panic_cnt = DEFAULT_WHITE_PANIC_CNT; +} + +static void send_work_handler(struct work_struct *data) +{ +#ifdef CONFIG_DFX_ZEROHUNG + zrhung_send_event(HUNGTASK_DOMAIN, HUNGTASK_NAME, + report_buf_text); +#endif +} + +static void htbase_report_zrhung_event(const char *report_buf_tag) +{ + htbase_show_state_filter(TASK_UNINTERRUPTIBLE); + pr_err("%s end\n", report_buf_tag); + schedule_work(&send_work); + report_zrhung_id++; +} + +static void htbase_report_zrhung(unsigned int event) +{ + bool report_load = false; + char report_buf_tag[REPORT_MSGLENGTH] = {0}; + char report_name[TASK_COMM_LEN + 1] = {0}; + int report_pid = 0; + int report_hungtime = 0; + int report_tasktype = 0; + + if (!event) + return; + if (event & HUNGTASK_EVENT_WHITELIST) { + snprintf(report_buf_tag, sizeof(report_buf_tag), + "hungtask_whitelist_%d", report_zrhung_id); + strncpy(report_name, upload.name, TASK_COMM_LEN); + report_pid = upload.pid; + report_tasktype = TASK_TYPE_WHITE; + report_hungtime = whitelist_dump_cnt * HEARTBEAT_TIME; + report_load = true; + } else { + pr_err("No such event report to zerohung!"); + } + pr_err("%s start\n", report_buf_tag); + if (event & HUNGTASK_EVENT_WHITELIST) + pr_err("report HUNGTASK_EVENT_WHITELIST to zrhung\n"); + if (upload.flag & FLAG_PF_FROZEN) + snprintf(report_buf_text, sizeof(report_buf_text), + "Task %s(%s) pid %d type %d blocked %ds.", + report_name, "FROZEN", report_pid, report_tasktype, report_hungtime); + else + snprintf(report_buf_text, sizeof(report_buf_text), + "Task %s pid %d type %d blocked %ds.", + report_name, report_pid, report_tasktype, report_hungtime); + if (report_load) + htbase_report_zrhung_event(report_buf_tag); +} + +static int print_frozen_list_item(int pid) +{ + int tmp; + + if (!frozed_head) { + tmp = snprintf(frozen_buf, FROZEN_BUF_LEN, "%s", "FROZEN Pid:"); + if (tmp < 0) + return -1; + frozen_used += min(tmp, FROZEN_BUF_LEN - 1); + frozed_head = true; + } + tmp = snprintf(frozen_buf + frozen_used, FROZEN_BUF_LEN - frozen_used, "%d,", + pid); + if (tmp < 0) + return -1; + frozen_used += min(tmp, FROZEN_BUF_LEN - frozen_used - 1); + return frozen_used; +} + +int dump_task_wa(struct task_item *item, int dump_cnt, + struct task_struct *task, unsigned int flag) +{ + int ret = 0; + + if ((item->d_state_time > TWO_MINUTES) && + (item->d_state_time % TWO_MINUTES != 0)) + return ret; + if ((item->d_state_time > HUNG_TEN_MINUTES) && + (item->d_state_time % HUNG_TEN_MINUTES != 0)) + return ret; + if ((item->d_state_time > HUNG_ONE_HOUR) && + (item->d_state_time % HUNG_ONE_HOUR != 0)) + return ret; + if (dump_cnt && (item->dump_wa > dump_cnt)) { + item->dump_wa = 1; + if (!dump_and_upload && task->flags & PF_FROZEN) { + int tmp = print_frozen_list_item(item->pid); + if (tmp < 0) + return ret; + if (tmp >= FROZEN_BUF_LEN - 1) { + pr_err("%s", frozen_buf); + memset(frozen_buf, 0, sizeof(frozen_buf)); + frozen_used = 0; + frozed_head = false; + print_frozen_list_item(item->pid); + } + } else if (!dump_and_upload) { + pr_err("Ready to dump a task %s\n", item->name); + do_show_task(task, flag, item->d_state_time); + ret++; + } + } + return ret; +} + +static void update_panic_task(struct task_item *item) +{ + if (upload.pid != 0) + return; + + upload.pid = item->pid; + upload.tgid = item->tgid; + memset(upload.name, 0, sizeof(upload.name)); + strncpy(upload.name, item->name, sizeof(upload.name)); +} + +static void deal_task(struct task_item *item, struct task_struct *task, bool is_called) +{ + int any_dumped_num = 0; + + if (is_called) { + item->dump_wa = 1; + item->panic_wa = 1; + item->d_state_time = 0; + return; + } + if (item->task_type & TASK_TYPE_WHITE) + any_dumped_num = dump_task_wa(item, whitelist_dump_cnt, task, + FLAG_DUMP_WHITE); + if (!is_called && (item->task_type & TASK_TYPE_WHITE)) { + if (whitelist_panic_cnt && item->panic_wa > whitelist_panic_cnt) { + pr_err("Task %s is causing panic\n", item->name); + update_panic_task(item); + item->panic_wa = 0; + hung_task_must_panic++; + } else { + item->isdone_wa = false; + } + } + if (item->isdone_wa) + remove_list_tasks(item); +} + +static bool check_conditions(struct task_struct *task, unsigned int task_type) +{ + bool no_check = true; + + if (task->flags & PF_FROZEN) + return no_check; + if (task_type & TASK_TYPE_WHITE && + (whitelist_dump_cnt || whitelist_panic_cnt)) + no_check = false; + return no_check; +} + +static void htbase_check_one_task(struct task_struct *t) +{ + unsigned int task_type = TASK_TYPE_IGNORE; + unsigned long switch_count = t->nvcsw + t->nivcsw; + struct task_item *taskitem = NULL; + bool is_called = false; + + if (unlikely(!switch_count)) { + pr_info("skip one's switch_count is zero\n"); + return; + } + + taskitem = find_task(t->pid, &list_tasks); + if (taskitem) { + if (check_conditions(t, taskitem->task_type)) + return; + is_called = refresh_task(taskitem, t); + } else { + task_type = get_task_type(t->pid, t->tgid, t->real_parent); + if (check_conditions(t, task_type)) + return; + taskitem = kmalloc(sizeof(*taskitem), GFP_ATOMIC); + if (!taskitem) { + pr_err("kmalloc failed"); + return; + } + memset(taskitem, 0, sizeof(*taskitem)); + taskitem->task_type = task_type; + create_taskitem(taskitem, t); + is_called = refresh_task(taskitem, t); + insert_task(taskitem, &list_tasks); + } + deal_task(taskitem, t, is_called); +} + +static void htbase_pre_process(void) +{ + htbase_set_timeout_secs(sysctl_hung_task_timeout_secs); + cur_heartbeat++; + if ((cur_heartbeat % REFRESH_INTERVAL) == 0) + do_refresh = 1; + else + do_refresh = 0; + if (do_refresh || (cur_heartbeat < TIME_REFRESH_PIDS)) { + refresh_whitelist_pids(); + check_parameters(); + } +} + +static void htbase_post_process(void) +{ + struct rb_node *n = NULL; + unsigned int hungevent = 0; + + if (frozen_used) { + pr_err("%s", frozen_buf); + memset(frozen_buf, 0, sizeof(frozen_buf)); + frozen_used = 0; + frozed_head = false; + } + if (dump_and_upload == HUNG_TASK_UPLOAD_ONCE) { + hungevent |= HUNGTASK_EVENT_WHITELIST; + dump_and_upload++; + } + if (dump_and_upload > 0) { + time_since_upload++; + if (time_since_upload > (whitelist_panic_cnt - whitelist_dump_cnt)) { + dump_and_upload = 0; + time_since_upload = 0; + } + } + if (hung_task_must_panic) { + htbase_show_state_filter(TASK_UNINTERRUPTIBLE); + hung_task_must_panic = 0; + pr_err("Task %s:%d blocked for %ds is causing panic\n", + upload.name, upload.pid, + whitelist_panic_cnt * HEARTBEAT_TIME); + do_panic(); + } + htuser_post_process_userlist(); + shrink_list_tasks(); + for (n = rb_first(&list_tasks); n != NULL; n = rb_next(n)) { + struct task_item *item = rb_entry(n, struct task_item, node); + item->isdone_wa = true; + } + + if (hungevent) + htbase_report_zrhung(hungevent); +} + +void htbase_check_tasks(unsigned long timeout) +{ + int max_count = PID_MAX_LIMIT; + int batch_count = HUNG_TASK_BATCHING; + struct task_struct *g = NULL; + struct task_struct *t = NULL; + + if (!hungtask_enable) + return; + if (test_taint(TAINT_DIE) || did_panic) { + pr_err("already in doing panic\n"); + return; + } + + htbase_pre_process(); + rcu_read_lock(); + for_each_process_thread(g, t) { + if (!max_count--) + goto unlock; + if (!--batch_count) { + batch_count = HUNG_TASK_BATCHING; + if (!rcu_lock_break(g, t)) + goto unlock; + } + if ((t->__state == TASK_UNINTERRUPTIBLE) || + (t->__state == TASK_KILLABLE)) + htbase_check_one_task(t); + } +unlock: + rcu_read_unlock(); + htbase_post_process(); +} + +static ssize_t htbase_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + if (hungtask_enable) + return snprintf(buf, ENABLE_SHOW_LEN, "on\n"); + else + return snprintf(buf, ENABLE_SHOW_LEN, "off\n"); +} + +static ssize_t htbase_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + char tmp[6]; /* only storage "on" "off" "kick" and enter */ + size_t len; + char *p = NULL; + + if (!buf) + return -EINVAL; + if ((count < 2) || (count > (sizeof(tmp) - 1))) { + pr_err("string too long or too short\n"); + return -EINVAL; + } + + p = memchr(buf, '\n', count); + len = p ? (size_t)(p - buf) : count; + memset(tmp, 0, sizeof(tmp)); + strncpy(tmp, buf, len); + if (!strncmp(tmp, "on", strlen(tmp))) { + hungtask_enable = HT_ENABLE; + pr_info("set hungtask_enable to enable\n"); + } else if (!strncmp(tmp, "off", strlen(tmp))) { + hungtask_enable = HT_DISABLE; + pr_info("set hungtask_enable to disable\n"); + } else { + pr_err("only accept on or off\n"); + } + return (ssize_t) count; +} + +static ssize_t htbase_monitorlist_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + int i; + char *start = buf; + char all_buf[WHITELIST_STORE_LEN - 20]; /* exclude extra header len 20*/ + unsigned long len = 0; + + memset(all_buf, 0, sizeof(all_buf)); + for (i = 0; i < WHITELIST_LEN; i++) { + if (whitetmplist[i].pid > 0) { + len += snprintf(all_buf + len, sizeof(all_buf) - len, + "%s-%d,", whitetmplist[i].name, whitetmplist[i].pid); + if (!(len < sizeof(all_buf))) { + len = sizeof(all_buf) - 1; + break; + } + } + } + if (len > 0) + all_buf[len] = 0; + if (whitelist_type == WHITE_LIST) + buf += snprintf(buf, WHITELIST_STORE_LEN, "whitelist:[%s]\n", all_buf); + else if (whitelist_type == BLACK_LIST) + buf += snprintf(buf, WHITELIST_STORE_LEN, "blacklist:[%s]\n", all_buf); + else + buf += snprintf(buf, WHITELIST_STORE_LEN, "\n"); + return buf - start; +} + +static void htbase_monitorlist_update(char **cur) +{ + int index = 0; + char *token = NULL; + + hashlist_clear(whitelist, WHITELIST_LEN); + memset(whitetmplist, 0, sizeof(whitetmplist)); + /* generate the new whitelist */ + for (; ; ) { + token = strsep(cur, ","); + if (token && strlen(token)) { + strncpy(whitetmplist[index].name, token, TASK_COMM_LEN); + if (strlen(whitetmplist[index].name) > 0) + whitelist_empty = false; + index++; + if (index >= WHITELIST_LEN) + break; + } + if (!(*cur)) + break; + } +} + +/* + * monitorlist_store - Called when 'write/echo' method is + * used on entry '/sys/kernel/hungtask/monitorlist'. + */ +static ssize_t htbase_monitorlist_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + size_t len; + char *p = NULL; + char all_buf[WHITELIST_STORE_LEN]; + char *cur = all_buf; + + + if ((n < 2) || (n > (sizeof(all_buf) - 1))) { + pr_err("whitelist input string illegal\n"); + return -EINVAL; + } + if (!buf) + return -EINVAL; + /* + * input format: + * write /sys/kernel/hungtask/monitorlist "whitelist, + * system_server,surfaceflinger" + */ + p = memchr(buf, '\n', n); + len = p ? (size_t)(p - buf) : n; /* exclude the '\n' */ + + memset(all_buf, 0, sizeof(all_buf)); + len = len > WHITELIST_STORE_LEN ? WHITELIST_STORE_LEN : len; + strncpy(all_buf, buf, len); + p = strsep(&cur, ","); + if (!cur) { + pr_err("string is not correct\n"); + return -EINVAL; + } + if (!strncmp(p, "whitelist", n)) { + whitelist_type = WHITE_LIST; + } else { + if (!strncmp(p, "blacklist", n)) + pr_err("blacklist is not support\n"); + else + pr_err("wrong list type is set\n"); + return -EINVAL; + } + if (!strlen(cur)) { + pr_err("at least one process need to be set\n"); + return -EINVAL; + } + pr_err("whitelist is %s\n", cur); + + htbase_monitorlist_update(&cur); + /* check again in case user input "whitelist,,,,,," */ + if (whitelist_empty) { + pr_err("at least one process need to be set\n"); + return -EINVAL; + } + return (ssize_t) n; +} + +/* used for sysctl at "/proc/sys/kernel/hung_task_timeout_secs" */ +void htbase_set_timeout_secs(unsigned long new_hungtask_timeout_secs) +{ + if ((new_hungtask_timeout_secs > CONFIG_DEFAULT_HUNG_TASK_TIMEOUT) || + (new_hungtask_timeout_secs % HEARTBEAT_TIME)) + return; + hungtask_timeout_secs = new_hungtask_timeout_secs; + /* + * if user change panic timeout value, we sync it to dump value + * defaultly, user can set it diffrently + */ + whitelist_panic_cnt = (int)(hungtask_timeout_secs / HEARTBEAT_TIME); + if (whitelist_panic_cnt > THIRTY_SECONDS) + whitelist_dump_cnt = whitelist_panic_cnt / HT_DUMP_IN_PANIC_LOOSE; + else + whitelist_dump_cnt = whitelist_panic_cnt / HT_DUMP_IN_PANIC_STRICT; +} + +void htbase_set_panic(int new_did_panic) +{ + did_panic = new_did_panic; +} + +static struct kobj_attribute timeout_attribute = { + .attr = { + .name = "enable", + .mode = 0640, + }, + .show = htbase_enable_show, + .store = htbase_enable_store, +}; + +static struct kobj_attribute monitorlist_attr = { + .attr = { + .name = "monitorlist", + .mode = 0640, + }, + .show = htbase_monitorlist_show, + .store = htbase_monitorlist_store, +}; + +#ifdef CONFIG_DFX_HUNGTASK_USER +static struct kobj_attribute userlist_attr = { + .attr = { + .name = "userlist", + .mode = 0640, + }, + .show = htuser_list_show, + .store = htuser_list_store, +}; +#endif + +static struct attribute *attrs[] = { + &timeout_attribute.attr, + &monitorlist_attr.attr, +#ifdef CONFIG_DFX_HUNGTASK_USER + &userlist_attr.attr, +#endif + NULL +}; + +static struct attribute_group hungtask_attr_group = { + .attrs = attrs, +}; + +static struct kobject *hungtask_kobj; +int htbase_create_sysfs(void) +{ + int i; + int ret; + + /* sleep 1000ms and wait /sys/kernel ready */ + while (!kernel_kobj) + msleep(1000); + + /* Create kobject named "hungtask" located at /sys/kernel/huangtask */ + hungtask_kobj = kobject_create_and_add("hungtask", kernel_kobj); + if (!hungtask_kobj) + return -ENOMEM; + ret = sysfs_create_group(hungtask_kobj, &hungtask_attr_group); + if (ret) + kobject_put(hungtask_kobj); + + for (i = 0; i < WHITELIST_LEN; i++) + INIT_HLIST_HEAD(&whitelist[i]); + memset(whitetmplist, 0, sizeof(whitetmplist)); + + INIT_WORK(&send_work, send_work_handler); + + return ret; +} diff --git a/drivers/staging/hungtask/hungtask_user.c b/drivers/staging/hungtask/hungtask_user.c new file mode 100755 index 000000000..39b0b1bd5 --- /dev/null +++ b/drivers/staging/hungtask/hungtask_user.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#define pr_fmt(fmt) "hungtask_user " fmt + +#include +#include +#include +#include +#include + +#include + +#define CMD_MIN_LEN 3 +#define CMD_MAX_LEN 20 +#define USERLIST_NUM 10 +#define MAX_USER_TIMEOUT 120 +#define MAX_SHOW_LEN 512 + +struct user_item { + pid_t pid; + int cur_cnt; + int panic_cnt; +}; + +static struct user_item userlist[USERLIST_NUM]; +static int userlist_count; +static DEFINE_SPINLOCK(userlist_lock); +static bool is_registered; +static bool need_panic; +static bool need_dump; +static int block_time; +static int block_pid; + +static void htuser_show_task(int pid) +{ + struct task_struct *p = NULL; + + p = pid_task(find_vpid(pid), PIDTYPE_PID); + if (p == NULL) { + pr_err("can not find pid %d\n", pid); + return; + } + + if (p->flags & PF_FROZEN) { + pr_info("process %d is frozen\n", pid); + return; + } + if (p->state == TASK_UNINTERRUPTIBLE) { + pr_err("UserList_KernelStack start\n"); + sched_show_task(p); + pr_err("UserList_KernelStack end\n"); + } +} + +static void htuser_list_insert(int pid, int count) +{ + spin_lock(&userlist_lock); + if (userlist_count >= USERLIST_NUM) { + pr_err("list is full\n"); + spin_unlock(&userlist_lock); + return; + } + userlist[userlist_count].pid = pid; + userlist[userlist_count].cur_cnt = 0; + userlist[userlist_count].panic_cnt = count; + userlist_count++; + spin_unlock(&userlist_lock); +} + +static int htuser_list_remove(int pid) +{ + int i; + + spin_lock(&userlist_lock); + for (i = 0; i < userlist_count; i++) { + if (userlist[i].pid == pid) { + if (i == userlist_count - 1) { + memset(&userlist[i], 0, sizeof(userlist[i])); + } else { + int len = sizeof(userlist[0]) * (userlist_count - i - 1); + memmove(&userlist[i], &userlist[i + 1], len); + } + userlist_count--; + spin_unlock(&userlist_lock); + return 0; + } + } + spin_unlock(&userlist_lock); + return -ENOENT; +} + +static void htuser_list_update(void) +{ + int i; + + need_panic = false; + need_dump = false; + spin_lock(&userlist_lock); + for (i = 0; i < userlist_count; i++) { + userlist[i].cur_cnt++; + if ((userlist[i].cur_cnt >= userlist[i].panic_cnt) || + (userlist[i].cur_cnt == userlist[i].panic_cnt / 2)) { + htuser_show_task(userlist[i].pid); + pr_err("process %d not scheduled for %ds\n", + userlist[i].pid, + userlist[i].cur_cnt * HEARTBEAT_TIME); + } + if (userlist[i].cur_cnt == userlist[i].panic_cnt) { + need_dump = true; + need_panic = true; + block_time = userlist[i].cur_cnt * HEARTBEAT_TIME; + block_pid = userlist[i].pid; + } + } + spin_unlock(&userlist_lock); +} + +static void htuser_list_kick(int pid) +{ + int i; + + spin_lock(&userlist_lock); + for (i = 0; i < userlist_count; i++) { + if (userlist[i].pid == pid) { + userlist[i].cur_cnt = 0; + spin_unlock(&userlist_lock); + return; + } + } + spin_unlock(&userlist_lock); +} + +void htuser_post_process_userlist(void) +{ + htuser_list_update(); + if (need_dump) { + pr_err("print all cpu stack and D state stack\n"); + hungtask_show_state_filter(TASK_UNINTERRUPTIBLE); + } + if (need_panic) + panic("UserList Process %d blocked for %ds causing panic", block_pid, block_time); +} + +static int htuser_process_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + struct task_struct *task = v; + + if (task == NULL) + return NOTIFY_OK; + + if ((task->tgid == task->pid) && (!htuser_list_remove(task->tgid))) + pr_err("remove success due to process %d die\n", task->tgid); + + return NOTIFY_OK; +} + +static struct notifier_block htuser_process_notify = { + .notifier_call = htuser_process_notifier, +}; + +ssize_t htuser_list_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int i; + char tmp[MAX_SHOW_LEN] = {0}; + int len = 0; + + len += snprintf(tmp + len, MAX_SHOW_LEN - len, + " Pid Current(sec) Expired(sec)\n"); + + spin_lock(&userlist_lock); + for (i = 0; i < userlist_count; i++) { + len += snprintf(tmp + len, MAX_SHOW_LEN - len, + "%5d %5d %5d", userlist[i].pid, + userlist[i].cur_cnt * HEARTBEAT_TIME, + userlist[i].panic_cnt * HEARTBEAT_TIME); + if (len >= MAX_SHOW_LEN) { + len = MAX_SHOW_LEN - 1; + break; + } + } + spin_unlock(&userlist_lock); + pr_info("%s\n", tmp); + strncpy(buf, tmp, len); + + return len; +} + +static int htuser_list_store_on(char *tmp, size_t len, int pid) +{ + unsigned long sec = 0; + + if (kstrtoul(tmp + 3, 10, &sec)) { + pr_err("invalid timeout value\n"); + return -EINVAL; + } + if ((sec > MAX_USER_TIMEOUT) || !sec) { + pr_err("invalid timeout value, should be in 0-%d\n", MAX_USER_TIMEOUT); + return -EINVAL; + } + if (sec % HEARTBEAT_TIME) { + pr_err("invalid timeout value, should be devided by %d\n", HEARTBEAT_TIME); + return -EINVAL; + } + pr_info("process %d set to enable, timeout=%ld\n", pid, sec); + htuser_list_insert(pid, sec / HEARTBEAT_TIME); + if (!is_registered) { + profile_event_register(PROFILE_TASK_EXIT, + &htuser_process_notify); + is_registered = true; + } + + return 0; +} + +ssize_t htuser_list_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + char tmp[CMD_MAX_LEN]; /* on/off/kick */ + size_t len; + char *p = NULL; + int pid = current->tgid; + int uid = current->cred->euid.val; + + if (uid >= 10000) + pr_err("non-system process %d(uid=%d) can not be added to hungtask userlist\n", + pid, uid); + if ((count < CMD_MIN_LEN) || (count > CMD_MAX_LEN)) { + pr_err("string too long or too short\n"); + return -EINVAL; + } + if (!buf) + return -EINVAL; + + memset(tmp, 0, sizeof(tmp)); + p = memchr(buf, '\n', count); + len = p ? (size_t)(p - buf) : count; + strncpy(tmp, buf, len); + + if (strncmp(tmp, "on", CMD_MIN_LEN) == 0) { + if (htuser_list_store_on(tmp, len, pid)) + return -EINVAL; + } else if (unlikely(strncmp(tmp, "off", CMD_MIN_LEN) == 0)) { + pr_info("process %d set to disable\n", pid); + if (!htuser_list_remove(pid)) + pr_err("remove success duet to process %d call off\n", pid); + } else if (likely(strncmp(tmp, "kick", CMD_MIN_LEN) == 0)) { + pr_info("process %d is kicked\n", pid); + htuser_list_kick(pid); + } else { + pr_err("only accept on off or kick\n"); + } + return (ssize_t)count; +} + diff --git a/drivers/staging/hungtask/hungtask_user.h b/drivers/staging/hungtask/hungtask_user.h new file mode 100755 index 000000000..17ea7212b --- /dev/null +++ b/drivers/staging/hungtask/hungtask_user.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef DFX_HUNGTASK_USER_H +#define DFX_HUNGTASK_USER_H + +#include + +#ifdef CONFIG_DFX_HUNGTASK_USER +void htuser_post_process_userlist(void); +ssize_t htuser_list_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count); +ssize_t htuser_list_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf); +#else +static inline void htuser_post_process_userlist(void) +{ +} + +static inline ssize_t htuser_list_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + return 0; +} +static inline ssize_t htuser_list_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return 0; +} + +#endif + +#endif /* DFX_HUNGTASK_USER_H */ diff --git a/drivers/staging/zerohung/Kconfig b/drivers/staging/zerohung/Kconfig new file mode 100755 index 000000000..913d28efb --- /dev/null +++ b/drivers/staging/zerohung/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +config DFX_ZEROHUNG + bool "zerohung driver" + default n + depends on HISYSEVENT + help + This feature support to catch hung log diff --git a/drivers/staging/zerohung/Makefile b/drivers/staging/zerohung/Makefile new file mode 100755 index 000000000..3727a0e91 --- /dev/null +++ b/drivers/staging/zerohung/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_DFX_ZEROHUNG) += zrhung_event.o watchpoint/ diff --git a/drivers/staging/zerohung/watchpoint/Makefile b/drivers/staging/zerohung/watchpoint/Makefile new file mode 100755 index 000000000..1cb8d7f99 --- /dev/null +++ b/drivers/staging/zerohung/watchpoint/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_DFX_ZEROHUNG) += hung_wp_screen.o diff --git a/drivers/staging/zerohung/watchpoint/hung_wp_screen.c b/drivers/staging/zerohung/watchpoint/hung_wp_screen.c new file mode 100755 index 000000000..3b5f2d6da --- /dev/null +++ b/drivers/staging/zerohung/watchpoint/hung_wp_screen.c @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#define pr_fmt(fmt) "zrhung " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TIME_CONVERT_UNIT 1000 +#define DEFAULT_TIMEOUT 10 + +#define LPRESSEVENT_TIME 5 +#define POWERKEYEVENT_MAX_COUNT 10 +#define POWERKEYEVENT_DEFAULT_COUNT 3 +#define POWERKEYEVENT_DEFAULT_TIMEWINDOW 5 +#define POWERKEYEVENT_DEFAULT_LIMIT_MS 300 +#define POWERKEYEVENT_DEFAULT_REPORT_MIN 2 +#define POWERKEYEVENT_TIME_LEN (POWERKEYEVENT_MAX_COUNT + 2) + +struct hung_wp_screen_data { + struct timer_list timer; + struct timer_list long_press_timer; + struct workqueue_struct *workq; + struct work_struct send_work; + spinlock_t lock; + int fb_blank; + int check_id; + int tag_id; +}; + +static bool init_done; +static struct hung_wp_screen_data g_hung_data; +static unsigned int lastreport_time; +static unsigned int lastprkyevt_time; +static unsigned int powerkeyevent_time[POWERKEYEVENT_TIME_LEN] = {0}; +static unsigned int newevt; +static unsigned int headevt; +static int *check_off_point; +struct work_struct powerkeyevent_sendwork; +struct work_struct lpressevent_sendwork; +static struct notifier_block hung_wp_screen_setblank_ncb; + +static void zrhung_lpressevent_send_work(struct work_struct *work) +{ + pr_info("LONGPRESS_EVENT send to zerohung\n"); + zrhung_send_event(WP_SCREEN_DOMAIN, WP_SCREEN_LPRESS_NAME, "none"); +} + +static void zrhung_wp_lpress_send(struct timer_list *t) +{ + int *check_off = check_off_point; + + del_timer(&g_hung_data.long_press_timer); + *check_off = 0; + queue_work(g_hung_data.workq, &lpressevent_sendwork); +} + +static void zrhung_powerkeyevent_send_work(struct work_struct *work) +{ + pr_info("POWERKEY_EVENT send to zerohung\n"); + zrhung_send_event(WP_SCREEN_DOMAIN, WP_SCREEN_PWK_NAME, "none"); +} + +static void zrhung_powerkeyevent_report(unsigned int dur, unsigned int end) +{ + unsigned int send_interval; + + send_interval = end > lastreport_time ? + ((end - lastreport_time) / TIME_CONVERT_UNIT) : POWERKEYEVENT_DEFAULT_REPORT_MIN; + if (unlikely(lastreport_time == 0)) { + lastreport_time = end; + } else if (send_interval < POWERKEYEVENT_DEFAULT_REPORT_MIN) { + pr_info("powerkeyevent too fast to report: %d\n", end); + return; + } + lastreport_time = end; + queue_work(g_hung_data.workq, &powerkeyevent_sendwork); +} + +static unsigned int refresh_prkyevt_index(unsigned int event) +{ + unsigned int evt = event; + + if (evt < POWERKEYEVENT_MAX_COUNT) + evt++; + else + evt = 0; + return evt; +} + +static void zrhung_new_powerkeyevent(unsigned int tmescs) +{ + unsigned int prkyevt_interval; + unsigned int evt_index; + int diff; + + powerkeyevent_time[newevt] = tmescs; + evt_index = (newevt >= headevt) ? + (newevt - headevt) : (newevt + POWERKEYEVENT_MAX_COUNT + 1 - headevt); + if (evt_index < (POWERKEYEVENT_DEFAULT_COUNT - 1)) { + pr_info("powerkeyevent not enough-%d\n", POWERKEYEVENT_DEFAULT_COUNT); + } else { + diff = powerkeyevent_time[newevt] - powerkeyevent_time[headevt]; + if (diff < 0) { + pr_info("powerkeyevent sth wrong in record time\n"); + return; + } + + prkyevt_interval = (unsigned int)(diff / TIME_CONVERT_UNIT); + if (prkyevt_interval <= POWERKEYEVENT_DEFAULT_TIMEWINDOW) + zrhung_powerkeyevent_report(prkyevt_interval, tmescs); + headevt = refresh_prkyevt_index(headevt); + } + newevt = refresh_prkyevt_index(newevt); +} + +static void zrhung_powerkeyevent_handler(void) +{ + unsigned int curtime; + unsigned long curjiff; + + pr_info("powerkeyevent check start"); + curjiff = jiffies; + curtime = jiffies_to_msecs(curjiff); + if (unlikely(lastprkyevt_time > curtime)) { + pr_info("powerkeyevent check but time overflow"); + lastprkyevt_time = curtime; + return; + } else if ((curtime - lastprkyevt_time) < POWERKEYEVENT_DEFAULT_LIMIT_MS) { + pr_info("powerkeyevent user press powerkey too fast-time:%d", curtime); + return; + } + lastprkyevt_time = curtime; + zrhung_new_powerkeyevent(curtime); +} + +static int hung_wp_screen_setblank(struct notifier_block *self, unsigned long event, void *data) +{ + unsigned long flags; + struct fb_event *evdata = data; + int blank; + + if (!init_done) + return 0; + + if (event != FB_EVENT_BLANK) + return 0; + + blank = *(int *)evdata->data; + spin_lock_irqsave(&(g_hung_data.lock), flags); + g_hung_data.fb_blank = blank; + if (((g_hung_data.check_id == ZRHUNG_WP_SCREENON) && (blank == 0)) || + ((g_hung_data.check_id == ZRHUNG_WP_SCREENOFF) && (blank != 0))) { + pr_info("check_id=%d, blank=%d", g_hung_data.check_id, g_hung_data.fb_blank); + del_timer(&g_hung_data.timer); + g_hung_data.check_id = ZRHUNG_WP_NONE; + } + spin_unlock_irqrestore(&(g_hung_data.lock), flags); + + return 0; +} + +static void hung_wp_screen_send_work(struct work_struct *work) +{ + unsigned long flags = 0; + + show_state_filter(TASK_UNINTERRUPTIBLE); + + if (g_hung_data.check_id == 1) + zrhung_send_event(WP_SCREEN_DOMAIN, WP_SCREEN_ON_NAME, "none"); + else + zrhung_send_event(WP_SCREEN_DOMAIN, WP_SCREEN_OFF_NAME, "none"); + pr_info("send event: %d\n", g_hung_data.check_id); + spin_lock_irqsave(&(g_hung_data.lock), flags); + g_hung_data.check_id = ZRHUNG_WP_NONE; + spin_unlock_irqrestore(&(g_hung_data.lock), flags); +} + +static void hung_wp_screen_send(struct timer_list *t) +{ + del_timer(&g_hung_data.timer); + pr_info("hung_wp_screen_%d end\n", g_hung_data.tag_id); + queue_work(g_hung_data.workq, &g_hung_data.send_work); +} + +static void hung_wp_screen_start(int check_id) +{ + if (g_hung_data.check_id != ZRHUNG_WP_NONE) { + pr_info("already in check_id: %d\n", g_hung_data.check_id); + return; + } + + g_hung_data.check_id = check_id; + if (timer_pending(&g_hung_data.timer)) + del_timer(&g_hung_data.timer); + + g_hung_data.timer.expires = jiffies + msecs_to_jiffies(DEFAULT_TIMEOUT * TIME_CONVERT_UNIT); + add_timer(&g_hung_data.timer); + pr_info("going to check ID=%d timeout=%d\n", check_id, DEFAULT_TIMEOUT); +} + +void hung_wp_screen_powerkey_ncb(int event) +{ + static int check_off; + unsigned long flags = 0; + + if (!init_done) + return; + + spin_lock_irqsave(&(g_hung_data.lock), flags); + if (event == WP_SCREEN_PWK_PRESS) { + pr_info("hung_wp_screen_%d start! fb_blank=%d", + ++g_hung_data.tag_id, g_hung_data.fb_blank); + check_off = 0; + if (g_hung_data.fb_blank != 0) { + hung_wp_screen_start(ZRHUNG_WP_SCREENON); + } else { + check_off = 1; + pr_info("start longpress test timer\n"); + check_off_point = &check_off; + g_hung_data.long_press_timer.expires = jiffies + + msecs_to_jiffies(LPRESSEVENT_TIME * TIME_CONVERT_UNIT); + if (!timer_pending(&g_hung_data.long_press_timer)) + add_timer(&g_hung_data.long_press_timer); + } + zrhung_powerkeyevent_handler(); + } else if (check_off) { + check_off = 0; + del_timer(&g_hung_data.long_press_timer); + if (event == WP_SCREEN_PWK_RELEASE && g_hung_data.fb_blank == 0) + hung_wp_screen_start(ZRHUNG_WP_SCREENOFF); + } + spin_unlock_irqrestore(&(g_hung_data.lock), flags); +} + +static int __init hung_wp_screen_init(void) +{ + init_done = false; + pr_info("%s start\n", __func__); + g_hung_data.fb_blank = 0; + g_hung_data.tag_id = 0; + g_hung_data.check_id = ZRHUNG_WP_NONE; + spin_lock_init(&(g_hung_data.lock)); + + timer_setup(&g_hung_data.timer, hung_wp_screen_send, 0); + timer_setup(&g_hung_data.long_press_timer, zrhung_wp_lpress_send, 0); + + g_hung_data.workq = create_workqueue("hung_wp_screen_workq"); + if (g_hung_data.workq == NULL) { + pr_err("create workq failed\n"); + return -EFAULT; + } + INIT_WORK(&g_hung_data.send_work, hung_wp_screen_send_work); + INIT_WORK(&powerkeyevent_sendwork, zrhung_powerkeyevent_send_work); + INIT_WORK(&lpressevent_sendwork, zrhung_lpressevent_send_work); + + hung_wp_screen_setblank_ncb.notifier_call = hung_wp_screen_setblank; + fb_register_client(&hung_wp_screen_setblank_ncb); + + init_done = true; + pr_info("%s done\n", __func__); + return 0; +} + +static void __exit hung_wp_screen_exit(void) +{ + fb_unregister_client(&hung_wp_screen_setblank_ncb); + + cancel_work_sync(&lpressevent_sendwork); + cancel_work_sync(&powerkeyevent_sendwork); + cancel_work_sync(&g_hung_data.send_work); + + destroy_workqueue(g_hung_data.workq); + + del_timer_sync(&g_hung_data.timer); + del_timer_sync(&g_hung_data.long_press_timer); +} + +module_init(hung_wp_screen_init); +module_exit(hung_wp_screen_exit); + +MODULE_AUTHOR("OHOS"); +MODULE_DESCRIPTION("Reporting the frozen screen alarm event"); +MODULE_LICENSE("GPL"); diff --git a/drivers/staging/zerohung/zrhung_event.c b/drivers/staging/zerohung/zrhung_event.c new file mode 100755 index 000000000..04b94a77e --- /dev/null +++ b/drivers/staging/zerohung/zrhung_event.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#define pr_fmt(fmt) "zrhung " fmt + +#include +#include + +#include +#include + +int zrhung_send_event(const char *domain, const char *event_name, const char *msg_buf) +{ + struct hiview_hisysevent *event = NULL; + int ret = 0; + + event = hisysevent_create(domain, event_name, FAULT); + if (!event) { + pr_err("failed to create event"); + return -EINVAL; + } + ret = hisysevent_put_string(event, "MSG", msg_buf); + if (ret != 0) { + pr_err("failed to put sting to event, ret=%d", ret); + goto hisysevent_end; + } + ret = hisysevent_write(event); + +hisysevent_end: + hisysevent_destroy(&event); + return ret; +} diff --git a/fs/epfs/Kconfig b/fs/epfs/Kconfig new file mode 100755 index 000000000..059c3a0cc --- /dev/null +++ b/fs/epfs/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +config EPFS + tristate "Enhanced Proxy File System support" + depends on TMPFS + help + Enhanced Proxy File System support. If unsure, say N. + +config EPFS_DEBUG + tristate "Debug message of Enhanced Proxy File System" + depends on EPFS + help + Enhanced Proxy File System debug support. diff --git a/fs/epfs/Makefile b/fs/epfs/Makefile new file mode 100755 index 000000000..b7375e6f9 --- /dev/null +++ b/fs/epfs/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_EPFS) += epfs.o +epfs-y := main.o super.o dentry.o inode.o file.o dir.o diff --git a/fs/epfs/dentry.c b/fs/epfs/dentry.c new file mode 100755 index 000000000..62299eccd --- /dev/null +++ b/fs/epfs/dentry.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/main.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include "internal.h" + +static int epfs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 1; +} + +static void epfs_d_release(struct dentry *dentry) +{ +} + +const struct dentry_operations epfs_dops = { + .d_revalidate = epfs_d_revalidate, + .d_release = epfs_d_release, +}; diff --git a/fs/epfs/dir.c b/fs/epfs/dir.c new file mode 100755 index 000000000..875057a86 --- /dev/null +++ b/fs/epfs/dir.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/dir.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include + +#include "internal.h" + +static int epfs_iterate(struct file *file, struct dir_context *ctx) +{ + return 0; +} + +const struct file_operations epfs_dir_fops = { .iterate = epfs_iterate }; diff --git a/fs/epfs/epfs.h b/fs/epfs/epfs.h new file mode 100755 index 000000000..19e66e145 --- /dev/null +++ b/fs/epfs/epfs.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/epfs/epfs.h + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#ifndef __FS_EPFS_H__ +#define __FS_EPFS_H__ + +#include +#include +#include + +#define EPFS_MAX_RANGES 127 + +struct __attribute__((__packed__)) epfs_range { + __u64 num; + __u64 reserved; + struct { + __u64 begin; + __u64 end; + } range[0]; +}; + +#define EPFS_IOCTL_MAGIC 0x71 +#define IOC_SET_ORIGIN_FD _IOW(EPFS_IOCTL_MAGIC, 1, __s32) +#define IOC_SET_EPFS_RANGE _IOW(EPFS_IOCTL_MAGIC, 2, struct epfs_range) +#define EPFS_IOCTL_MAXNR 3 + +#define EPFS_TAG "Epfs" + +#define epfs_err(fmt, ...) \ + pr_err("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_info(fmt, ...) \ + pr_info("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_warn(fmt, ...) \ + pr_warn("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_debug(fmt, ...) \ + pr_debug("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) + +#endif // __FS_EPFS_H__ diff --git a/fs/epfs/file.c b/fs/epfs/file.c new file mode 100755 index 000000000..ac8181933 --- /dev/null +++ b/fs/epfs/file.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/file.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include +#include +#include + +#include "internal.h" + +long epfs_set_origin_fd(struct file *file, unsigned long arg) +{ + int fd = -1; + struct file *origin_file; + struct inode *inode = file->f_inode; + struct epfs_inode_info *info = epfs_inode_to_private(inode); + int ret = 0; + + if (copy_from_user(&fd, (int *)arg, sizeof(fd))) + return -EFAULT; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("original fd: %d", fd); + origin_file = fget(fd); + if (!origin_file) { + epfs_err("Original file not exist!"); + return -EBADF; + } + + mutex_lock(&info->lock); + if (info->origin_file) { + // origin_file had been set. + ret = -EEXIST; + fput(origin_file); + } else if (file_inode(origin_file) == inode) { + epfs_err("Could not set itself as origin_file!"); + fput(origin_file); + ret = -EINVAL; + } else { + info->origin_file = origin_file; + fsstack_copy_attr_all(inode, file_inode(origin_file)); + fsstack_copy_inode_size(inode, file_inode(origin_file)); + } + mutex_unlock(&info->lock); + return ret; +} + +static int check_range(struct epfs_range *range) +{ + __u64 index; + + if (range->range[0].begin >= range->range[0].end) { + epfs_err("Invalid range: [%llu, %llu)", range->range[0].begin, + range->range[0].end); + return -EINVAL; + } + + for (index = 1; index < range->num; index++) { + if ((range->range[index].begin >= range->range[index].end) || + (range->range[index].begin < range->range[index - 1].end)) { + epfs_err("Invalid range: [%llu, %llu), [%llu, %llu)", + range->range[index - 1].begin, + range->range[index - 1].end, + range->range[index].begin, + range->range[index].end); + return -EINVAL; + } + } + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) { + epfs_debug("epfs_range recv %llu ranges:", range->num); + for (index = 0; index < range->num; index++) { + epfs_debug("range:[%llu %llu)", + range->range[index].begin, + range->range[index].end); + } + epfs_debug("\n"); + } + return 0; +} + +long epfs_set_range(struct file *file, unsigned long arg) +{ + struct inode *inode = file->f_inode; + struct inode *origin_inode; + struct epfs_inode_info *info = epfs_inode_to_private(inode); + int ret = 0; + struct epfs_range *range; + struct epfs_range header; + + mutex_lock(&info->lock); + if (!info->origin_file) { + epfs_err("origin file not exist!"); + ret = -EBADF; + goto out_set_range; + } + origin_inode = info->origin_file->f_inode; + if (!in_group_p(origin_inode->i_gid)) { + epfs_err("Only group member can set range: %u", + i_gid_read(origin_inode)); + ret = -EACCES; + goto out_set_range; + } + + if (copy_from_user(&header, (struct epfs_range *)arg, + sizeof(header))) { + ret = -EFAULT; + epfs_err("get header failed!"); + goto out_set_range; + } + + if (header.num > EPFS_MAX_RANGES || header.num == 0) { + ret = -EINVAL; + epfs_err("illegal num: %llu", header.num); + goto out_set_range; + } + + range = kzalloc(sizeof(header) + sizeof(header.range[0]) * header.num, + GFP_KERNEL); + if (!range) { + ret = -ENOMEM; + goto out_set_range; + } + + if (copy_from_user(range, (struct epfs_range *)arg, + sizeof(header) + sizeof(header.range[0]) * header.num)) { + ret = -EFAULT; + epfs_err("Failed to get range! num: %llu", header.num); + kfree(range); + goto out_set_range; + } + + ret = check_range(range); + if (ret) { + kfree(range); + goto out_set_range; + } + + info->range = range; +out_set_range: + mutex_unlock(&info->lock); + return ret; +} + +static long __epfs_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long rc = -ENOTTY; + + if (unlikely(_IOC_TYPE(cmd) != EPFS_IOCTL_MAGIC)) { + epfs_err("Failed to check epfs magic: %u", _IOC_TYPE(cmd)); + return -ENOTTY; + } + if (unlikely(_IOC_NR(cmd) >= EPFS_IOCTL_MAXNR)) { + epfs_err("Failed to check ioctl number: %u", _IOC_NR(cmd)); + return -ENOTTY; + } + if (unlikely(!access_ok((void __user *)arg, _IOC_SIZE(cmd)))) { + epfs_err("Failed to check user address space range!"); + return -EFAULT; + } + + switch (cmd) { + case IOC_SET_ORIGIN_FD: + return epfs_set_origin_fd(file, arg); + case IOC_SET_EPFS_RANGE: + return epfs_set_range(file, arg); + default: + epfs_info("Exit epfs unsupported ioctl, ret: %ld", rc); + return rc; + } +} + +static long epfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return __epfs_ioctl(file, cmd, arg); +} + +static long epfs_unlocked_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return __epfs_ioctl(file, cmd, arg); +} + +static ssize_t epfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct inode *inode = file_inode(file); + struct epfs_inode_info *info = epfs_inode_to_private(inode); + struct file *origin_file; + struct epfs_range *range; + ssize_t ret = 0; + loff_t pos = *ppos; + loff_t file_size; + int current_range_index = 0; + + mutex_lock(&info->lock); + range = info->range; + if (!range) { + ret = -EINVAL; + epfs_err("Invalid inode range!"); + goto out_read; + } + + origin_file = info->origin_file; + + if (!origin_file) { + ret = -ENOENT; + epfs_err("origin file not exist!"); + goto out_read; + } + + // Reduce count when it will read over file size. + file_size = i_size_read(file_inode(origin_file)); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + if (count > (file_size - pos)) + epfs_debug( + "count will be truncated to %llu, as file_size=%llu, pos=%llu", + file_size - pos, file_size, pos); + count = count <= (file_size - pos) ? count : (file_size - pos); + + // Skip ranges before pos. + while ((range->range[current_range_index].end <= pos) && + (current_range_index < range->num)) + current_range_index++; + + while (count > 0) { + __u64 current_begin, current_end; + + if (current_range_index >= range->num) { + // read directly when epfs range gone; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "read from %llu with len %lu at the end.", + pos, count); + ret = vfs_read(origin_file, buf, count, &pos); + break; + } + current_begin = range->range[current_range_index].begin; + current_end = range->range[current_range_index].end; + if (current_begin <= pos) { + // Clear user memory + unsigned long clear_len = current_end - pos; + + clear_len = clear_len < count ? clear_len : count; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "clear user memory from %llu with len %lu", + pos, clear_len); + if (clear_user(buf, clear_len)) { + ret = EFAULT; + break; + } + buf += clear_len; + pos += clear_len; + count -= clear_len; + current_range_index++; + } else { + // Read from pos to (next)current_begin + unsigned long read_len = current_begin - pos; + + read_len = read_len < count ? read_len : count; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "read from %llu with len %lu", + pos, read_len); + ret = vfs_read(origin_file, buf, read_len, &pos); + if (ret < 0 || ret < read_len) { + // Could not read enough bytes; + break; + } + buf += ret; + count -= ret; + } + } + + if (ret >= 0) { + ret = pos - *ppos; + *ppos = pos; + } +out_read: + mutex_unlock(&info->lock); + return ret; +} + +const struct file_operations epfs_file_fops = { + .unlocked_ioctl = epfs_unlocked_ioctl, + .compat_ioctl = epfs_compat_ioctl, + .read = epfs_read, + .llseek = generic_file_llseek, +}; diff --git a/fs/epfs/inode.c b/fs/epfs/inode.c new file mode 100755 index 000000000..492186ebc --- /dev/null +++ b/fs/epfs/inode.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/inode.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include + +#include "internal.h" + +#define USER_DATA_RW 1008 +#define USER_DATA_RW_UID KUIDT_INIT(USER_DATA_RW) +#define USER_DATA_RW_GID KGIDT_INIT(USER_DATA_RW) + +struct dentry *epfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + return ERR_PTR(-ENOENT); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int epfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) +#else +static int epfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +#endif +{ + struct inode *inode = epfs_iget(dir->i_sb, false); + + if (!inode) + return -ENOSPC; + d_tmpfile(dentry, inode); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("epfs: tmpfile %p", inode); + return 0; +} + +const struct inode_operations epfs_dir_iops = { + .tmpfile = epfs_tmpfile, + .lookup = epfs_lookup, +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int epfs_getattr(struct user_namespace *mnt_userns, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +#else +static int epfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +#endif +{ + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + struct epfs_inode_info *info = epfs_inode_to_private(inode); + struct file *origin_file; + struct kstat origin_stat; + int ret; + + mutex_lock(&info->lock); + origin_file = info->origin_file; + if (!origin_file) { + ret = -ENOENT; + goto out_getattr; + } + ret = vfs_getattr(&(origin_file->f_path), &origin_stat, request_mask, + flags); + if (ret) + goto out_getattr; + fsstack_copy_attr_all(inode, file_inode(origin_file)); + fsstack_copy_inode_size(inode, file_inode(origin_file)); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) + generic_fillattr(mnt_userns, d_inode(dentry), stat); +#else + generic_fillattr(d_inode(dentry), stat); +#endif + stat->blocks = origin_stat.blocks; + +out_getattr: + mutex_unlock(&info->lock); + return ret; +} + +const struct inode_operations epfs_file_iops = { + .getattr = epfs_getattr, +}; + +struct inode *epfs_iget(struct super_block *sb, bool is_dir) +{ + struct inode *inode = new_inode(sb); + + if (!inode) { + epfs_err("Failed to allocate new inode"); + return NULL; + } + if (is_dir) { + inode->i_op = &epfs_dir_iops; + inode->i_fop = &epfs_dir_fops; + inode->i_mode = S_IFDIR | 0770; + } else { + inode->i_op = &epfs_file_iops; + inode->i_fop = &epfs_file_fops; + inode->i_mode = S_IFREG; + } + inode->i_uid = USER_DATA_RW_UID; + inode->i_gid = USER_DATA_RW_GID; + return inode; +} diff --git a/fs/epfs/internal.h b/fs/epfs/internal.h new file mode 100755 index 000000000..9895ffbc0 --- /dev/null +++ b/fs/epfs/internal.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/epfs/internal.h + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#ifndef __FS_EPFS_INTERNAL_H__ +#define __FS_EPFS_INTERNAL_H__ + +#include +#include +#include + +#include "epfs.h" + +#define EPFS_SUPER_MAGIC 0x20220607 + +struct epfs_inode_info { + struct inode vfs_inode; + struct file *origin_file; + struct epfs_range *range; + struct mutex lock; +}; + +static inline struct epfs_inode_info *epfs_inode_to_private(struct inode *inode) +{ + return container_of(inode, struct epfs_inode_info, vfs_inode); +} + +struct inode *epfs_iget(struct super_block *sb, bool is_dir); +extern const struct dentry_operations epfs_dops; +extern const struct file_operations epfs_dir_fops; +extern const struct file_operations epfs_file_fops; +extern struct file_system_type epfs_fs_type; +extern struct kmem_cache *epfs_inode_cachep; + +#endif // __FS_EPFS_INTERNAL_H__ diff --git a/fs/epfs/main.c b/fs/epfs/main.c new file mode 100755 index 000000000..c91e94f8f --- /dev/null +++ b/fs/epfs/main.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/main.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include + +#include "internal.h" + +struct kmem_cache *epfs_inode_cachep; + +static int __init epfs_init(void) +{ + int ret; + + epfs_inode_cachep = + kmem_cache_create("epfs_inode_cache", + sizeof(struct epfs_inode_info), 0, 0, + NULL); + if (!epfs_inode_cachep) + return -ENOMEM; + ret = register_filesystem(&epfs_fs_type); + if (ret) + kmem_cache_destroy(epfs_inode_cachep); + return ret; +} + +static void __exit epfs_exit(void) +{ + unregister_filesystem(&epfs_fs_type); + kmem_cache_destroy(epfs_inode_cachep); +} + +module_init(epfs_init) +module_exit(epfs_exit) +MODULE_DESCRIPTION("Enhanced Proxy File System for OpenHarmony"); +MODULE_AUTHOR("LongPing Wei weilongping@huawei.com"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_FS("epfs"); diff --git a/fs/epfs/super.c b/fs/epfs/super.c new file mode 100755 index 000000000..82c84f680 --- /dev/null +++ b/fs/epfs/super.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/super.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include +#include +#include + +#include "internal.h" + +static struct inode *epfs_alloc_inode(struct super_block *sb) +{ + struct epfs_inode_info *info = + kmem_cache_zalloc(epfs_inode_cachep, GFP_KERNEL); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("inode info: %p", info); + inode_init_once(&info->vfs_inode); + mutex_init(&info->lock); + return &info->vfs_inode; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) +// Free epfs_inode_info +static void epfs_free_inode(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("free_inode: %p", inode); + kmem_cache_free(epfs_inode_cachep, + epfs_inode_to_private(inode)); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + epfs_free_inode(inode); +} +#endif + +// Destroy epfs_range +static void epfs_destroy_inode(struct inode *inode) +{ + struct epfs_inode_info *info = epfs_inode_to_private(inode); + + mutex_lock(&info->lock); + kfree(info->range); + info->range = NULL; + mutex_unlock(&info->lock); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) + call_rcu(&inode->i_rcu, i_callback); +#endif +} + +// Clear vfs_inode +static void epfs_evict_inode(struct inode *inode) +{ + struct epfs_inode_info *info = epfs_inode_to_private(inode); + + clear_inode(inode); + mutex_lock(&info->lock); + if (info->origin_file) { + fput(info->origin_file); + info->origin_file = NULL; + } + mutex_unlock(&info->lock); +} + +static int epfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + buf->f_type = EPFS_SUPER_MAGIC; + return 0; +} +struct super_operations epfs_sops = { + .alloc_inode = epfs_alloc_inode, + .destroy_inode = epfs_destroy_inode, +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) + .free_inode = epfs_free_inode, +#endif + .evict_inode = epfs_evict_inode, + .statfs = epfs_statfs, +}; + +static int epfs_fill_super(struct super_block *s, void *data, int silent) +{ + struct inode *inode; + + s->s_op = &epfs_sops; + s->s_d_op = &epfs_dops; + s->s_magic = EPFS_SUPER_MAGIC; + inode = epfs_iget(s, true /* dir */); + if (!inode) { + epfs_err("Failed to get root inode!"); + return -ENOMEM; + } + + s->s_root = d_make_root(inode); + if (!s->s_root) { + epfs_err("Failed to make root inode"); + return -ENOMEM; + } + + return 0; +} + +struct dentry *epfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + return mount_nodev(fs_type, flags, raw_data, epfs_fill_super); +} + +void epfs_kill_sb(struct super_block *sb) +{ + kill_anon_super(sb); +} + +struct file_system_type epfs_fs_type = { + .owner = THIS_MODULE, + .name = "epfs", + .mount = epfs_mount, + .kill_sb = epfs_kill_sb, +}; diff --git a/fs/hmdfs/Kconfig b/fs/hmdfs/Kconfig new file mode 100755 index 000000000..1bb5c2347 --- /dev/null +++ b/fs/hmdfs/Kconfig @@ -0,0 +1,40 @@ +config HMDFS_FS + tristate "HMDFS filesystem support" + help + HMDFS is an overlay file system. Relying on the underlying file system, + under the premise of networking, file exchanges across devices can be + realized. Device view and merge view are provided. In the device view, + the shared directories of the corresponding devices are provided under + different device directories; in the merge view, acollection of shared + files of all devices is provided. + +config HMDFS_FS_PERMISSION + bool "HMDFS application permission management" + depends on HMDFS_FS + help + HMDFS provides cross-device file and directory sharing. Only the same + application can access the files and directories under the corresponding + package directory. it provides management and control of access + permissions. + + If unsure, say N. + +config HMDFS_FS_ENCRYPTION + bool "HMDFS message encryption" + depends on HMDFS_FS && TLS + help + HMDFS provides cross-device file and directory sharing by sending and + receiving network messages. To ensure data security, TLS encryption is + provided. + + If you want to improve performance, say N. + +config HMDFS_FS_DEBUG + bool "HMDFS debug log" + depends on HMDFS_FS + help + HMDFS print a lot of logs, but many of them are debugging information, + which is actually unnecessary during operation. If there is a problem, + it works. + + If unsure, say N. diff --git a/fs/hmdfs/Makefile b/fs/hmdfs/Makefile new file mode 100755 index 000000000..20896e716 --- /dev/null +++ b/fs/hmdfs/Makefile @@ -0,0 +1,15 @@ +obj-$(CONFIG_HMDFS_FS) += hmdfs.o +ccflags-y += -I$(src) + +hmdfs-y := main.o super.o inode.o dentry.o inode_root.o file_merge.o +hmdfs-y += hmdfs_client.o hmdfs_server.o inode_local.o inode_remote.o +hmdfs-y += inode_merge.o hmdfs_dentryfile.o file_root.o file_remote.o +hmdfs-y += file_local.o client_writeback.o server_writeback.o stash.o +hmdfs-y += hmdfs_share.o + +hmdfs-y += comm/device_node.o comm/message_verify.o comm/node_cb.o +hmdfs-y += comm/connection.o comm/socket_adapter.o comm/transport.o + +hmdfs-$(CONFIG_HMDFS_FS_ENCRYPTION) += comm/crypto.o +hmdfs-$(CONFIG_HMDFS_FS_PERMISSION) += authority/authentication.o +hmdfs-$(CONFIG_HMDFS_FS_PERMISSION) += authority/config.o diff --git a/fs/hmdfs/authority/authentication.c b/fs/hmdfs/authority/authentication.c new file mode 100755 index 000000000..07705d20e --- /dev/null +++ b/fs/hmdfs/authority/authentication.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/authority/authentication.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "authentication.h" +#include +#include + +#include "hmdfs.h" + +struct fs_struct *hmdfs_override_fsstruct(struct fs_struct *saved_fs) +{ +#if (defined CONFIG_HMDFS_FS_PERMISSION) && (defined CONFIG_SDCARD_FS) + struct fs_struct *copied_fs = copy_fs_struct(saved_fs); + + if (!copied_fs) + return NULL; + copied_fs->umask = 0; + task_lock(current); + current->fs = copied_fs; + task_unlock(current); + return copied_fs; +#else + return saved_fs; +#endif +} + +void hmdfs_revert_fsstruct(struct fs_struct *saved_fs, + struct fs_struct *copied_fs) +{ +#if (defined CONFIG_HMDFS_FS_PERMISSION) && (defined CONFIG_SDCARD_FS) + task_lock(current); + current->fs = saved_fs; + task_unlock(current); + free_fs_struct(copied_fs); +#endif +} + +const struct cred *hmdfs_override_fsids(bool is_recv_thread) +{ + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + + cred = prepare_creds(); + if (!cred) + return NULL; + + cred->fsuid = is_recv_thread ? SYSTEM_UID : USER_DATA_RW_UID; + cred->fsgid = is_recv_thread ? SYSTEM_GID : USER_DATA_RW_GID; + + old_cred = override_creds(cred); + + return old_cred; +} + +const struct cred *hmdfs_override_dir_fsids(struct inode *dir, + struct dentry *dentry, __u16 *_perm) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + __u16 level = hmdfs_perm_get_next_level(hii->perm); + __u16 perm = 0; + + cred = prepare_creds(); + if (!cred) + return NULL; + + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + cred->fsuid = USER_DATA_RW_UID; + cred->fsgid = USER_DATA_RW_GID; + perm = (hii->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * services: dfs_share : dfs_share + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, DFS_SHARE_NAME)) { + perm = HMDFS_DIR_SERVICES | level; + cred->fsuid = DFS_SHARE_UID; + cred->fsgid = DFS_SHARE_GID; + break; + } + if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + perm = HMDFS_DIR_DATA | level; + } else { + perm = HMDFS_DIR_PUBLIC | level; + } + cred->fsuid = USER_DATA_RW_UID; + cred->fsgid = USER_DATA_RW_GID; + break; + case HMDFS_PERM_PKG: + if (is_service_dir(hii->perm)) { + cred->fsuid = DFS_SHARE_UID; + cred->fsgid = DFS_SHARE_GID; + perm = AUTH_SERVICES | HMDFS_DIR_PKG | level; + break; + } + if (is_data_dir(hii->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + int bid = get_bundle_uid(hmdfs_sb(dentry->d_sb), + dentry->d_name.name); + + if (bid != 0) { + cred->fsuid = KUIDT_INIT(bid); + cred->fsgid = KGIDT_INIT(bid); + } else { + cred->fsuid = ROOT_UID; + cred->fsgid = ROOT_GID; + } + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + cred->fsuid = dir->i_uid; + cred->fsgid = dir->i_gid; + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + cred->fsuid = dir->i_uid; + cred->fsgid = dir->i_gid; + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + + *_perm = perm; + old_cred = override_creds(cred); + + return old_cred; +} + +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm) +{ + or->saved_cred = hmdfs_override_dir_fsids(dir, dentry, perm); + if (!or->saved_cred) + return -ENOMEM; + + or->saved_fs = current->fs; + or->copied_fs = hmdfs_override_fsstruct(or->saved_fs); + if (!or->copied_fs) { + hmdfs_revert_fsids(or->saved_cred); + return -ENOMEM; + } + + return 0; +} + +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or) +{ + hmdfs_revert_fsstruct(or->saved_fs, or->copied_fs); + hmdfs_revert_fsids(or->saved_cred); +} + +const struct cred *hmdfs_override_file_fsids(struct inode *dir, __u16 *_perm) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct cred *cred = NULL; + const struct cred *old_cred = NULL; + __u16 level = hmdfs_perm_get_next_level(hii->perm); + uint16_t perm; + + perm = HMDFS_FILE_DEFAULT | level; + + cred = prepare_creds(); + if (!cred) + return NULL; + + cred->fsuid = dir->i_uid; + cred->fsgid = dir->i_gid; + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_FILE_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_FILE_DEFAULT | level; + + *_perm = perm; + old_cred = override_creds(cred); + + return old_cred; +} + +void hmdfs_revert_fsids(const struct cred *old_cred) +{ + const struct cred *cur_cred; + + cur_cred = current->cred; + revert_creds(old_cred); + put_cred(cur_cred); +} + +int hmdfs_persist_perm(struct dentry *dentry, __u16 *perm) +{ + int err; + struct inode *minode = d_inode(dentry); + + if (!minode) + return -EINVAL; + + inode_lock(minode); + err = __vfs_setxattr(&init_user_ns, dentry, minode, HMDFS_PERM_XATTR, perm, + sizeof(*perm), XATTR_CREATE); + if (!err) + fsnotify_xattr(dentry); + else if (err && err != -EEXIST) + hmdfs_err("failed to setxattr, err=%d", err); + inode_unlock(minode); + return err; +} + +__u16 hmdfs_read_perm(struct inode *inode) +{ + __u16 ret = 0; + int size = 0; + struct dentry *dentry = d_find_alias(inode); + + if (!dentry) + return ret; + + size = __vfs_getxattr(dentry, inode, HMDFS_PERM_XATTR, &ret, + sizeof(ret)); + /* + * some file may not set setxattr with perm + * eg. files created in sdcard dir by other user + **/ + if (size < 0 || size != sizeof(ret)) + ret = HMDFS_ALL_MASK; + + dput(dentry); + return ret; +} + +static __u16 __inherit_perm_dir(struct inode *parent, struct inode *inode) +{ + __u16 perm = 0; + struct hmdfs_inode_info *info = hmdfs_i(parent); + __u16 level = hmdfs_perm_get_next_level(info->perm); + struct dentry *dentry = d_find_alias(inode); + + if (!dentry) + return perm; + + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + perm = (info->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * services: dfs_share : dfs_share + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, DFS_SHARE_NAME)) { + // "services" + perm = HMDFS_DIR_SERVICES | level; + } else if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + // "data" + perm = HMDFS_DIR_DATA | level; + } else if (!strcmp(dentry->d_name.name, SYSTEM_NAME)) { + // "system" + perm = AUTH_SYSTEM | HMDFS_DIR_SYSTEM | level; + } else { + perm = HMDFS_DIR_PUBLIC | level; + } + break; + case HMDFS_PERM_PKG: + if (is_service_dir(info->perm)) { + perm = AUTH_SERVICES | HMDFS_DIR_PKG | level; + break; + } + if (is_data_dir(info->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + perm = (info->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + if (is_pkg_auth(info->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (info->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + dput(dentry); + return perm; +} + +static __u16 __inherit_perm_file(struct inode *parent) +{ + struct hmdfs_inode_info *hii = hmdfs_i(parent); + __u16 level = hmdfs_perm_get_next_level(hii->perm); + uint16_t perm; + + perm = HMDFS_FILE_DEFAULT | level; + + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_FILE_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_FILE_DEFAULT | level; + + return perm; +} + +__u16 hmdfs_perm_inherit(struct inode *parent_inode, struct inode *child) +{ + __u16 perm; + + if (S_ISDIR(child->i_mode)) + perm = __inherit_perm_dir(parent_inode, child); + else + perm = __inherit_perm_file(parent_inode); + return perm; +} + +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child) +{ + struct hmdfs_inode_info *info = hmdfs_i(child); + struct hmdfs_inode_info *dir = hmdfs_i(parent_inode); + + if (info->perm == HMDFS_ALL_MASK) + info->perm = hmdfs_perm_inherit(parent_inode, child); + if (is_service_dir(dir->perm)) + child->i_mode = child->i_mode | S_IRWXG; +} + +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry) +{ + struct hmdfs_inode_info *hii = hmdfs_i(dir); + struct inode *dinode = d_inode(dentry); + struct hmdfs_inode_info *dinfo = hmdfs_i(dinode); + __u16 level = hmdfs_perm_get_next_level(hii->perm); + __u16 perm = 0; + + hmdfs_debug("level:0x%X", level); + switch (level) { + case HMDFS_PERM_MNT: + /* system : media_rw */ + dinode->i_uid = USER_DATA_RW_UID; + dinode->i_gid = USER_DATA_RW_GID; + perm = (hii->perm & HMDFS_DIR_TYPE_MASK) | level; + break; + case HMDFS_PERM_DFS: + /* + * data : system : media_rw + * system: system : media_rw, need authority + * other : media_rw : media_rw + **/ + if (!strcmp(dentry->d_name.name, DFS_SHARE_NAME)) { + perm = HMDFS_DIR_SERVICES | level; + dinode->i_uid = DFS_SHARE_UID; + dinode->i_gid = DFS_SHARE_GID; + dinode->i_mode = dinode->i_mode | S_IRWXG; + break; + } + if (!strcmp(dentry->d_name.name, PKG_ROOT_NAME)) { + perm = HMDFS_DIR_DATA | level; + } else { + perm = HMDFS_DIR_PUBLIC | level; + } + dinode->i_uid = USER_DATA_RW_UID; + dinode->i_gid = USER_DATA_RW_GID; + break; + case HMDFS_PERM_PKG: + if (is_service_dir(hii->perm)) { + dinode->i_uid = DFS_SHARE_UID; + dinode->i_gid = DFS_SHARE_GID; + dinode->i_mode = dinode->i_mode | S_IRWXG; + perm = AUTH_SERVICES | HMDFS_DIR_PKG | level; + break; + } + if (is_data_dir(hii->perm)) { + /* + * Mkdir for app pkg. + * Get the appid by passing pkgname to configfs. + * Set ROOT + media_rw for remote install, + * local uninstall. + * Set appid + media_rw for local install. + */ + int bid = get_bundle_uid(hmdfs_sb(dentry->d_sb), + dentry->d_name.name); + if (bid != 0) { + dinode->i_uid = KUIDT_INIT(bid); + dinode->i_gid = KGIDT_INIT(bid); + } else { + dinode->i_uid = ROOT_UID; + dinode->i_gid = ROOT_GID; + } + perm = AUTH_PKG | HMDFS_DIR_PKG | level; + } else { + dinode->i_uid = dir->i_uid; + dinode->i_gid = dir->i_gid; + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + } + break; + case HMDFS_PERM_OTHER: + dinode->i_uid = dir->i_uid; + dinode->i_gid = dir->i_gid; + if (is_service_auth(hii->perm)) { + dinode->i_mode = dir->i_mode | S_IRWXG; + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + break; + } + if (is_pkg_auth(hii->perm)) + perm = AUTH_PKG | HMDFS_DIR_PKG_SUB | level; + else + perm = (hii->perm & AUTH_MASK) | HMDFS_DIR_DEFAULT | level; + break; + default: + /* ! it should not get to here */ + hmdfs_err("hmdfs perm incorrect got default case, level:%u", level); + break; + } + + dinfo->perm = perm; +} + +void hmdfs_root_inode_perm_init(struct inode *root_inode) +{ + struct hmdfs_inode_info *hii = hmdfs_i(root_inode); + + hii->perm = HMDFS_DIR_ROOT | HMDFS_PERM_MNT; + set_inode_uid(root_inode, USER_DATA_RW_UID); + set_inode_gid(root_inode, USER_DATA_RW_GID); +} diff --git a/fs/hmdfs/authority/authentication.h b/fs/hmdfs/authority/authentication.h new file mode 100755 index 000000000..a36636a78 --- /dev/null +++ b/fs/hmdfs/authority/authentication.h @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/authority/authentication.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef AUTHENTICATION_H +#define AUTHENTICATION_H + +#include +#include +#include +#include +#include +#include +#include "hmdfs.h" + +struct cache_fs_override { + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; + const struct cred *saved_cred; +}; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + +#define OID_ROOT 0 +#define OID_SYSTEM 1000 +#define OID_USER_DATA_RW 1008 +#define OID_DFS_SHARE 3822 + +/* copied from sdcardfs/multiuser.h */ +#define BASE_USER_RANGE 200000 /* offset for uid ranges for each user */ + +#define HMDFS_PERM_XATTR "user.hmdfs.perm" + +#define ROOT_UID KUIDT_INIT(OID_ROOT) +#define SYSTEM_UID KUIDT_INIT(OID_SYSTEM) +#define USER_DATA_RW_UID KUIDT_INIT(OID_USER_DATA_RW) +#define DFS_SHARE_UID KUIDT_INIT(OID_DFS_SHARE) + +#define ROOT_GID KGIDT_INIT(OID_ROOT) +#define SYSTEM_GID KGIDT_INIT(OID_SYSTEM) +#define USER_DATA_RW_GID KGIDT_INIT(OID_USER_DATA_RW) +#define DFS_SHARE_GID KGIDT_INIT(OID_DFS_SHARE) + +#define PKG_ROOT_NAME "data" +#define DFS_SHARE_NAME "services" +#define SYSTEM_NAME "system" + +/* + * | perm fix | permmnt | permdfs | permpkg | perm other + * /mnt/mdfs/ accoundID / device view / local / DATA / packageName /... + * / system /... + * / documents /... + * / devid /....... + * / merge view / + * / sdcard / + **/ +#define HMDFS_PERM_MASK 0x000F + +#define HMDFS_PERM_FIX 0 +#define HMDFS_PERM_MNT 1 +#define HMDFS_PERM_DFS 2 +#define HMDFS_PERM_PKG 3 +#define HMDFS_PERM_OTHER 4 + +static inline bool is_perm_fix(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_FIX; +} + +static inline bool is_perm_mnt(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_MNT; +} + +static inline bool is_perm_dfs(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_DFS; +} + +static inline bool is_perm_pkg(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_PKG; +} + +static inline bool is_perm_other(__u16 perm) +{ + return (perm & HMDFS_PERM_MASK) == HMDFS_PERM_OTHER; +} + +static inline void hmdfs_check_cred(const struct cred *cred) +{ + if (cred->fsuid.val != OID_SYSTEM || cred->fsgid.val != OID_SYSTEM) + hmdfs_warning("uid is %u, gid is %u", cred->fsuid.val, + cred->fsgid.val); +} + +/* dir and file type mask for hmdfs */ +#define HMDFS_DIR_TYPE_MASK 0x00F0 + +/* LEVEL 0 perm fix - permmnt , only root dir */ +#define HMDFS_DIR_ROOT 0x0010 + +/* LEVEL 1 perm dfs */ +#define HMDFS_DIR_PUBLIC 0x0020 +#define HMDFS_DIR_DATA 0x0030 +#define HMDFS_DIR_SYSTEM 0x0040 + +/* LEVEL 2 HMDFS_PERM_PKG */ +#define HMDFS_DIR_PKG 0x0050 + +/* LEVEL 2~n HMDFS_PERM_OTHER */ +#define PUBLIC_FILE 0x0060 +#define PUBLIC_SUB_DIR 0x0070 +#define SYSTEM_SUB_DIR 0x0080 +#define SYSTEM_SUB_FILE 0x0090 + +#define HMDFS_DIR_PKG_SUB 0x00A0 +#define HMDFS_FILE_PKG_SUB 0x00B0 + +/* access right is derived + * PUBLIC_SUB_DIR SYSTEM_SUB_DIR HMDFS_DIR_PKG_SUB + * PUBLIC_FILE SYSTEM_SUB_FILE HMDFS_FILE_PKG_SUB + */ +#define HMDFS_DIR_DEFAULT 0x00C0 +#define HMDFS_FILE_DEFAULT 0x00D0 +#define HMDFS_DIR_SERVICES 0x00E0 +#define HMDFS_TYPE_DEFAULT 0x0000 + +static inline bool is_data_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_DATA; +} + +static inline bool is_service_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_SERVICES; +} + +static inline bool is_pkg_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_PKG; +} + +static inline bool is_pkg_sub_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_PKG_SUB; +} + +static inline bool is_pkg_sub_file(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_FILE_PKG_SUB; +} + +static inline bool is_default_dir(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_DIR_DEFAULT; +} + +static inline bool is_default_file(__u16 perm) +{ + return (perm & HMDFS_DIR_TYPE_MASK) == HMDFS_FILE_DEFAULT; +} + +#define AUTH_MASK 0x0F00 +#define AUTH_PKG 0x0100 +#define AUTH_SYSTEM 0x0200 +#define AUTH_SERVICES 0x0400 + +static inline bool is_pkg_auth(__u16 perm) +{ + return (perm & AUTH_MASK) == AUTH_PKG; +} + +static inline bool is_system_auth(__u16 perm) +{ + return (perm & AUTH_MASK) == AUTH_SYSTEM; +} + +static inline bool is_service_auth(__u16 perm) +{ + return (perm & AUTH_MASK) == AUTH_SERVICES; +} +#define HMDFS_MOUNT_POINT_MASK 0xF000 +#define HMDFS_MNT_COMMON 0x0000 // sdcard +#define HMDFS_MNT_SDCARD 0x1000 // sdcard +#define HMDFS_MNT_ACNTID 0x2000 // accound id + +#define HMDFS_ALL_MASK (HMDFS_MOUNT_POINT_MASK | AUTH_MASK | HMDFS_DIR_TYPE_MASK | HMDFS_PERM_MASK) + +static inline void set_inode_gid(struct inode *inode, kgid_t gid) +{ + inode->i_gid = gid; +} + +static inline kuid_t get_inode_uid(struct inode *inode) +{ + kuid_t uid = inode->i_uid; + return uid; +} + +static inline void set_inode_uid(struct inode *inode, kuid_t uid) +{ + inode->i_uid = uid; +} + +static inline kuid_t hmdfs_override_inode_uid(struct inode *inode) +{ + kuid_t uid = get_inode_uid(inode); + + set_inode_uid(inode, current_fsuid()); + return uid; +} + +static inline void hmdfs_revert_inode_uid(struct inode *inode, kuid_t uid) +{ + set_inode_uid(inode, uid); +} + +static inline const struct cred *hmdfs_override_creds(const struct cred *new) +{ + if (!new) + return NULL; + + return override_creds(new); +} + +static inline void hmdfs_revert_creds(const struct cred *old) +{ + if (old) + revert_creds(old); +} + +static inline __u16 hmdfs_perm_get_next_level(__u16 perm) +{ + __u16 level = (perm & HMDFS_PERM_MASK) + 1; + + if (level <= HMDFS_PERM_OTHER) + return level; + else + return HMDFS_PERM_OTHER; +} + +struct fs_struct *hmdfs_override_fsstruct(struct fs_struct *saved_fs); +void hmdfs_revert_fsstruct(struct fs_struct *saved_fs, + struct fs_struct *copied_fs); +const struct cred *hmdfs_override_fsids(bool is_recv_thread); +const struct cred *hmdfs_override_dir_fsids(struct inode *dir, + struct dentry *dentry, __u16 *perm); +const struct cred *hmdfs_override_file_fsids(struct inode *dir, __u16 *perm); +void hmdfs_revert_fsids(const struct cred *old_cred); +int hmdfs_persist_perm(struct dentry *dentry, __u16 *perm); +__u16 hmdfs_read_perm(struct inode *inode); +void hmdfs_root_inode_perm_init(struct inode *root_inode); +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child); +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm); +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or); +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry); +extern int get_bid(const char *bname); +extern int __init hmdfs_init_configfs(void); +extern void hmdfs_exit_configfs(void); + +static inline int get_bundle_uid(struct hmdfs_sb_info *sbi, const char *bname) +{ + return sbi->user_id * BASE_USER_RANGE + get_bid(bname); +} + +#else + +static inline +void hmdfs_root_inode_perm_init(struct inode *root_inode) +{ +} + +static inline +void hmdfs_revert_fsids(const struct cred *old_cred) +{ +} + +static inline +int hmdfs_override_dir_id_fs(struct cache_fs_override *or, + struct inode *dir, + struct dentry *dentry, + __u16 *perm) +{ + return 0; +} + +static inline +void hmdfs_revert_dir_id_fs(struct cache_fs_override *or) +{ +} + +static inline +void check_and_fixup_ownership(struct inode *parent_inode, struct inode *child) +{ +} + +static inline +const struct cred *hmdfs_override_fsids(bool is_recv_thread) +{ + return ERR_PTR(-ENOTTY); +} + +static inline +const struct cred *hmdfs_override_creds(const struct cred *new) +{ + return ERR_PTR(-ENOTTY); +} + +static inline +void hmdfs_revert_creds(const struct cred *old) +{ + +} + +static inline +void check_and_fixup_ownership_remote(struct inode *dir, + struct dentry *dentry) +{ +} + +static inline +kuid_t hmdfs_override_inode_uid(struct inode *inode) +{ + return KUIDT_INIT((uid_t)0); +} + +static inline +void hmdfs_revert_inode_uid(struct inode *inode, kuid_t uid) +{ +} + +static inline +void hmdfs_check_cred(const struct cred *cred) +{ +} + +static inline int __init hmdfs_init_configfs(void) { return 0; } +static inline void hmdfs_exit_configfs(void) {} + +#endif /* CONFIG_HMDFS_FS_PERMISSION */ + +#endif diff --git a/fs/hmdfs/authority/config.c b/fs/hmdfs/authority/config.c new file mode 100755 index 000000000..1610ca902 --- /dev/null +++ b/fs/hmdfs/authority/config.c @@ -0,0 +1,377 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/authority/config.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include "hmdfs.h" + +#define UID_ATTR_TYPE 0 +#define GID_ATTR_TYPE 1 + +static struct kmem_cache *hmdfs_bid_entry_cachep; + +struct hmdfs_bid_entry { + struct hlist_node node; + struct qstr str; + int id; +}; + +struct hmdfs_config_bitem { + struct config_item item; + struct qstr str; +}; + +static unsigned int make_hash(const char *name, unsigned int len) +{ + unsigned long hash; + + hash = init_name_hash(0); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + + return end_name_hash(hash); +} + +static struct qstr make_qstr(const char *name) +{ + struct qstr str; + str.name = name; + str.len = strlen(name); + str.hash = make_hash(str.name, str.len); + + return str; +} + +static struct hmdfs_bid_entry *alloc_bid_entry(const char *name, int id) +{ + struct hmdfs_bid_entry *bid_entry; + char *bid_entry_name; + + bid_entry = kmem_cache_alloc(hmdfs_bid_entry_cachep, GFP_KERNEL); + if (!bid_entry) { + bid_entry = ERR_PTR(-ENOMEM); + goto out; + } + + bid_entry_name = kstrdup(name, GFP_KERNEL); + if (!bid_entry_name) { + kmem_cache_free(hmdfs_bid_entry_cachep, bid_entry); + bid_entry = ERR_PTR(-ENOMEM); + goto out; + } + + INIT_HLIST_NODE(&bid_entry->node); + bid_entry->str = make_qstr(bid_entry_name); + bid_entry->id = id; +out: + return bid_entry; +} + +static void free_bid_entry(struct hmdfs_bid_entry *bid_entry) +{ + if (bid_entry == NULL) + return; + + kfree(bid_entry->str.name); + kmem_cache_free(hmdfs_bid_entry_cachep, bid_entry); +} + +static struct hmdfs_config_bitem *alloc_bitem(const char *name) +{ + struct hmdfs_config_bitem *bitem; + char *bitem_name; + + bitem = kzalloc(sizeof(*bitem), GFP_KERNEL); + if (!bitem) { + bitem = ERR_PTR(-ENOMEM); + goto out; + } + + bitem_name = kstrdup(name, GFP_KERNEL); + if (!bitem_name) { + kfree(bitem); + bitem = ERR_PTR(-ENOMEM); + goto out; + } + + bitem->str = make_qstr(bitem_name); +out: + return bitem; +} + +static void free_bitem(struct hmdfs_config_bitem *bitem) +{ + if (bitem == NULL) + return; + + kfree(bitem->str.name); + kfree(bitem); +} + +#define HMDFS_BUNDLE_ATTRIBUTE(_attr_) \ + \ +static DEFINE_HASHTABLE(hmdfs_##_attr_##_hash_table, 4); \ + \ +static DEFINE_MUTEX(hmdfs_##_attr_##_hash_mutex); \ + \ +static int query_##_attr_##_hash_entry(struct qstr *str) \ +{ \ + int id = 0; \ + struct hmdfs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + mutex_lock(&hmdfs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(hmdfs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + id = bid_entry->id; \ + break; \ + } \ + } \ + mutex_unlock(&hmdfs_##_attr_##_hash_mutex); \ + \ + return id; \ +} \ + \ +static int insert_##_attr_##_hash_entry(struct qstr *str, int id) \ +{ \ + int err = 0; \ + struct hmdfs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + hmdfs_info("insert name = %s", str->name); \ + \ + mutex_lock(&hmdfs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(hmdfs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + bid_entry->id = id; \ + mutex_unlock(&hmdfs_##_attr_##_hash_mutex); \ + goto out; \ + } \ + } \ + mutex_unlock(&hmdfs_##_attr_##_hash_mutex); \ + \ + bid_entry = alloc_bid_entry(str->name, id); \ + if (IS_ERR(bid_entry)) { \ + err = PTR_ERR(bid_entry); \ + goto out; \ + } \ + \ + hash_add_rcu(hmdfs_##_attr_##_hash_table, &bid_entry->node, \ + bid_entry->str.hash); \ +out: \ + return err; \ +} \ + \ +static void remove_##_attr_##_hash_entry(struct qstr *str) \ +{ \ + struct hmdfs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + hmdfs_info("remove name = %s", str->name); \ + \ + mutex_lock(&hmdfs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(hmdfs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + hash_del_rcu(&bid_entry->node); \ + free_bid_entry(bid_entry); \ + break; \ + } \ + } \ + mutex_unlock(&hmdfs_##_attr_##_hash_mutex); \ +} \ + \ +static void clear_##_attr_##_hash_entry(void) \ +{ \ + int index; \ + struct hmdfs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + hmdfs_info("clear bid entry"); \ + \ + mutex_lock(&hmdfs_##_attr_##_hash_mutex); \ + hash_for_each_safe(hmdfs_##_attr_##_hash_table, index, \ + hash_node, bid_entry, node) { \ + hash_del_rcu(&bid_entry->node); \ + kfree(bid_entry->str.name); \ + kmem_cache_free(hmdfs_bid_entry_cachep, bid_entry); \ + } \ + mutex_unlock(&hmdfs_##_attr_##_hash_mutex); \ +} \ + \ +static int hmdfs_##_attr_##_get(const char *bname) \ +{ \ + struct qstr str; \ + \ + str = make_qstr(bname); \ + return query_##_attr_##_hash_entry(&str); \ +} \ + \ +static ssize_t hmdfs_##_attr_##_show(struct config_item *item, \ + char *page) \ +{ \ + int id; \ + struct hmdfs_config_bitem *bitem; \ + \ + hmdfs_info("show bundle id"); \ + \ + bitem = container_of(item, struct hmdfs_config_bitem, item); \ + id = query_##_attr_##_hash_entry(&bitem->str); \ + \ + return scnprintf(page, PAGE_SIZE, "%u\n", id); \ +} \ + \ +static ssize_t hmdfs_##_attr_##_store(struct config_item *item, \ + const char *page, size_t count) \ +{ \ + int id; \ + int err; \ + size_t size; \ + struct hmdfs_config_bitem *bitem; \ + \ + hmdfs_info("store bundle id"); \ + \ + bitem = container_of(item, struct hmdfs_config_bitem, item); \ + \ + if (kstrtouint(page, 10, &id)) { \ + size = -EINVAL; \ + goto out; \ + } \ + \ + err = insert_##_attr_##_hash_entry(&bitem->str, id); \ + if (err) { \ + size = err; \ + goto out; \ + } \ + \ + size = count; \ +out: \ + return size; \ +} \ + \ +static struct configfs_attribute hmdfs_##_attr_##_attr = { \ + .ca_name = __stringify(_attr_), \ + .ca_mode = S_IRUGO | S_IWUGO, \ + .ca_owner = THIS_MODULE, \ + .show = hmdfs_##_attr_##_show, \ + .store = hmdfs_##_attr_##_store, \ +}; + +HMDFS_BUNDLE_ATTRIBUTE(appid) + +static struct configfs_attribute *hmdfs_battrs[] = { + &hmdfs_appid_attr, + NULL, +}; + +static void hmdfs_config_bitem_release(struct config_item *item) +{ + struct hmdfs_config_bitem *bitem; + + hmdfs_info("release bundle item"); + + bitem = container_of(item, struct hmdfs_config_bitem, item); + remove_appid_hash_entry(&bitem->str); + remove_appid_hash_entry(&bitem->str); + free_bitem(bitem); +} + +static struct configfs_item_operations hmdfs_config_bitem_ops = { + .release = hmdfs_config_bitem_release, +}; + +static struct config_item_type hmdfs_config_bitem_type = { + .ct_item_ops = &hmdfs_config_bitem_ops, + .ct_attrs = hmdfs_battrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_item *hmdfs_make_bitem(struct config_group *group, + const char *name) +{ + struct config_item *item; + struct hmdfs_config_bitem *bitem; + + hmdfs_info("make bundle item = %s", name); + + bitem = alloc_bitem(name); + if (IS_ERR(bitem)) { + item = ERR_PTR(-ENOMEM); + goto out; + } + + config_item_init_type_name(&bitem->item, name, + &hmdfs_config_bitem_type); + item = &bitem->item; +out: + return item; +} + +static struct configfs_group_operations hmdfs_group_ops = { + .make_item = hmdfs_make_bitem, +}; + +static struct config_item_type hmdfs_group_type = { + .ct_group_ops = &hmdfs_group_ops, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem hmdfs_subsystem = { + .su_group = { + .cg_item = { + .ci_namebuf = "hmdfs", + .ci_type = &hmdfs_group_type, + }, + }, +}; + +int get_bid(const char *bname) +{ + return hmdfs_appid_get(bname); +} + +int __init hmdfs_init_configfs(void) +{ + int err; + struct configfs_subsystem *subsys; + + hmdfs_info("init configfs"); + + hmdfs_bid_entry_cachep = kmem_cache_create("hmdfs_bid_entry_cachep", + sizeof(struct hmdfs_bid_entry), 0, 0, NULL); + if (!hmdfs_bid_entry_cachep) { + hmdfs_err("failed to create bid entry cachep"); + err = -ENOMEM; + goto out; + } + + subsys = &hmdfs_subsystem; + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + + err = configfs_register_subsystem(subsys); + if (err) + hmdfs_err("failed to register subsystem"); + +out: + return err; +} + +void hmdfs_exit_configfs(void) +{ + hmdfs_info("hmdfs exit configfs"); + + configfs_unregister_subsystem(&hmdfs_subsystem); + clear_appid_hash_entry(); + + kmem_cache_destroy(hmdfs_bid_entry_cachep); +} \ No newline at end of file diff --git a/fs/hmdfs/client_writeback.c b/fs/hmdfs/client_writeback.c new file mode 100755 index 000000000..d62c286af --- /dev/null +++ b/fs/hmdfs/client_writeback.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/client_writeback.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" + +/* 200ms */ +#define HMDFS_MAX_PAUSE max((HZ / 5), 1) +#define HMDFS_BANDWIDTH_INTERVAL max((HZ / 5), 1) +/* Dirty type */ +#define HMDFS_DIRTY_FS 0 +#define HMDFS_DIRTY_FILE 1 +/* Exceed flags */ +#define HMDFS_FS_EXCEED (1 << HMDFS_DIRTY_FS) +#define HMDFS_FILE_EXCEED (1 << HMDFS_DIRTY_FILE) +/* Ratelimit calculate shift */ +#define HMDFS_LIMIT_SHIFT 10 + +void hmdfs_writeback_inodes_sb_handler(struct work_struct *work) +{ + struct hmdfs_writeback *hwb = container_of( + work, struct hmdfs_writeback, dirty_sb_writeback_work.work); + + try_to_writeback_inodes_sb(hwb->sbi->sb, WB_REASON_FS_FREE_SPACE); +} + +void hmdfs_writeback_inode_handler(struct work_struct *work) +{ + struct hmdfs_inode_info *info = NULL; + struct inode *inode = NULL; + struct hmdfs_writeback *hwb = container_of( + work, struct hmdfs_writeback, dirty_inode_writeback_work.work); + + spin_lock(&hwb->inode_list_lock); + while (likely(!list_empty(&hwb->inode_list_head))) { + info = list_first_entry(&hwb->inode_list_head, + struct hmdfs_inode_info, wb_list); + list_del_init(&info->wb_list); + spin_unlock(&hwb->inode_list_lock); + + inode = &info->vfs_inode; + write_inode_now(inode, 0); + iput(inode); + spin_lock(&hwb->inode_list_lock); + } + spin_unlock(&hwb->inode_list_lock); +} + +static void hmdfs_writeback_inodes_sb_delayed(struct super_block *sb, + unsigned int delay) +{ + struct hmdfs_sb_info *sbi = sb->s_fs_info; + unsigned long timeout; + + timeout = msecs_to_jiffies(delay); + if (!timeout || !work_busy(&sbi->h_wb->dirty_sb_writeback_work.work)) + mod_delayed_work(sbi->h_wb->dirty_sb_writeback_wq, + &sbi->h_wb->dirty_sb_writeback_work, timeout); +} + +static inline void hmdfs_writeback_inodes_sb(struct super_block *sb) +{ + hmdfs_writeback_inodes_sb_delayed(sb, 0); +} + +static void hmdfs_writeback_inode(struct super_block *sb, struct inode *inode) +{ + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + struct hmdfs_inode_info *info = hmdfs_i(inode); + + spin_lock(&hwb->inode_list_lock); + if (list_empty(&info->wb_list)) { + ihold(inode); + list_add_tail(&info->wb_list, &hwb->inode_list_head); + queue_delayed_work(hwb->dirty_inode_writeback_wq, + &hwb->dirty_inode_writeback_work, 0); + } + spin_unlock(&hwb->inode_list_lock); +} + +static unsigned long hmdfs_idirty_pages(struct inode *inode, int tag) +{ + struct pagevec pvec; + unsigned long nr_dirty_pages = 0; + pgoff_t index = 0; + +#if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE + pagevec_init(&pvec); +#else + pagevec_init(&pvec, 0); +#endif + while (pagevec_lookup_tag(&pvec, inode->i_mapping, &index, tag)) { + nr_dirty_pages += pagevec_count(&pvec); + pagevec_release(&pvec); + cond_resched(); + } + return nr_dirty_pages; +} + +static inline unsigned long hmdfs_ratio_thresh(unsigned long ratio, + unsigned long thresh) +{ + unsigned long ret = (ratio * thresh) >> HMDFS_LIMIT_SHIFT; + + return (ret == 0) ? 1 : ret; +} + +static inline unsigned long hmdfs_thresh_ratio(unsigned long base, + unsigned long thresh) +{ + unsigned long ratio = (base << HMDFS_LIMIT_SHIFT) / thresh; + + return (ratio == 0) ? 1 : ratio; +} + +void hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb) +{ + hwb->dirty_fs_thresh = DIV_ROUND_UP(hwb->dirty_fs_bytes, PAGE_SIZE); + hwb->dirty_file_thresh = DIV_ROUND_UP(hwb->dirty_file_bytes, PAGE_SIZE); + hwb->dirty_fs_bg_thresh = + DIV_ROUND_UP(hwb->dirty_fs_bg_bytes, PAGE_SIZE); + hwb->dirty_file_bg_thresh = + DIV_ROUND_UP(hwb->dirty_file_bg_bytes, PAGE_SIZE); + + hwb->fs_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_fs_bg_thresh, + hwb->dirty_fs_thresh); + hwb->file_bg_ratio = hmdfs_thresh_ratio(hwb->dirty_file_bg_thresh, + hwb->dirty_file_thresh); + hwb->fs_file_ratio = hmdfs_thresh_ratio(hwb->dirty_file_thresh, + hwb->dirty_fs_thresh); +} + +static void hmdfs_init_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc) +{ + struct hmdfs_writeback *hwb = hdtc->hwb; + + hdtc->fs_thresh = hdtc->hwb->dirty_fs_thresh; + hdtc->file_thresh = hdtc->hwb->dirty_file_thresh; + hdtc->fs_bg_thresh = hdtc->hwb->dirty_fs_bg_thresh; + hdtc->file_bg_thresh = hdtc->hwb->dirty_file_bg_thresh; + + if (!hwb->dirty_auto_threshold) + return; + + /* + * Init thresh according the previous bandwidth adjusted thresh, + * thresh should be no more than setting thresh. + */ + if (hwb->bw_fs_thresh < hdtc->fs_thresh) { + hdtc->fs_thresh = hwb->bw_fs_thresh; + hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio, + hdtc->fs_thresh); + } + if (hwb->bw_file_thresh < hdtc->file_thresh) { + hdtc->file_thresh = hwb->bw_file_thresh; + hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio, + hdtc->file_thresh); + } + /* + * The thresh should be updated in the first time of dirty pages + * exceed the freerun ceiling. + */ + hdtc->thresh_time_stamp = jiffies - HMDFS_BANDWIDTH_INTERVAL - 1; +} + +static void hmdfs_update_dirty_limit(struct hmdfs_dirty_throttle_control *hdtc) +{ + struct hmdfs_writeback *hwb = hdtc->hwb; + struct bdi_writeback *wb = hwb->wb; + unsigned int time_limit = hwb->writeback_timelimit; + unsigned long bw = wb->avg_write_bandwidth; + unsigned long thresh; + + if (!hwb->dirty_auto_threshold) + return; + + spin_lock(&hwb->write_bandwidth_lock); + if (bw > hwb->max_write_bandwidth) + hwb->max_write_bandwidth = bw; + + if (bw < hwb->min_write_bandwidth) + hwb->min_write_bandwidth = bw; + hwb->avg_write_bandwidth = bw; + spin_unlock(&hwb->write_bandwidth_lock); + + /* + * If the bandwidth is lower than the lower limit, it may propably + * offline, there is meaningless to set such a lower thresh. + */ + bw = max(bw, hwb->bw_thresh_lowerlimit); + thresh = bw * time_limit / roundup_pow_of_two(HZ); + if (thresh >= hwb->dirty_fs_thresh) { + hdtc->fs_thresh = hwb->dirty_fs_thresh; + hdtc->file_thresh = hwb->dirty_file_thresh; + hdtc->fs_bg_thresh = hwb->dirty_fs_bg_thresh; + hdtc->file_bg_thresh = hwb->dirty_file_bg_thresh; + } else { + /* Adjust thresh according to current bandwidth */ + hdtc->fs_thresh = thresh; + hdtc->fs_bg_thresh = hmdfs_ratio_thresh(hwb->fs_bg_ratio, + hdtc->fs_thresh); + hdtc->file_thresh = hmdfs_ratio_thresh(hwb->fs_file_ratio, + hdtc->fs_thresh); + hdtc->file_bg_thresh = hmdfs_ratio_thresh(hwb->file_bg_ratio, + hdtc->file_thresh); + } + /* Save bandwidth adjusted thresh */ + hwb->bw_fs_thresh = hdtc->fs_thresh; + hwb->bw_file_thresh = hdtc->file_thresh; + /* Update time stamp */ + hdtc->thresh_time_stamp = jiffies; +} + +void hmdfs_update_ratelimit(struct hmdfs_writeback *hwb) +{ + struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb}; + + hmdfs_init_dirty_limit(&hdtc); + + /* hdtc.file_bg_thresh should be the lowest thresh */ + hwb->ratelimit_pages = hdtc.file_bg_thresh / + (num_online_cpus() * HMDFS_RATELIMIT_PAGES_GAP); + if (hwb->ratelimit_pages < HMDFS_MIN_RATELIMIT_PAGES) + hwb->ratelimit_pages = HMDFS_MIN_RATELIMIT_PAGES; +} + +/* This is a copy of wb_max_pause() */ +static unsigned long hmdfs_wb_pause(struct bdi_writeback *wb, + unsigned long wb_dirty) +{ + unsigned long bw = wb->avg_write_bandwidth; + unsigned long t; + + /* + * Limit pause time for small memory systems. If sleeping for too long + * time, a small pool of dirty/writeback pages may go empty and disk go + * idle. + * + * 8 serves as the safety ratio. + */ + t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8)); + t++; + + return min_t(unsigned long, t, HMDFS_MAX_PAUSE); +} + +static unsigned long +hmdfs_dirty_freerun_ceiling(struct hmdfs_dirty_throttle_control *hdtc, + unsigned int type) +{ + if (type == HMDFS_DIRTY_FS) + return (hdtc->fs_thresh + hdtc->fs_bg_thresh) / 2; + else /* HMDFS_DIRTY_FILE_TYPE */ + return (hdtc->file_thresh + hdtc->file_bg_thresh) / 2; +} + +/* This is a copy of dirty_poll_interval() */ +static inline unsigned long hmdfs_dirty_intv(unsigned long dirty, + unsigned long thresh) +{ + if (thresh > dirty) + return 1UL << (ilog2(thresh - dirty) >> 1); + return 1; +} + +static void hmdfs_balance_dirty_pages(struct address_space *mapping) +{ + struct inode *inode = mapping->host; + struct super_block *sb = inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; + struct hmdfs_dirty_throttle_control hdtc = {.hwb = hwb}; + unsigned int dirty_exceeded = 0; + unsigned long start_time = jiffies; + unsigned long pause = 0; + + /* Add delay work to trigger timeout writeback */ + if (hwb->dirty_writeback_interval != 0) + hmdfs_writeback_inodes_sb_delayed( + sb, hwb->dirty_writeback_interval * 10); + + hmdfs_init_dirty_limit(&hdtc); + + while (1) { + unsigned long exceed = 0; + unsigned long diff; + + /* Per-filesystem overbalance writeback */ + hdtc.fs_nr_dirty = wb_stat_sum(wb, WB_RECLAIMABLE); + hdtc.fs_nr_reclaimable = + hdtc.fs_nr_dirty + wb_stat_sum(wb, WB_WRITEBACK); + if (hdtc.fs_nr_reclaimable < hdtc.file_bg_thresh) { + diff = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable, + hdtc.file_thresh); + goto free_running; + } + + /* Per-file overbalance writeback */ + hdtc.file_nr_dirty = + hmdfs_idirty_pages(inode, PAGECACHE_TAG_DIRTY); + hdtc.file_nr_reclaimable = + hmdfs_idirty_pages(inode, PAGECACHE_TAG_WRITEBACK) + + hdtc.file_nr_dirty; + if ((hdtc.fs_nr_reclaimable < + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) && + (hdtc.file_nr_reclaimable < + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FILE))) { + unsigned long fs_intv, file_intv; + + fs_intv = hmdfs_dirty_intv(hdtc.fs_nr_reclaimable, + hdtc.fs_thresh); + file_intv = hmdfs_dirty_intv(hdtc.file_nr_reclaimable, + hdtc.file_thresh); + diff = min(fs_intv, file_intv); +free_running: + current->nr_dirtied_pause = diff; + current->nr_dirtied = 0; + break; + } + + if (hdtc.fs_nr_reclaimable >= + hmdfs_dirty_freerun_ceiling(&hdtc, HMDFS_DIRTY_FS)) { + if (unlikely(!writeback_in_progress(wb))) + hmdfs_writeback_inodes_sb(sb); + } else { + hmdfs_writeback_inode(sb, inode); + } + + /* + * If dirty_auto_threshold is enabled, recalculate writeback + * thresh according to current bandwidth. Update bandwidth + * could be better if possible, but wb_update_bandwidth() is + * not exported, so we cannot update bandwidth here, so the + * bandwidth' update will be delayed if writing a lot to a + * single file. + */ + if (hwb->dirty_auto_threshold && + time_is_before_jiffies(hdtc.thresh_time_stamp + + HMDFS_BANDWIDTH_INTERVAL)) + hmdfs_update_dirty_limit(&hdtc); + + if (unlikely(hdtc.fs_nr_reclaimable >= hdtc.fs_thresh)) + exceed |= HMDFS_FS_EXCEED; + if (unlikely(hdtc.file_nr_reclaimable >= hdtc.file_thresh)) + exceed |= HMDFS_FILE_EXCEED; + + if (!exceed) { + trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, + 0UL, start_time); + current->nr_dirtied = 0; + break; + } + /* + * Per-file or per-fs reclaimable pages exceed throttle limit, + * sleep pause time and check again. + */ + dirty_exceeded |= exceed; + if (dirty_exceeded && !hwb->dirty_exceeded) + hwb->dirty_exceeded = true; + + /* Pause */ + pause = hmdfs_wb_pause(wb, hdtc.fs_nr_reclaimable); + + trace_hmdfs_balance_dirty_pages(sbi, wb, &hdtc, pause, + start_time); + + __set_current_state(TASK_KILLABLE); + io_schedule_timeout(pause); + + if (fatal_signal_pending(current)) + break; + } + + if (!dirty_exceeded && hwb->dirty_exceeded) + hwb->dirty_exceeded = false; + + if (hdtc.fs_nr_reclaimable >= hdtc.fs_bg_thresh) { + if (unlikely(!writeback_in_progress(wb))) + hmdfs_writeback_inodes_sb(sb); + } else if (hdtc.file_nr_reclaimable >= hdtc.file_bg_thresh) { + hmdfs_writeback_inode(sb, inode); + } +} + +void hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping) +{ + struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; + struct hmdfs_writeback *hwb = sbi->h_wb; + int *bdp_ratelimits = NULL; + int ratelimit; + + if (!hwb->dirty_writeback_control) + return; + + /* Add delay work to trigger timeout writeback */ + if (hwb->dirty_writeback_interval != 0) + hmdfs_writeback_inodes_sb_delayed( + mapping->host->i_sb, + hwb->dirty_writeback_interval * 10); + + ratelimit = current->nr_dirtied_pause; + if (hwb->dirty_exceeded) + ratelimit = min(ratelimit, HMDFS_DIRTY_EXCEED_RATELIMIT); + + /* + * This prevents one CPU to accumulate too many dirtied pages + * without calling into hmdfs_balance_dirty_pages(), which can + * happen when there are 1000+ tasks, all of them start dirtying + * pages at exactly the same time, hence all honoured too large + * initial task->nr_dirtied_pause. + */ + preempt_disable(); + bdp_ratelimits = this_cpu_ptr(hwb->bdp_ratelimits); + + trace_hmdfs_balance_dirty_pages_ratelimited(sbi, hwb, *bdp_ratelimits); + + if (unlikely(current->nr_dirtied >= ratelimit)) { + *bdp_ratelimits = 0; + } else if (unlikely(*bdp_ratelimits >= hwb->ratelimit_pages)) { + *bdp_ratelimits = 0; + ratelimit = 0; + } + preempt_enable(); + + if (unlikely(current->nr_dirtied >= ratelimit)) + hmdfs_balance_dirty_pages(mapping); +} + +void hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi) +{ + if (!sbi->h_wb) + return; + + flush_delayed_work(&sbi->h_wb->dirty_sb_writeback_work); + flush_delayed_work(&sbi->h_wb->dirty_inode_writeback_work); + destroy_workqueue(sbi->h_wb->dirty_sb_writeback_wq); + destroy_workqueue(sbi->h_wb->dirty_inode_writeback_wq); + free_percpu(sbi->h_wb->bdp_ratelimits); + kfree(sbi->h_wb); + sbi->h_wb = NULL; +} + +int hmdfs_init_writeback(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_writeback *hwb; + char name[HMDFS_WQ_NAME_LEN]; + int ret = -ENOMEM; + + hwb = kzalloc(sizeof(struct hmdfs_writeback), GFP_KERNEL); + if (!hwb) + return ret; + + hwb->sbi = sbi; + hwb->wb = &sbi->sb->s_bdi->wb; + hwb->dirty_writeback_control = true; + hwb->dirty_writeback_interval = HM_DEFAULT_WRITEBACK_INTERVAL; + hwb->dirty_file_bg_bytes = HMDFS_FILE_BG_WB_BYTES; + hwb->dirty_fs_bg_bytes = HMDFS_FS_BG_WB_BYTES; + hwb->dirty_file_bytes = HMDFS_FILE_WB_BYTES; + hwb->dirty_fs_bytes = HMDFS_FS_WB_BYTES; + hmdfs_calculate_dirty_thresh(hwb); + hwb->bw_file_thresh = hwb->dirty_file_thresh; + hwb->bw_fs_thresh = hwb->dirty_fs_thresh; + spin_lock_init(&hwb->inode_list_lock); + INIT_LIST_HEAD(&hwb->inode_list_head); + hwb->dirty_exceeded = false; + hwb->ratelimit_pages = HMDFS_DEF_RATELIMIT_PAGES; + hwb->dirty_auto_threshold = true; + hwb->writeback_timelimit = HMDFS_DEF_WB_TIMELIMIT; + hwb->bw_thresh_lowerlimit = HMDFS_BW_THRESH_DEF_LIMIT; + spin_lock_init(&hwb->write_bandwidth_lock); + hwb->avg_write_bandwidth = 0; + hwb->max_write_bandwidth = 0; + hwb->min_write_bandwidth = ULONG_MAX; + hwb->bdp_ratelimits = alloc_percpu(int); + if (!hwb->bdp_ratelimits) + goto free_hwb; + + snprintf(name, sizeof(name), "dfs_ino_wb%u", sbi->seq); + hwb->dirty_inode_writeback_wq = create_singlethread_workqueue(name); + if (!hwb->dirty_inode_writeback_wq) { + hmdfs_err("Failed to create inode writeback workqueue!"); + goto free_bdp; + } + snprintf(name, sizeof(name), "dfs_sb_wb%u", sbi->seq); + hwb->dirty_sb_writeback_wq = create_singlethread_workqueue(name); + if (!hwb->dirty_sb_writeback_wq) { + hmdfs_err("Failed to create filesystem writeback workqueue!"); + goto free_i_wq; + } + INIT_DELAYED_WORK(&hwb->dirty_sb_writeback_work, + hmdfs_writeback_inodes_sb_handler); + INIT_DELAYED_WORK(&hwb->dirty_inode_writeback_work, + hmdfs_writeback_inode_handler); + sbi->h_wb = hwb; + return 0; +free_i_wq: + destroy_workqueue(hwb->dirty_inode_writeback_wq); +free_bdp: + free_percpu(hwb->bdp_ratelimits); +free_hwb: + kfree(hwb); + return ret; +} diff --git a/fs/hmdfs/client_writeback.h b/fs/hmdfs/client_writeback.h new file mode 100755 index 000000000..689a5e733 --- /dev/null +++ b/fs/hmdfs/client_writeback.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/client_writeback.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef CLIENT_WRITEBACK_H +#define CLIENT_WRITEBACK_H + +#include "hmdfs.h" + +/* + * HM_DEFAULT_WRITEBACK_INTERVAL - centiseconds + * HMDFS_FILE_BG_WB_BYTES - background per-file threshold 10M + * HMDFS_FS_BG_WB_BYTES - background per-fs threshold 50M + * HMDFS_FILE_WB_BYTES - per-file throttle threshold + * HMDFS_FS_WB_BYTES - per-fs throttle threshold + */ +#define HM_DEFAULT_WRITEBACK_INTERVAL 500 +#define HMDFS_FILE_BG_WB_BYTES (10 * 1024 * 1024) +#define HMDFS_FS_BG_WB_BYTES (50 * 1024 * 1024) +#define HMDFS_FILE_WB_BYTES (HMDFS_FILE_BG_WB_BYTES << 1) +#define HMDFS_FS_WB_BYTES (HMDFS_FS_BG_WB_BYTES << 1) + +/* writeback time limit (default 5s) */ +#define HMDFS_DEF_WB_TIMELIMIT (5 * HZ) +#define HMDFS_MAX_WB_TIMELIMIT (30 * HZ) + +/* bandwidth adjusted lower limit (default 1MB/s) */ +#define HMDFS_BW_THRESH_MIN_LIMIT (1 << (20 - PAGE_SHIFT)) +#define HMDFS_BW_THRESH_MAX_LIMIT (100 << (20 - PAGE_SHIFT)) +#define HMDFS_BW_THRESH_DEF_LIMIT HMDFS_BW_THRESH_MIN_LIMIT + +#define HMDFS_DIRTY_EXCEED_RATELIMIT (32 >> (PAGE_SHIFT - 10)) +#define HMDFS_RATELIMIT_PAGES_GAP 16 +#define HMDFS_DEF_RATELIMIT_PAGES 32 +#define HMDFS_MIN_RATELIMIT_PAGES 1 + +struct hmdfs_dirty_throttle_control { + struct hmdfs_writeback *hwb; + /* last time threshes are updated */ + unsigned long thresh_time_stamp; + + unsigned long file_bg_thresh; + unsigned long fs_bg_thresh; + unsigned long file_thresh; + unsigned long fs_thresh; + + unsigned long file_nr_dirty; + unsigned long fs_nr_dirty; + unsigned long file_nr_reclaimable; + unsigned long fs_nr_reclaimable; +}; + +struct hmdfs_writeback { + struct hmdfs_sb_info *sbi; + struct bdi_writeback *wb; + /* enable hmdfs dirty writeback control */ + bool dirty_writeback_control; + + /* writeback per-file inode list */ + struct list_head inode_list_head; + spinlock_t inode_list_lock; + + /* centiseconds */ + unsigned int dirty_writeback_interval; + /* per-file background threshold */ + unsigned long dirty_file_bg_bytes; + unsigned long dirty_file_bg_thresh; + /* per-fs background threshold */ + unsigned long dirty_fs_bg_bytes; + unsigned long dirty_fs_bg_thresh; + /* per-file throttle threshold */ + unsigned long dirty_file_bytes; + unsigned long dirty_file_thresh; + /* per-fs throttle threshold */ + unsigned long dirty_fs_bytes; + unsigned long dirty_fs_thresh; + /* ratio between background thresh and throttle thresh */ + unsigned long fs_bg_ratio; + unsigned long file_bg_ratio; + /* ratio between file and fs throttle thresh */ + unsigned long fs_file_ratio; + + /* + * Enable auto-thresh. If enabled, the background and throttle + * thresh are nolonger a fixed value storeed in dirty_*_bytes, + * they are determined by the bandwidth of the network and the + * writeback timelimit. + */ + bool dirty_auto_threshold; + unsigned int writeback_timelimit; + /* bandwitdh adjusted filesystem throttle thresh */ + unsigned long bw_fs_thresh; + /* bandwidth adjusted per-file throttle thresh */ + unsigned long bw_file_thresh; + /* bandwidth adjusted thresh lower limit */ + unsigned long bw_thresh_lowerlimit; + + /* reclaimable pages exceed throttle thresh */ + bool dirty_exceeded; + /* percpu dirty pages ratelimit */ + long ratelimit_pages; + /* count percpu dirty pages */ + int __percpu *bdp_ratelimits; + + /* per-fs writeback work */ + struct workqueue_struct *dirty_sb_writeback_wq; + struct delayed_work dirty_sb_writeback_work; + /* per-file writeback work */ + struct workqueue_struct *dirty_inode_writeback_wq; + struct delayed_work dirty_inode_writeback_work; + + /* per-fs writeback bandwidth */ + spinlock_t write_bandwidth_lock; + unsigned long max_write_bandwidth; + unsigned long min_write_bandwidth; + unsigned long avg_write_bandwidth; +}; + +void hmdfs_writeback_inodes_sb_handler(struct work_struct *work); + +void hmdfs_writeback_inode_handler(struct work_struct *work); + +void hmdfs_calculate_dirty_thresh(struct hmdfs_writeback *hwb); + +void hmdfs_update_ratelimit(struct hmdfs_writeback *hwb); + +void hmdfs_balance_dirty_pages_ratelimited(struct address_space *mapping); + +void hmdfs_destroy_writeback(struct hmdfs_sb_info *sbi); + +int hmdfs_init_writeback(struct hmdfs_sb_info *sbi); + +#endif diff --git a/fs/hmdfs/comm/connection.c b/fs/hmdfs/comm/connection.c new file mode 100755 index 000000000..7613da514 --- /dev/null +++ b/fs/hmdfs/comm/connection.c @@ -0,0 +1,1312 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/connection.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "connection.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "device_node.h" +#include "hmdfs.h" +#include "message_verify.h" +#include "node_cb.h" +#include "protocol.h" +#include "socket_adapter.h" + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include "crypto.h" +#endif + +#define HMDFS_WAIT_REQUEST_END_MIN 20 +#define HMDFS_WAIT_REQUEST_END_MAX 30 + +#define HMDFS_WAIT_CONN_RELEASE (3 * HZ) + +#define HMDFS_RETRY_WB_WQ_MAX_ACTIVE 16 + +static void hs_fill_crypto_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct crypto_body *body = NULL; + + if (len < sizeof(struct crypto_body)) { + hmdfs_info("crpto body len %u is err", len); + return; + } + body = (struct crypto_body *)data; + + /* this is only test, later need to fill right algorithm. */ + body->crypto |= HMDFS_HS_CRYPTO_KTLS_AES128; + body->crypto = cpu_to_le32(body->crypto); + + hmdfs_info("fill crypto. ccrtypto=0x%08x", body->crypto); +} + +static int hs_parse_crypto_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct crypto_body *hs_crypto = NULL; + uint32_t crypto; + + if (len < sizeof(struct crypto_body)) { + hmdfs_info("handshake msg len error, len=%u", len); + return -1; + } + hs_crypto = (struct crypto_body *)data; + crypto = le16_to_cpu(hs_crypto->crypto); + conn_impl->crypto = crypto; + hmdfs_info("ops=%u, len=%u, crypto=0x%08x", ops, len, crypto); + return 0; +} + +static void hs_fill_case_sense_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct case_sense_body *body = (struct case_sense_body *)data; + + if (len < sizeof(struct case_sense_body)) { + hmdfs_err("case sensitive len %u is err", len); + return; + } + body->case_sensitive = conn_impl->node->sbi->s_case_sensitive; +} + +static int hs_parse_case_sense_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct case_sense_body *body = (struct case_sense_body *)data; + __u8 sensitive = conn_impl->node->sbi->s_case_sensitive ? 1 : 0; + + if (len < sizeof(struct case_sense_body)) { + hmdfs_info("case sensitive len %u is err", len); + return -1; + } + if (body->case_sensitive != sensitive) { + hmdfs_err("case sensitive inconsistent, server: %u,client: %u, ops: %u", + body->case_sensitive, sensitive, ops); + return -1; + } + return 0; +} + +static void hs_fill_feature_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct feature_body *body = (struct feature_body *)data; + + if (len < sizeof(struct feature_body)) { + hmdfs_err("feature len %u is err", len); + return; + } + body->features = cpu_to_le64(conn_impl->node->sbi->s_features); + body->reserved = cpu_to_le64(0); +} + +static int hs_parse_feature_data(struct connection *conn_impl, __u8 ops, + void *data, __u32 len) +{ + struct feature_body *body = (struct feature_body *)data; + + if (len < sizeof(struct feature_body)) { + hmdfs_err("feature len %u is err", len); + return -1; + } + + conn_impl->node->features = le64_to_cpu(body->features); + return 0; +} + +/* should ensure len is small than 0xffff. */ +static const struct conn_hs_extend_reg s_hs_extend_reg[HS_EXTEND_CODE_COUNT] = { + [HS_EXTEND_CODE_CRYPTO] = { + .len = sizeof(struct crypto_body), + .resv = 0, + .filler = hs_fill_crypto_data, + .parser = hs_parse_crypto_data + }, + [HS_EXTEND_CODE_CASE_SENSE] = { + .len = sizeof(struct case_sense_body), + .resv = 0, + .filler = hs_fill_case_sense_data, + .parser = hs_parse_case_sense_data, + }, + [HS_EXTEND_CODE_FEATURE_SUPPORT] = { + .len = sizeof(struct feature_body), + .resv = 0, + .filler = hs_fill_feature_data, + .parser = hs_parse_feature_data, + }, + [HS_EXTEND_CODE_FEATURE_SUPPORT] = { + .len = sizeof(struct feature_body), + .resv = 0, + .filler = hs_fill_feature_data, + .parser = hs_parse_feature_data, + }, +}; + +static __u32 hs_get_extend_data_len(void) +{ + __u32 len; + int i; + + len = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < HS_EXTEND_CODE_COUNT; i++) { + len += sizeof(struct extend_field_head); + len += s_hs_extend_reg[i].len; + } + + hmdfs_info("extend data total len is %u", len); + return len; +} + +static void hs_fill_extend_data(struct connection *conn_impl, __u8 ops, + void *extend_data, __u32 len) +{ + struct conn_hs_extend_head *extend_head = NULL; + struct extend_field_head *field = NULL; + uint8_t *body = NULL; + __u32 offset; + __u16 i; + + if (sizeof(struct conn_hs_extend_head) > len) { + hmdfs_info("len error. len=%u", len); + return; + } + extend_head = (struct conn_hs_extend_head *)extend_data; + extend_head->field_cn = 0; + offset = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < HS_EXTEND_CODE_COUNT; i++) { + if (sizeof(struct extend_field_head) > (len - offset)) + break; + field = (struct extend_field_head *)((uint8_t *)extend_data + + offset); + offset += sizeof(struct extend_field_head); + + if (s_hs_extend_reg[i].len > (len - offset)) + break; + body = (uint8_t *)extend_data + offset; + offset += s_hs_extend_reg[i].len; + + field->code = cpu_to_le16(i); + field->len = cpu_to_le16(s_hs_extend_reg[i].len); + + if (s_hs_extend_reg[i].filler) + s_hs_extend_reg[i].filler(conn_impl, ops, + body, s_hs_extend_reg[i].len); + + extend_head->field_cn += 1; + } + + extend_head->field_cn = cpu_to_le32(extend_head->field_cn); +} + +static int hs_parse_extend_data(struct connection *conn_impl, __u8 ops, + void *extend_data, __u32 extend_len) +{ + struct conn_hs_extend_head *extend_head = NULL; + struct extend_field_head *field = NULL; + uint8_t *body = NULL; + __u32 offset; + __u32 field_cnt; + __u16 code; + __u16 len; + int i; + int ret; + + if (sizeof(struct conn_hs_extend_head) > extend_len) { + hmdfs_err("ops=%u,extend_len=%u", ops, extend_len); + return -1; + } + extend_head = (struct conn_hs_extend_head *)extend_data; + field_cnt = le32_to_cpu(extend_head->field_cn); + hmdfs_info("extend_len=%u,field_cnt=%u", extend_len, field_cnt); + + offset = sizeof(struct conn_hs_extend_head); + + for (i = 0; i < field_cnt; i++) { + if (sizeof(struct extend_field_head) > (extend_len - offset)) { + hmdfs_err("cnt err, op=%u, extend_len=%u, cnt=%u, i=%u", + ops, extend_len, field_cnt, i); + return -1; + } + field = (struct extend_field_head *)((uint8_t *)extend_data + + offset); + offset += sizeof(struct extend_field_head); + code = le16_to_cpu(field->code); + len = le16_to_cpu(field->len); + if (len > (extend_len - offset)) { + hmdfs_err("len err, op=%u, extend_len=%u, cnt=%u, i=%u", + ops, extend_len, field_cnt, i); + hmdfs_err("len err, code=%u, len=%u, offset=%u", code, + len, offset); + return -1; + } + + body = (uint8_t *)extend_data + offset; + offset += len; + if ((code < HS_EXTEND_CODE_COUNT) && + (s_hs_extend_reg[code].parser)) { + ret = s_hs_extend_reg[code].parser(conn_impl, ops, + body, len); + if (ret) + return ret; + } + } + return 0; +} + +static int hs_proc_msg_data(struct connection *conn_impl, __u8 ops, void *data, + __u32 data_len) +{ + struct connection_handshake_req *hs_req = NULL; + uint8_t *extend_data = NULL; + __u32 extend_len; + __u32 req_len; + int ret; + + if (!data) { + hmdfs_err("err, msg data is null"); + return -1; + } + + if (data_len < sizeof(struct connection_handshake_req)) { + hmdfs_err("ack msg data len error. data_len=%u, device_id=%llu", + data_len, conn_impl->node->device_id); + return -1; + } + + hs_req = (struct connection_handshake_req *)data; + req_len = le32_to_cpu(hs_req->len); + if (req_len > (data_len - sizeof(struct connection_handshake_req))) { + hmdfs_info( + "ack msg hs_req len(%u) error. data_len=%u, device_id=%llu", + req_len, data_len, conn_impl->node->device_id); + return -1; + } + extend_len = + data_len - sizeof(struct connection_handshake_req) - req_len; + extend_data = (uint8_t *)data + + sizeof(struct connection_handshake_req) + req_len; + ret = hs_parse_extend_data(conn_impl, ops, extend_data, extend_len); + if (!ret) + hmdfs_info( + "hs msg rcv, ops=%u, data_len=%u, device_id=%llu, req_len=%u", + ops, data_len, conn_impl->node->device_id, hs_req->len); + return ret; +} +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +static int connection_handshake_init_tls(struct connection *conn_impl, __u8 ops) +{ + // init ktls config, use key1/key2 as init write-key of each direction + __u8 key1[HMDFS_KEY_SIZE]; + __u8 key2[HMDFS_KEY_SIZE]; + int ret; + + if ((ops != CONNECT_MESG_HANDSHAKE_RESPONSE) && + (ops != CONNECT_MESG_HANDSHAKE_ACK)) { + hmdfs_err("ops %u is err", ops); + return -EINVAL; + } + + update_key(conn_impl->master_key, key1, HKDF_TYPE_KEY_INITIATOR); + update_key(conn_impl->master_key, key2, HKDF_TYPE_KEY_ACCEPTER); + + if (ops == CONNECT_MESG_HANDSHAKE_ACK) { + memcpy(conn_impl->send_key, key1, HMDFS_KEY_SIZE); + memcpy(conn_impl->recv_key, key2, HMDFS_KEY_SIZE); + } else { + memcpy(conn_impl->send_key, key2, HMDFS_KEY_SIZE); + memcpy(conn_impl->recv_key, key1, HMDFS_KEY_SIZE); + } + + memset(key1, 0, HMDFS_KEY_SIZE); + memset(key2, 0, HMDFS_KEY_SIZE); + + hmdfs_info("hs: ops=%u start set crypto tls", ops); + ret = tls_crypto_info_init(conn_impl); + if (ret) + hmdfs_err("setting tls fail. ops is %u", ops); + + return ret; +} +#endif + +static int do_send_handshake(struct connection *conn_impl, __u8 ops, + __le16 request_id) +{ + int err; + struct connection_msg_head *hs_head = NULL; + struct connection_handshake_req *hs_data = NULL; + uint8_t *hs_extend_data = NULL; + struct hmdfs_send_data msg; + __u32 send_len; + __u32 len; + __u32 extend_len; + char buf[HMDFS_CID_SIZE] = { 0 }; + + len = scnprintf(buf, HMDFS_CID_SIZE, "%llu", 0ULL); + send_len = sizeof(struct connection_msg_head) + + sizeof(struct connection_handshake_req) + len; + + if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) || + (ops == CONNECT_MESG_HANDSHAKE_ACK)) && + (conn_impl->node->version >= DFS_2_0)) { + extend_len = hs_get_extend_data_len(); + send_len += extend_len; + } + + hs_head = kzalloc(send_len, GFP_KERNEL); + if (!hs_head) + return -ENOMEM; + + hs_data = (struct connection_handshake_req + *)((uint8_t *)hs_head + + sizeof(struct connection_msg_head)); + + hs_data->len = cpu_to_le32(len); + memcpy(hs_data->dev_id, buf, len); + + if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) || + ops == CONNECT_MESG_HANDSHAKE_ACK) && + (conn_impl->node->version >= DFS_2_0)) { + hs_extend_data = (uint8_t *)hs_data + + sizeof(struct connection_handshake_req) + len; + hs_fill_extend_data(conn_impl, ops, hs_extend_data, extend_len); + } + + hs_head->magic = HMDFS_MSG_MAGIC; + hs_head->version = DFS_2_0; + hs_head->flags |= 0x1; + hmdfs_info("Send handshake message: ops = %d, fd = %d", ops, + ((struct tcp_handle *)(conn_impl->connect_handle))->fd); + hs_head->operations = ops; + hs_head->request_id = request_id; + hs_head->datasize = cpu_to_le32(send_len); + hs_head->source = 0; + hs_head->msg_id = 0; + + msg.head = hs_head; + msg.head_len = sizeof(struct connection_msg_head); + msg.data = hs_data; + msg.len = send_len - msg.head_len; + msg.sdesc = NULL; + msg.sdesc_len = 0; + err = conn_impl->send_message(conn_impl, &msg); + kfree(hs_head); + return err; +} + +static int hmdfs_node_waiting_evt_sum(const struct hmdfs_peer *node) +{ + int sum = 0; + int i; + + for (i = 0; i < RAW_NODE_EVT_NR; i++) + sum += node->waiting_evt[i]; + + return sum; +} + +static int hmdfs_update_node_waiting_evt(struct hmdfs_peer *node, int evt, + unsigned int *seq) +{ + int last; + int sum; + unsigned int next; + + sum = hmdfs_node_waiting_evt_sum(node); + if (sum % RAW_NODE_EVT_NR) + last = !node->pending_evt; + else + last = node->pending_evt; + + /* duplicated event */ + if (evt == last) { + node->dup_evt[evt]++; + return 0; + } + + node->waiting_evt[evt]++; + hmdfs_debug("add node->waiting_evt[%d]=%d", evt, + node->waiting_evt[evt]); + + /* offline wait + online wait + offline wait = offline wait + * online wait + offline wait + online wait != online wait + * As the first online related resource (e.g. fd) must be invalidated + */ + if (node->waiting_evt[RAW_NODE_EVT_OFF] >= 2 && + node->waiting_evt[RAW_NODE_EVT_ON] >= 1) { + node->waiting_evt[RAW_NODE_EVT_OFF] -= 1; + node->waiting_evt[RAW_NODE_EVT_ON] -= 1; + node->seq_wr_idx -= 2; + node->merged_evt += 2; + } + + next = hmdfs_node_inc_evt_seq(node); + node->seq_tbl[(node->seq_wr_idx++) % RAW_NODE_EVT_MAX_NR] = next; + *seq = next; + + return 1; +} + +static void hmdfs_run_evt_cb_verbosely(struct hmdfs_peer *node, int raw_evt, + bool sync, unsigned int seq) +{ + int evt = (raw_evt == RAW_NODE_EVT_OFF) ? NODE_EVT_OFFLINE : + NODE_EVT_ONLINE; + int cur_evt_idx = sync ? 1 : 0; + + node->cur_evt[cur_evt_idx] = raw_evt; + node->cur_evt_seq[cur_evt_idx] = seq; + hmdfs_node_call_evt_cb(node, evt, sync, seq); + node->cur_evt[cur_evt_idx] = RAW_NODE_EVT_NR; +} + +static void hmdfs_node_evt_work(struct work_struct *work) +{ + struct hmdfs_peer *node = + container_of(work, struct hmdfs_peer, evt_dwork.work); + unsigned int seq; + + /* + * N-th sync cb completes before N-th async cb, + * so use seq_lock as a barrier in read & write path + * to ensure we can read the required seq. + */ + mutex_lock(&node->seq_lock); + seq = node->seq_tbl[(node->seq_rd_idx++) % RAW_NODE_EVT_MAX_NR]; + hmdfs_run_evt_cb_verbosely(node, node->pending_evt, false, seq); + mutex_unlock(&node->seq_lock); + + mutex_lock(&node->evt_lock); + if (hmdfs_node_waiting_evt_sum(node)) { + node->pending_evt = !node->pending_evt; + node->pending_evt_seq = + node->seq_tbl[node->seq_rd_idx % RAW_NODE_EVT_MAX_NR]; + node->waiting_evt[node->pending_evt]--; + /* sync cb has been done */ + schedule_delayed_work(&node->evt_dwork, + node->sbi->async_cb_delay * HZ); + } else { + node->last_evt = node->pending_evt; + node->pending_evt = RAW_NODE_EVT_NR; + } + mutex_unlock(&node->evt_lock); +} + +/* + * The running orders of cb are: + * + * (1) sync callbacks are invoked according to the queue order of raw events: + * ensured by seq_lock. + * (2) async callbacks are invoked according to the queue order of raw events: + * ensured by evt_lock & evt_dwork + * (3) async callback is invoked after sync callback of the same raw event: + * ensured by seq_lock. + * (4) async callback of N-th raw event and sync callback of (N+x)-th raw + * event can run concurrently. + */ +static void hmdfs_queue_raw_node_evt(struct hmdfs_peer *node, int evt) +{ + unsigned int seq = 0; + + mutex_lock(&node->evt_lock); + if (node->pending_evt == RAW_NODE_EVT_NR) { + if (evt == node->last_evt) { + node->dup_evt[evt]++; + mutex_unlock(&node->evt_lock); + return; + } + node->pending_evt = evt; + seq = hmdfs_node_inc_evt_seq(node); + node->seq_tbl[(node->seq_wr_idx++) % RAW_NODE_EVT_MAX_NR] = seq; + node->pending_evt_seq = seq; + mutex_lock(&node->seq_lock); + mutex_unlock(&node->evt_lock); + /* call sync cb, then async cb */ + hmdfs_run_evt_cb_verbosely(node, evt, true, seq); + mutex_unlock(&node->seq_lock); + schedule_delayed_work(&node->evt_dwork, + node->sbi->async_cb_delay * HZ); + } else if (hmdfs_update_node_waiting_evt(node, evt, &seq) > 0) { + /* + * Take seq_lock firstly to ensure N-th sync cb + * is called before N-th async cb. + */ + mutex_lock(&node->seq_lock); + mutex_unlock(&node->evt_lock); + hmdfs_run_evt_cb_verbosely(node, evt, true, seq); + mutex_unlock(&node->seq_lock); + } else { + mutex_unlock(&node->evt_lock); + } +} + +void connection_send_handshake(struct connection *conn_impl, __u8 ops, + __le16 request_id) +{ + struct tcp_handle *tcp = NULL; + int err = do_send_handshake(conn_impl, ops, request_id); + + if (likely(err >= 0)) + return; + + tcp = conn_impl->connect_handle; + hmdfs_err("Failed to send handshake: err = %d, fd = %d", err, tcp->fd); + hmdfs_reget_connection(conn_impl); +} + +void connection_handshake_notify(struct hmdfs_peer *node, int notify_type) +{ + struct notify_param param; + + param.notify = notify_type; + param.fd = INVALID_SOCKET_FD; + memcpy(param.remote_cid, node->cid, HMDFS_CID_SIZE); + notify(node, ¶m); +} + + +void peer_online(struct hmdfs_peer *peer) +{ + // To evaluate if someone else has made the peer online + u8 prev_stat = xchg(&peer->status, NODE_STAT_ONLINE); + unsigned long jif_tmp = jiffies; + + if (prev_stat == NODE_STAT_ONLINE) + return; + WRITE_ONCE(peer->conn_time, jif_tmp); + WRITE_ONCE(peer->sbi->connections.recent_ol, jif_tmp); + hmdfs_queue_raw_node_evt(peer, RAW_NODE_EVT_ON); +} + +void connection_to_working(struct hmdfs_peer *node) +{ + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + if (!node) + return; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == CONNECT_TYPE_TCP && + conn_impl->status == CONNECT_STAT_WAIT_RESPONSE) { + tcp = conn_impl->connect_handle; + hmdfs_info("fd %d to working", tcp->fd); + conn_impl->status = CONNECT_STAT_WORKING; + } + } + mutex_unlock(&node->conn_impl_list_lock); + peer_online(node); +} + +static int connection_check_version(__u8 version) +{ + __u8 min_ver = USERSPACE_MAX_VER; + + if (version <= min_ver || version >= MAX_VERSION) { + hmdfs_info("version err. version %u", version); + return -1; + } + return 0; +} + +void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len) +{ + __u8 version; + __u8 ops; + __u8 status; + int fd = ((struct tcp_handle *)(conn_impl->connect_handle))->fd; + struct connection_msg_head *head = (struct connection_msg_head *)buf; + int ret; + + version = head->version; + conn_impl->node->version = version; + if (connection_check_version(version) != 0) + goto out; + conn_impl->node->conn_operations = hmdfs_get_peer_operation(version); + ops = head->operations; + status = conn_impl->status; + switch (ops) { + case CONNECT_MESG_HANDSHAKE_REQUEST: + hmdfs_info( + "Recved handshake request: device_id = %llu, version = %d, head->len = %d, tcp->fd = %d", + conn_impl->node->device_id, version, head->datasize, fd); + connection_send_handshake(conn_impl, + CONNECT_MESG_HANDSHAKE_RESPONSE, + head->msg_id); + if (conn_impl->node->version >= DFS_2_0) { + conn_impl->status = CONNECT_STAT_WAIT_ACK; + conn_impl->node->status = NODE_STAT_SHAKING; + } else { + conn_impl->status = CONNECT_STAT_WORKING; + } + break; + case CONNECT_MESG_HANDSHAKE_RESPONSE: + hmdfs_info( + "Recved handshake response: device_id = %llu, cmd->status = %hhu, tcp->fd = %d", + conn_impl->node->device_id, status, fd); + if (status == CONNECT_STAT_WAIT_REQUEST) { + // must be 10.1 device, no need to set ktls + connection_to_working(conn_impl->node); + goto out; + } + + if (conn_impl->node->version >= DFS_2_0) { + ret = hs_proc_msg_data(conn_impl, ops, data, data_len); + if (ret) + goto nego_err; + connection_send_handshake(conn_impl, + CONNECT_MESG_HANDSHAKE_ACK, + head->msg_id); + hmdfs_info("respon rcv handle,conn_impl->crypto=0x%0x", + conn_impl->crypto); +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + ret = connection_handshake_init_tls(conn_impl, ops); + if (ret) { + hmdfs_err("init_tls_key fail, ops %u", ops); + goto out; + } +#endif + } + + conn_impl->status = CONNECT_STAT_WORKING; + peer_online(conn_impl->node); + break; + case CONNECT_MESG_HANDSHAKE_ACK: + if (conn_impl->node->version >= DFS_2_0) { + ret = hs_proc_msg_data(conn_impl, ops, data, data_len); + if (ret) + goto nego_err; + hmdfs_info("ack rcv handle, conn_impl->crypto=0x%0x", + conn_impl->crypto); +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + ret = connection_handshake_init_tls(conn_impl, ops); + if (ret) { + hmdfs_err("init_tls_key fail, ops %u", ops); + goto out; + } +#endif + conn_impl->status = CONNECT_STAT_WORKING; + peer_online(conn_impl->node); + break; + } + fallthrough; + default: + break; + } +out: + kfree(data); + return; +nego_err: + conn_impl->status = CONNECT_STAT_NEGO_FAIL; + connection_handshake_notify(conn_impl->node, + NOTIFY_OFFLINE); + hmdfs_err("protocol negotiation failed, remote device_id = %llu, tcp->fd = %d", + conn_impl->node->device_id, fd); + goto out; +} + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +static void update_tls_crypto_key(struct connection *conn, + struct hmdfs_head_cmd *head, void *data, + __u32 data_len) +{ + // rekey message handler + struct connection_rekey_request *rekey_req = NULL; + int ret = 0; + + if (hmdfs_message_verify(conn->node, head, data) < 0) { + hmdfs_err("Rekey msg %d has been abandoned", head->msg_id); + goto out_err; + } + + hmdfs_info("recv REKEY request"); + set_crypto_info(conn, SET_CRYPTO_RECV); + // update send key if requested + rekey_req = data; + if (le32_to_cpu(rekey_req->update_request) == UPDATE_REQUESTED) { + ret = tcp_send_rekey_request(conn); + if (ret == 0) + set_crypto_info(conn, SET_CRYPTO_SEND); + } +out_err: + kfree(data); +} + +static bool cmd_update_tls_crypto_key(struct connection *conn, + struct hmdfs_head_cmd *head) +{ + __u8 version = conn->node->version; + struct tcp_handle *tcp = conn->connect_handle; + + if (version < DFS_2_0 || conn->type != CONNECT_TYPE_TCP || !tcp) + return false; + return head->operations.command == F_CONNECT_REKEY; +} +#endif + +void connection_working_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len) +{ +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + if (cmd_update_tls_crypto_key(conn_impl, buf)) { + update_tls_crypto_key(conn_impl, buf, data, data_len); + return; + } +#endif + conn_impl->node->conn_operations->recvmsg(conn_impl->node, buf, data); +} + +static void connection_release(struct kref *ref) +{ + struct tcp_handle *tcp = NULL; + struct connection *conn = container_of(ref, struct connection, ref_cnt); + + hmdfs_info("connection release"); + memset(conn->master_key, 0, HMDFS_KEY_SIZE); + memset(conn->send_key, 0, HMDFS_KEY_SIZE); + memset(conn->recv_key, 0, HMDFS_KEY_SIZE); + if (conn->close) + conn->close(conn); + tcp = conn->connect_handle; + crypto_free_aead(conn->tfm); + // need to check and test: fput(tcp->sock->file); + if (tcp && tcp->sock) { + hmdfs_info("connection release: fd = %d, refcount %ld", tcp->fd, + file_count(tcp->sock->file)); + sockfd_put(tcp->sock); + } + if (tcp && tcp->recv_cache) + kmem_cache_destroy(tcp->recv_cache); + + if (!list_empty(&conn->list)) { + mutex_lock(&conn->node->conn_impl_list_lock); + list_del(&conn->list); + mutex_unlock(&conn->node->conn_impl_list_lock); + /* + * wakup hmdfs_disconnect_node to check + * conn_deleting_list if empty. + */ + wake_up_interruptible(&conn->node->deleting_list_wq); + } + + kfree(tcp); + kfree(conn); +} + +static void hmdfs_peer_release(struct kref *ref) +{ + struct hmdfs_peer *peer = container_of(ref, struct hmdfs_peer, ref_cnt); + struct mutex *lock = &peer->sbi->connections.node_lock; + + if (!list_empty(&peer->list)) + hmdfs_info("releasing a on-sbi peer: device_id %llu ", + peer->device_id); + else + hmdfs_info("releasing a redundant peer: device_id %llu ", + peer->device_id); + + cancel_delayed_work_sync(&peer->evt_dwork); + list_del(&peer->list); + idr_destroy(&peer->msg_idr); + idr_destroy(&peer->file_id_idr); + flush_workqueue(peer->req_handle_wq); + flush_workqueue(peer->async_wq); + flush_workqueue(peer->retry_wb_wq); + destroy_workqueue(peer->dentry_wq); + destroy_workqueue(peer->req_handle_wq); + destroy_workqueue(peer->async_wq); + destroy_workqueue(peer->retry_wb_wq); + destroy_workqueue(peer->reget_conn_wq); + kfree(peer); + mutex_unlock(lock); +} + +void connection_put(struct connection *conn) +{ + struct mutex *lock = &conn->ref_lock; + + kref_put_mutex(&conn->ref_cnt, connection_release, lock); +} + +void peer_put(struct hmdfs_peer *peer) +{ + struct mutex *lock = &peer->sbi->connections.node_lock; + + kref_put_mutex(&peer->ref_cnt, hmdfs_peer_release, lock); +} + +static void hmdfs_dump_deleting_list(struct hmdfs_peer *node) +{ + struct connection *con = NULL; + struct tcp_handle *tcp = NULL; + int count = 0; + + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(con, &node->conn_deleting_list, list) { + tcp = con->connect_handle; + hmdfs_info("deleting list %d:device_id %llu tcp_fd %d refcnt %d", + count, node->device_id, tcp ? tcp->fd : -1, + kref_read(&con->ref_cnt)); + count++; + } + mutex_unlock(&node->conn_impl_list_lock); +} + +static bool hmdfs_conn_deleting_list_empty(struct hmdfs_peer *node) +{ + bool empty = false; + + mutex_lock(&node->conn_impl_list_lock); + empty = list_empty(&node->conn_deleting_list); + mutex_unlock(&node->conn_impl_list_lock); + + return empty; +} + +void hmdfs_disconnect_node(struct hmdfs_peer *node) +{ + LIST_HEAD(local_conns); + struct connection *conn_impl = NULL; + struct connection *next = NULL; + struct tcp_handle *tcp = NULL; + + if (unlikely(!node)) + return; + + hmdfs_node_inc_evt_seq(node); + /* Refer to comments in hmdfs_is_node_offlined() */ + smp_mb__after_atomic(); + node->status = NODE_STAT_OFFLINE; + hmdfs_info("Try to disconnect peer: device_id %llu", node->device_id); + + mutex_lock(&node->conn_impl_list_lock); + if (!list_empty(&node->conn_impl_list)) + list_replace_init(&node->conn_impl_list, &local_conns); + mutex_unlock(&node->conn_impl_list_lock); + + list_for_each_entry_safe(conn_impl, next, &local_conns, list) { + tcp = conn_impl->connect_handle; + if (tcp && tcp->sock) { + kernel_sock_shutdown(tcp->sock, SHUT_RDWR); + hmdfs_info("shudown sock: fd = %d, refcount %ld", + tcp->fd, file_count(tcp->sock->file)); + } + if (tcp) + tcp->fd = INVALID_SOCKET_FD; + + tcp_close_socket(tcp); + list_del_init(&conn_impl->list); + + connection_put(conn_impl); + } + + if (wait_event_interruptible_timeout(node->deleting_list_wq, + hmdfs_conn_deleting_list_empty(node), + HMDFS_WAIT_CONN_RELEASE) <= 0) + hmdfs_dump_deleting_list(node); + + /* wait all request process end */ + spin_lock(&node->idr_lock); + while (node->msg_idr_process) { + spin_unlock(&node->idr_lock); + usleep_range(HMDFS_WAIT_REQUEST_END_MIN, + HMDFS_WAIT_REQUEST_END_MAX); + spin_lock(&node->idr_lock); + } + spin_unlock(&node->idr_lock); + + hmdfs_queue_raw_node_evt(node, RAW_NODE_EVT_OFF); +} + +static void hmdfs_run_simple_evt_cb(struct hmdfs_peer *node, int evt) +{ + unsigned int seq = hmdfs_node_inc_evt_seq(node); + + mutex_lock(&node->seq_lock); + hmdfs_node_call_evt_cb(node, evt, true, seq); + mutex_unlock(&node->seq_lock); +} + +static void hmdfs_del_peer(struct hmdfs_peer *node) +{ + /* + * No need for offline evt cb, because all files must + * have been flushed and closed, else the filesystem + * will be un-mountable. + */ + cancel_delayed_work_sync(&node->evt_dwork); + + hmdfs_run_simple_evt_cb(node, NODE_EVT_DEL); + + hmdfs_release_peer_sysfs(node); + + flush_workqueue(node->reget_conn_wq); + peer_put(node); +} + +void hmdfs_connections_stop(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_peer *node = NULL; + struct hmdfs_peer *con_tmp = NULL; + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry_safe(node, con_tmp, &sbi->connections.node_list, + list) { + mutex_unlock(&sbi->connections.node_lock); + hmdfs_disconnect_node(node); + hmdfs_del_peer(node); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); +} + +struct connection *get_conn_impl(struct hmdfs_peer *node, int connect_type) +{ + struct connection *conn_impl = NULL; + + if (!node) + return NULL; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == connect_type && + conn_impl->status == CONNECT_STAT_WORKING) { + connection_get(conn_impl); + mutex_unlock(&node->conn_impl_list_lock); + return conn_impl; + } + } + mutex_unlock(&node->conn_impl_list_lock); + hmdfs_err_ratelimited("device %llu not find connection, type %d", + node->device_id, connect_type); + return NULL; +} + +void set_conn_sock_quickack(struct hmdfs_peer *node) +{ + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + int option = 1; + + if (!node) + return; + mutex_lock(&node->conn_impl_list_lock); + list_for_each_entry(conn_impl, &node->conn_impl_list, list) { + if (conn_impl->type == CONNECT_TYPE_TCP && + conn_impl->status == CONNECT_STAT_WORKING && + conn_impl->connect_handle) { + tcp = (struct tcp_handle *)(conn_impl->connect_handle); + tcp_sock_set_quickack(tcp->sock->sk, option); + } + } + mutex_unlock(&node->conn_impl_list_lock); +} + +struct hmdfs_peer *hmdfs_lookup_from_devid(struct hmdfs_sb_info *sbi, + uint64_t device_id) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *lookup = NULL; + + if (!sbi) + return NULL; + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (con->status != NODE_STAT_ONLINE || + con->device_id != device_id) + continue; + lookup = con; + peer_get(lookup); + break; + } + mutex_unlock(&sbi->connections.node_lock); + return lookup; +} + +struct hmdfs_peer *hmdfs_lookup_from_cid(struct hmdfs_sb_info *sbi, + uint8_t *cid) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *lookup = NULL; + + if (!sbi) + return NULL; + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (strncmp(con->cid, cid, HMDFS_CID_SIZE) != 0) + continue; + lookup = con; + peer_get(lookup); + break; + } + mutex_unlock(&sbi->connections.node_lock); + return lookup; +} + +static struct hmdfs_peer *lookup_peer_by_cid_unsafe(struct hmdfs_sb_info *sbi, + uint8_t *cid) +{ + struct hmdfs_peer *node = NULL; + + list_for_each_entry(node, &sbi->connections.node_list, list) + if (!strncmp(node->cid, cid, HMDFS_CID_SIZE)) { + peer_get(node); + return node; + } + return NULL; +} + +static struct hmdfs_peer *add_peer_unsafe(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer2add) +{ + struct hmdfs_peer *peer; + int err; + + peer = lookup_peer_by_cid_unsafe(sbi, peer2add->cid); + if (peer) + return peer; + + err = hmdfs_register_peer_sysfs(sbi, peer2add); + if (err) { + hmdfs_err("register peer %llu sysfs err %d", + peer2add->device_id, err); + return ERR_PTR(err); + } + list_add_tail(&peer2add->list, &sbi->connections.node_list); + peer_get(peer2add); + hmdfs_run_simple_evt_cb(peer2add, NODE_EVT_ADD); + return peer2add; +} + +static struct hmdfs_peer *alloc_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, + const struct connection_operations *conn_operations, uint32_t devsl) +{ + struct hmdfs_peer *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!node) + return NULL; + + node->device_id = (u32)atomic_inc_return(&sbi->connections.conn_seq); + + node->async_wq = alloc_workqueue("dfs_async%u_%llu", WQ_MEM_RECLAIM, 0, + sbi->seq, node->device_id); + if (!node->async_wq) { + hmdfs_err("Failed to alloc async wq"); + goto out_err; + } + node->req_handle_wq = alloc_workqueue("dfs_req%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + sbi->async_req_max_active, + sbi->seq, node->device_id); + if (!node->req_handle_wq) { + hmdfs_err("Failed to alloc req wq"); + goto out_err; + } + node->dentry_wq = alloc_workqueue("dfs_dentry%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + 0, sbi->seq, node->device_id); + if (!node->dentry_wq) { + hmdfs_err("Failed to alloc dentry wq"); + goto out_err; + } + node->retry_wb_wq = alloc_workqueue("dfs_rwb%u_%llu", + WQ_UNBOUND | WQ_MEM_RECLAIM, + HMDFS_RETRY_WB_WQ_MAX_ACTIVE, + sbi->seq, node->device_id); + if (!node->retry_wb_wq) { + hmdfs_err("Failed to alloc retry writeback wq"); + goto out_err; + } + node->reget_conn_wq = alloc_workqueue("dfs_regetcon%u_%llu", + WQ_UNBOUND, 0, + sbi->seq, node->device_id); + if (!node->reget_conn_wq) { + hmdfs_err("Failed to alloc reget conn wq"); + goto out_err; + } + INIT_LIST_HEAD(&node->conn_impl_list); + mutex_init(&node->conn_impl_list_lock); + INIT_LIST_HEAD(&node->conn_deleting_list); + init_waitqueue_head(&node->deleting_list_wq); + idr_init(&node->msg_idr); + spin_lock_init(&node->idr_lock); + idr_init(&node->file_id_idr); + spin_lock_init(&node->file_id_lock); + INIT_LIST_HEAD(&node->list); + kref_init(&node->ref_cnt); + node->owner = sbi->seq; + node->conn_operations = conn_operations; + node->sbi = sbi; + node->status = NODE_STAT_SHAKING; + node->conn_time = jiffies; + memcpy(node->cid, cid, HMDFS_CID_SIZE); + atomic64_set(&node->sb_dirty_count, 0); + node->fid_cookie = 0; + atomic_set(&node->evt_seq, 0); + mutex_init(&node->seq_lock); + mutex_init(&node->offline_cb_lock); + mutex_init(&node->evt_lock); + node->pending_evt = RAW_NODE_EVT_NR; + node->last_evt = RAW_NODE_EVT_NR; + node->cur_evt[0] = RAW_NODE_EVT_NR; + node->cur_evt[1] = RAW_NODE_EVT_NR; + node->seq_wr_idx = (unsigned char)UINT_MAX; + node->seq_rd_idx = node->seq_wr_idx; + INIT_DELAYED_WORK(&node->evt_dwork, hmdfs_node_evt_work); + node->msg_idr_process = 0; + node->offline_start = false; + spin_lock_init(&node->wr_opened_inode_lock); + INIT_LIST_HEAD(&node->wr_opened_inode_list); + spin_lock_init(&node->stashed_inode_lock); + node->stashed_inode_nr = 0; + atomic_set(&node->rebuild_inode_status_nr, 0); + init_waitqueue_head(&node->rebuild_inode_status_wq); + INIT_LIST_HEAD(&node->stashed_inode_list); + node->need_rebuild_stash_list = false; + node->devsl = devsl; + + return node; + +out_err: + if (node->async_wq) { + destroy_workqueue(node->async_wq); + node->async_wq = NULL; + } + if (node->req_handle_wq) { + destroy_workqueue(node->req_handle_wq); + node->req_handle_wq = NULL; + } + if (node->dentry_wq) { + destroy_workqueue(node->dentry_wq); + node->dentry_wq = NULL; + } + if (node->retry_wb_wq) { + destroy_workqueue(node->retry_wb_wq); + node->retry_wb_wq = NULL; + } + if (node->reget_conn_wq) { + destroy_workqueue(node->reget_conn_wq); + node->reget_conn_wq = NULL; + } + kfree(node); + return NULL; +} + +struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, + uint32_t devsl) +{ + struct hmdfs_peer *peer = NULL, *on_sbi_peer = NULL; + const struct connection_operations *conn_opr_ptr = NULL; + + mutex_lock(&sbi->connections.node_lock); + peer = lookup_peer_by_cid_unsafe(sbi, cid); + mutex_unlock(&sbi->connections.node_lock); + if (peer) { + hmdfs_info("Got a existing peer: device_id = %llu", + peer->device_id); + goto out; + } + + conn_opr_ptr = hmdfs_get_peer_operation(DFS_2_0); + if (unlikely(!conn_opr_ptr)) { + hmdfs_info("Fatal! Cannot get peer operation"); + goto out; + } + peer = alloc_peer(sbi, cid, conn_opr_ptr, devsl); + if (unlikely(!peer)) { + hmdfs_info("Failed to alloc a peer"); + goto out; + } + + mutex_lock(&sbi->connections.node_lock); + on_sbi_peer = add_peer_unsafe(sbi, peer); + mutex_unlock(&sbi->connections.node_lock); + if (IS_ERR(on_sbi_peer)) { + peer_put(peer); + peer = NULL; + goto out; + } else if (unlikely(on_sbi_peer != peer)) { + hmdfs_info("Got a existing peer: device_id = %llu", + on_sbi_peer->device_id); + peer_put(peer); + peer = on_sbi_peer; + } else { + hmdfs_info("Got a newly allocated peer: device_id = %llu", + peer->device_id); + } + +out: + return peer; +} + +static void head_release(struct kref *kref) +{ + struct hmdfs_msg_idr_head *head; + struct hmdfs_peer *con; + + head = (struct hmdfs_msg_idr_head *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + con = head->peer; + idr_remove(&con->msg_idr, head->msg_id); + spin_unlock(&con->idr_lock); + + kfree(head); +} + +void head_put(struct hmdfs_msg_idr_head *head) +{ + kref_put_lock(&head->ref, head_release, &head->peer->idr_lock); +} + +struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id) +{ + struct hmdfs_msg_idr_head *head = NULL; + + spin_lock(&peer->idr_lock); + head = idr_find(&peer->msg_idr, id); + if (head) + kref_get(&head->ref); + spin_unlock(&peer->idr_lock); + + return head; +} + +int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type, + void *ptr) +{ + int ret = -EAGAIN; + struct hmdfs_msg_idr_head *head = ptr; + int end = peer->version < DFS_2_0 ? (USHRT_MAX + 1) : 0; + + idr_preload(GFP_KERNEL); + spin_lock(&peer->idr_lock); + if (!peer->offline_start) + ret = idr_alloc_cyclic(&peer->msg_idr, ptr, + 1, end, GFP_NOWAIT); + if (ret >= 0) { + kref_init(&head->ref); + head->msg_id = ret; + head->type = type; + head->peer = peer; + peer->msg_idr_process++; + ret = 0; + } + spin_unlock(&peer->idr_lock); + idr_preload_end(); + + return ret; +} diff --git a/fs/hmdfs/comm/connection.h b/fs/hmdfs/comm/connection.h new file mode 100755 index 000000000..6137c5498 --- /dev/null +++ b/fs/hmdfs/comm/connection.h @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/connection.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_CONNECTION_H +#define HMDFS_CONNECTION_H + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include +#endif + +#include +#include +#include "protocol.h" +#include "node_cb.h" + +#define HMDFS_KEY_SIZE 32 +#define HMDFS_IV_SIZE 12 +#define HMDFS_TAG_SIZE 16 +#define HMDFS_CID_SIZE 64 + +enum { + CONNECT_MESG_HANDSHAKE_REQUEST = 1, + CONNECT_MESG_HANDSHAKE_RESPONSE = 2, + CONNECT_MESG_HANDSHAKE_ACK = 3, +}; + +enum { + CONNECT_STAT_WAIT_REQUEST = 0, + CONNECT_STAT_WAIT_RESPONSE, + CONNECT_STAT_WORKING, + CONNECT_STAT_STOP, + CONNECT_STAT_WAIT_ACK, + CONNECT_STAT_NEGO_FAIL, + CONNECT_STAT_COUNT +}; + +enum { + CONNECT_TYPE_TCP = 0, + CONNECT_TYPE_UNSUPPORT, +}; + +struct connection_stat { + int64_t send_bytes; + int64_t recv_bytes; + int send_message_count; + int recv_message_count; + unsigned long rekey_time; +}; + +struct connection { + struct list_head list; + struct kref ref_cnt; + struct mutex ref_lock; + struct hmdfs_peer *node; + int type; + int status; + void *connect_handle; + struct crypto_aead *tfm; + u8 master_key[HMDFS_KEY_SIZE]; + u8 send_key[HMDFS_KEY_SIZE]; + u8 recv_key[HMDFS_KEY_SIZE]; + struct connection_stat stat; + struct work_struct reget_work; +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + struct tls12_crypto_info_aes_gcm_128 send_crypto_info; + struct tls12_crypto_info_aes_gcm_128 recv_crypto_info; +#endif + void (*close)(struct connection *connect); + int (*send_message)(struct connection *connect, + struct hmdfs_send_data *msg); + uint32_t crypto; +}; + +enum { + NODE_STAT_SHAKING = 0, + NODE_STAT_ONLINE, + NODE_STAT_OFFLINE, +}; + +struct hmdfs_async_work { + struct hmdfs_msg_idr_head head; + struct page *page; + struct delayed_work d_work; + unsigned long start; +}; + +enum { + RAW_NODE_EVT_OFF = 0, + RAW_NODE_EVT_ON, + RAW_NODE_EVT_NR, +}; + +#define RAW_NODE_EVT_MAX_NR 4 + +struct hmdfs_stash_statistics { + unsigned int cur_ok; + unsigned int cur_nothing; + unsigned int cur_fail; + unsigned int total_ok; + unsigned int total_nothing; + unsigned int total_fail; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_restore_statistics { + unsigned int cur_ok; + unsigned int cur_fail; + unsigned int cur_keep; + unsigned int total_ok; + unsigned int total_fail; + unsigned int total_keep; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_rebuild_statistics { + unsigned int cur_ok; + unsigned int cur_fail; + unsigned int cur_invalid; + unsigned int total_ok; + unsigned int total_fail; + unsigned int total_invalid; + unsigned int time; +}; + +struct hmdfs_peer_statistics { + /* stash statistics */ + struct hmdfs_stash_statistics stash; + /* restore statistics */ + struct hmdfs_restore_statistics restore; + /* rebuild statistics */ + struct hmdfs_rebuild_statistics rebuild; +}; + +struct hmdfs_peer { + struct list_head list; + struct kref ref_cnt; + unsigned int owner; + uint64_t device_id; + unsigned long conn_time; + uint8_t version; + int status; + u64 features; + long long old_sb_dirty_count; + atomic64_t sb_dirty_count; + /* + * cookie for opened file id. + * It will be increased if peer has offlined + */ + uint16_t fid_cookie; + struct mutex conn_impl_list_lock; + struct list_head conn_impl_list; + /* + * when async message process context call hmdfs_reget_connection + * add conn node to conn_deleting_list, so call hmdfs_disconnect_node + * can wait all receive thread exit + */ + struct list_head conn_deleting_list; + wait_queue_head_t deleting_list_wq; + struct idr msg_idr; + spinlock_t idr_lock; + struct idr file_id_idr; + spinlock_t file_id_lock; + int recvbuf_maxsize; + struct crypto_aead *tfm; + char cid[HMDFS_CID_SIZE + 1]; + const struct connection_operations *conn_operations; + struct hmdfs_sb_info *sbi; + struct workqueue_struct *async_wq; + struct workqueue_struct *req_handle_wq; + struct workqueue_struct *dentry_wq; + struct workqueue_struct *retry_wb_wq; + struct workqueue_struct *reget_conn_wq; + atomic_t evt_seq; + /* sync cb may be blocking */ + struct mutex seq_lock; + struct mutex offline_cb_lock; + struct mutex evt_lock; + unsigned char pending_evt; + unsigned char last_evt; + unsigned char waiting_evt[RAW_NODE_EVT_NR]; + unsigned char seq_rd_idx; + unsigned char seq_wr_idx; + unsigned int seq_tbl[RAW_NODE_EVT_MAX_NR]; + unsigned int pending_evt_seq; + unsigned char cur_evt[NODE_EVT_TYPE_NR]; + unsigned int cur_evt_seq[NODE_EVT_TYPE_NR]; + unsigned int merged_evt; + unsigned int dup_evt[RAW_NODE_EVT_NR]; + struct delayed_work evt_dwork; + /* protected by idr_lock */ + uint64_t msg_idr_process; + bool offline_start; + spinlock_t wr_opened_inode_lock; + struct list_head wr_opened_inode_list; + /* + * protect @stashed_inode_list and @stashed_inode_nr in stash process + * and fill_inode_remote->hmdfs_remote_init_stash_status process + */ + spinlock_t stashed_inode_lock; + unsigned int stashed_inode_nr; + struct list_head stashed_inode_list; + bool need_rebuild_stash_list; + /* how many inodes are rebuilding statsh status */ + atomic_t rebuild_inode_status_nr; + wait_queue_head_t rebuild_inode_status_wq; + struct hmdfs_peer_statistics stats; + /* sysfs */ + struct kobject kobj; + struct completion kobj_unregister; + uint32_t devsl; +}; + +#define HMDFS_DEVID_LOCAL 0 + +/* Be Compatible to DFS1.0, dont add packed attribute so far */ +struct connection_msg_head { + __u8 magic; + __u8 version; + __u8 operations; + __u8 flags; + __le32 datasize; + __le64 source; + __le16 msg_id; + __le16 request_id; + __le32 reserved1; +} __packed; + +struct connection_handshake_req { + __le32 len; + char dev_id[0]; +} __packed; + +enum { + HS_EXTEND_CODE_CRYPTO = 0, + HS_EXTEND_CODE_CASE_SENSE, + HS_EXTEND_CODE_FEATURE_SUPPORT, + HS_EXTEND_CODE_COUNT +}; + +struct conn_hs_extend_reg { + __u16 len; + __u16 resv; + void (*filler)(struct connection *conn_impl, __u8 ops, + void *data, __u32 len); + int (*parser)(struct connection *conn_impl, __u8 ops, + void *data, __u32 len); +}; + +struct conn_hs_extend_head { + __le32 field_cn; + char data[0]; +}; + +struct extend_field_head { + __le16 code; + __le16 len; +} __packed; + +struct crypto_body { + __le32 crypto; +} __packed; + +struct case_sense_body { + __u8 case_sensitive; +} __packed; + +struct feature_body { + __u64 features; + __u64 reserved; +} __packed; + +#define HMDFS_HS_CRYPTO_KTLS_AES128 0x00000001 +#define HMDFS_HS_CRYPTO_KTLS_AES256 0x00000002 + +static inline bool hmdfs_is_node_online(const struct hmdfs_peer *node) +{ + return READ_ONCE(node->status) == NODE_STAT_ONLINE; +} + +static inline unsigned int hmdfs_node_inc_evt_seq(struct hmdfs_peer *node) +{ + /* Use the atomic as an unsigned integer */ + return atomic_inc_return(&node->evt_seq); +} + +static inline unsigned int hmdfs_node_evt_seq(const struct hmdfs_peer *node) +{ + return atomic_read(&node->evt_seq); +} + +struct connection *get_conn_impl(struct hmdfs_peer *node, int connect_type); + +void set_conn_sock_quickack(struct hmdfs_peer *node); + +struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, + uint32_t devsl); + +struct hmdfs_peer *hmdfs_lookup_from_devid(struct hmdfs_sb_info *sbi, + uint64_t device_id); +struct hmdfs_peer *hmdfs_lookup_from_cid(struct hmdfs_sb_info *sbi, + uint8_t *cid); +void connection_send_handshake(struct connection *conn_impl, __u8 operations, + __le16 request_id); +void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, + void *data, __u32 data_len); +void connection_working_recv_handler(struct connection *conn_impl, void *head, + void *data, __u32 data_len); +static inline void connection_get(struct connection *conn) +{ + kref_get(&conn->ref_cnt); +} + +void connection_put(struct connection *conn); +static inline void peer_get(struct hmdfs_peer *peer) +{ + kref_get(&peer->ref_cnt); +} + +void peer_put(struct hmdfs_peer *peer); + +int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg); +void hmdfs_connections_stop(struct hmdfs_sb_info *sbi); + +void hmdfs_disconnect_node(struct hmdfs_peer *node); + +void connection_to_working(struct hmdfs_peer *node); + +int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type, + void *ptr); +struct hmdfs_msg_idr_head *hmdfs_find_msg_head(struct hmdfs_peer *peer, int id); + +static inline void hmdfs_start_process_offline(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->offline_start = true; + spin_unlock(&peer->idr_lock); +} + +static inline void hmdfs_stop_process_offline(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->offline_start = false; + spin_unlock(&peer->idr_lock); +} + +static inline void hmdfs_dec_msg_idr_process(struct hmdfs_peer *peer) +{ + spin_lock(&peer->idr_lock); + peer->msg_idr_process--; + spin_unlock(&peer->idr_lock); +} +#endif diff --git a/fs/hmdfs/comm/crypto.c b/fs/hmdfs/comm/crypto.c new file mode 100755 index 000000000..60bb08f16 --- /dev/null +++ b/fs/hmdfs/comm/crypto.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/crypto.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "crypto.h" + +#include +#include +#include +#include +#include +#include + +#include "hmdfs.h" + +static void tls_crypto_set_key(struct connection *conn_impl, int tx) +{ + int rc = 0; + struct tcp_handle *tcp = conn_impl->connect_handle; + struct tls_context *ctx = tls_get_ctx(tcp->sock->sk); + struct cipher_context *cctx = NULL; + struct tls_sw_context_tx *sw_ctx_tx = NULL; + struct tls_sw_context_rx *sw_ctx_rx = NULL; + struct crypto_aead **aead = NULL; + struct tls12_crypto_info_aes_gcm_128 *crypto_info = NULL; + + if (tx) { + crypto_info = &conn_impl->send_crypto_info; + cctx = &ctx->tx; + sw_ctx_tx = tls_sw_ctx_tx(ctx); + aead = &sw_ctx_tx->aead_send; + } else { + crypto_info = &conn_impl->recv_crypto_info; + cctx = &ctx->rx; + sw_ctx_rx = tls_sw_ctx_rx(ctx); + aead = &sw_ctx_rx->aead_recv; + } + + memcpy(cctx->iv, crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, crypto_info->iv, + TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(cctx->rec_seq, crypto_info->rec_seq, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + rc = crypto_aead_setkey(*aead, crypto_info->key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + if (rc) + hmdfs_err("crypto set key error"); +} + +int tls_crypto_info_init(struct connection *conn_impl) +{ + int ret = 0; + u8 key_meterial[HMDFS_KEY_SIZE]; + struct tcp_handle *tcp = + (struct tcp_handle *)(conn_impl->connect_handle); + if (conn_impl->node->version < DFS_2_0 || !tcp) + return -EINVAL; + // send + update_key(conn_impl->send_key, key_meterial, HKDF_TYPE_IV); + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TCP, TCP_ULP, + KERNEL_SOCKPTR("tls"), sizeof("tls")); + if (ret) + hmdfs_err("set tls error %d", ret); + tcp->connect->send_crypto_info.info.version = TLS_1_2_VERSION; + tcp->connect->send_crypto_info.info.cipher_type = + TLS_CIPHER_AES_GCM_128; + + memcpy(tcp->connect->send_crypto_info.key, tcp->connect->send_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->send_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->send_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->send_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TLS, TLS_TX, + KERNEL_SOCKPTR(&(tcp->connect->send_crypto_info)), + sizeof(tcp->connect->send_crypto_info)); + if (ret) + hmdfs_err("set tls send_crypto_info error %d", ret); + + // recv + update_key(tcp->connect->recv_key, key_meterial, HKDF_TYPE_IV); + tcp->connect->recv_crypto_info.info.version = TLS_1_2_VERSION; + tcp->connect->recv_crypto_info.info.cipher_type = + TLS_CIPHER_AES_GCM_128; + + memcpy(tcp->connect->recv_crypto_info.key, tcp->connect->recv_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->recv_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->recv_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->recv_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + + ret = tcp->sock->ops->setsockopt(tcp->sock, SOL_TLS, TLS_RX, + KERNEL_SOCKPTR(&(tcp->connect->recv_crypto_info)), + sizeof(tcp->connect->recv_crypto_info)); + if (ret) + hmdfs_err("set tls recv_crypto_info error %d", ret); + return ret; +} + +static int tls_set_tx(struct tcp_handle *tcp) +{ + int ret = 0; + u8 new_key[HMDFS_KEY_SIZE]; + u8 key_meterial[HMDFS_KEY_SIZE]; + + ret = update_key(tcp->connect->send_key, new_key, HKDF_TYPE_REKEY); + if (ret < 0) + return ret; + memcpy(tcp->connect->send_key, new_key, HMDFS_KEY_SIZE); + ret = update_key(tcp->connect->send_key, key_meterial, HKDF_TYPE_IV); + if (ret < 0) + return ret; + + memcpy(tcp->connect->send_crypto_info.key, tcp->connect->send_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->send_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->send_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->send_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(new_key, 0, HMDFS_KEY_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + + tls_crypto_set_key(tcp->connect, 1); + return 0; +} + +static int tls_set_rx(struct tcp_handle *tcp) +{ + int ret = 0; + u8 new_key[HMDFS_KEY_SIZE]; + u8 key_meterial[HMDFS_KEY_SIZE]; + + ret = update_key(tcp->connect->recv_key, new_key, HKDF_TYPE_REKEY); + if (ret < 0) + return ret; + memcpy(tcp->connect->recv_key, new_key, HMDFS_KEY_SIZE); + ret = update_key(tcp->connect->recv_key, key_meterial, HKDF_TYPE_IV); + if (ret < 0) + return ret; + + memcpy(tcp->connect->recv_crypto_info.key, tcp->connect->recv_key, + TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(tcp->connect->recv_crypto_info.iv, + key_meterial + CRYPTO_IV_OFFSET, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(tcp->connect->recv_crypto_info.salt, + key_meterial + CRYPTO_SALT_OFFSET, + TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(tcp->connect->recv_crypto_info.rec_seq, + key_meterial + CRYPTO_SEQ_OFFSET, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); + memset(new_key, 0, HMDFS_KEY_SIZE); + memset(key_meterial, 0, HMDFS_KEY_SIZE); + tls_crypto_set_key(tcp->connect, 0); + return 0; +} + +int set_crypto_info(struct connection *conn_impl, int set_type) +{ + int ret = 0; + __u8 version = conn_impl->node->version; + struct tcp_handle *tcp = + (struct tcp_handle *)(conn_impl->connect_handle); + if (version < DFS_2_0 || !tcp) + return -EINVAL; + + if (set_type == SET_CRYPTO_SEND) { + ret = tls_set_tx(tcp); + if (ret) { + hmdfs_err("tls set tx fail"); + return ret; + } + } + if (set_type == SET_CRYPTO_RECV) { + ret = tls_set_rx(tcp); + if (ret) { + hmdfs_err("tls set rx fail"); + return ret; + } + } + hmdfs_info("KTLS setting success"); + return ret; +} + +static int hmac_sha256(u8 *key, u8 key_len, char *info, u8 info_len, u8 *output) +{ + struct crypto_shash *tfm = NULL; + struct shash_desc *shash = NULL; + int ret = 0; + + if (!key) + return -EINVAL; + + tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); + if (IS_ERR(tfm)) { + hmdfs_err("crypto_alloc_ahash failed: err %ld", PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + + ret = crypto_shash_setkey(tfm, key, key_len); + if (ret) { + hmdfs_err("crypto_ahash_setkey failed: err %d", ret); + goto failed; + } + + shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm), + GFP_KERNEL); + if (!shash) { + ret = -ENOMEM; + goto failed; + } + + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, info, info_len, output); + + kfree(shash); + +failed: + crypto_free_shash(tfm); + return ret; +} + +static const char *const g_key_lable[] = { "ktls key initiator", + "ktls key accepter", + "ktls key update", "ktls iv&salt" }; +static const int g_key_lable_len[] = { 18, 17, 15, 12 }; + +int update_key(__u8 *old_key, __u8 *new_key, int type) +{ + int ret = 0; + char lable[MAX_LABLE_SIZE]; + u8 lable_size; + + lable_size = g_key_lable_len[type] + sizeof(u16) + sizeof(char); + *((u16 *)lable) = HMDFS_KEY_SIZE; + memcpy(lable + sizeof(u16), g_key_lable[type], g_key_lable_len[type]); + *(lable + sizeof(u16) + g_key_lable_len[type]) = 0x01; + ret = hmac_sha256(old_key, HMDFS_KEY_SIZE, lable, lable_size, new_key); + if (ret < 0) + hmdfs_err("hmac sha256 error"); + return ret; +} diff --git a/fs/hmdfs/comm/crypto.h b/fs/hmdfs/comm/crypto.h new file mode 100755 index 000000000..7549f3897 --- /dev/null +++ b/fs/hmdfs/comm/crypto.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/crypto.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_FS_ENCRYPTION_H +#define HMDFS_FS_ENCRYPTION_H + +#include "transport.h" + +#define MAX_LABLE_SIZE 30 +#define CRYPTO_IV_OFFSET 0 +#define CRYPTO_SALT_OFFSET (CRYPTO_IV_OFFSET + TLS_CIPHER_AES_GCM_128_IV_SIZE) +#define CRYPTO_SEQ_OFFSET \ + (CRYPTO_SALT_OFFSET + TLS_CIPHER_AES_GCM_128_SALT_SIZE) +#define REKEY_LIFETIME (60 * 60 * HZ) + +enum HKDF_TYPE { + HKDF_TYPE_KEY_INITIATOR = 0, + HKDF_TYPE_KEY_ACCEPTER = 1, + HKDF_TYPE_REKEY = 2, + HKDF_TYPE_IV = 3, +}; + +enum SET_CRYPTO_TYPE { + SET_CRYPTO_SEND = 0, + SET_CRYPTO_RECV = 1, +}; + +int tls_crypto_info_init(struct connection *conn_impl); +int set_crypto_info(struct connection *conn_impl, int set_type); +int update_key(__u8 *old_key, __u8 *new_key, int type); + +#endif diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c new file mode 100755 index 000000000..0f2585de6 --- /dev/null +++ b/fs/hmdfs/comm/device_node.c @@ -0,0 +1,1694 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/device_node.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "device_node.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "client_writeback.h" +#include "server_writeback.h" +#include "connection.h" +#include "hmdfs_client.h" +#include "socket_adapter.h" +#include "authority/authentication.h" + +DEFINE_MUTEX(hmdfs_sysfs_mutex); +static struct kset *hmdfs_kset; + +struct hmdfs_disconnect_node_work { + struct hmdfs_peer *conn; + struct work_struct work; + atomic_t *cnt; + struct wait_queue_head *waitq; +}; + +static void ctrl_cmd_update_socket_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct update_socket_param cmd; + struct hmdfs_peer *node = NULL; + struct connection *conn = NULL; + + if (unlikely(!buf || len != sizeof(cmd))) { + hmdfs_err("len/buf error"); + goto out; + } + memcpy(&cmd, buf, sizeof(cmd)); + + node = hmdfs_get_peer(sbi, cmd.cid, cmd.devsl); + if (unlikely(!node)) { + hmdfs_err("failed to update ctrl node: cannot get peer"); + goto out; + } + + conn = hmdfs_get_conn_tcp(node, cmd.newfd, cmd.masterkey, cmd.status); + if (unlikely(!conn)) { + hmdfs_err("failed to update ctrl node: cannot get conn"); + } else if (!sbi->system_cred) { + const struct cred *system_cred = get_cred(current_cred()); + + if (cmpxchg_relaxed(&sbi->system_cred, NULL, system_cred)) + put_cred(system_cred); + else + hmdfs_check_cred(system_cred); + } + + if (conn) + connection_put(conn); +out: + if (node) + peer_put(node); +} + +static void ctrl_cmd_update_devsl_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct update_devsl_param cmd; + struct hmdfs_peer *node = NULL; + + if (unlikely(!buf || len != sizeof(cmd))) { + hmdfs_err("Recved a invalid userbuf"); + return; + } + memcpy(&cmd, buf, sizeof(cmd)); + + node = hmdfs_lookup_from_cid(sbi, cmd.cid); + if (unlikely(!node)) { + hmdfs_err("failed to update devsl: cannot get peer"); + return; + } + hmdfs_info("Found peer: device_id = %llu", node->device_id); + node->devsl = cmd.devsl; + peer_put(node); +} + +static inline void hmdfs_disconnect_node_marked(struct hmdfs_peer *conn) +{ + hmdfs_start_process_offline(conn); + hmdfs_disconnect_node(conn); + hmdfs_stop_process_offline(conn); +} + +static void ctrl_cmd_off_line_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct offline_param cmd; + struct hmdfs_peer *node = NULL; + + if (unlikely(!buf || len != sizeof(cmd))) { + hmdfs_err("Recved a invalid userbuf"); + return; + } + memcpy(&cmd, buf, sizeof(cmd)); + node = hmdfs_lookup_from_cid(sbi, cmd.remote_cid); + if (unlikely(!node)) { + hmdfs_err("Cannot find node by device"); + return; + } + hmdfs_info("Found peer: device_id = %llu", node->device_id); + hmdfs_disconnect_node_marked(node); + peer_put(node); +} + +static void hmdfs_disconnect_node_work_fn(struct work_struct *base) +{ + struct hmdfs_disconnect_node_work *work = + container_of(base, struct hmdfs_disconnect_node_work, work); + + hmdfs_disconnect_node_marked(work->conn); + if (atomic_dec_and_test(work->cnt)) + wake_up(work->waitq); + kfree(work); +} + +static void ctrl_cmd_off_line_all_handler(const char *buf, size_t len, + struct hmdfs_sb_info *sbi) +{ + struct hmdfs_peer *node = NULL; + struct hmdfs_disconnect_node_work *work = NULL; + atomic_t cnt = ATOMIC_INIT(0); + wait_queue_head_t waitq; + + if (unlikely(len != sizeof(struct offline_all_param))) { + hmdfs_err("Recved a invalid userbuf, len %zu, expect %zu\n", + len, sizeof(struct offline_all_param)); + return; + } + + init_waitqueue_head(&waitq); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(node, &sbi->connections.node_list, list) { + mutex_unlock(&sbi->connections.node_lock); + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (work) { + atomic_inc(&cnt); + work->conn = node; + work->cnt = &cnt; + work->waitq = &waitq; + INIT_WORK(&work->work, hmdfs_disconnect_node_work_fn); + schedule_work(&work->work); + } else { + hmdfs_disconnect_node_marked(node); + } + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + wait_event(waitq, !atomic_read(&cnt)); +} + +typedef void (*ctrl_cmd_handler)(const char *buf, size_t len, + struct hmdfs_sb_info *sbi); + +static const ctrl_cmd_handler cmd_handler[CMD_CNT] = { + [CMD_UPDATE_SOCKET] = ctrl_cmd_update_socket_handler, + [CMD_UPDATE_DEVSL] = ctrl_cmd_update_devsl_handler, + [CMD_OFF_LINE] = ctrl_cmd_off_line_handler, + [CMD_OFF_LINE_ALL] = ctrl_cmd_off_line_all_handler, +}; + +static ssize_t sbi_cmd_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + struct notify_param param; + int out_len; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + memset(¶m, 0, sizeof(param)); + spin_lock(&sbi->notify_fifo_lock); + out_len = kfifo_out(&sbi->notify_fifo, ¶m, sizeof(param)); + spin_unlock(&sbi->notify_fifo_lock); + if (out_len != sizeof(param)) + param.notify = NOTIFY_NONE; + memcpy(buf, ¶m, sizeof(param)); + return sizeof(param); +} + +static const char *cmd2str(int cmd) +{ + switch (cmd) { + case 0: + return "CMD_UPDATE_SOCKET"; + case 1: + return "CMD_UPDATE_DEVSL"; + case 2: + return "CMD_OFF_LINE"; + case 3: + return "CMD_OFF_LINE_ALL"; + default: + return "illegal cmd"; + } +} + +static ssize_t sbi_cmd_store(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int cmd; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + if (!sbi) { + hmdfs_info("Fatal! Empty sbi. Mount fs first"); + return len; + } + if (len < sizeof(int)) { + hmdfs_err("Illegal cmd: cmd len = %zu", len); + return len; + } + cmd = *(int *)buf; + if (cmd < 0 || cmd >= CMD_CNT) { + hmdfs_err("Illegal cmd : cmd = %d", cmd); + return len; + } + mutex_lock(&sbi->cmd_handler_mutex); + hmdfs_info("Recved cmd: %s", cmd2str(cmd)); + if (cmd_handler[cmd]) + cmd_handler[cmd](buf, len, sbi); + mutex_unlock(&sbi->cmd_handler_mutex); + return len; +} + +static struct sbi_attribute sbi_cmd_attr = + __ATTR(cmd, 0664, sbi_cmd_show, sbi_cmd_store); + +static ssize_t sbi_status_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + ssize_t size = 0; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + sbi = to_sbi(kobj); + size += sprintf(buf + size, "peers version status\n"); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + size += sprintf(buf + size, "%llu %d %d\n", peer->device_id, + peer->version, peer->status); + // connection information + size += sprintf( + buf + size, + "\t socket_fd connection_status tcp_status ... refcnt\n"); + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + tcp = conn_impl->connect_handle; + size += sprintf(buf + size, "\t %d \t%d \t%d \t%p \t%ld\n", + tcp->fd, conn_impl->status, + tcp->sock->state, tcp->sock, file_count(tcp->sock->file)); + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return size; +} + +static ssize_t sbi_status_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, + size_t len) +{ + return len; +} + +static struct sbi_attribute sbi_status_attr = + __ATTR(status, 0664, sbi_status_show, sbi_status_store); + +static ssize_t sbi_stat_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + ssize_t size = 0; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + sbi = to_sbi(kobj); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + // connection information + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + tcp = conn_impl->connect_handle; + size += sprintf(buf + size, "socket_fd: %d\n", tcp->fd); + size += sprintf(buf + size, + "\tsend_msg %d \tsend_bytes %llu\n", + conn_impl->stat.send_message_count, + conn_impl->stat.send_bytes); + size += sprintf(buf + size, + "\trecv_msg %d \trecv_bytes %llu\n", + conn_impl->stat.recv_message_count, + conn_impl->stat.recv_bytes); + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return size; +} + +static ssize_t sbi_stat_store(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct connection *conn_impl = NULL; + + sbi = to_sbi(kobj); + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + // connection information + mutex_lock(&peer->conn_impl_list_lock); + list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { + conn_impl->stat.send_message_count = 0; + conn_impl->stat.send_bytes = 0; + conn_impl->stat.recv_message_count = 0; + conn_impl->stat.recv_bytes = 0; + } + mutex_unlock(&peer->conn_impl_list_lock); + } + mutex_unlock(&sbi->connections.node_lock); + return len; +} + +static struct sbi_attribute sbi_statistic_attr = + __ATTR(statistic, 0664, sbi_stat_show, sbi_stat_store); + +static ssize_t sbi_dcache_precision_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", to_sbi(kobj)->dcache_precision); +} + +#define PRECISION_MAX 3600000 + +static ssize_t sbi_dcache_precision_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int ret; + unsigned int precision; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(skip_spaces(buf), 0, &precision); + if (!ret) { + if (precision <= PRECISION_MAX) + sbi->dcache_precision = precision; + else + ret = -EINVAL; + } + + return ret ? ret : len; +} + +static struct sbi_attribute sbi_dcache_precision_attr = + __ATTR(dcache_precision, 0664, sbi_dcache_precision_show, + sbi_dcache_precision_store); + +static ssize_t sbi_dcache_threshold_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%lu\n", + to_sbi(kobj)->dcache_threshold); +} + +static ssize_t sbi_dcache_threshold_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + int ret; + unsigned long threshold; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtoul(skip_spaces(buf), 0, &threshold); + if (!ret) + sbi->dcache_threshold = threshold; + + return ret ? ret : len; +} + +static struct sbi_attribute sbi_dcache_threshold_attr = + __ATTR(dcache_threshold, 0664, sbi_dcache_threshold_show, + sbi_dcache_threshold_store); + +static ssize_t server_statistic_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + int i, ret; + const size_t size = PAGE_SIZE - 1; + ssize_t pos = 0; + struct server_statistic *stat = to_sbi(kobj)->s_server_statis; + + for (i = 0; i < F_SIZE; i++) { + + ret = snprintf(buf + pos, size - pos, + "%llu %u %llu %llu\n", + stat[i].cnt, + jiffies_to_msecs(stat[i].max), + stat[i].snd_cnt, stat[i].snd_fail_cnt); + if (ret > size - pos) + break; + pos += ret; + } + + /* If break, we should add a new line */ + if (i < F_SIZE) { + ret = snprintf(buf + pos, size + 1 - pos, "\n"); + pos += ret; + } + return pos; +} + +static struct sbi_attribute sbi_local_op_attr = __ATTR_RO(server_statistic); + +static ssize_t client_statistic_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + int i, ret; + const size_t size = PAGE_SIZE - 1; + ssize_t pos = 0; + struct client_statistic *stat = to_sbi(kobj)->s_client_statis; + + for (i = 0; i < F_SIZE; i++) { + + ret = snprintf(buf + pos, size - pos, + "%llu %llu %llu %llu %llu %u\n", + stat[i].snd_cnt, + stat[i].snd_fail_cnt, + stat[i].resp_cnt, + stat[i].timeout_cnt, + stat[i].delay_resp_cnt, + jiffies_to_msecs(stat[i].max)); + if (ret > size - pos) + break; + pos += ret; + } + + /* If break, we should add a new line */ + if (i < F_SIZE) { + ret = snprintf(buf + pos, size + 1 - pos, "\n"); + pos += ret; + } + + return pos; +} + +static struct sbi_attribute sbi_delay_resp_attr = __ATTR_RO(client_statistic); + +static inline unsigned long pages_to_kbytes(unsigned long page) +{ + return page << (PAGE_SHIFT - 10); +} + +static ssize_t dirty_writeback_stats_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + struct hmdfs_writeback *hwb = sbi->h_wb; + unsigned long avg; + unsigned long max; + unsigned long min; + + spin_lock(&hwb->write_bandwidth_lock); + avg = hwb->avg_write_bandwidth; + max = hwb->max_write_bandwidth; + min = hwb->min_write_bandwidth; + spin_unlock(&hwb->write_bandwidth_lock); + + if (min == ULONG_MAX) + min = 0; + + return snprintf(buf, PAGE_SIZE, + "%10lu\n" + "%10lu\n" + "%10lu\n", + pages_to_kbytes(avg), + pages_to_kbytes(max), + pages_to_kbytes(min)); +} + +static struct sbi_attribute sbi_dirty_writeback_stats_attr = + __ATTR_RO(dirty_writeback_stats); + +static ssize_t sbi_wb_timeout_ms_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->wb_timeout_ms); +} + +static ssize_t sbi_wb_timeout_ms_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int val; + int err; + + err = kstrtouint(buf, 10, &val); + if (err) + return err; + + if (!val || val > HMDFS_MAX_WB_TIMEOUT_MS) + return -EINVAL; + + sbi->wb_timeout_ms = val; + + return len; +} + +static struct sbi_attribute sbi_wb_timeout_ms_attr = + __ATTR(wb_timeout_ms, 0664, sbi_wb_timeout_ms_show, + sbi_wb_timeout_ms_store); + +static ssize_t sbi_dirty_writeback_centisecs_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", + sbi->h_wb->dirty_writeback_interval); +} + +static ssize_t sbi_dirty_writeback_centisecs_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + int err; + + err = kstrtouint(buf, 10, &sbi->h_wb->dirty_writeback_interval); + if (err) + return err; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_centisecs_attr = + __ATTR(dirty_writeback_centisecs, 0664, + sbi_dirty_writeback_centisecs_show, + sbi_dirty_writeback_centisecs_store); + +static ssize_t sbi_dirty_file_background_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", + sbi->h_wb->dirty_file_bg_bytes); +} + +static ssize_t sbi_dirty_file_background_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long file_background_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &file_background_bytes); + if (err) + return err; + if (file_background_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_fs_bytes = + max(sbi->h_wb->dirty_fs_bytes, file_background_bytes); + sbi->h_wb->dirty_fs_bg_bytes = + max(sbi->h_wb->dirty_fs_bg_bytes, file_background_bytes); + sbi->h_wb->dirty_file_bytes = + max(sbi->h_wb->dirty_file_bytes, file_background_bytes); + + sbi->h_wb->dirty_file_bg_bytes = file_background_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static ssize_t sbi_dirty_fs_background_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_fs_bg_bytes); +} + +static ssize_t sbi_dirty_fs_background_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long fs_background_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &fs_background_bytes); + if (err) + return err; + if (fs_background_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, fs_background_bytes); + sbi->h_wb->dirty_fs_bytes = + max(sbi->h_wb->dirty_fs_bytes, fs_background_bytes); + + sbi->h_wb->dirty_fs_bg_bytes = fs_background_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static struct sbi_attribute sbi_dirty_file_background_bytes_attr = + __ATTR(dirty_file_background_bytes, 0644, + sbi_dirty_file_background_bytes_show, + sbi_dirty_file_background_bytes_store); +static struct sbi_attribute sbi_dirty_fs_background_bytes_attr = + __ATTR(dirty_fs_background_bytes, 0644, + sbi_dirty_fs_background_bytes_show, + sbi_dirty_fs_background_bytes_store); + +static ssize_t sbi_dirty_file_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_file_bytes); +} + +static ssize_t sbi_dirty_file_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long file_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &file_bytes); + if (err) + return err; + if (file_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, file_bytes); + sbi->h_wb->dirty_fs_bytes = max(sbi->h_wb->dirty_fs_bytes, file_bytes); + + sbi->h_wb->dirty_file_bytes = file_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static ssize_t sbi_dirty_fs_bytes_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->h_wb->dirty_fs_bytes); +} + +static ssize_t sbi_dirty_fs_bytes_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long fs_bytes = 0; + int err; + + err = kstrtoul(buf, 10, &fs_bytes); + if (err) + return err; + if (fs_bytes == 0) + return -EINVAL; + + sbi->h_wb->dirty_file_bg_bytes = + min(sbi->h_wb->dirty_file_bg_bytes, fs_bytes); + sbi->h_wb->dirty_file_bytes = + min(sbi->h_wb->dirty_file_bytes, fs_bytes); + sbi->h_wb->dirty_fs_bg_bytes = + min(sbi->h_wb->dirty_fs_bg_bytes, fs_bytes); + + sbi->h_wb->dirty_fs_bytes = fs_bytes; + hmdfs_calculate_dirty_thresh(sbi->h_wb); + hmdfs_update_ratelimit(sbi->h_wb); + return len; +} + +static struct sbi_attribute sbi_dirty_file_bytes_attr = + __ATTR(dirty_file_bytes, 0644, sbi_dirty_file_bytes_show, + sbi_dirty_file_bytes_store); +static struct sbi_attribute sbi_dirty_fs_bytes_attr = + __ATTR(dirty_fs_bytes, 0644, sbi_dirty_fs_bytes_show, + sbi_dirty_fs_bytes_store); + +static ssize_t sbi_dirty_writeback_timelimit_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", + sbi->h_wb->writeback_timelimit / HZ); +} + +static ssize_t sbi_dirty_writeback_timelimit_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int time_limit = 0; + int err; + + err = kstrtouint(buf, 10, &time_limit); + if (err) + return err; + if (time_limit == 0 || time_limit > (HMDFS_MAX_WB_TIMELIMIT / HZ)) + return -EINVAL; + + sbi->h_wb->writeback_timelimit = time_limit * HZ; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_timelimit_attr = +__ATTR(dirty_writeback_timelimit, 0644, sbi_dirty_writeback_timelimit_show, + sbi_dirty_writeback_timelimit_store); + +static ssize_t sbi_dirty_thresh_lowerlimit_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%lu\n", + sbi->h_wb->bw_thresh_lowerlimit << PAGE_SHIFT); +} + +static ssize_t sbi_dirty_thresh_lowerlimit_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned long bw_thresh_lowerbytes = 0; + unsigned long bw_thresh_lowerlimit; + int err; + + err = kstrtoul(buf, 10, &bw_thresh_lowerbytes); + if (err) + return err; + + bw_thresh_lowerlimit = DIV_ROUND_UP(bw_thresh_lowerbytes, PAGE_SIZE); + if (bw_thresh_lowerlimit < HMDFS_BW_THRESH_MIN_LIMIT || + bw_thresh_lowerlimit > HMDFS_BW_THRESH_MAX_LIMIT) + return -EINVAL; + + sbi->h_wb->bw_thresh_lowerlimit = bw_thresh_lowerlimit; + return len; +} + +static struct sbi_attribute sbi_dirty_thresh_lowerlimit_attr = +__ATTR(dirty_thresh_lowerlimit, 0644, sbi_dirty_thresh_lowerlimit_show, + sbi_dirty_thresh_lowerlimit_store); + +static ssize_t sbi_dirty_writeback_autothresh_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_wb->dirty_auto_threshold); +} + +static ssize_t sbi_dirty_writeback_autothresh_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + bool dirty_auto_threshold = false; + int err; + + err = kstrtobool(buf, &dirty_auto_threshold); + if (err) + return err; + + sbi->h_wb->dirty_auto_threshold = dirty_auto_threshold; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_autothresh_attr = +__ATTR(dirty_writeback_autothresh, 0644, sbi_dirty_writeback_autothresh_show, + sbi_dirty_writeback_autothresh_store); + +static ssize_t sbi_dirty_writeback_control_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_wb->dirty_writeback_control); +} + +static ssize_t sbi_dirty_writeback_control_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int dirty_writeback_control = 0; + int err; + + err = kstrtouint(buf, 10, &dirty_writeback_control); + if (err) + return err; + + sbi->h_wb->dirty_writeback_control = (bool)dirty_writeback_control; + return len; +} + +static struct sbi_attribute sbi_dirty_writeback_control_attr = + __ATTR(dirty_writeback_control, 0644, sbi_dirty_writeback_control_show, + sbi_dirty_writeback_control_store); + +static ssize_t sbi_srv_dirty_thresh_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_swb->dirty_thresh_pg >> HMDFS_MB_TO_PAGE_SHIFT); +} + +static ssize_t sbi_srv_dirty_thresh_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_server_writeback *hswb = to_sbi(kobj)->h_swb; + int dirty_thresh_mb; + unsigned long long pages; + int err; + + err = kstrtoint(buf, 10, &dirty_thresh_mb); + if (err) + return err; + + if (dirty_thresh_mb <= 0) + return -EINVAL; + + pages = dirty_thresh_mb; + pages <<= HMDFS_MB_TO_PAGE_SHIFT; + if (pages > INT_MAX) { + hmdfs_err("Illegal dirty_thresh_mb %d, its page count beyonds max int", + dirty_thresh_mb); + return -EINVAL; + } + + hswb->dirty_thresh_pg = (unsigned int)pages; + return len; +} + +static struct sbi_attribute sbi_srv_dirty_thresh_attr = +__ATTR(srv_dirty_thresh, 0644, sbi_srv_dirty_thresh_show, + sbi_srv_dirty_thresh_store); + + +static ssize_t sbi_srv_dirty_wb_control_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + sbi->h_swb->dirty_writeback_control); +} + +static ssize_t sbi_srv_dirty_wb_conctrol_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_server_writeback *hswb = to_sbi(kobj)->h_swb; + bool dirty_writeback_control = true; + int err; + + err = kstrtobool(buf, &dirty_writeback_control); + if (err) + return err; + + hswb->dirty_writeback_control = dirty_writeback_control; + + return len; +} + +static struct sbi_attribute sbi_srv_dirty_wb_control_attr = +__ATTR(srv_dirty_writeback_control, 0644, sbi_srv_dirty_wb_control_show, + sbi_srv_dirty_wb_conctrol_store); + +static ssize_t sbi_dcache_timeout_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->dcache_timeout); +} + +static ssize_t sbi_dcache_timeout_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int timeout; + int err; + + err = kstrtouint(buf, 0, &timeout); + if (err) + return err; + + /* zero is invalid, and it doesn't mean no cache */ + if (timeout == 0 || timeout > MAX_DCACHE_TIMEOUT) + return -EINVAL; + + sbi->dcache_timeout = timeout; + + return len; +} + +static struct sbi_attribute sbi_dcache_timeout_attr = + __ATTR(dcache_timeout, 0644, sbi_dcache_timeout_show, + sbi_dcache_timeout_store); + +static ssize_t sbi_write_cache_timeout_sec_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", + to_sbi(kobj)->write_cache_timeout); +} + +static ssize_t sbi_write_cache_timeout_sec_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned int timeout; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(buf, 0, &timeout); + if (ret) + return ret; + + /* set write_cache_timeout to 0 means this functionality is disabled */ + sbi->write_cache_timeout = timeout; + + return len; +} + +static struct sbi_attribute sbi_write_cache_timeout_sec_attr = + __ATTR(write_cache_timeout_sec, 0664, sbi_write_cache_timeout_sec_show, + sbi_write_cache_timeout_sec_store); + +static ssize_t sbi_node_evt_cb_delay_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->async_cb_delay); +} + +static ssize_t sbi_node_evt_cb_delay_store(struct kobject *kobj, + struct sbi_attribute *attr, + const char *buf, + size_t len) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + unsigned int delay = 0; + int err; + + err = kstrtouint(buf, 10, &delay); + if (err) + return err; + + sbi->async_cb_delay = delay; + + return len; +} + +static struct sbi_attribute sbi_node_evt_cb_delay_attr = +__ATTR(node_event_delay, 0644, sbi_node_evt_cb_delay_show, + sbi_node_evt_cb_delay_store); + +static int calc_idr_number(struct idr *idr) +{ + void *entry = NULL; + int id; + int number = 0; + + idr_for_each_entry(idr, entry, id) { + number++; + if (number % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } + + return number; +} + +static ssize_t sbi_show_idr_stats(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf, bool showmsg) +{ + ssize_t size = 0; + int count; + struct hmdfs_sb_info *sbi = NULL; + struct hmdfs_peer *peer = NULL; + struct idr *idr = NULL; + + sbi = to_sbi(kobj); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + idr = showmsg ? &peer->msg_idr : &peer->file_id_idr; + count = calc_idr_number(idr); + size += snprintf(buf + size, PAGE_SIZE - size, + "device-id\tcount\tnext-id\n\t%llu\t\t%d\t%u\n", + peer->device_id, count, idr_get_cursor(idr)); + if (size >= PAGE_SIZE) { + size = PAGE_SIZE; + break; + } + } + mutex_unlock(&sbi->connections.node_lock); + + return size; +} + +static ssize_t pending_message_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + return sbi_show_idr_stats(kobj, attr, buf, true); +} + +static struct sbi_attribute sbi_pending_message_attr = + __ATTR_RO(pending_message); + +static ssize_t peer_opened_fd_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return sbi_show_idr_stats(kobj, attr, buf, false); +} + +static struct sbi_attribute sbi_peer_opened_fd_attr = __ATTR_RO(peer_opened_fd); + +static ssize_t sbi_srv_req_max_active_attr_show(struct kobject *kobj, + struct sbi_attribute *attr, + char *buf) +{ + const struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->async_req_max_active); +} + +static ssize_t sbi_srv_req_max_active_attr_store(struct kobject *kobj, + struct sbi_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned int max_active; + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + ret = kstrtouint(buf, 0, &max_active); + if (ret) + return ret; + + sbi->async_req_max_active = max_active; + + return len; +} + +static struct sbi_attribute sbi_srv_req_max_active_attr = +__ATTR(srv_req_handle_max_active, 0644, sbi_srv_req_max_active_attr_show, + sbi_srv_req_max_active_attr_store); + + +static ssize_t cache_file_show(struct hmdfs_sb_info *sbi, + struct list_head *head, char *buf) +{ + struct cache_file_node *cfn = NULL; + ssize_t pos = 0; + + mutex_lock(&sbi->cache_list_lock); + list_for_each_entry(cfn, head, list) { + pos += snprintf(buf + pos, PAGE_SIZE - pos, + "dev_id: %s relative_path: %s\n", + cfn->cid, cfn->relative_path); + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + } + mutex_unlock(&sbi->cache_list_lock); + + return pos; +} + +static ssize_t client_cache_file_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return cache_file_show(to_sbi(kobj), &to_sbi(kobj)->client_cache, buf); +} +static ssize_t server_cache_file_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + return cache_file_show(to_sbi(kobj), &to_sbi(kobj)->server_cache, buf); +} + +static struct sbi_attribute sbi_server_cache_file_attr = + __ATTR_RO(server_cache_file); +static struct sbi_attribute sbi_client_cache_file_attr = + __ATTR_RO(client_cache_file); + +static ssize_t sb_seq_show(struct kobject *kobj, struct sbi_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", to_sbi(kobj)->seq); +} + +static struct sbi_attribute sbi_seq_attr = __ATTR_RO(sb_seq); + +static ssize_t peers_sum_attr_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + struct hmdfs_peer *node = NULL; + unsigned int stash_ok = 0, stash_fail = 0, restore_ok = 0, + restore_fail = 0, rebuild_ok = 0, rebuild_fail = 0, rebuild_invalid = 0, + rebuild_time = 0; + unsigned long long stash_ok_pages = 0, stash_fail_pages = 0, + restore_ok_pages = 0, restore_fail_pages = 0; + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(node, &sbi->connections.node_list, list) { + peer_get(node); + mutex_unlock(&sbi->connections.node_lock); + stash_ok += node->stats.stash.total_ok; + stash_fail += node->stats.stash.total_fail; + stash_ok_pages += node->stats.stash.ok_pages; + stash_fail_pages += node->stats.stash.fail_pages; + restore_ok += node->stats.restore.total_ok; + restore_fail += node->stats.restore.total_fail; + restore_ok_pages += node->stats.restore.ok_pages; + restore_fail_pages += node->stats.restore.fail_pages; + rebuild_ok += node->stats.rebuild.total_ok; + rebuild_fail += node->stats.rebuild.total_fail; + rebuild_invalid += node->stats.rebuild.total_invalid; + rebuild_time += node->stats.rebuild.time; + peer_put(node); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + return snprintf(buf, PAGE_SIZE, + "%u %u %llu %llu\n" + "%u %u %llu %llu\n" + "%u %u %u %u\n", + stash_ok, stash_fail, stash_ok_pages, stash_fail_pages, + restore_ok, restore_fail, restore_ok_pages, + restore_fail_pages, rebuild_ok, rebuild_fail, + rebuild_invalid, rebuild_time); +} + +static struct sbi_attribute sbi_peers_attr = __ATTR_RO(peers_sum_attr); + +const char * const flag_name[] = { + "READPAGES", + "READPAGES_OPEN", + "ATOMIC_OPEN", +}; + +static ssize_t fill_features(char *buf, unsigned long long flag) +{ + int i; + ssize_t pos = 0; + bool sep = false; + int flag_name_count = ARRAY_SIZE(flag_name) / sizeof(flag_name[0]); + + for (i = 0; i < sizeof(flag) * BITS_PER_BYTE; ++i) { + if (!(flag & BIT(i))) + continue; + + if (sep) + pos += snprintf(buf + pos, PAGE_SIZE - pos, "|"); + sep = true; + + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + + if (i < flag_name_count && flag_name[i]) + pos += snprintf(buf + pos, PAGE_SIZE - pos, "%s", + flag_name[i]); + else + pos += snprintf(buf + pos, PAGE_SIZE - pos, "%d", i); + + if (pos >= PAGE_SIZE) { + pos = PAGE_SIZE; + break; + } + } + pos += snprintf(buf + pos, PAGE_SIZE - pos, "\n"); + if (pos >= PAGE_SIZE) + pos = PAGE_SIZE; + + return pos; +} + +static ssize_t sbi_features_show(struct kobject *kobj, + struct sbi_attribute *attr, char *buf) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + return fill_features(buf, sbi->s_features); +} + +static struct sbi_attribute sbi_features_attr = __ATTR(features, 0444, + sbi_features_show, NULL); + +static struct attribute *sbi_attrs[] = { + &sbi_cmd_attr.attr, + &sbi_status_attr.attr, + &sbi_statistic_attr.attr, + &sbi_dcache_precision_attr.attr, + &sbi_dcache_threshold_attr.attr, + &sbi_dcache_timeout_attr.attr, + &sbi_write_cache_timeout_sec_attr.attr, + &sbi_local_op_attr.attr, + &sbi_delay_resp_attr.attr, + &sbi_wb_timeout_ms_attr.attr, + &sbi_dirty_writeback_centisecs_attr.attr, + &sbi_dirty_file_background_bytes_attr.attr, + &sbi_dirty_fs_background_bytes_attr.attr, + &sbi_dirty_file_bytes_attr.attr, + &sbi_dirty_fs_bytes_attr.attr, + &sbi_dirty_writeback_autothresh_attr.attr, + &sbi_dirty_writeback_timelimit_attr.attr, + &sbi_dirty_thresh_lowerlimit_attr.attr, + &sbi_dirty_writeback_control_attr.attr, + &sbi_dirty_writeback_stats_attr.attr, + &sbi_srv_dirty_thresh_attr.attr, + &sbi_srv_dirty_wb_control_attr.attr, + &sbi_node_evt_cb_delay_attr.attr, + &sbi_srv_req_max_active_attr.attr, + &sbi_pending_message_attr.attr, + &sbi_peer_opened_fd_attr.attr, + &sbi_server_cache_file_attr.attr, + &sbi_client_cache_file_attr.attr, + &sbi_seq_attr.attr, + &sbi_peers_attr.attr, + &sbi_features_attr.attr, + NULL, +}; + +static ssize_t sbi_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct sbi_attribute *sbi_attr = to_sbi_attr(attr); + + if (!sbi_attr->show) + return -EIO; + return sbi_attr->show(kobj, sbi_attr, buf); +} + +static ssize_t sbi_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct sbi_attribute *sbi_attr = to_sbi_attr(attr); + + if (!sbi_attr->store) + return -EIO; + return sbi_attr->store(kobj, sbi_attr, buf, len); +} + +static const struct sysfs_ops sbi_sysfs_ops = { + .show = sbi_attr_show, + .store = sbi_attr_store, +}; + +static void sbi_release(struct kobject *kobj) +{ + struct hmdfs_sb_info *sbi = to_sbi(kobj); + + complete(&sbi->s_kobj_unregister); +} + +static struct kobj_type sbi_ktype = { + .sysfs_ops = &sbi_sysfs_ops, + .default_attrs = sbi_attrs, + .release = sbi_release, +}; + +static inline struct sbi_cmd_attribute *to_sbi_cmd_attr(struct attribute *x) +{ + return container_of(x, struct sbi_cmd_attribute, attr); +} + +static inline struct hmdfs_sb_info *cmd_kobj_to_sbi(struct kobject *x) +{ + return container_of(x, struct hmdfs_sb_info, s_cmd_timeout_kobj); +} + +static ssize_t cmd_timeout_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + int cmd = to_sbi_cmd_attr(attr)->command; + struct hmdfs_sb_info *sbi = cmd_kobj_to_sbi(kobj); + + if (cmd < 0 || cmd >= F_SIZE) + return 0; + + return snprintf(buf, PAGE_SIZE, "%u\n", get_cmd_timeout(sbi, cmd)); +} + +static ssize_t cmd_timeout_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + unsigned int value; + int cmd = to_sbi_cmd_attr(attr)->command; + int ret = kstrtouint(skip_spaces(buf), 0, &value); + struct hmdfs_sb_info *sbi = cmd_kobj_to_sbi(kobj); + + if (cmd < 0 || cmd >= F_SIZE) + return -EINVAL; + + if (!ret) + set_cmd_timeout(sbi, cmd, value); + + return ret ? ret : len; +} + +#define HMDFS_CMD_ATTR(_name, _cmd) \ + static struct sbi_cmd_attribute hmdfs_attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = 0664 }, \ + .command = (_cmd), \ + } + +HMDFS_CMD_ATTR(open, F_OPEN); +HMDFS_CMD_ATTR(release, F_RELEASE); +HMDFS_CMD_ATTR(readpage, F_READPAGE); +HMDFS_CMD_ATTR(writepage, F_WRITEPAGE); +HMDFS_CMD_ATTR(iterate, F_ITERATE); +HMDFS_CMD_ATTR(rmdir, F_RMDIR); +HMDFS_CMD_ATTR(unlink, F_UNLINK); +HMDFS_CMD_ATTR(rename, F_RENAME); +HMDFS_CMD_ATTR(setattr, F_SETATTR); +HMDFS_CMD_ATTR(statfs, F_STATFS); +HMDFS_CMD_ATTR(drop_push, F_DROP_PUSH); +HMDFS_CMD_ATTR(getattr, F_GETATTR); +HMDFS_CMD_ATTR(fsync, F_FSYNC); +HMDFS_CMD_ATTR(syncfs, F_SYNCFS); +HMDFS_CMD_ATTR(getxattr, F_GETXATTR); +HMDFS_CMD_ATTR(setxattr, F_SETXATTR); +HMDFS_CMD_ATTR(listxattr, F_LISTXATTR); + +#define ATTR_LIST(_name) (&hmdfs_attr_##_name.attr) + +static struct attribute *sbi_timeout_attrs[] = { + ATTR_LIST(open), ATTR_LIST(release), + ATTR_LIST(readpage), ATTR_LIST(writepage), + ATTR_LIST(iterate), ATTR_LIST(rmdir), + ATTR_LIST(unlink), ATTR_LIST(rename), + ATTR_LIST(setattr), + ATTR_LIST(statfs), ATTR_LIST(drop_push), + ATTR_LIST(getattr), ATTR_LIST(fsync), + ATTR_LIST(syncfs), ATTR_LIST(getxattr), + ATTR_LIST(setxattr), ATTR_LIST(listxattr), + NULL +}; + +static const struct sysfs_ops sbi_cmd_sysfs_ops = { + .show = cmd_timeout_show, + .store = cmd_timeout_store, +}; + +static void sbi_timeout_release(struct kobject *kobj) +{ + struct hmdfs_sb_info *sbi = container_of(kobj, struct hmdfs_sb_info, + s_cmd_timeout_kobj); + + complete(&sbi->s_timeout_kobj_unregister); +} + +static struct kobj_type sbi_timeout_ktype = { + .sysfs_ops = &sbi_cmd_sysfs_ops, + .default_attrs = sbi_timeout_attrs, + .release = sbi_timeout_release, +}; + +void hmdfs_release_sysfs(struct hmdfs_sb_info *sbi) +{ + kobject_put(&sbi->s_cmd_timeout_kobj); + wait_for_completion(&sbi->s_timeout_kobj_unregister); + kobject_put(&sbi->kobj); + wait_for_completion(&sbi->s_kobj_unregister); +} + +int hmdfs_register_sysfs(const char *name, struct hmdfs_sb_info *sbi) +{ + int ret; + struct kobject *kobj = NULL; + + mutex_lock(&hmdfs_sysfs_mutex); + kobj = kset_find_obj(hmdfs_kset, name); + if (kobj) { + hmdfs_err("mount failed, already exist"); + kobject_put(kobj); + mutex_unlock(&hmdfs_sysfs_mutex); + return -EEXIST; + } + + sbi->kobj.kset = hmdfs_kset; + init_completion(&sbi->s_kobj_unregister); + ret = kobject_init_and_add(&sbi->kobj, &sbi_ktype, + &hmdfs_kset->kobj, "%s", name); + sysfs_change_owner(&sbi->kobj, KUIDT_INIT(1000), KGIDT_INIT(1000)); + mutex_unlock(&hmdfs_sysfs_mutex); + + if (ret) { + kobject_put(&sbi->kobj); + wait_for_completion(&sbi->s_kobj_unregister); + return ret; + } + + init_completion(&sbi->s_timeout_kobj_unregister); + ret = kobject_init_and_add(&sbi->s_cmd_timeout_kobj, &sbi_timeout_ktype, + &sbi->kobj, "cmd_timeout"); + if (ret) { + hmdfs_release_sysfs(sbi); + return ret; + } + + kobject_uevent(&sbi->kobj, KOBJ_ADD); + return 0; +} + +void hmdfs_unregister_sysfs(struct hmdfs_sb_info *sbi) +{ + kobject_del(&sbi->s_cmd_timeout_kobj); + kobject_del(&sbi->kobj); +} + +static inline int to_sysfs_fmt_evt(unsigned int evt) +{ + return evt == RAW_NODE_EVT_NR ? -1 : evt; +} + +static ssize_t features_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return fill_features(buf, peer->features); +} + +static ssize_t event_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_async evt %d seq %u\n" + "cur_sync evt %d seq %u\n" + "pending evt %d seq %u\n" + "merged evt %u\n" + "dup_drop evt %u %u\n" + "waiting evt %u %u\n" + "seq_tbl %u %u %u %u\n" + "seq_rd_idx %u\n" + "seq_wr_idx %u\n", + to_sysfs_fmt_evt(peer->cur_evt[0]), + peer->cur_evt_seq[0], + to_sysfs_fmt_evt(peer->cur_evt[1]), + peer->cur_evt_seq[1], + to_sysfs_fmt_evt(peer->pending_evt), + peer->pending_evt_seq, + peer->merged_evt, + peer->dup_evt[RAW_NODE_EVT_OFF], + peer->dup_evt[RAW_NODE_EVT_ON], + peer->waiting_evt[RAW_NODE_EVT_OFF], + peer->waiting_evt[RAW_NODE_EVT_ON], + peer->seq_tbl[0], peer->seq_tbl[1], peer->seq_tbl[2], + peer->seq_tbl[3], + peer->seq_rd_idx % RAW_NODE_EVT_MAX_NR, + peer->seq_wr_idx % RAW_NODE_EVT_MAX_NR); +} + +static ssize_t stash_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_nothing %u\n" + "cur_fail %u\n" + "total_ok %u\n" + "total_nothing %u\n" + "total_fail %u\n" + "ok_pages %llu\n" + "fail_pages %llu\n", + peer->stats.stash.cur_ok, + peer->stats.stash.cur_nothing, + peer->stats.stash.cur_fail, + peer->stats.stash.total_ok, + peer->stats.stash.total_nothing, + peer->stats.stash.total_fail, + peer->stats.stash.ok_pages, + peer->stats.stash.fail_pages); +} + +static ssize_t restore_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_fail %u\n" + "cur_keep %u\n" + "total_ok %u\n" + "total_fail %u\n" + "total_keep %u\n" + "ok_pages %llu\n" + "fail_pages %llu\n", + peer->stats.restore.cur_ok, + peer->stats.restore.cur_fail, + peer->stats.restore.cur_keep, + peer->stats.restore.total_ok, + peer->stats.restore.total_fail, + peer->stats.restore.total_keep, + peer->stats.restore.ok_pages, + peer->stats.restore.fail_pages); +} + +static ssize_t rebuild_show(struct kobject *kobj, struct peer_attribute *attr, + char *buf) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + return snprintf(buf, PAGE_SIZE, + "cur_ok %u\n" + "cur_fail %u\n" + "cur_invalid %u\n" + "total_ok %u\n" + "total_fail %u\n" + "total_invalid %u\n" + "time %u\n", + peer->stats.rebuild.cur_ok, + peer->stats.rebuild.cur_fail, + peer->stats.rebuild.cur_invalid, + peer->stats.rebuild.total_ok, + peer->stats.rebuild.total_fail, + peer->stats.rebuild.total_invalid, + peer->stats.rebuild.time); +} + +static struct peer_attribute peer_features_attr = __ATTR_RO(features); +static struct peer_attribute peer_event_attr = __ATTR_RO(event); +static struct peer_attribute peer_stash_attr = __ATTR_RO(stash); +static struct peer_attribute peer_restore_attr = __ATTR_RO(restore); +static struct peer_attribute peer_rebuild_attr = __ATTR_RO(rebuild); + +static struct attribute *peer_attrs[] = { + &peer_features_attr.attr, + &peer_event_attr.attr, + &peer_stash_attr.attr, + &peer_restore_attr.attr, + &peer_rebuild_attr.attr, + NULL, +}; + +static ssize_t peer_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct peer_attribute *peer_attr = to_peer_attr(attr); + + if (!peer_attr->show) + return -EIO; + return peer_attr->show(kobj, peer_attr, buf); +} + +static ssize_t peer_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct peer_attribute *peer_attr = to_peer_attr(attr); + + if (!peer_attr->store) + return -EIO; + return peer_attr->store(kobj, peer_attr, buf, len); +} + +static const struct sysfs_ops peer_sysfs_ops = { + .show = peer_attr_show, + .store = peer_attr_store, +}; + +static void peer_sysfs_release(struct kobject *kobj) +{ + struct hmdfs_peer *peer = to_peer(kobj); + + complete(&peer->kobj_unregister); +} + +static struct kobj_type peer_ktype = { + .sysfs_ops = &peer_sysfs_ops, + .default_attrs = peer_attrs, + .release = peer_sysfs_release, +}; + +int hmdfs_register_peer_sysfs(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer) +{ + int err = 0; + + init_completion(&peer->kobj_unregister); + err = kobject_init_and_add(&peer->kobj, &peer_ktype, &sbi->kobj, + "peer_%llu", peer->device_id); + return err; +} + +void hmdfs_release_peer_sysfs(struct hmdfs_peer *peer) +{ + kobject_del(&peer->kobj); + kobject_put(&peer->kobj); + wait_for_completion(&peer->kobj_unregister); +} + +void notify(struct hmdfs_peer *node, struct notify_param *param) +{ + struct hmdfs_sb_info *sbi = node->sbi; + int in_len; + + if (!param) + return; + spin_lock(&sbi->notify_fifo_lock); + in_len = + kfifo_in(&sbi->notify_fifo, param, sizeof(struct notify_param)); + spin_unlock(&sbi->notify_fifo_lock); + if (in_len != sizeof(struct notify_param)) + return; + sysfs_notify(&sbi->kobj, NULL, "cmd"); +} + +int hmdfs_sysfs_init(void) +{ + hmdfs_kset = kset_create_and_add("hmdfs", NULL, fs_kobj); + if (!hmdfs_kset) + return -ENOMEM; + + return 0; +} + +void hmdfs_sysfs_exit(void) +{ + kset_unregister(hmdfs_kset); + hmdfs_kset = NULL; +} diff --git a/fs/hmdfs/comm/device_node.h b/fs/hmdfs/comm/device_node.h new file mode 100755 index 000000000..fdc64d467 --- /dev/null +++ b/fs/hmdfs/comm/device_node.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/device_node.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DEVICE_NODE_H +#define HMDFS_DEVICE_NODE_H + +#include "hmdfs.h" +#include "transport.h" + +enum CTRL_NODE_CMD { + CMD_UPDATE_SOCKET = 0, + CMD_UPDATE_DEVSL, + CMD_OFF_LINE, + CMD_OFF_LINE_ALL, + CMD_CNT, +}; + +struct update_socket_param { + int32_t cmd; + int32_t newfd; + uint32_t devsl; + uint8_t status; + uint8_t masterkey[HMDFS_KEY_SIZE]; + uint8_t cid[HMDFS_CID_SIZE]; +} __packed; + +struct update_devsl_param { + int32_t cmd; + uint32_t devsl; + uint8_t cid[HMDFS_CID_SIZE]; +} __attribute__((packed)); + +struct offline_param { + int32_t cmd; + uint8_t remote_cid[HMDFS_CID_SIZE]; +} __packed; + +struct offline_all_param { + int32_t cmd; +} __packed; + +enum NOTIFY { + NOTIFY_GET_SESSION, + NOTIFY_OFFLINE, + NOTIFY_NONE, + NOTIFY_CNT, +}; + +struct notify_param { + int32_t notify; + int32_t fd; + uint8_t remote_cid[HMDFS_CID_SIZE]; +} __packed; + +struct sbi_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct sbi_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct sbi_attribute *attr, + const char *buf, size_t len); +}; + +struct peer_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct peer_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct peer_attribute *attr, + const char *buf, size_t len); +}; + +struct sbi_cmd_attribute { + struct attribute attr; + int command; +}; + +void notify(struct hmdfs_peer *node, struct notify_param *param); +int hmdfs_register_sysfs(const char *name, struct hmdfs_sb_info *sbi); +void hmdfs_unregister_sysfs(struct hmdfs_sb_info *sbi); +void hmdfs_release_sysfs(struct hmdfs_sb_info *sbi); +int hmdfs_register_peer_sysfs(struct hmdfs_sb_info *sbi, + struct hmdfs_peer *peer); +void hmdfs_release_peer_sysfs(struct hmdfs_peer *peer); +int hmdfs_sysfs_init(void); +void hmdfs_sysfs_exit(void); + +static inline struct sbi_attribute *to_sbi_attr(struct attribute *x) +{ + return container_of(x, struct sbi_attribute, attr); +} + +static inline struct hmdfs_sb_info *to_sbi(struct kobject *x) +{ + return container_of(x, struct hmdfs_sb_info, kobj); +} + +static inline struct peer_attribute *to_peer_attr(struct attribute *x) +{ + return container_of(x, struct peer_attribute, attr); +} + +static inline struct hmdfs_peer *to_peer(struct kobject *x) +{ + return container_of(x, struct hmdfs_peer, kobj); +} +#endif diff --git a/fs/hmdfs/comm/message_verify.c b/fs/hmdfs/comm/message_verify.c new file mode 100755 index 000000000..fd76658ef --- /dev/null +++ b/fs/hmdfs/comm/message_verify.c @@ -0,0 +1,980 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/message_verify.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "message_verify.h" + +#include +#include +#include + +#include "connection.h" +#include "hmdfs.h" +#include "hmdfs_server.h" + +size_t message_length[C_FLAG_SIZE][F_SIZE][HMDFS_MESSAGE_MIN_MAX]; +bool need_response[F_SIZE]; + +void hmdfs_message_verify_init(void) +{ + int flag, cmd; + + for (cmd = 0; cmd < F_SIZE; cmd++) + need_response[cmd] = true; + need_response[F_RELEASE] = false; + need_response[F_CONNECT_REKEY] = false; + need_response[F_DROP_PUSH] = false; + + for (flag = 0; flag < C_FLAG_SIZE; flag++) { + for (cmd = 0; cmd < F_SIZE; cmd++) { + message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX] = 1; + message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + } + } + + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct open_request); + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct open_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct open_response); + message_length[C_RESPONSE][F_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct atomic_open_request); + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct atomic_open_request) + PATH_MAX + NAME_MAX + 1; + message_length[C_REQUEST][F_ATOMIC_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] + = MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct atomic_open_response); + message_length[C_RESPONSE][F_ATOMIC_OPEN][HMDFS_MESSAGE_LEN_JUDGE_INDEX] + = MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct release_request); + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct release_request); + message_length[C_REQUEST][F_RELEASE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct fsync_request); + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct fsync_request); + message_length[C_REQUEST][F_FSYNC][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_FSYNC][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpage_request); + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpage_request); + message_length[C_REQUEST][F_READPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_MAX_INDEX] = + HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpages_request); + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_request); + message_length[C_REQUEST][F_READPAGES][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_MAX_INDEX] = + HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGES][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_READPAGES_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readpages_open_request); + message_length[C_REQUEST][F_READPAGES_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_open_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_READPAGES_OPEN][ + HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_READPAGES_OPEN][HMDFS_MESSAGE_MIN_INDEX] = + 0; + message_length[C_RESPONSE][F_READPAGES_OPEN][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readpages_open_response) + + HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE; + message_length[C_RESPONSE][F_READPAGES_OPEN][ + HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + message_length[C_REQUEST][F_WRITEPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct writepage_response); + message_length[C_RESPONSE][F_WRITEPAGE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct readdir_request); + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct readdir_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_ITERATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(__le64) + HMDFS_MAX_MESSAGE_LEN; + message_length[C_RESPONSE][F_ITERATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct mkdir_request); + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct mkdir_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_MKDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_MKDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct create_request); + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct create_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_CREATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct hmdfs_inodeinfo_response); + message_length[C_RESPONSE][F_CREATE][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct rmdir_request); + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct rmdir_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_RMDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_RMDIR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct unlink_request); + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct unlink_request) + PATH_MAX + NAME_MAX + 2; + message_length[C_REQUEST][F_UNLINK][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_UNLINK][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct rename_request); + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct rename_request) + 4 + 4 * PATH_MAX; + message_length[C_REQUEST][F_RENAME][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_RENAME][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct setattr_request); + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct setattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_SETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct getattr_request); + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_GETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getattr_response); + message_length[C_RESPONSE][F_GETATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct statfs_request); + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct statfs_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_STATFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct statfs_response); + message_length[C_RESPONSE][F_STATFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct syncfs_request); + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct syncfs_request); + message_length[C_REQUEST][F_SYNCFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SYNCFS][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct getxattr_request); + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getxattr_request) + PATH_MAX + XATTR_NAME_MAX + 2; + message_length[C_REQUEST][F_GETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct getxattr_response) + HMDFS_XATTR_SIZE_MAX; + message_length[C_RESPONSE][F_GETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct setxattr_request); + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct setxattr_request) + PATH_MAX + XATTR_NAME_MAX + + HMDFS_XATTR_SIZE_MAX + 2; + message_length[C_REQUEST][F_SETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_MAX_INDEX] = 0; + message_length[C_RESPONSE][F_SETXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct listxattr_request); + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct listxattr_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_LISTXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_MIN_INDEX] = 0; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct listxattr_response) + HMDFS_LISTXATTR_SIZE_MAX; + message_length[C_RESPONSE][F_LISTXATTR][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; + + message_length[C_REQUEST][F_CONNECT_REKEY][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct connection_rekey_request); + message_length[C_REQUEST][F_CONNECT_REKEY][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct connection_rekey_request); + message_length[C_REQUEST][F_CONNECT_REKEY] + [HMDFS_MESSAGE_LEN_JUDGE_INDEX] = MESSAGE_LEN_JUDGE_BIN; + + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_MIN_INDEX] = + sizeof(struct drop_push_request); + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_MAX_INDEX] = + sizeof(struct drop_push_request) + PATH_MAX + 1; + message_length[C_REQUEST][F_DROP_PUSH][HMDFS_MESSAGE_LEN_JUDGE_INDEX] = + MESSAGE_LEN_JUDGE_RANGE; +} + +static void find_first_no_slash(const char **name, int *len) +{ + const char *s = *name; + int l = *len; + + while (*s == '/' && l > 0) { + s++; + l--; + } + + *name = s; + *len = l; +} + +static void find_first_slash(const char **name, int *len) +{ + const char *s = *name; + int l = *len; + + while (*s != '/' && l > 0) { + s++; + l--; + } + + *name = s; + *len = l; +} + +static bool path_contain_dotdot(const char *name, int len) +{ + while (true) { + find_first_no_slash(&name, &len); + + if (len == 0) + return false; + + if (len >= 2 && name[0] == '.' && name[1] == '.' && + (len == 2 || name[2] == '/')) + return true; + + find_first_slash(&name, &len); + } +} + +static int hmdfs_open_message_verify(int flag, size_t len, void *data) +{ + struct open_request *req = NULL; + size_t tmp_len = 0; + int path_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + path_len = le32_to_cpu(req->path_len); + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len == PATH_MAX || + tmp_len != len - sizeof(struct open_request) - 1 || + path_len != tmp_len) { + hmdfs_err("verify fail"); + return -EINVAL; + } + + /* + * We only allow server to open file in hmdfs, thus we need to + * make sure path don't contain "..". + */ + if (path_contain_dotdot(req->buf, path_len)) { + hmdfs_err("verify fail, path contain dotdot"); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_atomic_open_verify(int flag, size_t len, void *data) +{ + struct atomic_open_request *req = NULL; + size_t total_len; + size_t path_len; + size_t max_path_size; + size_t file_len; + size_t max_file_size; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + total_len = len - sizeof(*req); + max_path_size = min_t(size_t, PATH_MAX, total_len); + path_len = strnlen(req->buf, max_path_size); + /* file name need 2 byte at least */ + if (path_len == max_path_size || path_len + 3 > total_len) { + hmdfs_err("verify fail, len %zu, path_len %zu", len, path_len); + return -EINVAL; + } + + max_file_size = min_t(size_t, NAME_MAX + 1, total_len - path_len - 1); + file_len = strnlen(req->buf + path_len + 1, max_file_size); + + if (file_len == max_file_size || + total_len != path_len + 1 + file_len + 1 || + le32_to_cpu(req->path_len) != path_len || + le32_to_cpu(req->file_len) != file_len) { + hmdfs_err("verify fail total len %zu path_len %zu, decalared path len %u, file_len %zu, decalared file_len %u", + total_len, path_len, le32_to_cpu(req->path_len), + file_len, le32_to_cpu(req->file_len) != file_len); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_iterate_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct readdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (le32_to_cpu(tmp_request->path_len) != tmp_len || + len - sizeof(struct readdir_request) - 1 != tmp_len) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_mkdir_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct mkdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct mkdir_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + return err; +} + +static int hmdfs_create_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct create_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct create_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + return err; +} + +static int hmdfs_rmdir_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct rmdir_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct rmdir_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_unlink_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct unlink_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_path_len = 0; + size_t tmp_name_len = 0; + size_t tmp_char_path_len = 0; + size_t tmp_char_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_path_len = le32_to_cpu(tmp_request->path_len); + tmp_name_len = le32_to_cpu(tmp_request->name_len); + tmp_char_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_name_len = strnlen( + tmp_char + tmp_char_path_len + 1, NAME_MAX); + } else { + return err; + } + + if (tmp_path_len != tmp_char_path_len || + tmp_name_len != tmp_char_name_len || + len - sizeof(struct unlink_request) != + tmp_path_len + 1 + tmp_name_len + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_rename_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct rename_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_old_path_len = 0; + size_t tmp_new_path_len = 0; + size_t tmp_old_name_len = 0; + size_t tmp_new_name_len = 0; + size_t tmp_char_old_path_len = 0; + size_t tmp_char_new_path_len = 0; + size_t tmp_char_old_name_len = 0; + size_t tmp_char_new_name_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + + tmp_old_path_len = + le32_to_cpu(tmp_request->old_path_len); + tmp_new_path_len = + le32_to_cpu(tmp_request->new_path_len); + tmp_old_name_len = + le32_to_cpu(tmp_request->old_name_len); + tmp_new_name_len = + le32_to_cpu(tmp_request->new_name_len); + + tmp_char_old_path_len = strnlen(tmp_char, PATH_MAX); + tmp_char_new_path_len = strnlen( + tmp_char + tmp_char_old_path_len + 1, PATH_MAX); + + tmp_char_old_name_len = + strnlen(tmp_char + tmp_char_old_path_len + 1 + + tmp_char_new_path_len + 1, + PATH_MAX); + tmp_char_new_name_len = + strnlen(tmp_char + tmp_char_old_path_len + 1 + + tmp_char_new_path_len + 1 + + tmp_char_old_name_len + 1, + PATH_MAX); + } else { + return err; + } + + if (tmp_new_name_len != tmp_char_new_name_len || + tmp_old_name_len != tmp_char_old_name_len || + tmp_new_path_len != tmp_char_new_path_len || + tmp_old_path_len != tmp_char_old_path_len || + len - sizeof(struct rename_request) != + tmp_new_name_len + 1 + tmp_old_name_len + 1 + + tmp_new_path_len + 1 + tmp_old_path_len + + 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_setattr_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct setattr_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->buf; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (tmp_len != len - sizeof(struct setattr_request) - 1 || + le32_to_cpu(tmp_request->path_len) != tmp_len) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_getattr_verify(int flag, size_t len, void *data) +{ + struct getattr_request *req = NULL; + size_t tmp_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len != len - sizeof(struct getattr_request) - 1 || + le32_to_cpu(req->path_len) != tmp_len) { + hmdfs_err("verify fail"); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_getxattr_verify(int flag, size_t len, void *data) +{ + struct getxattr_request *req = NULL; + struct getxattr_response *resp = NULL; + size_t path_len = 0; + size_t name_len = 0; + size_t size = 0; + + if (!data) + return 0; + + if (flag == C_REQUEST) { + req = data; + path_len = le32_to_cpu(req->path_len); + name_len = le32_to_cpu(req->name_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || + path_len != strnlen(req->buf, PATH_MAX) || + name_len != + strnlen(req->buf + path_len + 1, XATTR_NAME_MAX) || + size > HMDFS_XATTR_SIZE_MAX) + return -EINVAL; + } else { + resp = data; + size = le32_to_cpu(resp->size); + if (len != sizeof(struct getxattr_response) && + len < sizeof(struct getxattr_response) + size) + return -EINVAL; + } + + return 0; +} + +static int hmdfs_setxattr_verify(int flag, size_t len, void *data) +{ + struct setxattr_request *req = NULL; + size_t path_len = 0; + size_t name_len = 0; + size_t size = 0; + + /* No need to verify response */ + if (flag != C_REQUEST || !data) + return 0; + + req = data; + path_len = le32_to_cpu(req->path_len); + name_len = le32_to_cpu(req->name_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || path_len != strnlen(req->buf, PATH_MAX) || + name_len != strnlen(req->buf + path_len + 1, XATTR_NAME_MAX) || + len != path_len + name_len + size + 2 + + sizeof(struct setxattr_request) || + size > HMDFS_XATTR_SIZE_MAX) + return -EINVAL; + + return 0; +} + +static int hmdfs_listxattr_verify(int flag, size_t len, void *data) +{ + struct listxattr_request *req = NULL; + struct listxattr_response *resp = NULL; + size_t path_len = 0; + size_t size = 0; + + if (!data) + return 0; + + if (flag == C_REQUEST) { + req = data; + path_len = le32_to_cpu(req->path_len); + size = le32_to_cpu(req->size); + if (path_len >= PATH_MAX || + path_len != strnlen(req->buf, PATH_MAX) || + size > HMDFS_LISTXATTR_SIZE_MAX) + return -EINVAL; + } else { + resp = data; + size = le32_to_cpu(resp->size); + if (len != sizeof(struct listxattr_response) && + len < sizeof(struct listxattr_response) + size) + return -EINVAL; + } + + return 0; +} + +static int hmdfs_writepage_verify(int flag, size_t len, void *data) +{ + struct writepage_request *req = NULL; + __u32 count; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + count = le32_to_cpu(req->count); + if (count == 0 || count > HMDFS_PAGE_SIZE || + len - sizeof(struct writepage_request) != HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, count is %d", count); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_statfs_verify(int flag, size_t len, void *data) +{ + int err = 0; + struct statfs_request *tmp_request = NULL; + char *tmp_char = NULL; + size_t tmp_len = 0; + + if (flag == C_REQUEST) { + if (data) { + tmp_request = data; + tmp_char = tmp_request->path; + tmp_len = strnlen(tmp_char, PATH_MAX); + } else { + return err; + } + + if (le32_to_cpu(tmp_request->path_len) != tmp_len || + tmp_len != len - sizeof(struct statfs_request) - 1) { + err = -EINVAL; + hmdfs_err("verify fail"); + return err; + } + } + + return err; +} + +static int hmdfs_readpages_verify(int flag, size_t len, void *data) +{ + struct readpages_request *req = NULL; + unsigned int size; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + size = le32_to_cpu(req->size); + if (size > HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, invalid req->size %u", size); + return -EINVAL; + } + + return 0; +} + +static int hmdfs_readpages_open_verify(int flag, size_t len, void *data) +{ + struct readpages_open_request *req = NULL; + unsigned int size; + size_t tmp_len; + + if (flag != C_REQUEST || !data) + return 0; + + req = data; + size = le32_to_cpu(req->size); + tmp_len = strnlen(req->buf, PATH_MAX); + if (tmp_len + 1 != len - sizeof(*req) || + le32_to_cpu(req->path_len) != tmp_len || + size > HMDFS_READPAGES_NR_MAX * HMDFS_PAGE_SIZE) { + hmdfs_err("verify fail, req->size %u", size); + return -EINVAL; + } + + return 0; +} + +typedef int (*hmdfs_message_verify_func)(int, size_t, void *); + +static const hmdfs_message_verify_func message_verify[F_SIZE] = { + [F_OPEN] = hmdfs_open_message_verify, + [F_WRITEPAGE] = hmdfs_writepage_verify, + [F_ITERATE] = hmdfs_iterate_verify, + [F_MKDIR] = hmdfs_mkdir_verify, + [F_CREATE] = hmdfs_create_verify, + [F_RMDIR] = hmdfs_rmdir_verify, + [F_UNLINK] = hmdfs_unlink_verify, + [F_RENAME] = hmdfs_rename_verify, + [F_SETATTR] = hmdfs_setattr_verify, + [F_STATFS] = hmdfs_statfs_verify, + [F_GETATTR] = hmdfs_getattr_verify, + [F_GETXATTR] = hmdfs_getxattr_verify, + [F_SETXATTR] = hmdfs_setxattr_verify, + [F_LISTXATTR] = hmdfs_listxattr_verify, + [F_READPAGES] = hmdfs_readpages_verify, + [F_READPAGES_OPEN] = hmdfs_readpages_open_verify, + [F_ATOMIC_OPEN] = hmdfs_atomic_open_verify, +}; + +static void handle_bad_message(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, int *err) +{ + /* + * Bad message won't be awared by upper layer, so ETIME is + * always given to upper layer. It is prefer to pass EOPNOTSUPP + * to upper layer when bad message (eg. caused by wrong len) + * received. + */ + if (head->operations.cmd_flag == C_RESPONSE) { + /* + * Change msg ret code. To let upper layer handle + * EOPNOTSUPP, hmdfs_message_verify() should return + * 0, so err code is modified either. + */ + head->ret_code = cpu_to_le32(-EOPNOTSUPP); + *err = 0; + } else { + if (head->operations.command >= F_SIZE) + return; + /* + * Some request messages do not need to be responded. + * Even if a response is returned, the response msg + * is automatically ignored in hmdfs_response_recv(). + * Therefore, it is normal to directly return a response. + */ + if (need_response[head->operations.command]) + hmdfs_send_err_response(con, head, -EOPNOTSUPP); + } +} + +int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + void *data) +{ + int err = 0; + int flag, cmd, len_type; + size_t len, min, max; + + if (!head) + return -EINVAL; + + flag = head->operations.cmd_flag; + if (flag != C_REQUEST && flag != C_RESPONSE) + return -EINVAL; + + cmd = head->operations.command; + if (cmd >= F_SIZE || cmd < F_OPEN || cmd == F_RESERVED_0 || + (cmd >= F_RESERVED_1 && cmd <= F_RESERVED_4) || cmd == F_RESERVED_5) { + err = -EINVAL; + goto handle_bad_msg; + } + + if (head->version != DFS_2_0) { + err = -EINVAL; + } else { + len = le32_to_cpu(head->data_len) - + sizeof(struct hmdfs_head_cmd); + min = message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX]; + if (head->operations.command == F_ITERATE && flag == C_RESPONSE) + max = sizeof(struct slice_descriptor) + PAGE_SIZE; + else + max = message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX]; + len_type = + message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX]; + + if (len_type == MESSAGE_LEN_JUDGE_RANGE) { + if (len < min || len > max) { + hmdfs_err( + "cmd %d -> %d message verify fail, len = %zu", + cmd, flag, len); + err = -EINVAL; + goto handle_bad_msg; + } + } else { + if (len != min && len != max) { + hmdfs_err( + "cmd %d -> %d message verify fail, len = %zu", + cmd, flag, len); + err = -EINVAL; + goto handle_bad_msg; + } + } + + if (message_verify[cmd]) + err = message_verify[cmd](flag, len, data); + + if (err) + goto handle_bad_msg; + + return err; + } + +handle_bad_msg: + handle_bad_message(con, head, &err); + return err; +} diff --git a/fs/hmdfs/comm/message_verify.h b/fs/hmdfs/comm/message_verify.h new file mode 100755 index 000000000..99e696a44 --- /dev/null +++ b/fs/hmdfs/comm/message_verify.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/message_verify.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_MESSAGE_VERIFY_H +#define HMDFS_MESSAGE_VERIFY_H + +#include "protocol.h" + +enum MESSAGE_LEN_JUDGE_TYPE { + MESSAGE_LEN_JUDGE_RANGE = 0, + MESSAGE_LEN_JUDGE_BIN = 1, +}; + +#define HMDFS_MESSAGE_MIN_INDEX 0 +#define HMDFS_MESSAGE_MAX_INDEX 1 +#define HMDFS_MESSAGE_LEN_JUDGE_INDEX 2 +#define HMDFS_MESSAGE_MIN_MAX 3 + +void hmdfs_message_verify_init(void); +int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + void *data); + +#endif diff --git a/fs/hmdfs/comm/node_cb.c b/fs/hmdfs/comm/node_cb.c new file mode 100755 index 000000000..21b84d2ff --- /dev/null +++ b/fs/hmdfs/comm/node_cb.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/node_cb.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include + +#include "node_cb.h" +#include "connection.h" + +static struct list_head cb_head[NODE_EVT_NR][NODE_EVT_TYPE_NR]; + +static const char *evt_str_tbl[NODE_EVT_NR] = { + "add", "online", "offline", "del", +}; + +static inline bool hmdfs_is_valid_node_evt(int evt) +{ + return (evt >= 0 && evt < NODE_EVT_NR); +} + +static const char *hmdfs_evt_str(int evt) +{ + if (!hmdfs_is_valid_node_evt(evt)) + return "unknown"; + return evt_str_tbl[evt]; +} + +void hmdfs_node_evt_cb_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cb_head); i++) { + int j; + + for (j = 0; j < ARRAY_SIZE(cb_head[0]); j++) + INIT_LIST_HEAD(&cb_head[i][j]); + } +} + +void hmdfs_node_add_evt_cb(struct hmdfs_node_cb_desc *desc, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + int evt = desc[i].evt; + bool sync = desc[i].sync; + + if (!hmdfs_is_valid_node_evt(evt)) + continue; + + list_add_tail(&desc[i].list, &cb_head[evt][sync]); + } +} + +void hmdfs_node_call_evt_cb(struct hmdfs_peer *conn, int evt, bool sync, + unsigned int seq) +{ + struct hmdfs_node_cb_desc *desc = NULL; + + hmdfs_info("node 0x%x:0x%llx call %s %s cb seq %u", + conn->owner, conn->device_id, hmdfs_evt_str(evt), + sync ? "sync" : "async", seq); + + if (!hmdfs_is_valid_node_evt(evt)) + return; + + list_for_each_entry(desc, &cb_head[evt][sync], list) { + if (conn->version < desc->min_version) + continue; + + desc->fn(conn, evt, seq); + } +} diff --git a/fs/hmdfs/comm/node_cb.h b/fs/hmdfs/comm/node_cb.h new file mode 100755 index 000000000..fe53b946f --- /dev/null +++ b/fs/hmdfs/comm/node_cb.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/node_cb.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_NODE_CB_H +#define HMDFS_NODE_CB_H + +#include "hmdfs.h" + +/* async & sync */ +#define NODE_EVT_TYPE_NR 2 + +enum { + NODE_EVT_ADD = 0, + NODE_EVT_ONLINE, + NODE_EVT_OFFLINE, + NODE_EVT_DEL, + NODE_EVT_NR, +}; + +struct hmdfs_peer; + +typedef void (*hmdfs_node_evt_cb)(struct hmdfs_peer *conn, + int evt, unsigned int seq); + +struct hmdfs_node_cb_desc { + int evt; + bool sync; + unsigned char min_version; + hmdfs_node_evt_cb fn; + struct list_head list; +}; + +extern void hmdfs_node_evt_cb_init(void); + +/* Only initialize during module init */ +extern void hmdfs_node_add_evt_cb(struct hmdfs_node_cb_desc *desc, int nr); +extern void hmdfs_node_call_evt_cb(struct hmdfs_peer *node, int evt, bool sync, + unsigned int seq); + +#endif /* HMDFS_NODE_CB_H */ diff --git a/fs/hmdfs/comm/protocol.h b/fs/hmdfs/comm/protocol.h new file mode 100755 index 000000000..a873143f2 --- /dev/null +++ b/fs/hmdfs/comm/protocol.h @@ -0,0 +1,489 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/protocol.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_PROTOCOL_H +#define HMDFS_PROTOCOL_H + +#include +#include +#include +#include + +struct hmdfs_cmd { + __u8 reserved; + __u8 cmd_flag; + __u8 command; + __u8 reserved2; +} __packed; + +#define HMDFS_MSG_MAGIC 0xF7 +#define HMDFS_MAX_MESSAGE_LEN (8 * 1024 * 1024) + +struct hmdfs_head_cmd { + __u8 magic; + __u8 version; + __le16 reserved; + __le32 data_len; + struct hmdfs_cmd operations; + __le32 ret_code; + __le32 msg_id; + __le32 reserved1; +} __packed; + +enum FILE_RECV_STATE { + FILE_RECV_PROCESS = 0, + FILE_RECV_SUCC, + FILE_RECV_ERR_NET, + FILE_RECV_ERR_SPC, +}; + +struct file_recv_info { + void *local_filp; + atomic_t local_fslices; + atomic_t state; +}; + +enum MSG_IDR_TYPE { + MSG_IDR_1_0_NONE = 0, + MSG_IDR_1_0_MESSAGE_SYNC, + MSG_IDR_1_0_PAGE, + MSG_IDR_MESSAGE_SYNC, + MSG_IDR_MESSAGE_ASYNC, + MSG_IDR_PAGE, + MSG_IDR_MAX, +}; + +struct hmdfs_msg_idr_head { + __u32 type; + __u32 msg_id; + struct kref ref; + struct hmdfs_peer *peer; +}; + +struct sendmsg_wait_queue { + struct hmdfs_msg_idr_head head; + wait_queue_head_t response_q; + struct list_head async_msg; + atomic_t valid; + __u32 size; + void *buf; + __u32 ret; + unsigned long start; + struct file_recv_info recv_info; +}; + +struct hmdfs_send_command { + struct hmdfs_cmd operations; + void *data; + size_t len; + void *local_filp; + void *out_buf; + size_t out_len; + __u32 ret_code; +}; + +struct hmdfs_req { + struct hmdfs_cmd operations; + /* + * Normally, the caller ought set timeout to TIMEOUT_CONFIG, so that + * hmdfs_send_async_request will search s_cmd_timeout for the user- + * configured timeout values. + * + * However, consider the given scenery: + * The caller may want to issue multiple requests sharing the same + * timeout value, but the users may update the value during the gap. + * To ensure the "atomicty" of timeout-using for these requests, we + * provide the timeout field for hacking. + */ + unsigned int timeout; + void *data; + size_t data_len; + + void *private; // optional + size_t private_len; // optional +}; + +struct hmdfs_resp { + void *out_buf; + size_t out_len; + __u32 ret_code; +}; + +struct hmdfs_msg_parasite { + struct hmdfs_msg_idr_head head; + struct delayed_work d_work; + bool wfired; + struct hmdfs_req req; + struct hmdfs_resp resp; + unsigned long start; +}; + +struct hmdfs_send_data { + // sect1: head + void *head; + size_t head_len; + + // sect2: slice descriptor + void *sdesc; + size_t sdesc_len; + + // sect3: request / response / file slice + void *data; + size_t len; +}; + +struct slice_descriptor { + __le32 num_slices; + __le32 slice_size; + __le32 slice_sn; + __le32 content_size; +} __packed; + +enum DFS_VERSION { + INVALID_VERSION = 0, + DFS_1_0, + + USERSPACE_MAX_VER = 0x3F, + DFS_2_0, + + MAX_VERSION = 0xFF +}; + +enum CONN_OPERATIONS_VERSION { USERDFS_VERSION, PROTOCOL_VERSION }; + +enum CMD_FLAG { C_REQUEST = 0, C_RESPONSE = 1, C_FLAG_SIZE }; + +enum FILE_CMD { + F_OPEN = 0, + F_RELEASE = 1, + F_READPAGE = 2, + F_WRITEPAGE = 3, + F_ITERATE = 4, + F_RESERVED_1 = 5, + F_RESERVED_2 = 6, + F_RESERVED_3 = 7, + F_RESERVED_4 = 8, + F_MKDIR = 9, + F_RMDIR = 10, + F_CREATE = 11, + F_UNLINK = 12, + F_RENAME = 13, + F_SETATTR = 14, + F_RESERVED_5 = 15, + F_STATFS = 16, + F_CONNECT_REKEY = 17, + F_DROP_PUSH = 18, + F_RESERVED_0 = 19, + F_GETATTR = 20, + F_FSYNC = 21, + F_SYNCFS = 22, + F_GETXATTR = 23, + F_SETXATTR = 24, + F_LISTXATTR = 25, + F_READPAGES = 26, + F_READPAGES_OPEN = 27, + F_ATOMIC_OPEN = 28, + F_SIZE, +}; + +struct open_request { + __u8 file_type; + __le32 flags; + __le32 path_len; + char buf[0]; +} __packed; + +struct open_response { + __le32 change_detect_cap; + __le64 file_ver; + __le32 file_id; + __le64 file_size; + __le64 ino; + __le64 ctime; + __le32 ctime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le64 stable_ctime; + __le32 stable_ctime_nsec; + __le64 ichange_count; +} __packed; + +enum hmdfs_open_flags { + HMDFS_O_TRUNC = O_TRUNC, + HMDFS_O_EXCL = O_EXCL, +}; + +struct atomic_open_request { + __le32 open_flags; + __le16 mode; + __le16 reserved1; + __le32 path_len; + __le32 file_len; + __le64 reserved2[4]; + char buf[0]; +} __packed; + +struct atomic_open_response { + __le32 fno; + __le16 i_mode; + __le16 reserved1; + __le32 i_flags; + __le32 reserved2; + __le64 reserved3[4]; + struct open_response open_resp; +} __packed; + +struct release_request { + __le64 file_ver; + __le32 file_id; +} __packed; + +struct fsync_request { + __le64 file_ver; + __le32 file_id; + __le32 datasync; + __le64 start; + __le64 end; +} __packed; + +struct readpage_request { + __le64 file_ver; + __le32 file_id; + __le32 size; + __le64 index; +} __packed; + +struct readpage_response { + char buf[0]; +} __packed; + +struct readpages_request { + __le64 file_ver; + __le32 file_id; + __le32 size; + __le64 index; + __le64 reserved; +} __packed; + +struct readpages_response { + char buf[0]; +} __packed; + +struct readpages_open_request { + __u8 file_type; + __u8 reserved1[3]; + __le32 flags; + __le32 path_len; + __le32 size; + __le64 index; + __le64 reserved2; + char buf[0]; +} __packed; + +struct readpages_open_response { + struct open_response open_resp; + __le64 reserved[4]; + char buf[0]; +} __packed; + +struct writepage_request { + __le64 file_ver; + __le32 file_id; + __le64 index; + __le32 count; + char buf[0]; +} __packed; + +struct writepage_response { + __le64 ichange_count; + __le64 ctime; + __le32 ctime_nsec; +} __packed; + +struct readdir_request { + __le64 dcache_crtime; + __le64 dcache_crtime_nsec; + __le64 dentry_ctime; + __le64 dentry_ctime_nsec; + __le64 num; + __le32 verify_cache; + __le32 path_len; + char path[0]; +} __packed; + +struct hmdfs_inodeinfo_response { + __le64 i_size; + __le64 i_mtime; + __le32 i_mtime_nsec; + __le32 fno; + __le16 i_mode; + __le64 i_ino; + __le32 i_flags; + __le32 i_reserved; +} __packed; + +struct mkdir_request { + __le32 path_len; + __le32 name_len; + __le16 mode; + char path[0]; +} __packed; + +struct create_request { + __le32 path_len; + __le32 name_len; + __le16 mode; + __u8 want_excl; + char path[0]; +} __packed; + +struct rmdir_request { + __le32 path_len; + __le32 name_len; + char path[0]; +} __packed; + +struct unlink_request { + __le32 path_len; + __le32 name_len; + char path[0]; +} __packed; + +struct rename_request { + __le32 old_path_len; + __le32 new_path_len; + __le32 old_name_len; + __le32 new_name_len; + __le32 flags; + char path[0]; +} __packed; + +struct drop_push_request { + __le32 path_len; + char path[0]; +} __packed; + +struct setattr_request { + __le64 size; + __le32 valid; + __le16 mode; + __le32 uid; + __le32 gid; + __le64 atime; + __le32 atime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le32 path_len; + char buf[0]; +} __packed; + +struct getattr_request { + __le32 lookup_flags; + __le32 path_len; + char buf[0]; +} __packed; + +struct getattr_response { + __le32 change_detect_cap; + __le32 result_mask; + __le32 flags; + __le64 fsid; + __le16 mode; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 rdev; + __le64 ino; + __le64 size; + __le64 blocks; + __le32 blksize; + __le64 atime; + __le32 atime_nsec; + __le64 mtime; + __le32 mtime_nsec; + __le64 ctime; + __le32 ctime_nsec; + __le64 crtime; + __le32 crtime_nsec; + __le64 ichange_count; +} __packed; + +struct statfs_request { + __le32 path_len; + char path[0]; +} __packed; + +struct statfs_response { + __le64 f_type; + __le64 f_bsize; + __le64 f_blocks; + __le64 f_bfree; + __le64 f_bavail; + __le64 f_files; + __le64 f_ffree; + __le32 f_fsid_0; + __le32 f_fsid_1; + __le64 f_namelen; + __le64 f_frsize; + __le64 f_flags; + __le64 f_spare_0; + __le64 f_spare_1; + __le64 f_spare_2; + __le64 f_spare_3; +} __packed; + +struct syncfs_request { + __le64 version; + __le32 flags; +} __packed; + +struct getxattr_request { + __le32 path_len; + __le32 name_len; + __le32 size; + char buf[0]; +} __packed; + +struct getxattr_response { + __le32 size; + char value[0]; /* xattr value may non-printable */ +} __packed; + +struct setxattr_request { + __le32 path_len; + __le32 name_len; + __le32 size; + __le32 flags; + __u8 del; /* remove xattr */ + char buf[0]; +} __packed; + +struct listxattr_request { + __le32 path_len; + __le32 size; + char buf[0]; +} __packed; + +struct listxattr_response { + __le32 size; + char list[0]; +} __packed; + +struct connection_rekey_request { + __le32 update_request; +} __packed; + +enum CONNECTION_KEY_UPDATE_REQUEST { + UPDATE_NOT_REQUESTED = 0, + UPDATE_REQUESTED = 1 +}; + +enum MSG_QUEUE_STATUS { + MSG_Q_SEND = 0, + MSG_Q_END_RECV, +}; +#endif diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c new file mode 100755 index 000000000..0404c2a79 --- /dev/null +++ b/fs/hmdfs/comm/socket_adapter.c @@ -0,0 +1,1158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/socket_adapter.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "socket_adapter.h" + +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/device_node.h" +#include "hmdfs_client.h" +#include "hmdfs_server.h" +#include "hmdfs_trace.h" +#include "message_verify.h" + +#define ACQUIRE_WFIRED_INTVAL_USEC_MIN 10 +#define ACQUIRE_WFIRED_INTVAL_USEC_MAX 30 + +typedef void (*request_callback)(struct hmdfs_peer *, struct hmdfs_head_cmd *, + void *); +typedef void (*response_callback)(struct hmdfs_peer *, + struct sendmsg_wait_queue *, void *, size_t); + +static const request_callback s_recv_callbacks[F_SIZE] = { + [F_OPEN] = hmdfs_server_open, + [F_READPAGE] = hmdfs_server_readpage, + [F_RELEASE] = hmdfs_server_release, + [F_WRITEPAGE] = hmdfs_server_writepage, + [F_ITERATE] = hmdfs_server_readdir, + [F_MKDIR] = hmdfs_server_mkdir, + [F_CREATE] = hmdfs_server_create, + [F_RMDIR] = hmdfs_server_rmdir, + [F_UNLINK] = hmdfs_server_unlink, + [F_RENAME] = hmdfs_server_rename, + [F_SETATTR] = hmdfs_server_setattr, + [F_STATFS] = hmdfs_server_statfs, + [F_DROP_PUSH] = hmdfs_server_get_drop_push, + [F_GETATTR] = hmdfs_server_getattr, + [F_FSYNC] = hmdfs_server_fsync, + [F_SYNCFS] = hmdfs_server_syncfs, + [F_GETXATTR] = hmdfs_server_getxattr, + [F_SETXATTR] = hmdfs_server_setxattr, + [F_LISTXATTR] = hmdfs_server_listxattr, + [F_READPAGES] = hmdfs_server_readpages, + [F_READPAGES_OPEN] = hmdfs_server_readpages_open, + [F_ATOMIC_OPEN] = hmdfs_server_atomic_open, +}; + +typedef void (*file_request_callback)(struct hmdfs_peer *, + struct hmdfs_send_command *); + +struct async_req_callbacks { + void (*on_wakeup)(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); +}; + +static const struct async_req_callbacks g_async_req_callbacks[F_SIZE] = { + [F_SYNCFS] = { .on_wakeup = hmdfs_recv_syncfs_cb }, + [F_WRITEPAGE] = { .on_wakeup = hmdfs_writepage_cb }, +}; + +static void msg_release(struct kref *kref) +{ + struct sendmsg_wait_queue *msg_wq; + struct hmdfs_peer *con; + + msg_wq = (struct sendmsg_wait_queue *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + con = msg_wq->head.peer; + idr_remove(&con->msg_idr, msg_wq->head.msg_id); + spin_unlock(&con->idr_lock); + + kfree(msg_wq->buf); + if (msg_wq->recv_info.local_filp) + fput(msg_wq->recv_info.local_filp); + kfree(msg_wq); +} + +// Always remember to find before put, and make sure con is avilable +void msg_put(struct sendmsg_wait_queue *msg_wq) +{ + kref_put_lock(&msg_wq->head.ref, msg_release, + &msg_wq->head.peer->idr_lock); +} + +static void recv_info_init(struct file_recv_info *recv_info) +{ + memset(recv_info, 0, sizeof(struct file_recv_info)); + atomic_set(&recv_info->local_fslices, 0); + atomic_set(&recv_info->state, FILE_RECV_PROCESS); +} + +static int msg_init(struct hmdfs_peer *con, struct sendmsg_wait_queue *msg_wq) +{ + int ret = 0; + struct file_recv_info *recv_info = &msg_wq->recv_info; + + ret = hmdfs_alloc_msg_idr(con, MSG_IDR_MESSAGE_SYNC, msg_wq); + if (unlikely(ret)) + return ret; + + atomic_set(&msg_wq->valid, MSG_Q_SEND); + init_waitqueue_head(&msg_wq->response_q); + recv_info_init(recv_info); + msg_wq->start = jiffies; + return 0; +} + +static inline void statistic_con_sb_dirty(struct hmdfs_peer *con, + const struct hmdfs_cmd *op) +{ + if (op->command == F_WRITEPAGE && op->cmd_flag == C_REQUEST) + atomic64_inc(&con->sb_dirty_count); +} + +int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg) +{ + int ret = 0; + struct connection *connect = NULL; + struct tcp_handle *tcp = NULL; + struct hmdfs_head_cmd *head = msg->head; + const struct cred *old_cred; + + if (!node) { + hmdfs_err("node NULL when send cmd %d", + head->operations.command); + ret = -EAGAIN; + goto out_err; + } else if (node->status != NODE_STAT_ONLINE) { + hmdfs_err("device %llu OFFLINE %d when send cmd %d", + node->device_id, node->status, + head->operations.command); + ret = -EAGAIN; + goto out; + } + + old_cred = hmdfs_override_creds(node->sbi->system_cred); + + do { + connect = get_conn_impl(node, CONNECT_TYPE_TCP); + if (!connect) { + hmdfs_info_ratelimited( + "device %llu no connection available when send cmd %d, get new session", + node->device_id, head->operations.command); + if (node->status != NODE_STAT_OFFLINE) { + struct notify_param param; + + memcpy(param.remote_cid, node->cid, + HMDFS_CID_SIZE); + param.notify = NOTIFY_OFFLINE; + param.fd = INVALID_SOCKET_FD; + notify(node, ¶m); + } + ret = -EAGAIN; + goto revert_cred; + } + + ret = connect->send_message(connect, msg); + if (ret == -ESHUTDOWN) { + hmdfs_info("device %llu send cmd %d message fail, connection stop", + node->device_id, head->operations.command); + connect->status = CONNECT_STAT_STOP; + tcp = connect->connect_handle; + if (node->status != NODE_STAT_OFFLINE) { + connection_get(connect); + if (!queue_work(node->reget_conn_wq, + &connect->reget_work)) + connection_put(connect); + } + connection_put(connect); + /* + * node->status is OFFLINE can not ensure + * node_seq will be increased before + * hmdfs_sendmessage() returns. + */ + hmdfs_node_inc_evt_seq(node); + } else { + connection_put(connect); + goto revert_cred; + } + } while (node->status != NODE_STAT_OFFLINE); +revert_cred: + hmdfs_revert_creds(old_cred); + + if (!ret) + statistic_con_sb_dirty(node, &head->operations); +out: + if (node->version == DFS_2_0 && + head->operations.cmd_flag == C_REQUEST) + hmdfs_client_snd_statis(node->sbi, + head->operations.command, ret); + else if (node->version == DFS_2_0 && + head->operations.cmd_flag == C_RESPONSE) + hmdfs_server_snd_statis(node->sbi, + head->operations.command, ret); +out_err: + return ret; +} + +int hmdfs_sendmessage_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, __u32 data_len, + void *buf, __u32 ret_code) +{ + int ret; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = cmd->operations; + head.operations.cmd_flag = C_RESPONSE; + head.data_len = cpu_to_le32(data_len + sizeof(struct hmdfs_head_cmd)); + head.ret_code = cpu_to_le32(ret_code); + head.msg_id = cmd->msg_id; + head.reserved = cmd->reserved; + head.reserved1 = cmd->reserved1; + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = buf; + msg.len = data_len; + msg.sdesc = NULL; + msg.sdesc_len = 0; + + ret = hmdfs_sendmessage(con, &msg); + return ret; +} + +static void mp_release(struct kref *kref) +{ + struct hmdfs_msg_parasite *mp = NULL; + struct hmdfs_peer *peer = NULL; + + mp = (struct hmdfs_msg_parasite *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + peer = mp->head.peer; + idr_remove(&peer->msg_idr, mp->head.msg_id); + spin_unlock(&peer->idr_lock); + + peer_put(peer); + kfree(mp->resp.out_buf); + kfree(mp); +} + +void mp_put(struct hmdfs_msg_parasite *mp) +{ + kref_put_lock(&mp->head.ref, mp_release, &mp->head.peer->idr_lock); +} + +static void async_request_cb_on_wakeup_fn(struct work_struct *w) +{ + struct hmdfs_msg_parasite *mp = + container_of(w, struct hmdfs_msg_parasite, d_work.work); + struct async_req_callbacks cbs; + const struct cred *old_cred = + hmdfs_override_creds(mp->head.peer->sbi->cred); + + if (mp->resp.ret_code == -ETIME) + hmdfs_client_resp_statis(mp->head.peer->sbi, + mp->req.operations.command, + HMDFS_RESP_TIMEOUT, 0, 0); + + cbs = g_async_req_callbacks[mp->req.operations.command]; + if (cbs.on_wakeup) + (*cbs.on_wakeup)(mp->head.peer, &mp->req, &mp->resp); + mp_put(mp); + hmdfs_revert_creds(old_cred); +} + +static struct hmdfs_msg_parasite *mp_alloc(struct hmdfs_peer *peer, + const struct hmdfs_req *req) +{ + struct hmdfs_msg_parasite *mp = kzalloc(sizeof(*mp), GFP_KERNEL); + int ret; + + if (unlikely(!mp)) + return ERR_PTR(-ENOMEM); + + ret = hmdfs_alloc_msg_idr(peer, MSG_IDR_MESSAGE_ASYNC, mp); + if (unlikely(ret)) { + kfree(mp); + return ERR_PTR(ret); + } + + mp->start = jiffies; + peer_get(mp->head.peer); + mp->resp.ret_code = -ETIME; + INIT_DELAYED_WORK(&mp->d_work, async_request_cb_on_wakeup_fn); + mp->wfired = false; + mp->req = *req; + return mp; +} + +/** + * hmdfs_send_async_request - sendout a async request + * @peer: target device node + * @req: request descriptor + necessary contexts + * + * Sendout a request synchronously and wait for its response asynchronously + * Return -ESHUTDOWN when the device node is unachievable + * Return -EAGAIN if the network is recovering + * Return -ENOMEM if out of memory + * + * Register g_async_req_callbacks to recv the response + */ +int hmdfs_send_async_request(struct hmdfs_peer *peer, + const struct hmdfs_req *req) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + struct hmdfs_msg_parasite *mp = NULL; + size_t msg_len = req->data_len + sizeof(struct hmdfs_head_cmd); + unsigned int timeout; + + if (req->timeout == TIMEOUT_CONFIG) + timeout = get_cmd_timeout(peer->sbi, req->operations.command); + else + timeout = req->timeout; + if (timeout == TIMEOUT_UNINIT || timeout == TIMEOUT_NONE) { + hmdfs_err("send msg %d with uninitialized/invalid timeout", + req->operations.command); + return -EINVAL; + } + + if (!hmdfs_is_node_online(peer)) + return -EAGAIN; + + mp = mp_alloc(peer, req); + if (IS_ERR(mp)) + return PTR_ERR(mp); + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.data_len = cpu_to_le32(msg_len); + head.operations = mp->req.operations; + head.msg_id = cpu_to_le32(mp->head.msg_id); + head.reserved = 0; + head.reserved1 = 0; + + msg.head = &head; + msg.head_len = sizeof(head); + msg.data = mp->req.data; + msg.len = mp->req.data_len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + + ret = hmdfs_sendmessage(peer, &msg); + if (unlikely(ret)) { + mp_put(mp); + goto out; + } + + queue_delayed_work(peer->async_wq, &mp->d_work, timeout * HZ); + /* + * The work may havn't been queued upon the arriving of it's response, + * resulting in meaningless waiting. So we use the membar to tell the + * recv thread if the work has been queued + */ + smp_store_release(&mp->wfired, true); +out: + hmdfs_dec_msg_idr_process(peer); + return ret; +} + +static int hmdfs_record_async_readdir(struct hmdfs_peer *con, + struct sendmsg_wait_queue *msg_wq) +{ + struct hmdfs_sb_info *sbi = con->sbi; + + spin_lock(&sbi->async_readdir_msg_lock); + if (sbi->async_readdir_prohibit) { + spin_unlock(&sbi->async_readdir_msg_lock); + return -EINTR; + } + + list_add(&msg_wq->async_msg, &sbi->async_readdir_msg_list); + spin_unlock(&sbi->async_readdir_msg_lock); + + return 0; +} + +static void hmdfs_untrack_async_readdir(struct hmdfs_peer *con, + struct sendmsg_wait_queue *msg_wq) +{ + struct hmdfs_sb_info *sbi = con->sbi; + + spin_lock(&sbi->async_readdir_msg_lock); + list_del(&msg_wq->async_msg); + spin_unlock(&sbi->async_readdir_msg_lock); +} + +int hmdfs_sendmessage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *sm) +{ + int time_left; + int ret = 0; + struct sendmsg_wait_queue *msg_wq = NULL; + struct hmdfs_send_data msg; + size_t outlen = sm->len + sizeof(struct hmdfs_head_cmd); + unsigned int timeout = + get_cmd_timeout(con->sbi, sm->operations.command); + struct hmdfs_head_cmd *head = NULL; + bool dec = false; + + if (!hmdfs_is_node_online(con)) { + ret = -EAGAIN; + goto free_filp; + } + + if (timeout == TIMEOUT_UNINIT) { + hmdfs_err_ratelimited("send msg %d with uninitialized timeout", + sm->operations.command); + ret = -EINVAL; + goto free_filp; + } + + head = kzalloc(sizeof(struct hmdfs_head_cmd), GFP_KERNEL); + if (!head) { + ret = -ENOMEM; + goto free_filp; + } + + sm->out_buf = NULL; + head->magic = HMDFS_MSG_MAGIC; + head->version = DFS_2_0; + head->operations = sm->operations; + head->data_len = cpu_to_le32(outlen); + head->ret_code = cpu_to_le32(sm->ret_code); + head->reserved = 0; + head->reserved1 = 0; + if (timeout != TIMEOUT_NONE) { + msg_wq = kzalloc(sizeof(*msg_wq), GFP_KERNEL); + if (!msg_wq) { + ret = -ENOMEM; + goto free_filp; + } + ret = msg_init(con, msg_wq); + if (ret) { + kfree(msg_wq); + msg_wq = NULL; + goto free_filp; + } + dec = true; + head->msg_id = cpu_to_le32(msg_wq->head.msg_id); + if (sm->operations.command == F_ITERATE) + msg_wq->recv_info.local_filp = sm->local_filp; + } + msg.head = head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = sm->data; + msg.len = sm->len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + ret = hmdfs_sendmessage(con, &msg); + if (ret) { + hmdfs_err_ratelimited("send err sm->device_id, %lld, msg_id %u", + con->device_id, head->msg_id); + goto free; + } + + if (timeout == TIMEOUT_NONE) + goto free; + + hmdfs_dec_msg_idr_process(con); + dec = false; + + if (sm->operations.command == F_ITERATE) { + ret = hmdfs_record_async_readdir(con, msg_wq); + if (ret) { + atomic_set(&msg_wq->recv_info.state, FILE_RECV_ERR_SPC); + goto free; + } + } + + time_left = wait_event_interruptible_timeout( + msg_wq->response_q, + (atomic_read(&msg_wq->valid) == MSG_Q_END_RECV), timeout * HZ); + + if (sm->operations.command == F_ITERATE) + hmdfs_untrack_async_readdir(con, msg_wq); + + if (time_left == -ERESTARTSYS || time_left == 0) { + hmdfs_err("timeout err sm->device_id %lld, msg_id %d cmd %d", + con->device_id, head->msg_id, + head->operations.command); + if (sm->operations.command == F_ITERATE) + atomic_set(&msg_wq->recv_info.state, FILE_RECV_ERR_NET); + ret = -ETIME; + hmdfs_client_resp_statis(con->sbi, sm->operations.command, + HMDFS_RESP_TIMEOUT, 0, 0); + goto free; + } + sm->out_buf = msg_wq->buf; + msg_wq->buf = NULL; + sm->out_len = msg_wq->size - sizeof(struct hmdfs_head_cmd); + ret = msg_wq->ret; + +free: + if (msg_wq) + msg_put(msg_wq); + if (dec) + hmdfs_dec_msg_idr_process(con); + kfree(head); + return ret; + +free_filp: + if (sm->local_filp) + fput(sm->local_filp); + kfree(head); + return ret; +} + +static int hmdfs_send_slice(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + struct slice_descriptor *sdesc, void *slice_buf) +{ + int ret; + struct hmdfs_send_data msg; + struct hmdfs_head_cmd head; + int content_size = le32_to_cpu(sdesc->content_size); + int msg_len = sizeof(struct hmdfs_head_cmd) + content_size + + sizeof(struct slice_descriptor); + + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = cmd->operations; + head.operations.cmd_flag = C_RESPONSE; + head.data_len = cpu_to_le32(msg_len); + head.ret_code = cpu_to_le32(0); + head.msg_id = cmd->msg_id; + head.reserved = cmd->reserved; + head.reserved1 = cmd->reserved1; + + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.sdesc = sdesc; + msg.sdesc_len = le32_to_cpu(sizeof(struct slice_descriptor)); + msg.data = slice_buf; + msg.len = content_size; + + ret = hmdfs_sendmessage(con, &msg); + + return ret; +} + +int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + struct file *filp) +{ + int ret; + const unsigned int slice_size = PAGE_SIZE; + char *slice_buf = NULL; + loff_t file_offset = 0, file_size; + ssize_t size; + struct slice_descriptor sdesc; + unsigned int slice_sn = 0; + + if (!filp) + return hmdfs_sendmessage_response(con, head, 0, NULL, 0); + + sdesc.slice_size = cpu_to_le32(slice_size); + file_size = i_size_read(file_inode(filp)); + file_size = round_up(file_size, slice_size); + sdesc.num_slices = cpu_to_le32(file_size / slice_size); + + slice_buf = kmalloc(slice_size, GFP_KERNEL); + if (!slice_buf) { + ret = -ENOMEM; + goto out; + } + + while (1) { + sdesc.slice_sn = cpu_to_le32(slice_sn++); + size = kernel_read(filp, slice_buf, (size_t)slice_size, + &file_offset); + if (IS_ERR_VALUE(size)) { + ret = (int)size; + goto out; + } + sdesc.content_size = cpu_to_le32(size); + ret = hmdfs_send_slice(con, head, &sdesc, slice_buf); + if (ret) { + hmdfs_info("Cannot send file slice %d ", + le32_to_cpu(sdesc.slice_sn)); + break; + } + if (file_offset >= i_size_read(file_inode(filp))) + break; + } + +out: + kfree(slice_buf); + if (ret) + hmdfs_sendmessage_response(con, head, 0, NULL, ret); + return ret; +} + +static void asw_release(struct kref *kref) +{ + struct hmdfs_async_work *asw = NULL; + struct hmdfs_peer *peer = NULL; + + asw = (struct hmdfs_async_work *)container_of(kref, + struct hmdfs_msg_idr_head, ref); + peer = asw->head.peer; + idr_remove(&peer->msg_idr, asw->head.msg_id); + spin_unlock(&peer->idr_lock); + kfree(asw); +} + +void asw_put(struct hmdfs_async_work *asw) +{ + kref_put_lock(&asw->head.ref, asw_release, &asw->head.peer->idr_lock); +} + +void hmdfs_recv_page_work_fn(struct work_struct *ptr) +{ + struct hmdfs_async_work *async_work = + container_of(ptr, struct hmdfs_async_work, d_work.work); + + if (async_work->head.peer->version >= DFS_2_0) + hmdfs_client_resp_statis(async_work->head.peer->sbi, + F_READPAGE, HMDFS_RESP_TIMEOUT, 0, 0); + hmdfs_err_ratelimited("timeout and release page, msg_id:%u", + async_work->head.msg_id); + asw_done(async_work); +} + +int hmdfs_sendpage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *sm) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct hmdfs_async_work *async_work = NULL; + size_t outlen = sm->len + sizeof(struct hmdfs_head_cmd); + struct hmdfs_head_cmd head; + unsigned int timeout; + unsigned long start = jiffies; + + WARN_ON(!sm->out_buf); + + timeout = get_cmd_timeout(con->sbi, sm->operations.command); + if (timeout == TIMEOUT_UNINIT) { + hmdfs_err("send msg %d with uninitialized timeout", + sm->operations.command); + ret = -EINVAL; + goto unlock; + } + + if (!hmdfs_is_node_online(con)) { + ret = -EAGAIN; + goto unlock; + } + + memset(&head, 0, sizeof(head)); + head.magic = HMDFS_MSG_MAGIC; + head.version = DFS_2_0; + head.operations = sm->operations; + head.data_len = cpu_to_le32(outlen); + head.ret_code = cpu_to_le32(sm->ret_code); + head.reserved = 0; + head.reserved1 = 0; + + msg.head = &head; + msg.head_len = sizeof(struct hmdfs_head_cmd); + msg.data = sm->data; + msg.len = sm->len; + msg.sdesc_len = 0; + msg.sdesc = NULL; + + async_work = kzalloc(sizeof(*async_work), GFP_KERNEL); + if (!async_work) { + ret = -ENOMEM; + goto unlock; + } + async_work->start = start; + ret = hmdfs_alloc_msg_idr(con, MSG_IDR_PAGE, async_work); + if (ret) { + hmdfs_err("alloc msg_id failed, err %d", ret); + goto unlock; + } + head.msg_id = cpu_to_le32(async_work->head.msg_id); + async_work->page = sm->out_buf; + asw_get(async_work); + INIT_DELAYED_WORK(&async_work->d_work, hmdfs_recv_page_work_fn); + ret = queue_delayed_work(con->async_wq, &async_work->d_work, + timeout * HZ); + if (!ret) { + hmdfs_err("queue_delayed_work failed, msg_id %u", head.msg_id); + goto fail_and_unlock_page; + } + ret = hmdfs_sendmessage(con, &msg); + if (ret) { + hmdfs_err("send err sm->device_id, %lld, msg_id %u", + con->device_id, head.msg_id); + if (!cancel_delayed_work(&async_work->d_work)) { + hmdfs_err("cancel async work err"); + asw_put(async_work); + hmdfs_dec_msg_idr_process(con); + goto out; + } + goto fail_and_unlock_page; + } + + asw_put(async_work); + hmdfs_dec_msg_idr_process(con); + return 0; + +fail_and_unlock_page: + asw_put(async_work); + asw_done(async_work); + hmdfs_dec_msg_idr_process(con); + return ret; +unlock: + kfree(async_work); + unlock_page(sm->out_buf); +out: + return ret; +} + +static void hmdfs_request_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + unsigned long start = jiffies; + const struct cred *saved_cred = hmdfs_override_fsids(true); + + if (!saved_cred) { + hmdfs_err("prepare cred failed!"); + kfree(buf); + return; + } + + s_recv_callbacks[head->operations.command](con, head, buf); + hmdfs_statistic(con->sbi, head->operations.command, jiffies - start); + + kfree(buf); + + hmdfs_revert_fsids(saved_cred); +} + +static void hmdfs_msg_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + const struct cred *old_cred = hmdfs_override_creds(con->sbi->cred); + + /* + * Reuse PF_NPROC_EXCEEDED as an indication of hmdfs server context: + * 1. PF_NPROC_EXCEEDED will set by setreuid()/setuid()/setresuid(), + * we assume kwork will not call theses syscalls. + * 2. PF_NPROC_EXCEEDED will be cleared by execv(), and kworker + * will not call it. + */ + current->flags |= PF_NPROC_EXCEEDED; + hmdfs_request_handle_sync(con, head, buf); + current->flags &= ~PF_NPROC_EXCEEDED; + + hmdfs_revert_creds(old_cred); +} + + +static void hmdfs_request_work_fn(struct work_struct *ptr) +{ + struct work_handler_desp *desp = + container_of(ptr, struct work_handler_desp, work); + + hmdfs_msg_handle_sync(desp->peer, desp->head, desp->buf); + peer_put(desp->peer); + kfree(desp->head); + kfree(desp); +} + +static int hmdfs_msg_handle_async(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf, + struct workqueue_struct *wq, + void (*work_fn)(struct work_struct *ptr)) +{ + struct work_handler_desp *desp = NULL; + struct hmdfs_head_cmd *dup_head = NULL; + int ret; + + desp = kzalloc(sizeof(*desp), GFP_KERNEL); + if (!desp) { + ret = -ENOMEM; + goto exit_desp; + } + + dup_head = kzalloc(sizeof(*dup_head), GFP_KERNEL); + if (!dup_head) { + ret = -ENOMEM; + goto exit_desp; + } + + *dup_head = *head; + desp->peer = con; + desp->head = dup_head; + desp->buf = buf; + INIT_WORK(&desp->work, work_fn); + + peer_get(con); + queue_work(wq, &desp->work); + + ret = 0; + return ret; + +exit_desp: + kfree(desp); + return ret; +} + +static int hmdfs_request_recv(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + int ret; + + if (head->operations.command >= F_SIZE || + !s_recv_callbacks[head->operations.command]) { + ret = -EINVAL; + hmdfs_err("NULL callback, command %d", + head->operations.command); + goto out; + } + + switch (head->operations.command) { + case F_OPEN: + case F_RELEASE: + case F_ITERATE: + case F_MKDIR: + case F_RMDIR: + case F_CREATE: + case F_UNLINK: + case F_RENAME: + case F_SETATTR: + case F_STATFS: + case F_CONNECT_REKEY: + case F_DROP_PUSH: + case F_GETATTR: + case F_FSYNC: + case F_SYNCFS: + case F_GETXATTR: + case F_SETXATTR: + case F_LISTXATTR: + case F_READPAGES_OPEN: + case F_ATOMIC_OPEN: + ret = hmdfs_msg_handle_async(con, head, buf, con->req_handle_wq, + hmdfs_request_work_fn); + break; + case F_WRITEPAGE: + case F_READPAGE: + case F_READPAGES: + hmdfs_msg_handle_sync(con, head, buf); + ret = 0; + break; + default: + hmdfs_err("Fatal! Unexpected request command %d", + head->operations.command); + ret = -EINVAL; + } + +out: + return ret; +} + +void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info, + __u32 ret_code, __u32 data_len, void *buf) +{ + msg_info->ret = ret_code; + msg_info->size = data_len; + msg_info->buf = buf; + atomic_set(&msg_info->valid, MSG_Q_END_RECV); + wake_up_interruptible(&msg_info->response_q); +} + +static int hmdfs_readfile_slice(struct sendmsg_wait_queue *msg_info, + struct work_handler_desp *desp) +{ + struct slice_descriptor *sdesc = desp->buf; + void *slice_buf = sdesc + 1; + struct file_recv_info *recv_info = &msg_info->recv_info; + struct file *filp = recv_info->local_filp; + loff_t offset; + ssize_t written_size; + + if (atomic_read(&recv_info->state) != FILE_RECV_PROCESS) + return -EBUSY; + + offset = le32_to_cpu(sdesc->slice_size) * le32_to_cpu(sdesc->slice_sn); + + written_size = kernel_write(filp, slice_buf, + le32_to_cpu(sdesc->content_size), &offset); + if (IS_ERR_VALUE(written_size)) { + atomic_set(&recv_info->state, FILE_RECV_ERR_SPC); + hmdfs_info("Fatal! Cannot store a file slice %d/%d, ret = %d", + le32_to_cpu(sdesc->slice_sn), + le32_to_cpu(sdesc->num_slices), (int)written_size); + return (int)written_size; + } + + if (atomic_inc_return(&recv_info->local_fslices) >= + le32_to_cpu(sdesc->num_slices)) + atomic_set(&recv_info->state, FILE_RECV_SUCC); + return 0; +} + +static void hmdfs_file_response_work_fn(struct work_struct *ptr) +{ + struct work_handler_desp *desp = + container_of(ptr, struct work_handler_desp, work); + struct sendmsg_wait_queue *msg_info = NULL; + int ret; + atomic_t *pstate = NULL; + u8 cmd = desp->head->operations.command; + const struct cred *old_cred = + hmdfs_override_creds(desp->peer->sbi->cred); + + msg_info = (struct sendmsg_wait_queue *)hmdfs_find_msg_head(desp->peer, + le32_to_cpu(desp->head->msg_id)); + if (!msg_info || atomic_read(&msg_info->valid) != MSG_Q_SEND) { + hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_DELAY, + 0, 0); + hmdfs_info("cannot find msg(id %d)", + le32_to_cpu(desp->head->msg_id)); + goto free; + } + + ret = le32_to_cpu(desp->head->ret_code); + if (ret || le32_to_cpu(desp->head->data_len) == sizeof(*desp->head)) + goto wakeup; + ret = hmdfs_readfile_slice(msg_info, desp); + pstate = &msg_info->recv_info.state; + if (ret || atomic_read(pstate) != FILE_RECV_PROCESS) + goto wakeup; + goto free; + +wakeup: + hmdfs_response_wakeup(msg_info, ret, sizeof(struct hmdfs_head_cmd), + NULL); + hmdfs_client_resp_statis(desp->peer->sbi, cmd, HMDFS_RESP_NORMAL, + msg_info->start, jiffies); +free: + if (msg_info) + msg_put(msg_info); + peer_put(desp->peer); + hmdfs_revert_creds(old_cred); + + kfree(desp->buf); + kfree(desp->head); + kfree(desp); +} + +static void hmdfs_wait_mp_wfired(struct hmdfs_msg_parasite *mp) +{ + /* We just cancel queued works */ + while (unlikely(!smp_load_acquire(&mp->wfired))) + usleep_range(ACQUIRE_WFIRED_INTVAL_USEC_MIN, + ACQUIRE_WFIRED_INTVAL_USEC_MAX); +} + +int hmdfs_response_handle_sync(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + struct sendmsg_wait_queue *msg_info = NULL; + struct hmdfs_msg_parasite *mp = NULL; + struct hmdfs_msg_idr_head *msg_head = NULL; + u32 msg_id = le32_to_cpu(head->msg_id); + bool woke = false; + u8 cmd = head->operations.command; + + msg_head = hmdfs_find_msg_head(con, msg_id); + if (!msg_head) + goto out; + + switch (msg_head->type) { + case MSG_IDR_MESSAGE_SYNC: + msg_info = (struct sendmsg_wait_queue *)msg_head; + if (atomic_read(&msg_info->valid) == MSG_Q_SEND) { + hmdfs_response_wakeup(msg_info, + le32_to_cpu(head->ret_code), + le32_to_cpu(head->data_len), buf); + hmdfs_client_resp_statis(con->sbi, cmd, + HMDFS_RESP_NORMAL, + msg_info->start, jiffies); + woke = true; + } + + msg_put(msg_info); + break; + case MSG_IDR_MESSAGE_ASYNC: + mp = (struct hmdfs_msg_parasite *)msg_head; + + hmdfs_wait_mp_wfired(mp); + if (cancel_delayed_work(&mp->d_work)) { + mp->resp.out_buf = buf; + mp->resp.out_len = + le32_to_cpu(head->data_len) - sizeof(*head); + mp->resp.ret_code = le32_to_cpu(head->ret_code); + queue_delayed_work(con->async_wq, &mp->d_work, 0); + hmdfs_client_resp_statis(con->sbi, cmd, + HMDFS_RESP_NORMAL, mp->start, + jiffies); + woke = true; + } + mp_put(mp); + break; + default: + hmdfs_err("receive incorrect msg type %d msg_id %d cmd %d", + msg_head->type, msg_id, cmd); + break; + } + + if (likely(woke)) + return 0; +out: + hmdfs_client_resp_statis(con->sbi, cmd, HMDFS_RESP_DELAY, 0, 0); + hmdfs_info("cannot find msg_id %d cmd %d", msg_id, cmd); + return -EINVAL; +} + +static int hmdfs_response_recv(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, void *buf) +{ + __u16 command = head->operations.command; + int ret; + + if (command >= F_SIZE) { + ret = -EINVAL; + return ret; + } + + switch (head->operations.command) { + case F_OPEN: + case F_RELEASE: + case F_READPAGE: + case F_WRITEPAGE: + case F_MKDIR: + case F_RMDIR: + case F_CREATE: + case F_UNLINK: + case F_RENAME: + case F_SETATTR: + case F_STATFS: + case F_CONNECT_REKEY: + case F_DROP_PUSH: + case F_GETATTR: + case F_FSYNC: + case F_SYNCFS: + case F_GETXATTR: + case F_SETXATTR: + case F_LISTXATTR: + ret = hmdfs_response_handle_sync(con, head, buf); + return ret; + + case F_ITERATE: + ret = hmdfs_msg_handle_async(con, head, buf, con->async_wq, + hmdfs_file_response_work_fn); + return ret; + + default: + hmdfs_err("Fatal! Unexpected response command %d", + head->operations.command); + ret = -EINVAL; + return ret; + } +} + +static void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, + void *buf) +{ + struct hmdfs_head_cmd *hmdfs_head = (struct hmdfs_head_cmd *)head; + + trace_hmdfs_recv_mesg_callback(hmdfs_head); + + if (hmdfs_message_verify(con, hmdfs_head, buf) < 0) { + hmdfs_info("Message %d has been abandoned", hmdfs_head->msg_id); + goto out_err; + } + + switch (hmdfs_head->operations.cmd_flag) { + case C_REQUEST: + if (hmdfs_request_recv(con, hmdfs_head, buf) < 0) + goto out_err; + break; + + case C_RESPONSE: + if (hmdfs_response_recv(con, hmdfs_head, buf) < 0) + goto out_err; + break; + + default: + hmdfs_err("Fatal! Unexpected msg cmd %d", + hmdfs_head->operations.cmd_flag); + goto out_err; + } + return; + +out_err: + kfree(buf); +} + +static inline void hmdfs_recv_page_callback(struct hmdfs_peer *con, + struct hmdfs_head_cmd *head, + int err, void *data) +{ + if (head->operations.command == F_READPAGE) + hmdfs_client_recv_readpage(head, err, data); +} + +static const struct connection_operations conn_operations[] = { + [PROTOCOL_VERSION] = { + .recvmsg = hmdfs_recv_mesg_callback, + .recvpage = hmdfs_recv_page_callback, + /* remote device operations */ + .remote_file_fops = + &hmdfs_dev_file_fops_remote, + .remote_file_iops = + &hmdfs_dev_file_iops_remote, + .remote_file_aops = + &hmdfs_dev_file_aops_remote, + .remote_unlink = + hmdfs_dev_unlink_from_con, + .remote_readdir = + hmdfs_dev_readdir_from_con, + } +}; + +const struct connection_operations *hmdfs_get_peer_operation(__u8 version) +{ + if (version <= INVALID_VERSION || version >= MAX_VERSION) + return NULL; + + if (version <= USERSPACE_MAX_VER) + return &(conn_operations[USERDFS_VERSION]); + else + return &(conn_operations[PROTOCOL_VERSION]); +} + +void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp) +{ + hmdfs_wait_mp_wfired(mp); + if (!cancel_delayed_work(&mp->d_work)) + hmdfs_err("cancel parasite work err msg_id=%d cmd=%d", + mp->head.msg_id, mp->req.operations.command); + else + async_request_cb_on_wakeup_fn(&mp->d_work.work); +} + +void hmdfs_wakeup_async_work(struct hmdfs_async_work *async_work) +{ + if (!cancel_delayed_work(&async_work->d_work)) + hmdfs_err("cancel async work err msg_id=%d", + async_work->head.msg_id); + else + hmdfs_recv_page_work_fn(&async_work->d_work.work); +} diff --git a/fs/hmdfs/comm/socket_adapter.h b/fs/hmdfs/comm/socket_adapter.h new file mode 100755 index 000000000..ba4c672d7 --- /dev/null +++ b/fs/hmdfs/comm/socket_adapter.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/socket_adapter.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef SOCKET_ADAPTER_H +#define SOCKET_ADAPTER_H + +#include +#include + +#include "connection.h" +#include "hmdfs.h" +#include "protocol.h" + +#define HMDFS_KEY_SIZE 32 +#define HMDFS_IV_SIZE 12 +#define HMDFS_TAG_SIZE 16 +#define HMDFS_CID_SIZE 64 +#define INVALID_SOCKET_FD (-1) + +#define HMDFS_IDR_RESCHED_COUNT 512 + +struct connection_operations { + void (*recvmsg)(struct hmdfs_peer *con, void *head, void *buf); + void (*recvpage)(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + int err, void *data); + const struct file_operations *remote_file_fops; + const struct inode_operations *remote_file_iops; + const struct address_space_operations *remote_file_aops; + int (*remote_unlink)(struct hmdfs_peer *con, struct dentry *dentry); + int (*remote_readdir)(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx); + struct hmdfs_lookup_ret *(*remote_lookup)(struct hmdfs_peer *con, + const char *relative_path, + const char *d_name); +}; + +/***************************************************************************** + * connections(TCP, UDP, .etc) adapter for RPC + *****************************************************************************/ + +struct work_handler_desp { + struct work_struct work; + struct hmdfs_peer *peer; + struct hmdfs_head_cmd *head; + void *buf; +}; + +struct work_readfile_request_async { + struct work_struct work; + struct hmdfs_peer *con; + struct hmdfs_send_command sm; +}; + +static inline void hmdfs_init_cmd(struct hmdfs_cmd *op, u8 cmd) +{ + op->reserved = 0; + op->cmd_flag = C_REQUEST; + op->command = cmd; + op->reserved2 = 0; +} + +int hmdfs_send_async_request(struct hmdfs_peer *peer, + const struct hmdfs_req *req); +int hmdfs_sendmessage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *msg); +int hmdfs_sendpage_request(struct hmdfs_peer *con, + struct hmdfs_send_command *msg); + +int hmdfs_sendmessage_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, __u32 data_len, + void *buf, __u32 ret_code); +int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, + struct file *filp); +const struct connection_operations *hmdfs_get_peer_operation(__u8 version); + +void hmdfs_recv_page_work_fn(struct work_struct *ptr); + +/***************************************************************************** + * statistics info for RPC + *****************************************************************************/ + +enum hmdfs_resp_type { + HMDFS_RESP_NORMAL, + HMDFS_RESP_DELAY, + HMDFS_RESP_TIMEOUT +}; + +struct server_statistic { + unsigned long long cnt; /* request received */ + unsigned long long max; /* max processing time */ + unsigned long long total; /* total processing time */ + unsigned long long snd_cnt; /* resp send to client */ + unsigned long long snd_fail_cnt; /* send resp to client failed cnt */ +}; + +struct client_statistic { + unsigned long long snd_cnt; /* request send to server */ + unsigned long long resp_cnt; /* response receive from server */ + unsigned long long timeout_cnt; /* no respone from server */ + unsigned long long delay_resp_cnt; /* delay response from server */ + unsigned long long max; /* max waiting time */ + unsigned long long total; /* total waiting time */ + unsigned long long snd_fail_cnt; /* request send failed to server */ +}; + + +static inline void hmdfs_statistic(struct hmdfs_sb_info *sbi, u8 cmd, + unsigned long jiff) +{ + if (cmd >= F_SIZE) + return; + + sbi->s_server_statis[cmd].cnt++; + sbi->s_server_statis[cmd].total += jiff; + if (jiff > sbi->s_server_statis[cmd].max) + sbi->s_server_statis[cmd].max = jiff; +} + +static inline void hmdfs_server_snd_statis(struct hmdfs_sb_info *sbi, + u8 cmd, int ret) +{ + if (cmd >= F_SIZE) + return; + ret ? sbi->s_server_statis[cmd].snd_fail_cnt++ : + sbi->s_server_statis[cmd].snd_cnt++; +} + +static inline void hmdfs_client_snd_statis(struct hmdfs_sb_info *sbi, + u8 cmd, int ret) +{ + if (cmd >= F_SIZE) + return; + ret ? sbi->s_client_statis[cmd].snd_fail_cnt++ : + sbi->s_client_statis[cmd].snd_cnt++; +} + +extern void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, + enum hmdfs_resp_type type, + unsigned long start, unsigned long end); + +/***************************************************************************** + * timeout configuration for RPC + *****************************************************************************/ + +enum HMDFS_TIME_OUT { + TIMEOUT_NONE = 0, + TIMEOUT_COMMON = 4, + TIMEOUT_6S = 6, + TIMEOUT_30S = 30, + TIMEOUT_1M = 60, + TIMEOUT_90S = 90, + TIMEOUT_CONFIG = UINT_MAX - 1, // for hmdfs_req to read from config + TIMEOUT_UNINIT = UINT_MAX, +}; + +static inline int get_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd) +{ + return sbi->s_cmd_timeout[cmd]; +} + +static inline void set_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd, + unsigned int value) +{ + sbi->s_cmd_timeout[cmd] = value; +} + +void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info, + __u32 ret_code, __u32 data_len, void *buf); + +void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp); + +void hmdfs_wakeup_async_work(struct hmdfs_async_work *async_work); + +void msg_put(struct sendmsg_wait_queue *msg_wq); +void head_put(struct hmdfs_msg_idr_head *head); +void mp_put(struct hmdfs_msg_parasite *mp); +void asw_put(struct hmdfs_async_work *asw); +static inline void asw_done(struct hmdfs_async_work *asw) +{ + if (asw->page) + unlock_page(asw->page); + asw_put(asw); +} + +static inline void asw_get(struct hmdfs_async_work *asw) +{ + kref_get(&asw->head.ref); +} +#endif diff --git a/fs/hmdfs/comm/transport.c b/fs/hmdfs/comm/transport.c new file mode 100755 index 000000000..c9eaaa7dc --- /dev/null +++ b/fs/hmdfs/comm/transport.c @@ -0,0 +1,1218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/comm/transport.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "transport.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "device_node.h" +#include "hmdfs_trace.h" +#include "socket_adapter.h" +#include "authority/authentication.h" + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +#include +#include "crypto.h" +#endif + +typedef void (*connect_recv_handler)(struct connection *, void *, void *, + __u32); + +static connect_recv_handler connect_recv_callback[CONNECT_STAT_COUNT] = { + [CONNECT_STAT_WAIT_REQUEST] = connection_handshake_recv_handler, + [CONNECT_STAT_WAIT_RESPONSE] = connection_handshake_recv_handler, + [CONNECT_STAT_WORKING] = connection_working_recv_handler, + [CONNECT_STAT_STOP] = NULL, + [CONNECT_STAT_WAIT_ACK] = connection_handshake_recv_handler, + [CONNECT_STAT_NEGO_FAIL] = NULL, +}; + +static int recvmsg_nofs(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size, int flags) +{ + unsigned int nofs_flags; + int ret; + + /* enable NOFS for memory allocation */ + nofs_flags = memalloc_nofs_save(); + ret = kernel_recvmsg(sock, msg, vec, num, size, flags); + memalloc_nofs_restore(nofs_flags); + + return ret; +} + +static int sendmsg_nofs(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size) +{ + unsigned int nofs_flags; + int ret; + + /* enable NOFS for memory allocation */ + nofs_flags = memalloc_nofs_save(); + ret = kernel_sendmsg(sock, msg, vec, num, size); + memalloc_nofs_restore(nofs_flags); + + return ret; +} + +static int tcp_set_recvtimeo(struct socket *sock, int timeout) +{ + long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); + + tcp_sock_set_nodelay(sock->sk); + tcp_sock_set_user_timeout(sock->sk, jiffies_left); + return 0; +} + +uint32_t hmdfs_tcpi_rtt(struct hmdfs_peer *con) +{ + uint32_t rtt_us = 0; + struct connection *conn_impl = NULL; + struct tcp_handle *tcp = NULL; + + conn_impl = get_conn_impl(con, CONNECT_TYPE_TCP); + if (!conn_impl) + return rtt_us; + tcp = (struct tcp_handle *)(conn_impl->connect_handle); + if (tcp->sock) + rtt_us = tcp_sk(tcp->sock->sk)->srtt_us >> 3; + connection_put(conn_impl); + return rtt_us; +} + +static int tcp_read_head_from_socket(struct socket *sock, void *buf, + unsigned int to_read) +{ + int rc = 0; + struct msghdr hmdfs_msg; + struct kvec iov; + + iov.iov_base = buf; + iov.iov_len = to_read; + memset(&hmdfs_msg, 0, sizeof(hmdfs_msg)); + hmdfs_msg.msg_flags = MSG_WAITALL; + hmdfs_msg.msg_control = NULL; + hmdfs_msg.msg_controllen = 0; + rc = recvmsg_nofs(sock, &hmdfs_msg, &iov, 1, to_read, + hmdfs_msg.msg_flags); + if (rc == -EAGAIN || rc == -ETIMEDOUT || rc == -EINTR || + rc == -EBADMSG) { + usleep_range(1000, 2000); + return -EAGAIN; + } + // error occurred + if (rc != to_read) { + hmdfs_err("tcp recv error %d", rc); + return -ESHUTDOWN; + } + return 0; +} + +static int tcp_read_buffer_from_socket(struct socket *sock, void *buf, + unsigned int to_read) +{ + int read_cnt = 0; + int retry_time = 0; + int rc = 0; + struct msghdr hmdfs_msg; + struct kvec iov; + + do { + iov.iov_base = (char *)buf + read_cnt; + iov.iov_len = to_read - read_cnt; + memset(&hmdfs_msg, 0, sizeof(hmdfs_msg)); + hmdfs_msg.msg_flags = MSG_WAITALL; + hmdfs_msg.msg_control = NULL; + hmdfs_msg.msg_controllen = 0; + rc = recvmsg_nofs(sock, &hmdfs_msg, &iov, 1, + to_read - read_cnt, hmdfs_msg.msg_flags); + if (rc == -EBADMSG) { + usleep_range(1000, 2000); + continue; + } + if (rc == -EAGAIN || rc == -ETIMEDOUT || rc == -EINTR) { + retry_time++; + hmdfs_info("read again %d", rc); + usleep_range(1000, 2000); + continue; + } + // error occurred + if (rc <= 0) { + hmdfs_err("tcp recv error %d", rc); + return -ESHUTDOWN; + } + read_cnt += rc; + if (read_cnt != to_read) + hmdfs_info("read again %d/%d", read_cnt, to_read); + } while (read_cnt < to_read && retry_time < MAX_RECV_RETRY_TIMES); + if (read_cnt == to_read) + return 0; + return -ESHUTDOWN; +} + +static int hmdfs_drop_readpage_buffer(struct socket *sock, + struct hmdfs_head_cmd *recv) +{ + unsigned int len; + void *buf = NULL; + int err; + + len = le32_to_cpu(recv->data_len) - sizeof(struct hmdfs_head_cmd); + if (len > HMDFS_PAGE_SIZE || !len) { + hmdfs_err("recv invalid readpage length %u", len); + return -EINVAL; + } + + /* Abort the connection if no memory */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ESHUTDOWN; + + err = tcp_read_buffer_from_socket(sock, buf, len); + kfree(buf); + + return err; +} + +static int hmdfs_get_readpage_buffer(struct socket *sock, + struct hmdfs_head_cmd *recv, + struct page *page) +{ + char *page_buf = NULL; + unsigned int out_len; + int err; + + out_len = le32_to_cpu(recv->data_len) - sizeof(struct hmdfs_head_cmd); + if (out_len > HMDFS_PAGE_SIZE || !out_len) { + hmdfs_err("recv invalid readpage length %u", out_len); + return -EINVAL; + } + + page_buf = kmap(page); + err = tcp_read_buffer_from_socket(sock, page_buf, out_len); + if (err) + goto out_unmap; + if (out_len != HMDFS_PAGE_SIZE) + memset(page_buf + out_len, 0, HMDFS_PAGE_SIZE - out_len); + +out_unmap: + kunmap(page); + return err; +} + +static int tcp_recvpage_tls(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + struct hmdfs_peer *node = NULL; + struct page *page = NULL; + struct hmdfs_async_work *async_work = NULL; + int rd_err; + + if (!connect) { + hmdfs_err("tcp connect == NULL"); + return -ESHUTDOWN; + } + node = connect->node; + tcp = (struct tcp_handle *)(connect->connect_handle); + + rd_err = le32_to_cpu(recv->ret_code); + if (rd_err) + hmdfs_warning("tcp: readpage from peer %llu ret err %d", + node->device_id, rd_err); + + async_work = (struct hmdfs_async_work *)hmdfs_find_msg_head(node, + le32_to_cpu(recv->msg_id)); + if (!async_work || !cancel_delayed_work(&async_work->d_work)) + goto out; + + page = async_work->page; + if (!page) { + hmdfs_err("page not found"); + goto out; + } + + if (!rd_err) { + ret = hmdfs_get_readpage_buffer(tcp->sock, recv, page); + if (ret) + rd_err = ret; + } + node->conn_operations->recvpage(node, recv, rd_err, async_work); + asw_put(async_work); + return ret; + +out: + /* async_work will be released by recvpage in normal processure */ + if (async_work) + asw_put(async_work); + hmdfs_err_ratelimited("timeout and droppage"); + hmdfs_client_resp_statis(node->sbi, F_READPAGE, HMDFS_RESP_DELAY, 0, 0); + if (!rd_err) + ret = hmdfs_drop_readpage_buffer(tcp->sock, recv); + return ret; +} + +static void aeadcipher_cb(struct crypto_async_request *req, int error) +{ + struct aeadcrypt_result *result = req->data; + + if (error == -EINPROGRESS) + return; + result->err = error; + complete(&result->completion); +} + +static int aeadcipher_en_de(struct aead_request *req, + struct aeadcrypt_result result, int flag) +{ + int rc = 0; + + if (flag) + rc = crypto_aead_encrypt(req); + else + rc = crypto_aead_decrypt(req); + switch (rc) { + case 0: + break; + case -EINPROGRESS: + case -EBUSY: + rc = wait_for_completion_interruptible(&result.completion); + if (!rc && !result.err) + reinit_completion(&result.completion); + break; + default: + hmdfs_err("returned rc %d result %d", rc, result.err); + break; + } + return rc; +} + +static int set_aeadcipher(struct crypto_aead *tfm, struct aead_request *req, + struct aeadcrypt_result *result) +{ + init_completion(&result->completion); + aead_request_set_callback( + req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + aeadcipher_cb, result); + return 0; +} + +int aeadcipher_encrypt_buffer(struct connection *con, __u8 *src_buf, + size_t src_len, __u8 *dst_buf, size_t dst_len) +{ + int ret = 0; + struct scatterlist src, dst; + struct aead_request *req = NULL; + struct aeadcrypt_result result; + __u8 cipher_iv[HMDFS_IV_SIZE]; + + if (src_len <= 0) + return -EINVAL; + if (!virt_addr_valid(src_buf) || !virt_addr_valid(dst_buf)) { + WARN_ON(1); + hmdfs_err("encrypt address is invalid"); + return -EPERM; + } + + get_random_bytes(cipher_iv, HMDFS_IV_SIZE); + memcpy(dst_buf, cipher_iv, HMDFS_IV_SIZE); + req = aead_request_alloc(con->tfm, GFP_KERNEL); + if (!req) { + hmdfs_err("aead_request_alloc() failed"); + return -ENOMEM; + } + ret = set_aeadcipher(con->tfm, req, &result); + if (ret) { + hmdfs_err("set_enaeadcipher exit fault"); + goto out; + } + + sg_init_one(&src, src_buf, src_len); + sg_init_one(&dst, dst_buf + HMDFS_IV_SIZE, dst_len - HMDFS_IV_SIZE); + aead_request_set_crypt(req, &src, &dst, src_len, cipher_iv); + aead_request_set_ad(req, 0); + ret = aeadcipher_en_de(req, result, ENCRYPT_FLAG); +out: + aead_request_free(req); + return ret; +} + +int aeadcipher_decrypt_buffer(struct connection *con, __u8 *src_buf, + size_t src_len, __u8 *dst_buf, size_t dst_len) +{ + int ret = 0; + struct scatterlist src, dst; + struct aead_request *req = NULL; + struct aeadcrypt_result result; + __u8 cipher_iv[HMDFS_IV_SIZE]; + + if (src_len <= HMDFS_IV_SIZE + HMDFS_TAG_SIZE) + return -EINVAL; + if (!virt_addr_valid(src_buf) || !virt_addr_valid(dst_buf)) { + WARN_ON(1); + hmdfs_err("decrypt address is invalid"); + return -EPERM; + } + + memcpy(cipher_iv, src_buf, HMDFS_IV_SIZE); + req = aead_request_alloc(con->tfm, GFP_KERNEL); + if (!req) { + hmdfs_err("aead_request_alloc() failed"); + return -ENOMEM; + } + ret = set_aeadcipher(con->tfm, req, &result); + if (ret) { + hmdfs_err("set_deaeadcipher exit fault"); + goto out; + } + + sg_init_one(&src, src_buf + HMDFS_IV_SIZE, src_len - HMDFS_IV_SIZE); + sg_init_one(&dst, dst_buf, dst_len); + aead_request_set_crypt(req, &src, &dst, src_len - HMDFS_IV_SIZE, + cipher_iv); + aead_request_set_ad(req, 0); + ret = aeadcipher_en_de(req, result, DECRYPT_FLAG); +out: + aead_request_free(req); + return ret; +} + +static int tcp_recvbuffer_cipher(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + size_t cipherbuffer_len; + __u8 *cipherbuffer = NULL; + size_t outlen = 0; + __u8 *outdata = NULL; + __u32 recv_len = le32_to_cpu(recv->data_len); + + tcp = (struct tcp_handle *)(connect->connect_handle); + if (recv_len == sizeof(struct hmdfs_head_cmd)) + goto out_recv_head; + else if (recv_len > sizeof(struct hmdfs_head_cmd) && + recv_len <= ADAPTER_MESSAGE_LENGTH) + cipherbuffer_len = recv_len - sizeof(struct hmdfs_head_cmd) + + HMDFS_IV_SIZE + HMDFS_TAG_SIZE; + else + return -ENOMSG; + cipherbuffer = kzalloc(cipherbuffer_len, GFP_KERNEL); + if (!cipherbuffer) { + hmdfs_err("zalloc cipherbuffer error"); + return -ESHUTDOWN; + } + outlen = cipherbuffer_len - HMDFS_IV_SIZE - HMDFS_TAG_SIZE; + outdata = kzalloc(outlen, GFP_KERNEL); + if (!outdata) { + hmdfs_err("encrypt zalloc outdata error"); + kfree(cipherbuffer); + return -ESHUTDOWN; + } + + ret = tcp_read_buffer_from_socket(tcp->sock, cipherbuffer, + cipherbuffer_len); + if (ret) + goto out_recv; + ret = aeadcipher_decrypt_buffer(connect, cipherbuffer, cipherbuffer_len, + outdata, outlen); + if (ret) { + hmdfs_err("decrypt_buf fail"); + goto out_recv; + } +out_recv_head: + if (connect_recv_callback[connect->status]) { + connect_recv_callback[connect->status](connect, recv, outdata, + outlen); + } else { + kfree(outdata); + hmdfs_err("encypt callback NULL status %d", connect->status); + } + kfree(cipherbuffer); + return ret; +out_recv: + kfree(cipherbuffer); + kfree(outdata); + return ret; +} + +static int tcp_recvbuffer_tls(struct connection *connect, + struct hmdfs_head_cmd *recv) +{ + int ret = 0; + struct tcp_handle *tcp = NULL; + size_t outlen; + __u8 *outdata = NULL; + __u32 recv_len = le32_to_cpu(recv->data_len); + + tcp = (struct tcp_handle *)(connect->connect_handle); + outlen = recv_len - sizeof(struct hmdfs_head_cmd); + if (outlen == 0) + goto out_recv_head; + + /* + * NOTE: Up to half of the allocated memory may be wasted due to + * the Internal Fragmentation, however the memory allocation times + * can be reduced and we don't have to adjust existing message + * transporting mechanism + */ + outdata = kmalloc(outlen, GFP_KERNEL); + if (!outdata) + return -ESHUTDOWN; + + ret = tcp_read_buffer_from_socket(tcp->sock, outdata, outlen); + if (ret) { + kfree(outdata); + return ret; + } + tcp->connect->stat.recv_bytes += outlen; +out_recv_head: + if (connect_recv_callback[connect->status]) { + connect_recv_callback[connect->status](connect, recv, outdata, + outlen); + } else { + kfree(outdata); + hmdfs_err("callback NULL status %d", connect->status); + } + return 0; +} + +static int tcp_receive_from_sock(struct tcp_handle *tcp) +{ + struct hmdfs_head_cmd *recv = NULL; + int ret = 0; + + if (!tcp) { + hmdfs_info("tcp recv thread !tcp"); + return -ESHUTDOWN; + } + + if (!tcp->sock) { + hmdfs_info("tcp recv thread !sock"); + return -ESHUTDOWN; + } + + recv = kmem_cache_alloc(tcp->recv_cache, GFP_KERNEL); + if (!recv) { + hmdfs_info("tcp recv thread !cache"); + return -ESHUTDOWN; + } + + ret = tcp_read_head_from_socket(tcp->sock, recv, + sizeof(struct hmdfs_head_cmd)); + if (ret) + goto out; + + tcp->connect->stat.recv_bytes += sizeof(struct hmdfs_head_cmd); + tcp->connect->stat.recv_message_count++; + + if (recv->magic != HMDFS_MSG_MAGIC) { + hmdfs_info_ratelimited("tcp recv fd %d wrong magic. drop message", + tcp->fd); + goto out; + } + + if ((le32_to_cpu(recv->data_len) > + HMDFS_MAX_MESSAGE_LEN + sizeof(struct hmdfs_head_cmd)) || + (le32_to_cpu(recv->data_len) < sizeof(struct hmdfs_head_cmd))) { + hmdfs_info("tcp recv fd %d length error. drop message", + tcp->fd); + goto out; + } + + if (recv->version > USERSPACE_MAX_VER && + tcp->connect->status == CONNECT_STAT_WORKING && + recv->operations.command == F_READPAGE && + recv->operations.cmd_flag == C_RESPONSE) { + ret = tcp_recvpage_tls(tcp->connect, recv); + goto out; + } + + if (tcp->connect->status == CONNECT_STAT_WORKING && + recv->version > USERSPACE_MAX_VER) + ret = tcp_recvbuffer_tls(tcp->connect, recv); + else + ret = tcp_recvbuffer_cipher(tcp->connect, recv); + +out: + kmem_cache_free(tcp->recv_cache, recv); + return ret; +} + +static bool tcp_handle_is_available(struct tcp_handle *tcp) +{ +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + struct tls_context *tls_ctx = NULL; + struct tls_sw_context_rx *ctx = NULL; + +#endif + if (!tcp || !tcp->sock || !tcp->sock->sk) { + hmdfs_err("Invalid tcp connection"); + return false; + } + + if (tcp->sock->sk->sk_state != TCP_ESTABLISHED) { + hmdfs_err("TCP conn %d is broken, current sk_state is %d", + tcp->fd, tcp->sock->sk->sk_state); + return false; + } + + if (tcp->sock->state != SS_CONNECTING && + tcp->sock->state != SS_CONNECTED) { + hmdfs_err("TCP conn %d is broken, current sock state is %d", + tcp->fd, tcp->sock->state); + return false; + } + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + tls_ctx = tls_get_ctx(tcp->sock->sk); + if (tls_ctx) { + ctx = tls_sw_ctx_rx(tls_ctx); + if (ctx && ctx->strp.stopped) { + hmdfs_err( + "TCP conn %d is broken, the strparser has stopped", + tcp->fd); + return false; + } + } +#endif + return true; +} + +static int tcp_recv_thread(void *arg) +{ + int ret = 0; + struct tcp_handle *tcp = (struct tcp_handle *)arg; + const struct cred *old_cred; + + WARN_ON(!tcp); + WARN_ON(!tcp->sock); + set_freezable(); + + old_cred = hmdfs_override_creds(tcp->connect->node->sbi->system_cred); + + while (!kthread_should_stop()) { + /* + * 1. In case the redundant connection has not been mounted on + * a peer + * 2. Lock is unnecessary since a transient state is acceptable + */ + if (tcp_handle_is_available(tcp) && + list_empty(&tcp->connect->list)) + goto freeze; + if (!mutex_trylock(&tcp->close_mutex)) + continue; + if (tcp_handle_is_available(tcp)) + ret = tcp_receive_from_sock(tcp); + else + ret = -ESHUTDOWN; + /* + * This kthread will exit if ret is -ESHUTDOWN, thus we need to + * set recv_task to NULL to avoid calling kthread_stop() from + * tcp_close_socket(). + */ + if (ret == -ESHUTDOWN) + tcp->recv_task = NULL; + mutex_unlock(&tcp->close_mutex); + if (ret == -ESHUTDOWN) { + hmdfs_node_inc_evt_seq(tcp->connect->node); + tcp->connect->status = CONNECT_STAT_STOP; + if (tcp->connect->node->status != NODE_STAT_OFFLINE) + hmdfs_reget_connection(tcp->connect); + break; + } +freeze: + schedule(); + try_to_freeze(); + } + + hmdfs_info("Exiting. Now, sock state = %d", tcp->sock->state); + hmdfs_revert_creds(old_cred); + connection_put(tcp->connect); + return 0; +} + +static int tcp_send_message_sock_cipher(struct tcp_handle *tcp, + struct hmdfs_send_data *msg) +{ + int ret = 0; + __u8 *outdata = NULL; + size_t outlen = 0; + int send_len = 0; + int send_vec_cnt = 0; + struct msghdr tcp_msg; + struct kvec iov[TCP_KVEC_ELE_DOUBLE]; + + memset(&tcp_msg, 0, sizeof(tcp_msg)); + if (!tcp || !tcp->sock) { + hmdfs_err("encrypt tcp socket = NULL"); + return -ESHUTDOWN; + } + iov[0].iov_base = msg->head; + iov[0].iov_len = msg->head_len; + send_vec_cnt = TCP_KVEC_HEAD; + if (msg->len == 0) + goto send; + + outlen = msg->len + HMDFS_IV_SIZE + HMDFS_TAG_SIZE; + outdata = kzalloc(outlen, GFP_KERNEL); + if (!outdata) { + hmdfs_err("tcp send message encrypt fail to alloc outdata"); + return -ENOMEM; + } + ret = aeadcipher_encrypt_buffer(tcp->connect, msg->data, msg->len, + outdata, outlen); + if (ret) { + hmdfs_err("encrypt_buf fail"); + goto out; + } + iov[1].iov_base = outdata; + iov[1].iov_len = outlen; + send_vec_cnt = TCP_KVEC_ELE_DOUBLE; +send: + mutex_lock(&tcp->send_mutex); + send_len = sendmsg_nofs(tcp->sock, &tcp_msg, iov, send_vec_cnt, + msg->head_len + outlen); + mutex_unlock(&tcp->send_mutex); + if (send_len <= 0) { + hmdfs_err("error %d", send_len); + ret = -ESHUTDOWN; + } else if (send_len != msg->head_len + outlen) { + hmdfs_err("send part of message. %d/%zu", send_len, + msg->head_len + outlen); + ret = -EAGAIN; + } else { + ret = 0; + } +out: + kfree(outdata); + return ret; +} + +static int tcp_send_message_sock_tls(struct tcp_handle *tcp, + struct hmdfs_send_data *msg) +{ + int send_len = 0; + int send_vec_cnt = 0; + struct msghdr tcp_msg; + struct kvec iov[TCP_KVEC_ELE_TRIPLE]; + + memset(&tcp_msg, 0, sizeof(tcp_msg)); + if (!tcp || !tcp->sock) { + hmdfs_err("tcp socket = NULL"); + return -ESHUTDOWN; + } + iov[TCP_KVEC_HEAD].iov_base = msg->head; + iov[TCP_KVEC_HEAD].iov_len = msg->head_len; + if (msg->len == 0 && msg->sdesc_len == 0) { + send_vec_cnt = TCP_KVEC_ELE_SINGLE; + } else if (msg->sdesc_len == 0) { + iov[TCP_KVEC_DATA].iov_base = msg->data; + iov[TCP_KVEC_DATA].iov_len = msg->len; + send_vec_cnt = TCP_KVEC_ELE_DOUBLE; + } else { + iov[TCP_KVEC_FILE_PARA].iov_base = msg->sdesc; + iov[TCP_KVEC_FILE_PARA].iov_len = msg->sdesc_len; + iov[TCP_KVEC_FILE_CONTENT].iov_base = msg->data; + iov[TCP_KVEC_FILE_CONTENT].iov_len = msg->len; + send_vec_cnt = TCP_KVEC_ELE_TRIPLE; + } + mutex_lock(&tcp->send_mutex); + send_len = sendmsg_nofs(tcp->sock, &tcp_msg, iov, send_vec_cnt, + msg->head_len + msg->len + msg->sdesc_len); + mutex_unlock(&tcp->send_mutex); + if (send_len == -EBADMSG) { + return -EBADMSG; + } else if (send_len <= 0) { + hmdfs_err("error %d", send_len); + return -ESHUTDOWN; + } else if (send_len != msg->head_len + msg->len + msg->sdesc_len) { + hmdfs_err("send part of message. %d/%zu", send_len, + msg->head_len + msg->len); + tcp->connect->stat.send_bytes += send_len; + return -EAGAIN; + } + tcp->connect->stat.send_bytes += send_len; + tcp->connect->stat.send_message_count++; + return 0; +} + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +int tcp_send_rekey_request(struct connection *connect) +{ + int ret = 0; + struct hmdfs_send_data msg; + struct tcp_handle *tcp = connect->connect_handle; + struct hmdfs_head_cmd *head = NULL; + struct connection_rekey_request *rekey_request_param = NULL; + struct hmdfs_cmd operations; + + hmdfs_init_cmd(&operations, F_CONNECT_REKEY); + head = kzalloc(sizeof(struct hmdfs_head_cmd) + + sizeof(struct connection_rekey_request), + GFP_KERNEL); + if (!head) + return -ENOMEM; + rekey_request_param = + (struct connection_rekey_request + *)((uint8_t *)head + sizeof(struct hmdfs_head_cmd)); + + rekey_request_param->update_request = cpu_to_le32(UPDATE_NOT_REQUESTED); + + head->magic = HMDFS_MSG_MAGIC; + head->version = DFS_2_0; + head->operations = operations; + head->data_len = + cpu_to_le32(sizeof(*head) + sizeof(*rekey_request_param)); + head->reserved = 0; + head->reserved1 = 0; + head->ret_code = 0; + + msg.head = head; + msg.head_len = sizeof(*head); + msg.data = rekey_request_param; + msg.len = sizeof(*rekey_request_param); + msg.sdesc = NULL; + msg.sdesc_len = 0; + ret = tcp_send_message_sock_tls(tcp, &msg); + if (ret != 0) + hmdfs_err("return error %d", ret); + kfree(head); + return ret; +} +#endif + +static int tcp_send_message(struct connection *connect, + struct hmdfs_send_data *msg) +{ + int ret = 0; +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + unsigned long nowtime = jiffies; +#endif + struct tcp_handle *tcp = NULL; + + if (!connect) { + hmdfs_err("tcp connection = NULL "); + return -ESHUTDOWN; + } + if (!msg) { + hmdfs_err("msg = NULL"); + return -EINVAL; + } + if (msg->len > HMDFS_MAX_MESSAGE_LEN) { + hmdfs_err("message->len error: %zu", msg->len); + return -EINVAL; + } + tcp = (struct tcp_handle *)(connect->connect_handle); + if (connect->status == CONNECT_STAT_STOP) + return -EAGAIN; + + trace_hmdfs_tcp_send_message(msg->head); + + if (connect->status == CONNECT_STAT_WORKING && + connect->node->version > USERSPACE_MAX_VER) + ret = tcp_send_message_sock_tls(tcp, msg); + else + // Handshake status or version HMDFS1.0 + ret = tcp_send_message_sock_cipher(tcp, msg); + + if (ret != 0) { + hmdfs_err("return error %d", ret); + return ret; + } +#ifdef CONFIG_HMDFS_FS_ENCRYPTION + if (nowtime - connect->stat.rekey_time >= REKEY_LIFETIME && + connect->status == CONNECT_STAT_WORKING && + connect->node->version >= DFS_2_0) { + hmdfs_info("send rekey message to devid %llu", + connect->node->device_id); + ret = tcp_send_rekey_request(connect); + if (ret == 0) + set_crypto_info(connect, SET_CRYPTO_SEND); + connect->stat.rekey_time = nowtime; + } +#endif + return ret; +} + +void tcp_close_socket(struct tcp_handle *tcp) +{ + if (!tcp) + return; + mutex_lock(&tcp->close_mutex); + if (tcp->recv_task) { + kthread_stop(tcp->recv_task); + tcp->recv_task = NULL; + } + mutex_unlock(&tcp->close_mutex); +} + +static int set_tfm(__u8 *master_key, struct crypto_aead *tfm) +{ + int ret = 0; + int iv_len; + __u8 *sec_key = NULL; + + sec_key = master_key; + crypto_aead_clear_flags(tfm, ~0); + ret = crypto_aead_setkey(tfm, sec_key, HMDFS_KEY_SIZE); + if (ret) { + hmdfs_err("failed to set the key"); + goto out; + } + ret = crypto_aead_setauthsize(tfm, HMDFS_TAG_SIZE); + if (ret) { + hmdfs_err("authsize length is error"); + goto out; + } + + iv_len = crypto_aead_ivsize(tfm); + if (iv_len != HMDFS_IV_SIZE) { + hmdfs_err("IV recommended value should be set %d", iv_len); + ret = -ENODATA; + } +out: + return ret; +} + +static int tcp_update_socket(struct tcp_handle *tcp, int fd, + uint8_t *master_key, struct socket *socket) +{ + int err = 0; + struct hmdfs_peer *node = NULL; + + if (!master_key || fd == 0) + return -EAGAIN; + + tcp->sock = socket; + tcp->fd = fd; + if (!tcp_handle_is_available(tcp)) { + err = -EPIPE; + goto put_sock; + } + + hmdfs_info("socket fd %d, state %d, refcount %ld", + fd, socket->state, file_count(socket->file)); + + tcp->recv_cache = kmem_cache_create("hmdfs_socket", + tcp->recvbuf_maxsize, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!tcp->recv_cache) { + err = -ENOMEM; + goto put_sock; + } + + err = tcp_set_recvtimeo(socket, TCP_RECV_TIMEOUT); + if (err) { + hmdfs_err("tcp set timeout error"); + goto free_mem_cache; + } + + /* send key and recv key, default MASTER KEY */ + memcpy(tcp->connect->master_key, master_key, HMDFS_KEY_SIZE); + memcpy(tcp->connect->send_key, master_key, HMDFS_KEY_SIZE); + memcpy(tcp->connect->recv_key, master_key, HMDFS_KEY_SIZE); + tcp->connect->tfm = crypto_alloc_aead("gcm(aes)", 0, 0); + if (IS_ERR(tcp->connect->tfm)) { + err = PTR_ERR(tcp->connect->tfm); + tcp->connect->tfm = NULL; + hmdfs_err("failed to load transform for gcm(aes):%d", err); + goto free_mem_cache; + } + + err = set_tfm(master_key, tcp->connect->tfm); + if (err) { + hmdfs_err("tfm seting exit fault"); + goto free_crypto; + } + + connection_get(tcp->connect); + + node = tcp->connect->node; + tcp->recv_task = kthread_create(tcp_recv_thread, (void *)tcp, + "dfs_rcv%u_%llu_%d", + node->owner, node->device_id, fd); + if (IS_ERR(tcp->recv_task)) { + err = PTR_ERR(tcp->recv_task); + hmdfs_err("tcp->rcev_task %d", err); + goto put_conn; + } + + return 0; + +put_conn: + tcp->recv_task = NULL; + connection_put(tcp->connect); +free_crypto: + crypto_free_aead(tcp->connect->tfm); + tcp->connect->tfm = NULL; +free_mem_cache: + kmem_cache_destroy(tcp->recv_cache); + tcp->recv_cache = NULL; +put_sock: + tcp->sock = NULL; + tcp->fd = 0; + + return err; +} + +static struct tcp_handle *tcp_alloc_handle(struct connection *connect, + int socket_fd, uint8_t *master_key, struct socket *socket) +{ + int ret = 0; + struct tcp_handle *tcp = kzalloc(sizeof(*tcp), GFP_KERNEL); + + if (!tcp) + return NULL; + tcp->connect = connect; + tcp->connect->connect_handle = (void *)tcp; + tcp->recvbuf_maxsize = MAX_RECV_SIZE; + tcp->recv_task = NULL; + tcp->recv_cache = NULL; + tcp->sock = NULL; + mutex_init(&tcp->close_mutex); + mutex_init(&tcp->send_mutex); + ret = tcp_update_socket(tcp, socket_fd, master_key, socket); + if (ret) { + kfree(tcp); + return NULL; + } + return tcp; +} + +void hmdfs_get_connection(struct hmdfs_peer *peer) +{ + struct notify_param param; + + if (!peer) + return; + param.notify = NOTIFY_GET_SESSION; + param.fd = INVALID_SOCKET_FD; + memcpy(param.remote_cid, peer->cid, HMDFS_CID_SIZE); + notify(peer, ¶m); +} + +static void connection_notify_to_close(struct connection *conn) +{ + struct notify_param param; + struct hmdfs_peer *peer = NULL; + struct tcp_handle *tcp = NULL; + + tcp = conn->connect_handle; + peer = conn->node; + + // libdistbus/src/TcpSession.cpp will close the socket + param.notify = NOTIFY_GET_SESSION; + param.fd = tcp->fd; + memcpy(param.remote_cid, peer->cid, HMDFS_CID_SIZE); + notify(peer, ¶m); +} + +void hmdfs_reget_connection(struct connection *conn) +{ + struct tcp_handle *tcp = NULL; + struct connection *conn_impl = NULL; + struct connection *next = NULL; + struct task_struct *recv_task = NULL; + bool should_put = false; + bool stop_thread = true; + + if (!conn) + return; + + // One may put a connection if and only if he took it out of the list + mutex_lock(&conn->node->conn_impl_list_lock); + list_for_each_entry_safe(conn_impl, next, &conn->node->conn_impl_list, + list) { + if (conn_impl == conn) { + should_put = true; + list_move(&conn->list, &conn->node->conn_deleting_list); + break; + } + } + if (!should_put) { + mutex_unlock(&conn->node->conn_impl_list_lock); + return; + } + + tcp = conn->connect_handle; + if (tcp) { + recv_task = tcp->recv_task; + /* + * To avoid the receive thread to stop itself. Ensure receive + * thread stop before process offline event + */ + if (!recv_task || recv_task->pid == current->pid) + stop_thread = false; + } + mutex_unlock(&conn->node->conn_impl_list_lock); + + if (tcp) { + if (tcp->sock) { + hmdfs_info("shudown sock: fd = %d, sockref = %ld, connref = %u stop_thread = %d", + tcp->fd, file_count(tcp->sock->file), + kref_read(&conn->ref_cnt), stop_thread); + kernel_sock_shutdown(tcp->sock, SHUT_RDWR); + } + + if (stop_thread) + tcp_close_socket(tcp); + + if (tcp->fd != INVALID_SOCKET_FD) + connection_notify_to_close(conn); + } + connection_put(conn); +} + +static struct connection * +lookup_conn_by_socketfd_unsafe(struct hmdfs_peer *node, struct socket *socket) +{ + struct connection *tcp_conn = NULL; + struct tcp_handle *tcp = NULL; + + list_for_each_entry(tcp_conn, &node->conn_impl_list, list) { + if (tcp_conn->connect_handle) { + tcp = (struct tcp_handle *)(tcp_conn->connect_handle); + if (tcp->sock == socket) { + connection_get(tcp_conn); + return tcp_conn; + } + } + } + return NULL; +} + +static void hmdfs_reget_connection_work_fn(struct work_struct *work) +{ + struct connection *conn = + container_of(work, struct connection, reget_work); + + hmdfs_reget_connection(conn); + connection_put(conn); +} + +struct connection *alloc_conn_tcp(struct hmdfs_peer *node, int socket_fd, + uint8_t *master_key, uint8_t status, struct socket *socket) +{ + struct connection *tcp_conn = NULL; + unsigned long nowtime = jiffies; + + tcp_conn = kzalloc(sizeof(*tcp_conn), GFP_KERNEL); + if (!tcp_conn) + goto out_err; + + kref_init(&tcp_conn->ref_cnt); + mutex_init(&tcp_conn->ref_lock); + INIT_LIST_HEAD(&tcp_conn->list); + tcp_conn->node = node; + tcp_conn->close = tcp_stop_connect; + tcp_conn->send_message = tcp_send_message; + tcp_conn->type = CONNECT_TYPE_TCP; + tcp_conn->status = status; + tcp_conn->stat.rekey_time = nowtime; + tcp_conn->connect_handle = + (void *)tcp_alloc_handle(tcp_conn, socket_fd, master_key, socket); + INIT_WORK(&tcp_conn->reget_work, hmdfs_reget_connection_work_fn); + if (!tcp_conn->connect_handle) { + hmdfs_err("Failed to alloc tcp_handle for strcut conn"); + goto out_err; + } + return tcp_conn; + +out_err: + kfree(tcp_conn); + return NULL; +} + +static struct connection *add_conn_tcp_unsafe(struct hmdfs_peer *node, + struct socket *socket, + struct connection *conn2add) +{ + struct connection *conn; + + conn = lookup_conn_by_socketfd_unsafe(node, socket); + if (conn) { + hmdfs_info("socket already in list"); + return conn; + } + + /* Prefer to use socket opened by local device */ + if (conn2add->status == CONNECT_STAT_WAIT_REQUEST) + list_add(&conn2add->list, &node->conn_impl_list); + else + list_add_tail(&conn2add->list, &node->conn_impl_list); + connection_get(conn2add); + return conn2add; +} + +struct connection *hmdfs_get_conn_tcp(struct hmdfs_peer *node, int fd, + uint8_t *master_key, uint8_t status) +{ + struct connection *tcp_conn = NULL, *on_peer_conn = NULL; + struct tcp_handle *tcp = NULL; + struct socket *socket = NULL; + int err = 0; + + socket = sockfd_lookup(fd, &err); + if (!socket) { + hmdfs_err("lookup socket fail, socket_fd %d, err %d", fd, err); + return NULL; + } + mutex_lock(&node->conn_impl_list_lock); + tcp_conn = lookup_conn_by_socketfd_unsafe(node, socket); + mutex_unlock(&node->conn_impl_list_lock); + if (tcp_conn) { + hmdfs_info("Got a existing tcp conn: fsocket_fd = %d", + fd); + sockfd_put(socket); + goto out; + } + + tcp_conn = alloc_conn_tcp(node, fd, master_key, status, socket); + if (!tcp_conn) { + hmdfs_info("Failed to alloc a tcp conn, socket_fd %d", fd); + sockfd_put(socket); + goto out; + } + + mutex_lock(&node->conn_impl_list_lock); + on_peer_conn = add_conn_tcp_unsafe(node, socket, tcp_conn); + mutex_unlock(&node->conn_impl_list_lock); + tcp = tcp_conn->connect_handle; + if (on_peer_conn == tcp_conn) { + hmdfs_info("Got a newly allocated tcp conn: socket_fd = %d", fd); + wake_up_process(tcp->recv_task); + if (status == CONNECT_STAT_WAIT_RESPONSE) + connection_send_handshake( + on_peer_conn, CONNECT_MESG_HANDSHAKE_REQUEST, + 0); + } else { + hmdfs_info("Got a existing tcp conn: socket_fd = %d", fd); + tcp->fd = INVALID_SOCKET_FD; + tcp_close_socket(tcp); + connection_put(tcp_conn); + + tcp_conn = on_peer_conn; + } + +out: + return tcp_conn; +} + +void tcp_stop_connect(struct connection *connect) +{ + hmdfs_info("now nothing to do"); +} diff --git a/fs/hmdfs/comm/transport.h b/fs/hmdfs/comm/transport.h new file mode 100755 index 000000000..bce882cb6 --- /dev/null +++ b/fs/hmdfs/comm/transport.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/comm/transport.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_TRANSPORT_H +#define HMDFS_TRANSPORT_H + +#include "connection.h" + +#define ENCRYPT_FLAG 1 +#define DECRYPT_FLAG 0 + +struct aeadcrypt_result { + struct completion completion; + int err; +}; + +#define ADAPTER_MESSAGE_LENGTH (1024 * 1024 + 1024) // 1M + 1K +#define MAX_RECV_SIZE sizeof(struct hmdfs_head_cmd) + +#define TCP_KVEC_HEAD 0 +#define TCP_KVEC_DATA 1 + +enum TCP_KVEC_FILE_ELE_INDEX { + TCP_KVEC_FILE_PARA = 1, + TCP_KVEC_FILE_CONTENT = 2, +}; + +enum TCP_KVEC_TYPE { + TCP_KVEC_ELE_SINGLE = 1, + TCP_KVEC_ELE_DOUBLE = 2, + TCP_KVEC_ELE_TRIPLE = 3, +}; + +#define TCP_RECV_TIMEOUT 2 +#define MAX_RECV_RETRY_TIMES 2 + +#ifndef SO_RCVTIMEO +#define SO_RCVTIMEO SO_RCVTIMEO_OLD +#endif + +struct tcp_handle { + struct connection *connect; + int recvbuf_maxsize; + struct mutex close_mutex; + /* + * To achieve atomicity. + * + * The sock lock held at the tcp layer may be temporally released at + * `sk_wait_event()` when waiting for sock buffer. From this point on, + * threads serialized at the initial call to `lock_sock()` contained + * in `tcp_sendmsg()` can proceed, resuling in intermixed messages. + */ + struct mutex send_mutex; + struct socket *sock; + int fd; + struct kmem_cache *recv_cache; + struct task_struct *recv_task; +}; + +void hmdfs_get_connection(struct hmdfs_peer *peer); +void hmdfs_reget_connection(struct connection *conn); +struct connection *hmdfs_get_conn_tcp(struct hmdfs_peer *node, int socket_fd, + uint8_t *master_key, uint8_t status); +void tcp_stop_connect(struct connection *connect); +uint32_t hmdfs_tcpi_rtt(struct hmdfs_peer *node); +void tcp_close_socket(struct tcp_handle *tcp); + +#ifdef CONFIG_HMDFS_FS_ENCRYPTION +int tcp_send_rekey_request(struct connection *connect); +#endif + +#endif diff --git a/fs/hmdfs/dentry.c b/fs/hmdfs/dentry.c new file mode 100755 index 000000000..fb7c4749b --- /dev/null +++ b/fs/hmdfs/dentry.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/dentry.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include + +#include "comm/connection.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" + +extern struct kmem_cache *hmdfs_dentry_cachep; + +void hmdfs_set_time(struct dentry *dentry, unsigned long time) +{ + struct hmdfs_dentry_info *d_info = dentry->d_fsdata; + + if (d_info) + d_info->time = time; +} + +unsigned long hmdfs_get_time(struct dentry *dentry) +{ + struct hmdfs_dentry_info *d_info = dentry->d_fsdata; + + if (d_info) + return (unsigned long)d_info->time; + return 0; +} + +static int hmdfs_d_remote_revalidate(struct hmdfs_peer *conn, + struct dentry *target, + struct dentry *parent) +{ + unsigned int timeout = hmdfs_sb(target->d_sb)->dcache_timeout; + unsigned long dentry_time = hmdfs_get_time(target); + struct clearcache_item *item; + + item = hmdfs_find_cache_item(conn->device_id, parent); + if (!item) + return 0; + kref_put(&item->ref, release_cache_item); + + if (cache_item_revalidate(READ_ONCE(conn->conn_time), + dentry_time, timeout)) + return 1; + + return 0; +} + +static inline void lock_for_dname_cmp(struct dentry *dentry, + struct dentry *lower_dentry) +{ + if (dentry < lower_dentry) { + spin_lock(&dentry->d_lock); + spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&lower_dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + } +} + +static inline void unlock_for_dname_cmp(struct dentry *dentry, + struct dentry *lower_dentry) +{ + spin_unlock(&dentry->d_lock); + spin_unlock(&lower_dentry->d_lock); +} + +static int hmdfs_dev_d_revalidate(struct dentry *direntry, unsigned int flags) +{ + struct inode *dinode = NULL; + struct hmdfs_inode_info *info = NULL; + + spin_lock(&direntry->d_lock); + if (IS_ROOT(direntry)) { + spin_unlock(&direntry->d_lock); + return 1; + } + spin_unlock(&direntry->d_lock); + + dinode = d_inode(direntry); + if (!dinode) + return 0; + + info = hmdfs_i(dinode); + if (info->inode_type == HMDFS_LAYER_SECOND_LOCAL || + info->inode_type == HMDFS_LAYER_FIRST_DEVICE) { + return 1; + } + if (info->conn && info->conn->status == NODE_STAT_ONLINE) + return 1; + + return 0; +} + +static int hmdfs_d_revalidate(struct dentry *direntry, unsigned int flags) +{ + struct inode *dinode = NULL; + struct hmdfs_inode_info *info = NULL; + struct path lower_path, parent_lower_path; + struct dentry *parent_dentry = NULL; + struct dentry *parent_lower_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + struct dentry *lower_dentry = NULL; + int ret; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET | LOOKUP_REVAL)) + return 0; + + dinode = d_inode(direntry); + if (!dinode) + return 0; + + /* remote dentry timeout */ + info = hmdfs_i(dinode); + parent_dentry = dget_parent(direntry); + if (info->conn) { + ret = hmdfs_d_remote_revalidate(info->conn, direntry, + parent_dentry); + dput(parent_dentry); + return ret; + } + + hmdfs_get_lower_path(direntry, &lower_path); + lower_dentry = lower_path.dentry; + lower_cur_parent_dentry = dget_parent(lower_dentry); + hmdfs_get_lower_path(parent_dentry, &parent_lower_path); + parent_lower_dentry = parent_lower_path.dentry; + if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) { + ret = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (ret == 0) + goto out; + } + + spin_lock(&lower_dentry->d_lock); + if (d_unhashed(lower_dentry)) { + spin_unlock(&lower_dentry->d_lock); + ret = 0; + goto out; + } + spin_unlock(&lower_dentry->d_lock); + + if (parent_lower_dentry != lower_cur_parent_dentry) { + ret = 0; + goto out; + } + + ret = 1; + lock_for_dname_cmp(direntry, lower_dentry); + if (!qstr_case_eq(&direntry->d_name, &lower_dentry->d_name)) + ret = 0; + unlock_for_dname_cmp(direntry, lower_dentry); + +out: + hmdfs_put_lower_path(&parent_lower_path); + dput(lower_cur_parent_dentry); + hmdfs_put_lower_path(&lower_path); + dput(parent_dentry); + return ret; +} + +static void hmdfs_dev_d_release(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + switch (hmdfs_d(dentry)->dentry_type) { + case HMDFS_LAYER_SECOND_LOCAL: + hmdfs_clear_cache_dents(dentry, false); + hmdfs_drop_remote_cache_dents(dentry); + path_put(&(hmdfs_d(dentry)->lower_path)); + break; + case HMDFS_LAYER_ZERO: + hmdfs_put_reset_lower_path(dentry); + break; + case HMDFS_LAYER_FIRST_DEVICE: + break; + case HMDFS_LAYER_SECOND_REMOTE: + hmdfs_clear_cache_dents(dentry, false); + break; + default: + hmdfs_err("Unexpected dentry type %d", + hmdfs_d(dentry)->dentry_type); + return; + } + + kmem_cache_free(hmdfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +static void hmdfs_d_release(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + hmdfs_clear_cache_dents(dentry, false); + hmdfs_drop_remote_cache_dents(dentry); + hmdfs_put_reset_lower_path(dentry); + kmem_cache_free(hmdfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +static int hmdfs_cmp_ci(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb); + + if (name->len != len) + return 1; + + if (!sbi->s_case_sensitive) { + if (str_n_case_eq(name->name, str, len)) + return 0; + } else { + if (!strncmp(name->name, str, len)) + return 0; + } + return 1; +} + +static int hmdfs_hash_ci(const struct dentry *dentry, struct qstr *qstr) +{ + const unsigned char *name = qstr->name; + unsigned int len = qstr->len; + unsigned long hash; + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb); + + if (sbi->s_case_sensitive) + return 0; + + hash = init_name_hash(dentry); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + qstr->hash = end_name_hash(hash); + return 0; +} + +void clear_comrades_locked(struct list_head *comrade_list) +{ + struct hmdfs_dentry_comrade *cc, *nc; + + WARN_ON(!comrade_list); + list_for_each_entry_safe(cc, nc, comrade_list, list) { + dput(cc->lo_d); + kfree(cc); + } + INIT_LIST_HEAD(comrade_list); +} + +void clear_comrades(struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(dentry); + + wait_event(cdi->wait_queue, !has_merge_lookup_work(cdi)); + mutex_lock(&cdi->comrade_list_lock); + clear_comrades_locked(&cdi->comrade_list); + mutex_unlock(&cdi->comrade_list_lock); +} + +/** + * d_revalidate_merge - revalidate a merge dentry + * + * Always return 0 to invalidate a dentry for fault-tolerance. + * The cost is acceptable for a overlay filesystem. + */ +static int d_revalidate_merge(struct dentry *direntry, unsigned int flags) +{ + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(direntry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *parent_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + int ret = 1; + + if (flags & LOOKUP_RCU) { + return -ECHILD; + } + + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET | LOOKUP_REVAL)) { + return 0; + } + + parent_dentry = dget_parent(direntry); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lower_cur_parent_dentry = dget_parent(comrade->lo_d); + if ((comrade->lo_d->d_flags & DCACHE_OP_REVALIDATE)) { + ret = comrade->lo_d->d_op->d_revalidate( + comrade->lo_d, flags); + if (ret == 0) { + dput(lower_cur_parent_dentry); + goto out; + } + } + dput(lower_cur_parent_dentry); + } +out: + dput(parent_dentry); + return ret; +} + +static void d_release_merge(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + clear_comrades(dentry); + kmem_cache_free(hmdfs_dentry_merge_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +const struct dentry_operations hmdfs_dops_merge = { + .d_revalidate = d_revalidate_merge, + .d_release = d_release_merge, +}; + +const struct dentry_operations hmdfs_dev_dops = { + .d_revalidate = hmdfs_dev_d_revalidate, + .d_release = hmdfs_dev_d_release, +}; + +const struct dentry_operations hmdfs_dops = { + .d_revalidate = hmdfs_d_revalidate, + .d_release = hmdfs_d_release, + .d_compare = hmdfs_cmp_ci, + .d_hash = hmdfs_hash_ci, +}; diff --git a/fs/hmdfs/file_local.c b/fs/hmdfs/file_local.c new file mode 100755 index 000000000..20a997236 --- /dev/null +++ b/fs/hmdfs/file_local.c @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_local.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" +#include "hmdfs_share.h" +#include "hmdfs_trace.h" + +int hmdfs_file_open_local(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lower_path; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + + if (!gfi) { + err = -ENOMEM; + goto out_err; + } + + hmdfs_get_lower_path(file->f_path.dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, cred); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + kfree(gfi); + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } +out_err: + return err; +} + +int hmdfs_file_release_local(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + + file->private_data = NULL; + fput(gfi->lower_file); + kfree(gfi); + return 0; +} + +static void hmdfs_file_accessed(struct file *file) +{ + struct file *lower_file = hmdfs_f(file)->lower_file; + struct inode *inode = file_inode(file); + struct inode *lower_inode = file_inode(lower_file); + + if (file->f_flags & O_NOATIME) + return; + + inode->i_atime = lower_inode->i_atime; +} + +ssize_t hmdfs_do_read_iter(struct file *file, struct iov_iter *iter, + loff_t *ppos) +{ + ssize_t ret; + struct file *lower_file = hmdfs_f(file)->lower_file; + + if (!iov_iter_count(iter)) + return 0; + + ret = vfs_iter_read(lower_file, iter, ppos, 0); + hmdfs_file_accessed(file); + + return ret; +} + +static ssize_t hmdfs_local_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + return hmdfs_do_read_iter(iocb->ki_filp, iter, &iocb->ki_pos); +} + +static void hmdfs_file_modified(struct file *file) +{ + struct inode *inode = file_inode(file); + struct dentry *dentry = file_dentry(file); + struct file *lower_file = hmdfs_f(file)->lower_file; + struct inode *lower_inode = file_inode(lower_file); + + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + i_size_write(inode, i_size_read(lower_inode)); + + if (!hmdfs_i_merge(hmdfs_i(inode))) + update_inode_to_dentry(dentry, inode); +} + +ssize_t hmdfs_do_write_iter(struct file *file, struct iov_iter *iter, + loff_t *ppos) +{ + ssize_t ret; + struct file *lower_file = hmdfs_f(file)->lower_file; + struct inode *inode = file_inode(file); + + if (!iov_iter_count(iter)) + return 0; + + inode_lock(inode); + + ret = file_remove_privs(file); + if (ret) + goto out_unlock; + + file_start_write(lower_file); + ret = vfs_iter_write(lower_file, iter, ppos, 0); + file_end_write(lower_file); + + hmdfs_file_modified(file); + +out_unlock: + inode_unlock(inode); + return ret; +} + +ssize_t hmdfs_local_write_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + return hmdfs_do_write_iter(iocb->ki_filp, iter, &iocb->ki_pos); +} + +int hmdfs_fsync_local(struct file *file, loff_t start, loff_t end, int datasync) +{ + int err; + struct file *lower_file = hmdfs_f(file)->lower_file; + + err = __generic_file_fsync(file, start, end, datasync); + if (err) + goto out; + + err = vfs_fsync_range(lower_file, start, end, datasync); +out: + return err; +} + +loff_t hmdfs_file_llseek_local(struct file *file, loff_t offset, int whence) +{ + loff_t ret; + struct file *lower_file; + + lower_file = hmdfs_f(file)->lower_file; + lower_file->f_pos = file->f_pos; + ret = vfs_llseek(lower_file, offset, whence); + file->f_pos = lower_file->f_pos; + + return ret; +} + +int hmdfs_file_mmap_local(struct file *file, struct vm_area_struct *vma) +{ + struct hmdfs_file_info *private_data = file->private_data; + struct file *realfile = NULL; + int ret; + + if (!private_data) + return -EINVAL; + + realfile = private_data->lower_file; + if (!realfile) + return -EINVAL; + + if (!realfile->f_op->mmap) + return -ENODEV; + + if (WARN_ON(file != vma->vm_file)) + return -EIO; + + vma->vm_file = get_file(realfile); + ret = call_mmap(vma->vm_file, vma); + if (ret) + fput(realfile); + else + fput(file); + + file_accessed(file); + + return ret; +} + +const struct file_operations hmdfs_file_fops_local = { + .owner = THIS_MODULE, + .llseek = hmdfs_file_llseek_local, + .read_iter = hmdfs_local_read_iter, + .write_iter = hmdfs_local_write_iter, + .mmap = hmdfs_file_mmap_local, + .open = hmdfs_file_open_local, + .release = hmdfs_file_release_local, + .fsync = hmdfs_fsync_local, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, +}; + +static int hmdfs_iterate_local(struct file *file, struct dir_context *ctx) +{ + int err = 0; + loff_t start_pos = ctx->pos; + struct file *lower_file = hmdfs_f(file)->lower_file; + + if (ctx->pos == -1) + return 0; + + lower_file->f_pos = file->f_pos; + err = iterate_dir(lower_file, ctx); + file->f_pos = lower_file->f_pos; + + if (err < 0) + ctx->pos = -1; + + trace_hmdfs_iterate_local(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int hmdfs_dir_open_local(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct dentry *dentry = file->f_path.dentry; + struct path lower_path; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + + if (!gfi) + return -ENOMEM; + + if (IS_ERR_OR_NULL(cred)) { + err = -EPERM; + goto out_err; + } + hmdfs_get_lower_path(dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, cred); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + goto out_err; + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } + return err; + +out_err: + kfree(gfi); + return err; +} + +static int hmdfs_dir_release_local(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + + file->private_data = NULL; + fput(gfi->lower_file); + kfree(gfi); + return 0; +} + +const struct file_operations hmdfs_dir_ops_local = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_local, + .open = hmdfs_dir_open_local, + .release = hmdfs_dir_release_local, + .fsync = hmdfs_fsync_local, +}; + +static int __hmdfs_ioc_set_share_path(struct file *file, + struct hmdfs_share_control *sc) +{ + struct super_block *sb = file->f_inode->i_sb; + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + struct hmdfs_share_table *st = &sbi->share_table; + struct hmdfs_share_item *item; + struct dentry *dentry; + const char *dir_path, *full_path; + struct qstr relative_path; + struct fd src; + int err = 0; + + src = fdget(sc->src_fd); + if (!src.file) + return -EBADF; + + /* only reg file can be shared */ + if (!S_ISREG(src.file->f_inode->i_mode)) { + err = -EPERM; + goto err_out; + } + + /* share file is not allowed to be shared */ + if (hmdfs_is_share_file(src.file)) { + err = -EPERM; + goto err_out; + } + + dentry = src.file->f_path.dentry; + if (dentry->d_name.len > NAME_MAX) { + err = -ENAMETOOLONG; + goto err_out; + } + + dir_path = hmdfs_get_dentry_relative_path(file->f_path.dentry); + if (unlikely(!dir_path)) { + err = -ENOMEM; + goto err_out; + } + + full_path = hmdfs_connect_path(dir_path, dentry->d_name.name); + if (unlikely(!full_path)) { + err = -ENOMEM; + goto free_dir; + } + relative_path.name = full_path; + relative_path.len = strlen(full_path); + + spin_lock(&sbi->share_table.item_list_lock); + item = hmdfs_lookup_share_item(st, &relative_path); + if (!item) { + err = insert_share_item(st, &relative_path, src.file, sc->cid); + goto unlock; + } + + if (item->opened) + err = -EEXIST; + else + update_share_item(item, src.file, sc->cid); + +unlock: + spin_unlock(&sbi->share_table.item_list_lock); + kfree(full_path); +free_dir: + kfree(dir_path); +err_out: + fdput(src); + return err; +} + +static int hmdfs_ioc_set_share_path(struct file *file, unsigned long arg) +{ + struct hmdfs_share_control sc; + + if (copy_from_user(&sc, (struct hmdfs_share_control __user *)arg, + sizeof(sc))) + return -EFAULT; + + return __hmdfs_ioc_set_share_path(file, &sc); +} + +static long hmdfs_dir_ioctl_local(struct file *file, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case HMDFS_IOC_SET_SHARE_PATH: + return hmdfs_ioc_set_share_path(file, arg); + default: + return -ENOTTY; + } +} + +const struct file_operations hmdfs_dir_ops_share = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_local, + .open = hmdfs_dir_open_local, + .release = hmdfs_dir_release_local, + .fsync = hmdfs_fsync_local, + .unlocked_ioctl = hmdfs_dir_ioctl_local, + .compat_ioctl = hmdfs_dir_ioctl_local, +}; diff --git a/fs/hmdfs/file_merge.c b/fs/hmdfs/file_merge.c new file mode 100755 index 000000000..e7b75b6e3 --- /dev/null +++ b/fs/hmdfs/file_merge.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_merge.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_merge_view.h" + +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" + +struct hmdfs_iterate_callback_merge { + struct dir_context ctx; + struct dir_context *caller; + /* + * Record the return value of 'caller->actor': + * + * -EINVAL, buffer is exhausted + * -EINTR, current task is pending + * -EFAULT, something is wrong + * 0, success and can do more + */ + int result; + struct rb_root *root; + uint64_t dev_id; +}; + +struct hmdfs_cache_entry { + struct rb_node rb_node; + int name_len; + char *name; + int file_type; +}; + +struct hmdfs_cache_entry *allocate_entry(const char *name, int namelen, + int d_type) +{ + struct hmdfs_cache_entry *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + data->name = kstrndup(name, namelen, GFP_KERNEL); + if (!data->name) { + kfree(data); + return ERR_PTR(-ENOMEM); + } + + data->name_len = namelen; + data->file_type = d_type; + + return data; +} + +int insert_filename(struct rb_root *root, struct hmdfs_cache_entry **new_entry) +{ + struct rb_node *parent = NULL; + struct rb_node **new_node = &(root->rb_node); + int cmp_res = 0; + struct hmdfs_cache_entry *data = *new_entry; + + while (*new_node) { + struct hmdfs_cache_entry *entry = container_of( + *new_node, struct hmdfs_cache_entry, rb_node); + parent = *new_node; + + if (data->name_len < entry->name_len) + cmp_res = -1; + else if (data->name_len > entry->name_len) + cmp_res = 1; + else + cmp_res = strncmp(data->name, entry->name, + data->name_len); + + if (!cmp_res) { + kfree(data->name); + kfree(data); + *new_entry = entry; + return entry->file_type; + } + + if (cmp_res < 0) + new_node = &((*new_node)->rb_left); + else if (cmp_res > 0) + new_node = &((*new_node)->rb_right); + } + + rb_link_node(&data->rb_node, parent, new_node); + rb_insert_color(&data->rb_node, root); + + return 0; +} + +static void recursive_delete(struct rb_node *node) +{ + struct hmdfs_cache_entry *entry = NULL; + + if (!node) + return; + + recursive_delete(node->rb_left); + recursive_delete(node->rb_right); + + entry = container_of(node, struct hmdfs_cache_entry, rb_node); + kfree(entry->name); + kfree(entry); +} + +static void destroy_tree(struct rb_root *root) +{ + if (!root) + return; + recursive_delete(root->rb_node); + root->rb_node = NULL; +} + +static void delete_filename(struct rb_root *root, + struct hmdfs_cache_entry *data) +{ + struct rb_node **node = &(root->rb_node); + struct hmdfs_cache_entry *entry = NULL; + int cmp_res = 0; + + while (*node) { + entry = container_of(*node, struct hmdfs_cache_entry, rb_node); + if (data->name_len < entry->name_len) + cmp_res = -1; + else if (data->name_len > entry->name_len) + cmp_res = 1; + else + cmp_res = strncmp(data->name, entry->name, + data->name_len); + + if (!cmp_res) + goto found; + + if (cmp_res < 0) + node = &((*node)->rb_left); + else if (cmp_res > 0) + node = &((*node)->rb_right); + } + return; + +found: + rb_erase(*node, root); + kfree(entry->name); + kfree(entry); +} + +static void rename_conflicting_file(char *dentry_name, int *len, + unsigned int dev_id) +{ + int i = *len - 1; + int dot_pos = -1; + char *buffer; + + buffer = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); + if (!buffer) + return; + + while (i >= 0) { + if (dentry_name[i] == '/') + break; + if (dentry_name[i] == '.') { + // TODO: 这个修改同步到 CT01 + dot_pos = i; + break; + } + i--; + } + + if (dot_pos == -1) { + snprintf(dentry_name + *len, DENTRY_NAME_MAX_LEN - *len, + CONFLICTING_FILE_SUFFIX, dev_id); + goto done; + } + + for (i = 0; i < *len - dot_pos; i++) + buffer[i] = dentry_name[i + dot_pos]; + + buffer[i] = '\0'; + snprintf(dentry_name + dot_pos, DENTRY_NAME_MAX_LEN - dot_pos, + CONFLICTING_FILE_SUFFIX, dev_id); + strcat(dentry_name, buffer); + +done: + *len = strlen(dentry_name); + kfree(buffer); +} + +static void rename_conflicting_directory(char *dentry_name, int *len) +{ + snprintf(dentry_name + *len, DENTRY_NAME_MAX_LEN - *len, + CONFLICTING_DIR_SUFFIX); + *len += strlen(CONFLICTING_DIR_SUFFIX); +} + +static int hmdfs_actor_merge(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + int ret = 0; + int insert_res = 0; + int max_devid_len = 2; + char *dentry_name = NULL; + int dentry_len = namelen; + struct hmdfs_cache_entry *cache_entry = NULL; + struct hmdfs_iterate_callback_merge *iterate_callback_merge = NULL; + struct dir_context *org_ctx = NULL; + + if (hmdfs_file_type(name) != HMDFS_TYPE_COMMON) + return 0; + + if (namelen > NAME_MAX) + return -EINVAL; + dentry_name = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!dentry_name) + return -ENOMEM; + + strncpy(dentry_name, name, dentry_len); + + cache_entry = allocate_entry(dentry_name, dentry_len, d_type); + if (IS_ERR(cache_entry)) { + ret = PTR_ERR(cache_entry); + goto done; + } + + iterate_callback_merge = + container_of(ctx, struct hmdfs_iterate_callback_merge, ctx); + insert_res = + insert_filename(iterate_callback_merge->root, &cache_entry); + if (d_type == DT_DIR && insert_res == DT_DIR) { + goto done; + } else if (d_type == DT_DIR && insert_res == DT_REG) { + if (strlen(CONFLICTING_DIR_SUFFIX) > NAME_MAX - dentry_len) { + ret = -ENAMETOOLONG; + goto delete; + } + rename_conflicting_directory(dentry_name, &dentry_len); + cache_entry->file_type = DT_DIR; + } else if (d_type == DT_REG && insert_res > 0) { + if (strlen(CONFLICTING_FILE_SUFFIX) + max_devid_len > + NAME_MAX - dentry_len) { + ret = -ENAMETOOLONG; + goto delete; + } + rename_conflicting_file(dentry_name, &dentry_len, + iterate_callback_merge->dev_id); + } + + org_ctx = iterate_callback_merge->caller; + ret = org_ctx->actor(org_ctx, dentry_name, dentry_len, org_ctx->pos, + ino, d_type); + /* + * Record original return value, so that the caller can be aware of + * different situations. + */ + iterate_callback_merge->result = ret; + ret = ret == 0 ? 0 : 1; + if (ret && d_type == DT_DIR && insert_res == DT_REG && + cache_entry->file_type == DT_DIR) + cache_entry->file_type = DT_REG; + +delete: + if (ret && !insert_res) + delete_filename(iterate_callback_merge->root, cache_entry); +done: + kfree(dentry_name); + return ret; +} + +struct hmdfs_file_info * +get_next_hmdfs_file_info(struct hmdfs_file_info *fi_head, int device_id) +{ + struct hmdfs_file_info *fi_iter = NULL; + struct hmdfs_file_info *fi_result = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry_safe(fi_iter, fi_result, &(fi_head->comrade_list), + comrade_list) { + if (fi_iter->device_id == device_id) + break; + } + mutex_unlock(&fi_head->comrade_list_lock); + + return fi_result != fi_head ? fi_result : NULL; +} + +struct hmdfs_file_info *get_hmdfs_file_info(struct hmdfs_file_info *fi_head, + int device_id) +{ + struct hmdfs_file_info *fi_iter = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry(fi_iter, &(fi_head->comrade_list), comrade_list) { + if (fi_iter->device_id == device_id) { + mutex_unlock(&fi_head->comrade_list_lock); + return fi_iter; + } + } + mutex_unlock(&fi_head->comrade_list_lock); + + return NULL; +} + +int hmdfs_iterate_merge(struct file *file, struct dir_context *ctx) +{ + int err = 0; + struct hmdfs_file_info *fi_head = hmdfs_f(file); + struct hmdfs_file_info *fi_iter = NULL; + struct file *lower_file_iter = NULL; + loff_t start_pos = ctx->pos; + unsigned long device_id = (unsigned long)((ctx->pos) << 1 >> + (POS_BIT_NUM - DEV_ID_BIT_NUM)); + struct hmdfs_iterate_callback_merge ctx_merge = { + .ctx.actor = hmdfs_actor_merge, + .caller = ctx, + .root = &fi_head->root, + .dev_id = device_id + }; + + /* pos = -1 indicates that all devices have been traversed + * or an error has occurred. + */ + if (ctx->pos == -1) + return 0; + + fi_iter = get_hmdfs_file_info(fi_head, device_id); + if (!fi_iter) { + fi_iter = get_next_hmdfs_file_info(fi_head, device_id); + // dev_id is changed, parameter is set 0 to get next file info + if (fi_iter) + ctx_merge.ctx.pos = + hmdfs_set_pos(fi_iter->device_id, 0, 0); + } + while (fi_iter) { + ctx_merge.dev_id = fi_iter->device_id; + device_id = ctx_merge.dev_id; + lower_file_iter = fi_iter->lower_file; + lower_file_iter->f_pos = file->f_pos; + err = iterate_dir(lower_file_iter, &ctx_merge.ctx); + file->f_pos = lower_file_iter->f_pos; + ctx->pos = file->f_pos; + + if (err) + goto done; + /* + * ctx->actor return nonzero means buffer is exhausted or + * something is wrong, thus we should not continue. + */ + if (ctx_merge.result) + goto done; + fi_iter = get_next_hmdfs_file_info(fi_head, device_id); + if (fi_iter) { + file->f_pos = hmdfs_set_pos(fi_iter->device_id, 0, 0); + ctx->pos = file->f_pos; + } + } +done: + trace_hmdfs_iterate_merge(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int do_dir_open_merge(struct file *file, const struct cred *cred, + struct hmdfs_file_info *fi_head) +{ + int ret = -EINVAL; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(file->f_path.dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct hmdfs_file_info *fi = NULL; + struct path lo_p = { .mnt = file->f_path.mnt }; + struct file *lower_file = NULL; + + if (IS_ERR_OR_NULL(cred)) + return ret; + + wait_event(dim->wait_queue, !has_merge_lookup_work(dim)); + + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + fi = kzalloc(sizeof(*fi), GFP_KERNEL); + if (!fi) { + ret = ret ? -ENOMEM : 0; + continue; // allow some dir to fail to open + } + lo_p.dentry = comrade->lo_d; + // make sure that dentry will not be dentry_kill before open + dget(lo_p.dentry); + if (unlikely(d_is_negative(lo_p.dentry))) { + hmdfs_info("dentry is negative, try again"); + kfree(fi); + dput(lo_p.dentry); + continue; // skip this device + } + lower_file = dentry_open(&lo_p, file->f_flags, cred); + dput(lo_p.dentry); + if (IS_ERR(lower_file)) { + kfree(fi); + continue; + } + ret = 0; + fi->device_id = comrade->dev_id; + fi->lower_file = lower_file; + mutex_lock(&fi_head->comrade_list_lock); + list_add_tail(&fi->comrade_list, &fi_head->comrade_list); + mutex_unlock(&fi_head->comrade_list_lock); + } + mutex_unlock(&dim->comrade_list_lock); + return ret; +} + +int hmdfs_dir_open_merge(struct inode *inode, struct file *file) +{ + int ret = 0; + struct hmdfs_file_info *fi = NULL; + + fi = kzalloc(sizeof(*fi), GFP_KERNEL); + if (!fi) + return -ENOMEM; + + file->private_data = fi; + fi->root = RB_ROOT; + mutex_init(&fi->comrade_list_lock); + INIT_LIST_HEAD(&fi->comrade_list); + + ret = do_dir_open_merge(file, hmdfs_sb(inode->i_sb)->cred, fi); + if (ret) + kfree(fi); + + return ret; +} + +int hmdfs_dir_release_merge(struct inode *inode, struct file *file) +{ + struct hmdfs_file_info *fi_head = hmdfs_f(file); + struct hmdfs_file_info *fi_iter = NULL; + struct hmdfs_file_info *fi_temp = NULL; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry_safe(fi_iter, fi_temp, &(fi_head->comrade_list), + comrade_list) { + list_del_init(&(fi_iter->comrade_list)); + fput(fi_iter->lower_file); + kfree(fi_iter); + } + mutex_unlock(&fi_head->comrade_list_lock); + destroy_tree(&fi_head->root); + file->private_data = NULL; + kfree(fi_head); + + return 0; +} + +long hmdfs_dir_unlocked_ioctl_merge(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct hmdfs_file_info *fi_head = hmdfs_f(file); + struct hmdfs_file_info *fi_iter = NULL; + struct hmdfs_file_info *fi_temp = NULL; + struct file *lower_file = NULL; + int error = -ENOTTY; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry_safe(fi_iter, fi_temp, &(fi_head->comrade_list), + comrade_list) { + if (fi_iter->device_id == 0) { + lower_file = fi_iter->lower_file; + error = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); + break; + } + } + mutex_unlock(&fi_head->comrade_list_lock); + return error; +} + +long hmdfs_dir_compat_ioctl_merge(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct hmdfs_file_info *fi_head = hmdfs_f(file); + struct hmdfs_file_info *fi_iter = NULL; + struct hmdfs_file_info *fi_temp = NULL; + struct file *lower_file = NULL; + int error = -ENOTTY; + + mutex_lock(&fi_head->comrade_list_lock); + list_for_each_entry_safe(fi_iter, fi_temp, &(fi_head->comrade_list), + comrade_list) { + if (fi_iter->device_id == 0) { + lower_file = fi_iter->lower_file; + error = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); + break; + } + } + mutex_unlock(&fi_head->comrade_list_lock); + return error; +} + +const struct file_operations hmdfs_dir_fops_merge = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_merge, + .open = hmdfs_dir_open_merge, + .release = hmdfs_dir_release_merge, + .unlocked_ioctl = hmdfs_dir_unlocked_ioctl_merge, + .compat_ioctl = hmdfs_dir_compat_ioctl_merge, +}; + +static ssize_t hmdfs_merge_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + return hmdfs_do_read_iter(iocb->ki_filp, iter, &iocb->ki_pos); +} + +ssize_t hmdfs_merge_write_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + return hmdfs_do_write_iter(iocb->ki_filp, iter, &iocb->ki_pos); +} + +int hmdfs_file_open_merge(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lo_p = { .mnt = file->f_path.mnt }; + struct super_block *sb = inode->i_sb; + const struct cred *cred = hmdfs_sb(sb)->cred; + struct hmdfs_file_info *gfi = NULL; + struct dentry *parent = NULL; + + lo_p.dentry = hmdfs_get_fst_lo_d(file->f_path.dentry); + if (!lo_p.dentry) { + err = -EINVAL; + goto out_err; + } + + gfi = kzalloc(sizeof(*gfi), GFP_KERNEL); + if (!gfi) { + err = -ENOMEM; + goto out_err; + } + + parent = dget_parent(file->f_path.dentry); + lower_file = dentry_open(&lo_p, file->f_flags, cred); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + kfree(gfi); + } else { + gfi->lower_file = lower_file; + file->private_data = gfi; + } + dput(parent); +out_err: + dput(lo_p.dentry); + return err; +} + +int hmdfs_file_flush_merge(struct file *file, fl_owner_t id) +{ + struct hmdfs_file_info *gfi = hmdfs_f(file); + struct file *lower_file = gfi->lower_file; + + if (lower_file->f_op->flush) + return lower_file->f_op->flush(lower_file, id); + + return 0; +} + +/* Transparent transmission of parameters to device_view level, + * so file operations are same as device_view local operations. + */ +const struct file_operations hmdfs_file_fops_merge = { + .owner = THIS_MODULE, + .llseek = hmdfs_file_llseek_local, + .read_iter = hmdfs_merge_read_iter, + .write_iter = hmdfs_merge_write_iter, + .mmap = hmdfs_file_mmap_local, + .open = hmdfs_file_open_merge, + .flush = hmdfs_file_flush_merge, + .release = hmdfs_file_release_local, + .fsync = hmdfs_fsync_local, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, +}; diff --git a/fs/hmdfs/file_remote.c b/fs/hmdfs/file_remote.c new file mode 100755 index 000000000..f9a77ddf4 --- /dev/null +++ b/fs/hmdfs/file_remote.c @@ -0,0 +1,1056 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_remote.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "file_remote.h" + +#include "comm/socket_adapter.h" +#include "hmdfs.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" + +static inline bool hmdfs_remote_write_cache_expired( + struct hmdfs_inode_info *info) +{ + return time_after(jiffies, info->writecache_expire); +} + +enum expire_reason { + ALL_GOOD = 0, + INO_DISMATCH = 1, + SIZE_OR_CTIME_DISMATCH = 2, + TIMER_EXPIRE = 3, + TIMER_WORKING = 4, + STABLE_CTIME_DISMATCH = 5, + KEEP_CACHE = 6, +}; + +/* + * hmdfs_open_final_remote - Do final steps of opening a remote file, update + * local inode cache and decide whether of not to truncate inode pages. + * + * @info: hmdfs inode info + * @open_ret: values returned from remote when opening a remote file + * @keep_cache: keep local cache & i_size + */ +static int hmdfs_open_final_remote(struct hmdfs_inode_info *info, + struct hmdfs_open_ret *open_ret, + struct file *file, bool keep_cache) +{ + struct inode *inode = &info->vfs_inode; + bool truncate = false; + enum expire_reason reason = ALL_GOOD; + int ret = 0; + + /* + * if remote inode number changed and lookup stale data, we'll return + * -ESTALE, and reopen the file with metedate from remote getattr. + */ + if (info->remote_ino != open_ret->ino) { + hmdfs_debug( + "got stale local inode, ino in local %llu, ino from open %llu", + info->remote_ino, open_ret->ino); + hmdfs_send_close(info->conn, &open_ret->fid); + reason = INO_DISMATCH; + ret = -ESTALE; + goto out; + } + + if (keep_cache) { + reason = KEEP_CACHE; + trace_hmdfs_open_final_remote(info, open_ret, file, reason); + goto set_fid_out; + } + + /* + * if remote size do not match local inode, or remote ctime do not match + * the last time same file was opened. + */ + if (inode->i_size != open_ret->file_size || + hmdfs_time_compare(&info->remote_ctime, &open_ret->remote_ctime)) { + truncate = true; + reason = SIZE_OR_CTIME_DISMATCH; + goto out; + } + + /* + * If 'writecache_expire' is set, check if it expires. And skip the + * checking of stable_ctime. + */ + if (info->writecache_expire) { + truncate = hmdfs_remote_write_cache_expired(info); + if (truncate) + reason = TIMER_EXPIRE; + else + reason = TIMER_WORKING; + goto out; + } + + /* the first time, or remote ctime is ahead of remote time */ + if (info->stable_ctime.tv_sec == 0 && info->stable_ctime.tv_nsec == 0) { + truncate = true; + reason = STABLE_CTIME_DISMATCH; + goto out; + } + + /* + * - if last stable_ctime == stable_ctime, we do nothing. + * a. if ctime < stable_ctime, data is ensured to be uptodate, + * b. if ctime == stable_ctime, stale data might be accessed. This is + * acceptable since pagecache will be dropped later. + * c. ctime > stable_ctime is impossible. + * - if last stable_ctime < stable_ctime, we clear the cache. + * d. ctime != last stable_ctime is impossible + * e. ctime == last stable_ctime, this is possible to read again from + * b, thus we need to drop the cache. + * - if last stable_ctime > stable_ctime, we clear the cache. + * stable_ctime must be zero in this case, this is possible because + * system time might be changed. + */ + if (hmdfs_time_compare(&info->stable_ctime, &open_ret->stable_ctime)) { + truncate = true; + reason = STABLE_CTIME_DISMATCH; + goto out; + } + +out: + trace_hmdfs_open_final_remote(info, open_ret, file, reason); + if (ret) + return ret; + + if (reason == SIZE_OR_CTIME_DISMATCH) { + inode->i_ctime = open_ret->remote_ctime; + info->remote_ctime = open_ret->remote_ctime; + } + + if (truncate) { + info->writecache_expire = 0; + truncate_inode_pages(inode->i_mapping, 0); + } + + atomic64_set(&info->write_counter, 0); + info->stable_ctime = open_ret->stable_ctime; + i_size_write(inode, open_ret->file_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; +set_fid_out: + spin_lock(&info->fid_lock); + info->fid = open_ret->fid; + spin_unlock(&info->fid_lock); + return 0; +} + +int hmdfs_do_open_remote(struct file *file, bool keep_cache) +{ + struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); + struct hmdfs_peer *conn = info->conn; + struct hmdfs_open_ret open_ret; + __u8 file_type = hmdfs_d(file->f_path.dentry)->file_type; + char *send_buf; + int err = 0; + + send_buf = hmdfs_get_dentry_relative_path(file->f_path.dentry); + if (!send_buf) { + err = -ENOMEM; + goto out_free; + } + err = hmdfs_send_open(conn, send_buf, file_type, &open_ret); + if (err) { + hmdfs_err("hmdfs_send_open return failed with %d", err); + goto out_free; + } + + err = hmdfs_open_final_remote(info, &open_ret, file, keep_cache); + +out_free: + kfree(send_buf); + return err; +} + +static inline bool hmdfs_remote_need_reopen(struct hmdfs_inode_info *info) +{ + return test_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); +} + +static inline bool hmdfs_remote_is_opening_file(struct hmdfs_inode_info *info) +{ + return test_bit(HMDFS_FID_OPENING, &info->fid_flags); +} + +static int hmdfs_remote_wait_opening_file(struct hmdfs_inode_info *info) +{ + int err; + + if (!hmdfs_remote_is_opening_file(info)) + return 0; + + err = ___wait_event(info->fid_wq, hmdfs_remote_is_opening_file(info), + TASK_INTERRUPTIBLE, 0, 0, + spin_unlock(&info->fid_lock); + schedule(); + spin_lock(&info->fid_lock)); + if (err) + err = -EINTR; + + return err; +} + +static int hmdfs_remote_file_reopen(struct hmdfs_inode_info *info, + struct file *filp) +{ + int err = 0; + struct hmdfs_peer *conn = info->conn; + struct inode *inode = NULL; + struct hmdfs_fid fid; + + if (conn->status == NODE_STAT_OFFLINE) + return -EAGAIN; + + spin_lock(&info->fid_lock); + err = hmdfs_remote_wait_opening_file(info); + if (err || !hmdfs_remote_need_reopen(info)) { + spin_unlock(&info->fid_lock); + goto out; + } + + set_bit(HMDFS_FID_OPENING, &info->fid_flags); + fid = info->fid; + spin_unlock(&info->fid_lock); + + inode = &info->vfs_inode; + inode_lock(inode); + /* + * Most closing cases are meaningless, except for one: + * read process A read process B + * err = -EBADF err = -EBADF (caused by re-online) + * set_need_reopen + * do reopen + * fid = new fid_1 [server hold fid_1] + * set need_reopen + * do reopen + * send close (fid_1) // In case of leak + * fid = new fid_2 + */ + if (fid.id != HMDFS_INODE_INVALID_FILE_ID) + hmdfs_send_close(conn, &fid); + err = hmdfs_do_open_remote(filp, true); + inode_unlock(inode); + + spin_lock(&info->fid_lock); + /* + * May make the bit set in offline handler lost, but server + * will tell us whether or not the newly-opened file id is + * generated before offline, if it is opened before offline, + * the operation on the file id will return -EBADF and + * HMDFS_FID_NEED_OPEN bit will be set again. + */ + if (!err) + clear_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + clear_bit(HMDFS_FID_OPENING, &info->fid_flags); + spin_unlock(&info->fid_lock); + + wake_up_interruptible_all(&info->fid_wq); +out: + return err; +} + +static int hmdfs_remote_check_and_reopen(struct hmdfs_inode_info *info, + struct file *filp) +{ + if (!hmdfs_remote_need_reopen(info)) + return 0; + + return hmdfs_remote_file_reopen(info, filp); +} + +void hmdfs_do_close_remote(struct kref *kref) +{ + struct hmdfs_inode_info *info = + container_of(kref, struct hmdfs_inode_info, ref); + struct hmdfs_fid fid; + + hmdfs_remote_fetch_fid(info, &fid); + /* This function can return asynchronously */ + hmdfs_send_close(info->conn, &fid); +} + +static inline bool hmdfs_remote_need_track_file(const struct hmdfs_sb_info *sbi, + fmode_t mode) +{ + return (hmdfs_is_stash_enabled(sbi) && (mode & FMODE_WRITE)); +} + +static void +hmdfs_remote_del_wr_opened_inode_nolock(struct hmdfs_inode_info *info) +{ + WARN_ON(list_empty(&info->wr_opened_node)); + if (atomic_dec_and_test(&info->wr_opened_cnt)) + list_del_init(&info->wr_opened_node); +} + +void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->wr_opened_inode_lock); + hmdfs_remote_del_wr_opened_inode_nolock(info); + spin_unlock(&conn->wr_opened_inode_lock); +} + +void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + if (list_empty(&info->wr_opened_node)) { + atomic_set(&info->wr_opened_cnt, 1); + list_add_tail(&info->wr_opened_node, + &conn->wr_opened_inode_list); + } else { + atomic_inc(&info->wr_opened_cnt); + } +} + +static void hmdfs_remote_add_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->wr_opened_inode_lock); + hmdfs_remote_add_wr_opened_inode_nolock(conn, info); + spin_unlock(&conn->wr_opened_inode_lock); +} + +int hmdfs_file_open_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct kref *ref = &(info->ref); + int err = 0; + + inode_lock(inode); + if (kref_read(ref) == 0) { + err = hmdfs_do_open_remote(file, false); + if (err == 0) + kref_init(ref); + } else { + kref_get(ref); + } + inode_unlock(inode); + + if (!err && hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), + file->f_mode)) + hmdfs_remote_add_wr_opened_inode(info->conn, info); + + return err; +} + +static void hmdfs_set_writecache_expire(struct hmdfs_inode_info *info, + unsigned int seconds) +{ + unsigned long new_expire = jiffies + (unsigned long)seconds * HZ; + + /* + * When file has been written before closing, set pagecache expire + * if it has not been set yet. This is necessary because ctime might + * stay the same after overwrite. + */ + if (info->writecache_expire && + time_after(new_expire, info->writecache_expire)) + return; + + info->writecache_expire = new_expire; +} + +static void hmdfs_remote_keep_writecache(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = NULL; + struct kref *ref = NULL; + struct hmdfs_getattr_ret *getattr_ret = NULL; + unsigned int write_cache_timeout = + hmdfs_sb(inode->i_sb)->write_cache_timeout; + int err; + + if (!write_cache_timeout) + return; + + info = hmdfs_i(inode); + ref = &(info->ref); + /* + * don't do anything if file is still opening or file hasn't been + * written. + */ + if (kref_read(ref) > 0 || !atomic64_read(&info->write_counter)) + return; + + /* + * If remote getattr failed, and we don't update ctime, + * pagecache will be truncated the next time file is opened. + */ + err = hmdfs_remote_getattr(info->conn, file_dentry(file), 0, + &getattr_ret); + if (err) { + hmdfs_err("remote getattr failed with err %d", err); + return; + } + + if (!(getattr_ret->stat.result_mask & STATX_CTIME)) { + hmdfs_err("get remote ctime failed with mask 0x%x", + getattr_ret->stat.result_mask); + kfree(getattr_ret); + return; + } + /* + * update ctime from remote, in case that pagecahe will be + * truncated in next open. + */ + inode->i_ctime = getattr_ret->stat.ctime; + info->remote_ctime = getattr_ret->stat.ctime; + hmdfs_set_writecache_expire(info, write_cache_timeout); + kfree(getattr_ret); +} + +int hmdfs_file_release_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + if (hmdfs_remote_need_track_file(hmdfs_sb(inode->i_sb), file->f_mode)) + hmdfs_remote_del_wr_opened_inode(info->conn, info); + + inode_lock(inode); + kref_put(&info->ref, hmdfs_do_close_remote); + hmdfs_remote_keep_writecache(inode, file); + inode_unlock(inode); + + return 0; +} + +static int hmdfs_file_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct inode *inode = file_inode(file); + + if (!(file->f_mode & FMODE_WRITE)) + return 0; + + /* + * Continue regardless of whether file reopen fails or not, + * because there may be no dirty page. + */ + hmdfs_remote_check_and_reopen(hmdfs_i(inode), file); + + /* + * Wait for wsem here would impact the performance greatly, so we + * overlap the time to issue as many wbs as we can, expecting async + * wbs are eliminated afterwards. + */ + filemap_fdatawrite(inode->i_mapping); + down_write(&hmdfs_i(inode)->wpage_sem); + err = filemap_write_and_wait(inode->i_mapping); + up_write(&hmdfs_i(inode)->wpage_sem); + return err; +} + +static ssize_t hmdfs_file_read_iter_remote(struct kiocb *iocb, + struct iov_iter *iter) +{ + struct file *filp = iocb->ki_filp; + struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); + struct file_ra_state *ra = NULL; + unsigned int rtt; + int err; + bool tried = false; + +retry: + err = hmdfs_remote_check_and_reopen(info, filp); + if (err) + return err; + + ra = &filp->f_ra; + /* rtt is measured in 10 msecs */ + rtt = hmdfs_tcpi_rtt(info->conn) / 10000; + switch (rtt) { + case 0: + break; + case 1: + ra->ra_pages = 256; + break; + case 2: + ra->ra_pages = 512; + break; + default: + ra->ra_pages = 1024; + break; + } + + err = generic_file_read_iter(iocb, iter); + if (err < 0 && !tried && hmdfs_remote_need_reopen(info)) { + /* Read from a stale fid, try read again once. */ + tried = true; + goto retry; + } + + return err; +} + +static inline bool hmdfs_is_file_unwritable(const struct hmdfs_inode_info *info, + bool check_stash) +{ + return (check_stash && hmdfs_inode_is_stashing(info)) || + !hmdfs_is_node_online(info->conn); +} + +static ssize_t __hmdfs_file_write_iter_remote(struct kiocb *iocb, + struct iov_iter *iter, + bool check_stash) +{ + struct file *filp = iocb->ki_filp; + struct inode *inode = file_inode(filp); + struct hmdfs_inode_info *info = hmdfs_i(inode); + ssize_t ret; + + if (hmdfs_is_file_unwritable(info, check_stash)) + return -EAGAIN; + + ret = hmdfs_remote_check_and_reopen(info, filp); + if (ret) + return ret; + + inode_lock(inode); + if (hmdfs_is_file_unwritable(info, check_stash)) { + ret = -EAGAIN; + goto out; + } + ret = generic_write_checks(iocb, iter); + if (ret > 0) + ret = __generic_file_write_iter(iocb, iter); +out: + inode_unlock(inode); + + if (ret > 0) + ret = generic_write_sync(iocb, ret); + return ret; +} + +ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, + struct iov_iter *iter) +{ + return __hmdfs_file_write_iter_remote(iocb, iter, false); +} + +static ssize_t hmdfs_file_write_iter_remote(struct kiocb *iocb, + struct iov_iter *iter) +{ + return __hmdfs_file_write_iter_remote(iocb, iter, true); +} + +/* hmdfs not support mmap write remote file */ +static vm_fault_t hmdfs_page_mkwrite(struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct hmdfs_file_vm_ops = { + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = hmdfs_page_mkwrite, +}; + +static int hmdfs_file_mmap_remote(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &hmdfs_file_vm_ops; + file_accessed(file); + + return 0; +} + +static int hmdfs_file_fsync_remote(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct hmdfs_inode_info *info = hmdfs_i(file_inode(file)); + struct hmdfs_peer *conn = info->conn; + struct hmdfs_fid fid; + int err; + + trace_hmdfs_fsync_enter_remote(conn->sbi, conn->device_id, + info->remote_ino, datasync); + /* + * Continue regardless of whether file reopen fails or not, + * because there may be no dirty page. + */ + hmdfs_remote_check_and_reopen(info, file); + + filemap_fdatawrite(file->f_mapping); + down_write(&info->wpage_sem); + err = file_write_and_wait_range(file, start, end); + up_write(&info->wpage_sem); + if (err) { + hmdfs_err("local fsync fail with %d", err); + goto out; + } + + hmdfs_remote_fetch_fid(info, &fid); + err = hmdfs_send_fsync(conn, &fid, start, end, datasync); + if (err) + hmdfs_err("send fsync fail with %d", err); + +out: + trace_hmdfs_fsync_exit_remote(conn->sbi, conn->device_id, + info->remote_ino, + get_cmd_timeout(conn->sbi, F_FSYNC), err); + + /* Compatible with POSIX retcode */ + if (err == -ETIME) + err = -EIO; + + return err; +} + +const struct file_operations hmdfs_dev_file_fops_remote = { + .owner = THIS_MODULE, + .llseek = generic_file_llseek, + .read_iter = hmdfs_file_read_iter_remote, + .write_iter = hmdfs_file_write_iter_remote, + .mmap = hmdfs_file_mmap_remote, + .open = hmdfs_file_open_remote, + .release = hmdfs_file_release_remote, + .flush = hmdfs_file_flush, + .fsync = hmdfs_file_fsync_remote, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, +}; + +static void hmdfs_fill_page_zero(struct page *page) +{ + void *addr = NULL; + + addr = kmap(page); + memset(addr, 0, PAGE_SIZE); + kunmap(page); + SetPageUptodate(page); + unlock_page(page); +} + +static int hmdfs_readpage_remote(struct file *file, struct page *page) +{ + struct inode *inode = file_inode(file); + struct hmdfs_inode_info *info = hmdfs_i(inode); + loff_t isize = i_size_read(inode); + pgoff_t end_index = (isize - 1) >> PAGE_SHIFT; + struct hmdfs_fid fid; + + if (!isize || page->index > end_index) { + hmdfs_fill_page_zero(page); + return 0; + } + + if (!isize || page->index > end_index) { + hmdfs_fill_page_zero(page); + return 0; + } + + hmdfs_remote_fetch_fid(info, &fid); + return hmdfs_client_readpage(info->conn, &fid, page); +} + +uint32_t hmdfs_get_writecount(struct page *page) +{ + uint32_t count = 0; + loff_t pos = (loff_t)page->index << HMDFS_PAGE_OFFSET; + struct inode *inode = page->mapping->host; + loff_t size = i_size_read(inode); + /* + * If page offset is greater than i_size, this is possible when + * writepage concurrent with truncate. In this case, we don't need to + * do remote writepage since it'll be truncated after the page is + * unlocked. + */ + if (pos >= size) + count = 0; + /* + * If the page about to write is beyond i_size, we can't write beyond + * i_size because remote file size will be wrong. + */ + else if (size < pos + HMDFS_PAGE_SIZE) + count = size - pos; + /* It's safe to write the whole page */ + else + count = HMDFS_PAGE_SIZE; + + return count; +} + +static bool allow_cur_thread_wpage(struct hmdfs_inode_info *info, + bool *rsem_held, bool sync_all) +{ + WARN_ON(!rsem_held); + + if (sync_all) { + *rsem_held = false; + return true; + } + *rsem_held = down_read_trylock(&info->wpage_sem); + return *rsem_held; +} + +/** + * hmdfs_writepage_remote - writeback a dirty page to remote + * + * INFO: + * When asked to WB_SYNC_ALL, this function should leave with both the page and + * the radix tree node clean to achieve close-to-open consitency. Moreover, + * this shall never return -EIO to help filemap to iterate all dirty pages. + * + * INFO: + * When asked to WB_SYNC_NONE, this function should be mercy if faults(oom or + * bad pipe) happended to enable subsequent r/w & wb. + */ +static int hmdfs_writepage_remote(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_sb_info *sbi = hmdfs_sb(inode->i_sb); + int ret = 0; + bool rsem_held = false; + bool sync = wbc->sync_mode == WB_SYNC_ALL; + struct hmdfs_writepage_context *param = NULL; + + if (!allow_cur_thread_wpage(info, &rsem_held, sync)) + goto out_unlock; + + set_page_writeback(page); + + param = kzalloc(sizeof(*param), GFP_NOFS); + if (!param) { + ret = -ENOMEM; + goto out_endwb; + } + + if (sync && hmdfs_usr_sig_pending(current)) { + ClearPageUptodate(page); + goto out_free; + } + param->count = hmdfs_get_writecount(page); + if (!param->count) + goto out_free; + param->rsem_held = rsem_held; + hmdfs_remote_fetch_fid(info, ¶m->fid); + param->sync_all = sync; + param->caller = current; + get_task_struct(current); + param->page = page; + param->timeout = jiffies + msecs_to_jiffies(sbi->wb_timeout_ms); + INIT_DELAYED_WORK(¶m->retry_dwork, hmdfs_remote_writepage_retry); + ret = hmdfs_remote_do_writepage(info->conn, param); + if (likely(!ret)) + return 0; + + put_task_struct(current); +out_free: + kfree(param); +out_endwb: + end_page_writeback(page); + if (rsem_held) + up_read(&info->wpage_sem); +out_unlock: + if (sync || !hmdfs_need_redirty_page(info, ret)) { + SetPageError(page); + mapping_set_error(page->mapping, ret); + } else { + redirty_page_for_writepage(wbc, page); + } + unlock_page(page); + return ret; +} + +static void hmdfs_account_dirty_pages(struct address_space *mapping) +{ + struct hmdfs_sb_info *sbi = mapping->host->i_sb->s_fs_info; + + if (!sbi->h_wb->dirty_writeback_control) + return; + + this_cpu_inc(*sbi->h_wb->bdp_ratelimits); +} + +static int hmdfs_write_begin_remote(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata) +{ + pgoff_t index = ((unsigned long long)pos) >> PAGE_SHIFT; + struct inode *inode = file_inode(file); + struct page *page = NULL; + int ret = 0; + +start: + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return -ENOMEM; + *pagep = page; + wait_on_page_writeback(page); + + // If this page will be covered completely. + if (len == HMDFS_PAGE_SIZE || PageUptodate(page)) + return 0; + + /* + * If data existed in this page will covered, + * we just need to clear this page. + */ + if (!((unsigned long long)pos & (HMDFS_PAGE_SIZE - 1)) && + (pos + len) >= i_size_read(inode)) { + zero_user_segment(page, len, HMDFS_PAGE_SIZE); + return 0; + } + /* + * We need readpage before write date to this page. + */ + ret = hmdfs_readpage_remote(file, page); + if (!ret) { + if (PageLocked(page)) { + ret = __lock_page_killable(page); + if (!ret) + unlock_page(page); + } + + if (!ret && PageUptodate(page)) { + put_page(page); + goto start; + } + if (!ret) + ret = -EIO; + } + put_page(page); + return ret; +} + +static int hmdfs_write_end_remote(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned int len, unsigned int copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + if (!PageUptodate(page)) { + if (unlikely(copied != len)) + copied = 0; + else + SetPageUptodate(page); + } + if (!copied) + goto unlock_out; + + if (!PageDirty(page)) { + hmdfs_account_dirty_pages(mapping); + set_page_dirty(page); + } + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + hmdfs_i(inode)->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + } +unlock_out: + unlock_page(page); + put_page(page); + + /* hmdfs private writeback control */ + hmdfs_balance_dirty_pages_ratelimited(mapping); + return copied; +} + +const struct address_space_operations hmdfs_dev_file_aops_remote = { + .readpage = hmdfs_readpage_remote, + .write_begin = hmdfs_write_begin_remote, + .write_end = hmdfs_write_end_remote, + .writepage = hmdfs_writepage_remote, + .set_page_dirty = __set_page_dirty_nobuffers, +}; + +loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, + unsigned long offset) +{ + loff_t pos; + + pos = ((loff_t)dev_id << (POS_BIT_NUM - 1 - DEV_ID_BIT_NUM)) + + ((loff_t)group_id << OFFSET_BIT_NUM) + offset; + if (dev_id) + pos |= ((loff_t)1 << (POS_BIT_NUM - 1)); + return pos; +} + +static int analysis_dentry_file_from_con(struct hmdfs_sb_info *sbi, + struct file *file, + struct file *handler, + struct dir_context *ctx) +{ + struct hmdfs_dentry_group *dentry_group = NULL; + loff_t pos = ctx->pos; + unsigned long dev_id = (unsigned long)((pos << 1) >> (POS_BIT_NUM - DEV_ID_BIT_NUM)); + unsigned long group_id = (unsigned long)((pos << (1 + DEV_ID_BIT_NUM)) >> + (POS_BIT_NUM - GROUP_ID_BIT_NUM)); + loff_t offset = pos & OFFSET_BIT_MASK; + int group_num = 0; + char *dentry_name = NULL; + int iterate_result = 0; + int i, j; + + dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); + + if (!dentry_group) + return -ENOMEM; + + if (IS_ERR_OR_NULL(handler)) { + kfree(dentry_group); + return -ENOENT; + } + + group_num = get_dentry_group_cnt(file_inode(handler)); + dentry_name = kzalloc(DENTRY_NAME_MAX_LEN, GFP_KERNEL); + if (!dentry_name) { + kfree(dentry_group); + return -ENOMEM; + } + + for (i = group_id; i < group_num; i++) { + int ret = hmdfs_metainfo_read(sbi, handler, dentry_group, + sizeof(struct hmdfs_dentry_group), + i); + if (ret != sizeof(struct hmdfs_dentry_group)) { + hmdfs_err("read dentry group failed ret:%d", ret); + goto done; + } + + for (j = offset; j < DENTRY_PER_GROUP; j++) { + int len; + int file_type = DT_UNKNOWN; + bool is_continue; + + len = le16_to_cpu(dentry_group->nsl[j].namelen); + if (!test_bit_le(j, dentry_group->bitmap) || len == 0) + continue; + + memset(dentry_name, 0, DENTRY_NAME_MAX_LEN); + // TODO: Support more file_type + if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) + file_type = DT_DIR; + else if (S_ISREG(le16_to_cpu( + dentry_group->nsl[j].i_mode))) + file_type = DT_REG; + + strncat(dentry_name, dentry_group->filename[j], len); + pos = hmdfs_set_pos(dev_id, i, j); + is_continue = + dir_emit(ctx, dentry_name, len, + pos + INUNUMBER_START, file_type); + if (!is_continue) { + ctx->pos = pos; + iterate_result = 1; + goto done; + } + } + offset = 0; + } + +done: + kfree(dentry_name); + kfree(dentry_group); + return iterate_result; +} + +int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx) +{ + int iterate_result = 0; + + iterate_result = analysis_dentry_file_from_con( + con->sbi, file, file->private_data, ctx); + return iterate_result; +} + +static int hmdfs_iterate_remote(struct file *file, struct dir_context *ctx) +{ + int err = 0; + loff_t start_pos = ctx->pos; + struct hmdfs_peer *con = NULL; + struct hmdfs_dentry_info *di = hmdfs_d(file->f_path.dentry); + bool is_local = !((ctx->pos) >> (POS_BIT_NUM - 1)); + uint64_t dev_id = di->device_id; + + if (ctx->pos == -1) + return 0; + if (is_local) + ctx->pos = hmdfs_set_pos(dev_id, 0, 0); + + con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); + if (con) { + // ctx->pos = 0; + err = con->conn_operations->remote_readdir(con, file, ctx); + if (unlikely(!con)) { + hmdfs_err("con is null"); + goto done; + } + peer_put(con); + if (err) + goto done; + } + +done: + if (err <= 0) + ctx->pos = -1; + + trace_hmdfs_iterate_remote(file->f_path.dentry, start_pos, ctx->pos, + err); + return err; +} + +int hmdfs_dir_open_remote(struct inode *inode, struct file *file) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct clearcache_item *cache_item = NULL; + + if (info->conn && info->conn->version <= USERSPACE_MAX_VER) { + return 0; + } else if (info->conn) { + if (!hmdfs_cache_revalidate(READ_ONCE(info->conn->conn_time), + info->conn->device_id, + file->f_path.dentry)) + get_remote_dentry_file_sync(file->f_path.dentry, + info->conn); + cache_item = hmdfs_find_cache_item(info->conn->device_id, + file->f_path.dentry); + if (cache_item) { + file->private_data = cache_item->filp; + get_file(file->private_data); + kref_put(&cache_item->ref, release_cache_item); + return 0; + } + return -ENOENT; + } + return -ENOENT; +} + +static int hmdfs_dir_release_remote(struct inode *inode, struct file *file) +{ + if (file->private_data) + fput(file->private_data); + file->private_data = NULL; + return 0; +} + +const struct file_operations hmdfs_dev_dir_ops_remote = { + .owner = THIS_MODULE, + .iterate = hmdfs_iterate_remote, + .open = hmdfs_dir_open_remote, + .release = hmdfs_dir_release_remote, + .fsync = __generic_file_fsync, +}; diff --git a/fs/hmdfs/file_remote.h b/fs/hmdfs/file_remote.h new file mode 100755 index 000000000..026bd0c94 --- /dev/null +++ b/fs/hmdfs/file_remote.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/file_remote.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_FILE_REMOTE_H +#define HMDFS_FILE_REMOTE_H + +#include +#include + +#include "hmdfs.h" +#include "comm/connection.h" + +void hmdfs_remote_del_wr_opened_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info); + +void hmdfs_remote_add_wr_opened_inode_nolock(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info); + +ssize_t hmdfs_file_write_iter_remote_nocheck(struct kiocb *iocb, + struct iov_iter *iter); + +#endif diff --git a/fs/hmdfs/file_root.c b/fs/hmdfs/file_root.c new file mode 100755 index 000000000..d82ff4d0b --- /dev/null +++ b/fs/hmdfs/file_root.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/file_root.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" + +#define DEVICE_VIEW_CTX_POS 2 +#define MERGE_VIEW_CTX_POS 3 +#define ROOT_DIR_INO_START 20000000 + +// used by hmdfs_device_iterate functions +#define DEVICE_VIEW_INO_START 20000002 +#define LOCAL_DEVICE_CTX_POS 2 + +struct hmdfs_peer *get_next_con(struct hmdfs_sb_info *sbi, + unsigned long current_dev_id) +{ + struct hmdfs_peer *con = NULL; + struct hmdfs_peer *next_con = NULL; + struct list_head *head, *node; + + mutex_lock(&sbi->connections.node_lock); + head = &sbi->connections.node_list; + if (current_dev_id == 0) { + node = head->next; + if (node == head) + goto done; + next_con = container_of(node, struct hmdfs_peer, list); + if (next_con->status == NODE_STAT_ONLINE) + goto done; + current_dev_id = next_con->device_id; + next_con = NULL; + } + + list_for_each_entry(con, &sbi->connections.node_list, list) { + if ((con->device_id & 0xFFFF) == (current_dev_id & 0xFFFF)) { + node = con->list.next; + if (node == head) + goto done; + next_con = container_of(node, struct hmdfs_peer, list); + if (next_con->status == NODE_STAT_ONLINE) + goto done; + current_dev_id = next_con->device_id; + next_con = NULL; + } + } +done: + if (next_con) + peer_get(next_con); + mutex_unlock(&sbi->connections.node_lock); + return next_con; +} + +int hmdfs_device_iterate(struct file *file, struct dir_context *ctx) +{ + int err = 0; + uint64_t ino_start = DEVICE_VIEW_INO_START; + struct hmdfs_peer *next_con = NULL; + unsigned long dev_id = 0; + struct hmdfs_peer *con = NULL; + char *remote_device_name = NULL; + + if (ctx->pos != 0) + goto out; + dir_emit_dots(file, ctx); + + if (ctx->pos == LOCAL_DEVICE_CTX_POS) { + err = dir_emit(ctx, DEVICE_VIEW_LOCAL, + sizeof(DEVICE_VIEW_LOCAL) - 1, ino_start++, + DT_DIR); + if (!err) + goto out; + (ctx->pos)++; + } + next_con = get_next_con(file->f_inode->i_sb->s_fs_info, 0); + if (!next_con) + goto out; + + dev_id = next_con->device_id; + peer_put(next_con); + con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); + remote_device_name = kmalloc(HMDFS_CID_SIZE + 1, GFP_KERNEL); + if (!remote_device_name) { + err = -ENOMEM; + goto out; + } + while (con) { + peer_put(con); + snprintf(remote_device_name, HMDFS_CID_SIZE + 1, "%s", + con->cid); + if (!dir_emit(ctx, remote_device_name, + strlen(remote_device_name), ino_start++, DT_DIR)) + goto done; + + (ctx->pos)++; + con = get_next_con(file->f_inode->i_sb->s_fs_info, dev_id); + if (!con) + goto done; + + dev_id = con->device_id; + } +done: + kfree(remote_device_name); +out: + if (err <= 0) + ctx->pos = -1; + + return err; +} + +int hmdfs_root_iterate(struct file *file, struct dir_context *ctx) +{ + uint64_t ino_start = ROOT_DIR_INO_START; + struct hmdfs_sb_info *sbi = file_inode(file)->i_sb->s_fs_info; + + if (!dir_emit_dots(file, ctx)) + return 0; + if (ctx->pos == DEVICE_VIEW_CTX_POS) { + if (!dir_emit(ctx, DEVICE_VIEW_ROOT, + sizeof(DEVICE_VIEW_ROOT) - 1, ino_start, DT_DIR)) + return 0; + ino_start++; + ctx->pos = MERGE_VIEW_CTX_POS; + } + if (sbi->s_merge_switch && ctx->pos == MERGE_VIEW_CTX_POS) { + if (!dir_emit(ctx, MERGE_VIEW_ROOT, sizeof(MERGE_VIEW_ROOT) - 1, + ino_start, DT_DIR)) + return 0; + (ctx->pos)++; + } + return 0; +} + +const struct file_operations hmdfs_root_fops = { + .owner = THIS_MODULE, + .iterate = hmdfs_root_iterate, +}; + +const struct file_operations hmdfs_device_fops = { + .owner = THIS_MODULE, + .iterate = hmdfs_device_iterate, +}; diff --git a/fs/hmdfs/hmdfs.h b/fs/hmdfs/hmdfs.h new file mode 100755 index 000000000..7d3fcf62c --- /dev/null +++ b/fs/hmdfs/hmdfs.h @@ -0,0 +1,345 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_H +#define HMDFS_H + +#include +#include +#include +#include +#include +#include + +#include "comm/protocol.h" + +#if KERNEL_VERSION(4, 15, 0) < LINUX_VERSION_CODE +#define hmdfs_time_t timespec64 +#define hmdfs_time_compare timespec64_compare +#define hmdfs_time_add timespec64_add +#else +#define hmdfs_time_t timespec +#define hmdfs_time_compare timespec_compare +#define hmdfs_time_add timespec_add +#endif + +#define HMDFS_PAGE_SIZE 4096 +#define HMDFS_PAGE_OFFSET 12 + +/* max xattr value size, not include '\0' */ +#define HMDFS_XATTR_SIZE_MAX 4096 +/* max listxattr response size, include '\0' */ +#define HMDFS_LISTXATTR_SIZE_MAX 4096 + +// 20 digits +'\0', Converted from a u64 integer +#define HMDFS_ACCOUNT_HASH_MAX_LEN 21 +#define CTRL_PATH_MAX_LEN 21 + +#define HMDFS_SUPER_MAGIC 0x20200302 + +#define DEFAULT_WRITE_CACHE_TIMEOUT 30 +#define DEFAULT_SRV_REQ_MAX_ACTIVE 16 + +#define HMDFS_INODE_INVALID_FILE_ID (1U << 31) +#define HMDFS_FID_VER_BOOT_COOKIE_SHIFT 15 + +/* According to task_struct instead of workqueue_struct */ +#define HMDFS_WQ_NAME_LEN 16 + +#define HMDFS_DEF_WB_TIMEOUT_MS 60000 +#define HMDFS_MAX_WB_TIMEOUT_MS 900000 + +#define HMDFS_READPAGES_NR_MAX 32 + +#define HMDFS_CID_SIZE 64 + +enum { + HMDFS_FEATURE_READPAGES = 1ULL << 0, + HMDFS_FEATURE_READPAGES_OPEN = 1ULL << 1, + HMDFS_ATOMIC_OPEN = 1ULL << 2, +}; + +struct client_statistic; +struct server_statistic; +struct hmdfs_writeback; +struct hmdfs_server_writeback; +struct hmdfs_syncfs_info { + wait_queue_head_t wq; + atomic_t wait_count; + int remote_ret; + unsigned long long version; + + /* Protect version in concurrent operations */ + spinlock_t v_lock; + /* + * Serialize hmdfs_sync_fs() process: + * |<- pending_list ->| exexuting |<- wait_list ->| + * syncfs_1 syncfs_2 (syncfs_3) syncfs_4 syncfs_5 + * + * Abandon syncfs processes in pending_list after syncfs_3 finished; + * Pick the last syncfs process in wait_list after syncfs_3 finished; + */ + bool is_executing; + /* syncfs process arriving after current exexcuting syncfs */ + struct list_head wait_list; + /* syncfs process arriving before current exexcuting syncfs */ + struct list_head pending_list; + spinlock_t list_lock; +}; + +struct hmdfs_share_table { + struct list_head item_list_head; + spinlock_t item_list_lock; + struct workqueue_struct *share_item_timeout_wq; + int item_cnt; + int max_cnt; +}; + +struct hmdfs_sb_info { + /* list for all registered superblocks */ + struct list_head list; + struct mutex umount_mutex; + + struct kobject kobj; + struct completion s_kobj_unregister; + struct super_block *sb; + struct super_block *lower_sb; + /* from mount, which is root */ + const struct cred *cred; + /* from update cmd, expected to be system */ + const struct cred *system_cred; + struct { + struct mutex node_lock; + struct list_head node_list; + atomic_t conn_seq; + unsigned long recent_ol; + } connections; + char *local_dst; + char *real_dst; + char *local_src; + char *cache_dir; + /* seq number for hmdfs super block */ + unsigned int seq; + + /* + * This value indicate how long the pagecache stay valid(in seconds) in + * client if metadate(except iversion) is equal to server. This + * functionality is disabled if this value is 0. + */ + unsigned int write_cache_timeout; + unsigned int dcache_timeout; + unsigned int dcache_precision; + unsigned long dcache_threshold; + struct list_head client_cache; + struct list_head server_cache; + struct list_head to_delete; + struct mutex cache_list_lock; + + /* local operation time statistic */ + struct server_statistic *s_server_statis; + + /* client statistic */ + struct client_statistic *s_client_statis; + + /* TIMEOUT of each command */ + struct kobject s_cmd_timeout_kobj; + struct completion s_timeout_kobj_unregister; + unsigned int s_cmd_timeout[F_SIZE]; + + /* For case sensitive */ + bool s_case_sensitive; + + /* For features supporting */ + u64 s_features; + + /* For merge & device view */ + unsigned int s_merge_switch; + /* For writeback */ + struct hmdfs_writeback *h_wb; + /* For server writeback */ + struct hmdfs_server_writeback *h_swb; + + /* syncfs info */ + struct hmdfs_syncfs_info hsi; + + /* To bridge the userspace utils */ + struct kfifo notify_fifo; + spinlock_t notify_fifo_lock; + struct mutex cmd_handler_mutex; + + /* For reboot detect */ + uint64_t boot_cookie; + /* offline process */ + unsigned int async_cb_delay; + /* For server handle requests */ + unsigned int async_req_max_active; + /* stash dirty pages during offline */ + bool s_offline_stash; + + /* Timeout (ms) to retry writing remote pages */ + unsigned int wb_timeout_ms; + + struct path stash_work_dir; + /* dentry cache */ + bool s_dentry_cache; + + /* share table */ + struct hmdfs_share_table share_table; + + /* msgs that are waiting for remote */ + struct list_head async_readdir_msg_list; + /* protect async_readdir_msg_list */ + spinlock_t async_readdir_msg_lock; + /* async readdir work that are queued but not finished */ + struct list_head async_readdir_work_list; + /* protect async_readdir_work_list */ + spinlock_t async_readdir_work_lock; + /* wait for async_readdir_work_list to be empty in umount */ + wait_queue_head_t async_readdir_wq; + /* don't allow async readdir */ + bool async_readdir_prohibit; + + /* multi user */ + unsigned int user_id; +}; + +static inline struct hmdfs_sb_info *hmdfs_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline bool hmdfs_is_stash_enabled(const struct hmdfs_sb_info *sbi) +{ + return sbi->s_offline_stash; +} + +struct setattr_info { + loff_t size; + unsigned int valid; + umode_t mode; + kuid_t uid; + kgid_t gid; + long long atime; + long atime_nsec; + long long mtime; + long mtime_nsec; + long long ctime; + long ctime_nsec; +}; + +struct hmdfs_file_info { + union { + struct { + struct rb_root root; + struct mutex comrade_list_lock; + }; + struct { + struct file *lower_file; + int device_id; + }; + }; + struct list_head comrade_list; +}; + +static inline struct hmdfs_file_info *hmdfs_f(struct file *file) +{ + return file->private_data; +} + +// Almost all the source files want this, so... +#include "inode.h" + +/* locking helpers */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + inode_unlock(d_inode(dir)); + dput(dir); +} + +extern uint64_t path_hash(const char *path, int len, bool case_sense); +extern int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, + const char *name, unsigned int flags, + struct path *path); +extern ssize_t hmdfs_remote_listxattr(struct dentry *dentry, char *buffer, + size_t size); + +int check_filename(const char *name, int len); + +int hmdfs_permission(struct user_namespace *mnt_userns, struct inode *inode, int mask); + +int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data); + +/* Refer to comments in hmdfs_request_work_fn() */ +#define HMDFS_SERVER_CTX_FLAGS (PF_KTHREAD | PF_WQ_WORKER | PF_NPROC_EXCEEDED) + +static inline bool is_current_hmdfs_server_ctx(void) +{ + return ((current->flags & HMDFS_SERVER_CTX_FLAGS) == + HMDFS_SERVER_CTX_FLAGS); +} + +extern uint64_t hmdfs_gen_boot_cookie(void); + +static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len) +{ + return !strncasecmp(s1, s2, len); +} + +static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2) +{ + return q1->len == q2->len && str_n_case_eq(q1->name, q2->name, q2->len); +} + +static inline bool qstr_eq(const struct qstr *q1, const struct qstr *q2) +{ + return q1->len == q2->len && !strncmp(q1->name, q2->name, q2->len); +} + +/***************************************************************************** + * log print helpers + *****************************************************************************/ +__printf(4, 5) void __hmdfs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...); +#define hmdfs_err(fmt, ...) \ + __hmdfs_log(KERN_ERR, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_warning(fmt, ...) \ + __hmdfs_log(KERN_WARNING, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_info(fmt, ...) \ + __hmdfs_log(KERN_INFO, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_err_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_ERR, true, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_warning_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_WARNING, true, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_info_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_INFO, true, __func__, fmt, ##__VA_ARGS__) +#ifdef CONFIG_HMDFS_FS_DEBUG +#define hmdfs_debug(fmt, ...) \ + __hmdfs_log(KERN_DEBUG, false, __func__, fmt, ##__VA_ARGS__) +#define hmdfs_debug_ratelimited(fmt, ...) \ + __hmdfs_log(KERN_DEBUG, true, __func__, fmt, ##__VA_ARGS__) +#else +#define hmdfs_debug(fmt, ...) ((void)0) +#define hmdfs_debug_ratelimited(fmt, ...) ((void)0) +#endif + +/***************************************************************************** + * inode/file operations declartion + *****************************************************************************/ +extern const struct inode_operations hmdfs_device_ops; +extern const struct inode_operations hmdfs_root_ops; +extern const struct file_operations hmdfs_root_fops; +extern const struct file_operations hmdfs_device_fops; + +#endif // HMDFS_H diff --git a/fs/hmdfs/hmdfs_client.c b/fs/hmdfs/hmdfs_client.c new file mode 100755 index 000000000..31c1a6d38 --- /dev/null +++ b/fs/hmdfs/hmdfs_client.c @@ -0,0 +1,1097 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_client.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_client.h" +#include "hmdfs_server.h" + +#include +#include +#include + +#include "comm/socket_adapter.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_trace.h" +#include "comm/node_cb.h" +#include "stash.h" +#include "authority/authentication.h" + +#define HMDFS_SYNC_WPAGE_RETRY_MS 2000 + +static inline void free_sm_outbuf(struct hmdfs_send_command *sm) +{ + if (sm->out_buf && sm->out_len != 0) + kfree(sm->out_buf); + sm->out_len = 0; + sm->out_buf = NULL; +} + +int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf, + __u8 file_type, struct hmdfs_open_ret *open_ret) +{ + int ret; + int path_len = strlen(send_buf); + size_t send_len = sizeof(struct open_request) + path_len + 1; + struct open_request *open_req = kzalloc(send_len, GFP_KERNEL); + struct open_response *resp; + struct hmdfs_send_command sm = { + .data = open_req, + .len = send_len, + }; + hmdfs_init_cmd(&sm.operations, F_OPEN); + + if (!open_req) { + ret = -ENOMEM; + goto out; + } + open_req->file_type = file_type; + open_req->path_len = cpu_to_le32(path_len); + strcpy(open_req->buf, send_buf); + ret = hmdfs_sendmessage_request(con, &sm); + kfree(open_req); + + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + resp = sm.out_buf; + + open_ret->ino = le64_to_cpu(resp->ino); + open_ret->fid.ver = le64_to_cpu(resp->file_ver); + open_ret->fid.id = le32_to_cpu(resp->file_id); + open_ret->file_size = le64_to_cpu(resp->file_size); + open_ret->remote_ctime.tv_sec = le64_to_cpu(resp->ctime); + open_ret->remote_ctime.tv_nsec = le32_to_cpu(resp->ctime_nsec); + open_ret->stable_ctime.tv_sec = le64_to_cpu(resp->stable_ctime); + open_ret->stable_ctime.tv_nsec = le32_to_cpu(resp->stable_ctime_nsec); + +out: + free_sm_outbuf(&sm); + return ret; +} + +void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid) +{ + size_t send_len = sizeof(struct release_request); + struct release_request *release_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = release_req, + .len = send_len, + }; + hmdfs_init_cmd(&sm.operations, F_RELEASE); + + if (!release_req) + return; + + release_req->file_ver = cpu_to_le64(fid->ver); + release_req->file_id = cpu_to_le32(fid->id); + + hmdfs_sendmessage_request(con, &sm); + kfree(release_req); +} + +int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + __s64 start, __s64 end, __s32 datasync) +{ + int ret; + struct fsync_request *fsync_req = + kzalloc(sizeof(struct fsync_request), GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = fsync_req, + .len = sizeof(struct fsync_request), + }; + + hmdfs_init_cmd(&sm.operations, F_FSYNC); + if (!fsync_req) + return -ENOMEM; + + fsync_req->file_ver = cpu_to_le64(fid->ver); + fsync_req->file_id = cpu_to_le32(fid->id); + fsync_req->datasync = cpu_to_le32(datasync); + fsync_req->start = cpu_to_le64(start); + fsync_req->end = cpu_to_le64(end); + + ret = hmdfs_sendmessage_request(con, &sm); + + free_sm_outbuf(&sm); + kfree(fsync_req); + return ret; +} + +int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + struct page *page) +{ + int ret; + size_t send_len = sizeof(struct readpage_request); + struct readpage_request *read_data = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = read_data, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_READPAGE); + if (!read_data) { + unlock_page(page); + return -ENOMEM; + } + + sm.out_buf = page; + read_data->file_ver = cpu_to_le64(fid->ver); + read_data->file_id = cpu_to_le32(fid->id); + read_data->size = cpu_to_le32(HMDFS_PAGE_SIZE); + read_data->index = cpu_to_le64(page->index); + ret = hmdfs_sendpage_request(con, &sm); + kfree(read_data); + return ret; +} + +bool hmdfs_usr_sig_pending(struct task_struct *p) +{ + sigset_t *sig = &p->pending.signal; + + if (likely(!signal_pending(p))) + return false; + return sigismember(sig, SIGINT) || sigismember(sig, SIGTERM) || + sigismember(sig, SIGKILL); +} + +void hmdfs_client_writepage_done(struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx) +{ + struct page *page = ctx->page; + bool unlock = ctx->rsem_held; + + SetPageUptodate(page); + end_page_writeback(page); + if (unlock) + up_read(&info->wpage_sem); + unlock_page(page); +} + +static void hmdfs_client_writepage_err(struct hmdfs_peer *peer, + struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx, + int err) +{ + struct page *page = ctx->page; + bool unlock = ctx->rsem_held; + + if (err == -ENOMEM || err == -EAGAIN || err == -ESHUTDOWN || + err == -ETIME) + SetPageUptodate(page); + else + hmdfs_info("Page %ld of file %u writeback err %d devid %llu", + page->index, ctx->fid.id, err, peer->device_id); + + /* + * Current and subsequent writebacks have been canceled by the + * user, leaving these pages' states in chaos. Read pages in + * the future to update these pages. + */ + if (ctx->sync_all && hmdfs_usr_sig_pending(ctx->caller)) + ClearPageUptodate(page); + + if (ctx->sync_all || !time_is_after_eq_jiffies(ctx->timeout) || + !(err == -ETIME || hmdfs_need_redirty_page(info, err))) { + SetPageError(page); + mapping_set_error(page->mapping, -EIO); + } else { + __set_page_dirty_nobuffers(page); + account_page_redirty(page); + } + + end_page_writeback(page); + if (unlock) + up_read(&info->wpage_sem); + unlock_page(page); +} + +static inline bool +hmdfs_no_timedout_sync_write(struct hmdfs_writepage_context *ctx) +{ + return ctx->sync_all && time_is_after_eq_jiffies(ctx->timeout); +} + +static inline bool +hmdfs_client_rewrite_for_timeout(struct hmdfs_writepage_context *ctx, int err) +{ + return (err == -ETIME && hmdfs_no_timedout_sync_write(ctx) && + !hmdfs_usr_sig_pending(ctx->caller)); +} + +static inline bool +hmdfs_client_rewrite_for_offline(struct hmdfs_sb_info *sbi, + struct hmdfs_writepage_context *ctx, int err) +{ + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + unsigned int status = READ_ONCE(info->stash_status); + + /* + * No retry if offline occurs during inode restoration. + * + * Do retry if local file cache is ready even it is not + * a WB_SYNC_ALL write, else no-sync_all writeback will + * return -EIO, mapping_set_error(mapping, -EIO) will be + * called and it will make the concurrent calling of + * filemap_write_and_wait() in hmdfs_flush_stash_file_data() + * return -EIO. + */ + return (hmdfs_is_stash_enabled(sbi) && + status != HMDFS_REMOTE_INODE_RESTORING && + (hmdfs_no_timedout_sync_write(ctx) || + status == HMDFS_REMOTE_INODE_STASHING) && + hmdfs_is_offline_or_timeout_err(err)); +} + +static inline bool +hmdfs_client_redo_writepage(struct hmdfs_sb_info *sbi, + struct hmdfs_writepage_context *ctx, int err) +{ + return hmdfs_client_rewrite_for_timeout(ctx, err) || + hmdfs_client_rewrite_for_offline(sbi, ctx, err); +} + +static bool hmdfs_remote_write_to_remote(struct hmdfs_inode_info *info) +{ + unsigned int status = READ_ONCE(info->stash_status); + bool stashing; + + if (status != HMDFS_REMOTE_INODE_STASHING) + return true; + + /* Ensure it's OK to use info->cache afterwards */ + spin_lock(&info->stash_lock); + stashing = (info->stash_status == HMDFS_REMOTE_INODE_STASHING); + spin_unlock(&info->stash_lock); + + return !stashing; +} + +int hmdfs_remote_do_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *ctx) +{ + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + bool to_remote = false; + int err = 0; + + to_remote = hmdfs_remote_write_to_remote(info); + if (to_remote) + err = hmdfs_client_writepage(info->conn, ctx); + else + err = hmdfs_stash_writepage(info->conn, ctx); + if (!err) + return 0; + + if (!(to_remote && + hmdfs_client_rewrite_for_offline(con->sbi, ctx, err))) + return err; + + queue_delayed_work(con->retry_wb_wq, &ctx->retry_dwork, + msecs_to_jiffies(HMDFS_SYNC_WPAGE_RETRY_MS)); + + return 0; +} + +void hmdfs_remote_writepage_retry(struct work_struct *work) +{ + struct hmdfs_writepage_context *ctx = + container_of(work, struct hmdfs_writepage_context, + retry_dwork.work); + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + struct hmdfs_peer *peer = info->conn; + const struct cred *old_cred = NULL; + int err; + + old_cred = hmdfs_override_creds(peer->sbi->cred); + err = hmdfs_remote_do_writepage(peer, ctx); + hmdfs_revert_creds(old_cred); + if (err) { + hmdfs_client_writepage_err(peer, info, ctx, err); + put_task_struct(ctx->caller); + kfree(ctx); + } +} + +void hmdfs_writepage_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp) +{ + struct hmdfs_writepage_context *ctx = req->private; + struct hmdfs_inode_info *info = hmdfs_i(ctx->page->mapping->host); + int ret = resp->ret_code; + unsigned long page_index = ctx->page->index; + + trace_hmdfs_writepage_cb_enter(peer, info->remote_ino, page_index, ret); + + if (!ret) { + hmdfs_client_writepage_done(info, ctx); + atomic64_inc(&info->write_counter); + goto cleanup_all; + } + + if (hmdfs_client_redo_writepage(peer->sbi, ctx, ret)) { + ret = hmdfs_remote_do_writepage(peer, ctx); + if (!ret) + goto cleanup_req; + WARN_ON(ret == -ETIME); + } + + hmdfs_client_writepage_err(peer, info, ctx, ret); + +cleanup_all: + put_task_struct(ctx->caller); + kfree(ctx); +cleanup_req: + kfree(req->data); + + trace_hmdfs_writepage_cb_exit(peer, info->remote_ino, page_index, ret); +} + +int hmdfs_client_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *param) +{ + int ret = 0; + size_t send_len = sizeof(struct writepage_request) + HMDFS_PAGE_SIZE; + struct writepage_request *write_data = kzalloc(send_len, GFP_NOFS); + struct hmdfs_req req; + char *data = NULL; + + if (unlikely(!write_data)) + return -ENOMEM; + + WARN_ON(!PageLocked(param->page)); // VFS + WARN_ON(PageDirty(param->page)); // VFS + WARN_ON(!PageWriteback(param->page)); // hmdfs + + write_data->file_ver = cpu_to_le64(param->fid.ver); + write_data->file_id = cpu_to_le32(param->fid.id); + write_data->index = cpu_to_le64(param->page->index); + write_data->count = cpu_to_le32(param->count); + data = kmap(param->page); + memcpy((char *)write_data->buf, data, HMDFS_PAGE_SIZE); + kunmap(param->page); + req.data = write_data; + req.data_len = send_len; + + req.private = param; + req.private_len = sizeof(*param); + + req.timeout = TIMEOUT_CONFIG; + hmdfs_init_cmd(&req.operations, F_WRITEPAGE); + ret = hmdfs_send_async_request(con, &req); + if (unlikely(ret)) + kfree(write_data); + return ret; +} + +void hmdfs_client_recv_readpage(struct hmdfs_head_cmd *head, int err, + struct hmdfs_async_work *async_work) +{ + struct page *page = async_work->page; + int ret = le32_to_cpu(head->ret_code); + struct hmdfs_inode_info *info = hmdfs_i(page->mapping->host); + unsigned long page_index = page->index; + + if (!err) + SetPageUptodate(page); + else if (err == -EBADF) + /* There may be a stale fd caused by fid version, need reopen */ + set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + + hmdfs_client_resp_statis(async_work->head.peer->sbi, F_READPAGE, + HMDFS_RESP_NORMAL, async_work->start, jiffies); + + trace_hmdfs_client_recv_readpage(async_work->head.peer, + info->remote_ino, page_index, ret); + + asw_done(async_work); +} + +/* read cache dentry file at path and write them into filp */ +int hmdfs_client_start_readdir(struct hmdfs_peer *con, struct file *filp, + const char *path, int path_len, + struct hmdfs_dcache_header *header) +{ + int ret; + size_t send_len = sizeof(struct readdir_request) + path_len + 1; + struct readdir_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + .local_filp = filp, + }; + + hmdfs_init_cmd(&sm.operations, F_ITERATE); + if (!req) + return -ENOMEM; + + /* add ref or it will be release at msg put */ + get_file(sm.local_filp); + req->path_len = cpu_to_le32(path_len); + strncpy(req->path, path, path_len); + + /* + * Is we already have a cache file, verify it. If it is + * uptodate, then we don't have to transfer a new one + */ + if (header) { + req->dcache_crtime = header->dcache_crtime; + req->dcache_crtime_nsec = header->dcache_crtime_nsec; + req->dentry_ctime = header->dentry_ctime; + req->dentry_ctime_nsec = header->dentry_ctime_nsec; + req->num = header->num; + req->verify_cache = cpu_to_le32(1); + } + + ret = hmdfs_sendmessage_request(con, &sm); + kfree(req); + return ret; +} + +int hmdfs_client_start_mkdir(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, struct hmdfs_lookup_ret *mkdir_ret) +{ + int ret = 0; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct mkdir_request) + path_len + 1 + + name_len + 1; + struct mkdir_request *mkdir_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_inodeinfo_response *resp = NULL; + struct hmdfs_send_command sm = { + .data = mkdir_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_MKDIR); + if (!mkdir_req) + return -ENOMEM; + + mkdir_req->path_len = cpu_to_le32(path_len); + mkdir_req->name_len = cpu_to_le32(name_len); + mkdir_req->mode = cpu_to_le16(mode); + strncpy(mkdir_req->path, path, path_len); + strncpy(mkdir_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + if (ret == -ENOENT || ret == -ETIME || ret == -EOPNOTSUPP) + goto out; + if (!sm.out_buf) { + ret = -ENOENT; + goto out; + } + resp = sm.out_buf; + mkdir_ret->i_mode = le16_to_cpu(resp->i_mode); + mkdir_ret->i_size = le64_to_cpu(resp->i_size); + mkdir_ret->i_mtime = le64_to_cpu(resp->i_mtime); + mkdir_ret->i_mtime_nsec = le32_to_cpu(resp->i_mtime_nsec); + mkdir_ret->i_ino = le64_to_cpu(resp->i_ino); + +out: + free_sm_outbuf(&sm); + kfree(mkdir_req); + return ret; +} + +int hmdfs_client_start_create(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, bool want_excl, + struct hmdfs_lookup_ret *create_ret) +{ + int ret = 0; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct create_request) + path_len + 1 + + name_len + 1; + struct create_request *create_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_inodeinfo_response *resp = NULL; + struct hmdfs_send_command sm = { + .data = create_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_CREATE); + if (!create_req) + return -ENOMEM; + + create_req->path_len = cpu_to_le32(path_len); + create_req->name_len = cpu_to_le32(name_len); + create_req->mode = cpu_to_le16(mode); + create_req->want_excl = want_excl; + strncpy(create_req->path, path, path_len); + strncpy(create_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + if (ret == -ENOENT || ret == -ETIME || ret == -EOPNOTSUPP) + goto out; + if (!sm.out_buf) { + ret = -ENOENT; + goto out; + } + resp = sm.out_buf; + create_ret->i_mode = le16_to_cpu(resp->i_mode); + create_ret->i_size = le64_to_cpu(resp->i_size); + create_ret->i_mtime = le64_to_cpu(resp->i_mtime); + create_ret->i_mtime_nsec = le32_to_cpu(resp->i_mtime_nsec); + create_ret->i_ino = le64_to_cpu(resp->i_ino); + +out: + free_sm_outbuf(&sm); + kfree(create_req); + return ret; +} + +int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path, + const char *name) +{ + int ret; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct rmdir_request) + path_len + 1 + + name_len + 1; + struct rmdir_request *rmdir_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = rmdir_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_RMDIR); + if (!rmdir_req) + return -ENOMEM; + + rmdir_req->path_len = cpu_to_le32(path_len); + rmdir_req->name_len = cpu_to_le32(name_len); + strncpy(rmdir_req->path, path, path_len); + strncpy(rmdir_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + free_sm_outbuf(&sm); + kfree(rmdir_req); + return ret; +} + +int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path, + const char *name) +{ + int ret; + int path_len = strlen(path); + int name_len = strlen(name); + size_t send_len = sizeof(struct unlink_request) + path_len + 1 + + name_len + 1; + struct unlink_request *unlink_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = unlink_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_UNLINK); + if (!unlink_req) + return -ENOMEM; + + unlink_req->path_len = cpu_to_le32(path_len); + unlink_req->name_len = cpu_to_le32(name_len); + strncpy(unlink_req->path, path, path_len); + strncpy(unlink_req->path + path_len + 1, name, name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + kfree(unlink_req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path, + const char *old_name, const char *new_path, + const char *new_name, unsigned int flags) +{ + int ret; + int old_path_len = strlen(old_path); + int new_path_len = strlen(new_path); + int old_name_len = strlen(old_name); + int new_name_len = strlen(new_name); + + size_t send_len = sizeof(struct rename_request) + old_path_len + 1 + + new_path_len + 1 + old_name_len + 1 + new_name_len + + 1; + struct rename_request *rename_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = rename_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_RENAME); + if (!rename_req) + return -ENOMEM; + + rename_req->old_path_len = cpu_to_le32(old_path_len); + rename_req->new_path_len = cpu_to_le32(new_path_len); + rename_req->old_name_len = cpu_to_le32(old_name_len); + rename_req->new_name_len = cpu_to_le32(new_name_len); + rename_req->flags = cpu_to_le32(flags); + + strncpy(rename_req->path, old_path, old_path_len); + strncpy(rename_req->path + old_path_len + 1, new_path, new_path_len); + + strncpy(rename_req->path + old_path_len + 1 + new_path_len + 1, + old_name, old_name_len); + strncpy(rename_req->path + old_path_len + 1 + new_path_len + 1 + + old_name_len + 1, + new_name, new_name_len); + + ret = hmdfs_sendmessage_request(con, &sm); + free_sm_outbuf(&sm); + kfree(rename_req); + return ret; +} + +int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf, + struct setattr_info *attr_info) +{ + int ret; + int path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct setattr_request); + struct setattr_request *setattr_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = setattr_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_SETATTR); + if (!setattr_req) + return -ENOMEM; + + strcpy(setattr_req->buf, send_buf); + setattr_req->path_len = cpu_to_le32(path_len); + setattr_req->valid = cpu_to_le32(attr_info->valid); + setattr_req->size = cpu_to_le64(attr_info->size); + setattr_req->mtime = cpu_to_le64(attr_info->mtime); + setattr_req->mtime_nsec = cpu_to_le32(attr_info->mtime_nsec); + ret = hmdfs_sendmessage_request(con, &sm); + kfree(setattr_req); + return ret; +} + +static void hmdfs_update_getattr_ret(struct getattr_response *resp, + struct hmdfs_getattr_ret *result) +{ + struct kstat *stat = &result->stat; + + stat->result_mask = le32_to_cpu(resp->result_mask); + if (stat->result_mask == 0) + return; + + stat->ino = le64_to_cpu(resp->ino); + stat->mode = le16_to_cpu(resp->mode); + stat->nlink = le32_to_cpu(resp->nlink); + stat->uid.val = le32_to_cpu(resp->uid); + stat->gid.val = le32_to_cpu(resp->gid); + stat->size = le64_to_cpu(resp->size); + stat->blocks = le64_to_cpu(resp->blocks); + stat->blksize = le32_to_cpu(resp->blksize); + stat->atime.tv_sec = le64_to_cpu(resp->atime); + stat->atime.tv_nsec = le32_to_cpu(resp->atime_nsec); + stat->mtime.tv_sec = le64_to_cpu(resp->mtime); + stat->mtime.tv_nsec = le32_to_cpu(resp->mtime_nsec); + stat->ctime.tv_sec = le64_to_cpu(resp->ctime); + stat->ctime.tv_nsec = le32_to_cpu(resp->ctime_nsec); + stat->btime.tv_sec = le64_to_cpu(resp->crtime); + stat->btime.tv_nsec = le32_to_cpu(resp->crtime_nsec); + result->fsid = le64_to_cpu(resp->fsid); + /* currently not used */ + result->i_flags = 0; +} + +int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf, + unsigned int lookup_flags, + struct hmdfs_getattr_ret *result) +{ + int path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct getattr_request); + int ret = 0; + struct getattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_GETATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->lookup_flags = cpu_to_le32(lookup_flags); + strncpy(req->buf, send_buf, path_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_getattr_ret(sm.out_buf, result); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +static void hmdfs_update_statfs_ret(struct statfs_response *resp, + struct kstatfs *buf) +{ + buf->f_type = le64_to_cpu(resp->f_type); + buf->f_bsize = le64_to_cpu(resp->f_bsize); + buf->f_blocks = le64_to_cpu(resp->f_blocks); + buf->f_bfree = le64_to_cpu(resp->f_bfree); + buf->f_bavail = le64_to_cpu(resp->f_bavail); + buf->f_files = le64_to_cpu(resp->f_files); + buf->f_ffree = le64_to_cpu(resp->f_ffree); + buf->f_fsid.val[0] = le32_to_cpu(resp->f_fsid_0); + buf->f_fsid.val[1] = le32_to_cpu(resp->f_fsid_1); + buf->f_namelen = le64_to_cpu(resp->f_namelen); + buf->f_frsize = le64_to_cpu(resp->f_frsize); + buf->f_flags = le64_to_cpu(resp->f_flags); + buf->f_spare[0] = le64_to_cpu(resp->f_spare_0); + buf->f_spare[1] = le64_to_cpu(resp->f_spare_1); + buf->f_spare[2] = le64_to_cpu(resp->f_spare_2); + buf->f_spare[3] = le64_to_cpu(resp->f_spare_3); +} + +int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path, + struct kstatfs *buf) +{ + int ret; + int path_len = strlen(path); + size_t send_len = sizeof(struct statfs_request) + path_len + 1; + struct statfs_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_STATFS); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + strncpy(req->path, path, path_len); + + ret = hmdfs_sendmessage_request(con, &sm); + + if (ret == -ETIME) + ret = -EIO; + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_statfs_ret(sm.out_buf, buf); +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_send_syncfs(struct hmdfs_peer *con, int syncfs_timeout) +{ + int ret; + struct hmdfs_req req; + struct hmdfs_sb_info *sbi = con->sbi; + struct syncfs_request *syncfs_req = + kzalloc(sizeof(struct syncfs_request), GFP_KERNEL); + + if (!syncfs_req) { + hmdfs_err("cannot allocate syncfs_request"); + return -ENOMEM; + } + + hmdfs_init_cmd(&req.operations, F_SYNCFS); + req.timeout = syncfs_timeout; + + syncfs_req->version = cpu_to_le64(sbi->hsi.version); + req.data = syncfs_req; + req.data_len = sizeof(*syncfs_req); + + ret = hmdfs_send_async_request(con, &req); + if (ret) { + kfree(syncfs_req); + hmdfs_err("ret fail with %d", ret); + } + + return ret; +} + +static void hmdfs_update_getxattr_ret(struct getxattr_response *resp, + void *value, size_t o_size, int *ret) +{ + ssize_t size = le32_to_cpu(resp->size); + + if (o_size && o_size < size) { + *ret = -ERANGE; + return; + } + + if (o_size) + memcpy(value, resp->value, size); + + *ret = size; +} + +int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, void *value, size_t size) +{ + size_t path_len = strlen(send_buf); + size_t name_len = strlen(name); + size_t send_len = path_len + name_len + + sizeof(struct getxattr_request) + 2; + int ret = 0; + struct getxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_GETXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->name_len = cpu_to_le32(name_len); + req->size = cpu_to_le32(size); + strncpy(req->buf, send_buf, path_len); + strncpy(req->buf + path_len + 1, name, name_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_getxattr_ret(sm.out_buf, value, size, &ret); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, const void *value, + size_t size, int flags) +{ + size_t path_len = strlen(send_buf); + size_t name_len = strlen(name); + size_t send_len = path_len + name_len + size + 2 + + sizeof(struct setxattr_request); + int ret = 0; + struct setxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_SETXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->name_len = cpu_to_le32(name_len); + req->size = cpu_to_le32(size); + req->flags = cpu_to_le32(flags); + strncpy(req->buf, send_buf, path_len); + strncpy(req->buf + path_len + 1, name, name_len); + if (!value) { + memcpy(req->buf + path_len + name_len + 2, value, size); + req->del = true; + } + ret = hmdfs_sendmessage_request(con, &sm); + kfree(req); + return ret; +} + +static void hmdfs_update_listxattr_ret(struct listxattr_response *resp, + char *list, size_t o_size, ssize_t *ret) +{ + ssize_t size = le32_to_cpu(resp->size); + + if (o_size && o_size < size) { + *ret = -ERANGE; + return; + } + + /* multi name split with '\0', use memcpy */ + if (o_size) + memcpy(list, resp->list, size); + + *ret = size; +} + +ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf, + char *list, size_t size) +{ + size_t path_len = strlen(send_buf); + size_t send_len = path_len + 1 + sizeof(struct listxattr_request); + ssize_t ret = 0; + struct listxattr_request *req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_LISTXATTR); + if (!req) + return -ENOMEM; + + req->path_len = cpu_to_le32(path_len); + req->size = cpu_to_le32(size); + strncpy(req->buf, send_buf, path_len); + ret = hmdfs_sendmessage_request(con, &sm); + if (!ret && (sm.out_len == 0 || !sm.out_buf)) + ret = -ENOENT; + if (ret) + goto out; + + hmdfs_update_listxattr_ret(sm.out_buf, list, size, &ret); + +out: + kfree(req); + free_sm_outbuf(&sm); + return ret; +} + +void hmdfs_recv_syncfs_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp) +{ + struct hmdfs_sb_info *sbi = peer->sbi; + struct syncfs_request *syncfs_req = (struct syncfs_request *)req->data; + + WARN_ON(!syncfs_req); + spin_lock(&sbi->hsi.v_lock); + if (le64_to_cpu(syncfs_req->version) != sbi->hsi.version) { + hmdfs_info( + "Recv stale syncfs resp[ver: %llu] from device %llu, current ver %llu", + le64_to_cpu(syncfs_req->version), peer->device_id, + sbi->hsi.version); + spin_unlock(&sbi->hsi.v_lock); + goto out; + } + + if (!sbi->hsi.remote_ret) + sbi->hsi.remote_ret = resp->ret_code; + + if (resp->ret_code) { + hmdfs_err("Recv syncfs error code %d from device %llu", + resp->ret_code, peer->device_id); + } else { + /* + * Set @sb_dirty_count to zero if no one else produce + * dirty data on remote server during remote sync. + */ + atomic64_cmpxchg(&peer->sb_dirty_count, + peer->old_sb_dirty_count, 0); + } + + atomic_dec(&sbi->hsi.wait_count); + spin_unlock(&sbi->hsi.v_lock); + wake_up_interruptible(&sbi->hsi.wq); + +out: + kfree(syncfs_req); +} + +void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path) +{ + int path_len = strlen(path); + size_t send_len = sizeof(struct drop_push_request) + path_len + 1; + struct drop_push_request *dp_req = kzalloc(send_len, GFP_KERNEL); + struct hmdfs_send_command sm = { + .data = dp_req, + .len = send_len, + }; + + hmdfs_init_cmd(&sm.operations, F_DROP_PUSH); + if (!dp_req) + return; + + dp_req->path_len = cpu_to_le32(path_len); + strncpy(dp_req->path, path, path_len); + + hmdfs_sendmessage_request(con, &sm); + kfree(dp_req); +} + +static void *hmdfs_get_msg_next(struct hmdfs_peer *peer, int *id) +{ + struct hmdfs_msg_idr_head *head = NULL; + + spin_lock(&peer->idr_lock); + head = idr_get_next(&peer->msg_idr, id); + if (head && head->type < MSG_IDR_MAX && head->type >= 0) + kref_get(&head->ref); + + spin_unlock(&peer->idr_lock); + + return head; +} + +void hmdfs_client_offline_notify(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + int id; + int count = 0; + struct hmdfs_msg_idr_head *head = NULL; + + for (id = 0; (head = hmdfs_get_msg_next(conn, &id)) != NULL; ++id) { + switch (head->type) { + case MSG_IDR_1_0_NONE: + head_put(head); + head_put(head); + break; + case MSG_IDR_MESSAGE_SYNC: + case MSG_IDR_1_0_MESSAGE_SYNC: + hmdfs_response_wakeup((struct sendmsg_wait_queue *)head, + -ETIME, 0, NULL); + hmdfs_debug("wakeup id=%d", head->msg_id); + msg_put((struct sendmsg_wait_queue *)head); + break; + case MSG_IDR_MESSAGE_ASYNC: + hmdfs_wakeup_parasite( + (struct hmdfs_msg_parasite *)head); + hmdfs_debug("wakeup parasite id=%d", head->msg_id); + mp_put((struct hmdfs_msg_parasite *)head); + break; + case MSG_IDR_PAGE: + case MSG_IDR_1_0_PAGE: + hmdfs_wakeup_async_work( + (struct hmdfs_async_work *)head); + hmdfs_debug("wakeup async work id=%d", head->msg_id); + asw_put((struct hmdfs_async_work *)head); + break; + default: + hmdfs_err("Bad type=%d id=%d", head->type, + head->msg_id); + break; + } + + count++; + /* If there are too many idr to process, avoid to soft lockup, + * process every 512 message we resched + */ + if (count % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } +} + +static struct hmdfs_node_cb_desc client_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_1_0, + .fn = hmdfs_client_offline_notify, + }, +}; + +void __init hmdfs_client_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(client_cb, ARRAY_SIZE(client_cb)); +} diff --git a/fs/hmdfs/hmdfs_client.h b/fs/hmdfs/hmdfs_client.h new file mode 100755 index 000000000..ab2867dca --- /dev/null +++ b/fs/hmdfs/hmdfs_client.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_client.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_CLIENT_H +#define HMDFS_CLIENT_H + +#include "comm/transport.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" + +struct hmdfs_open_ret { + struct hmdfs_fid fid; + __u64 file_size; + __u64 ino; + struct hmdfs_time_t remote_ctime; + struct hmdfs_time_t stable_ctime; +}; + +struct hmdfs_writepage_context { + struct hmdfs_fid fid; + uint32_t count; + bool sync_all; + bool rsem_held; + unsigned long timeout; + struct task_struct *caller; + struct page *page; + struct delayed_work retry_dwork; +}; + +int hmdfs_client_start_readdir(struct hmdfs_peer *con, struct file *filp, + const char *path, int path_len, + struct hmdfs_dcache_header *header); +int hmdfs_client_start_mkdir(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, struct hmdfs_lookup_ret *mkdir_ret); +int hmdfs_client_start_create(struct hmdfs_peer *con, + const char *path, const char *name, + umode_t mode, bool want_excl, + struct hmdfs_lookup_ret *create_ret); +int hmdfs_client_start_rmdir(struct hmdfs_peer *con, const char *path, + const char *name); +int hmdfs_client_start_unlink(struct hmdfs_peer *con, const char *path, + const char *name); +int hmdfs_client_start_rename(struct hmdfs_peer *con, const char *old_path, + const char *old_name, const char *new_path, + const char *new_name, unsigned int flags); + +static inline bool hmdfs_is_offline_err(int err) +{ + /* + * writepage() will get -EBADF if peer is online + * again during offline stash, and -EBADF also + * needs redo. + */ + return (err == -EAGAIN || err == -ESHUTDOWN || err == -EBADF); +} + +static inline bool hmdfs_is_offline_or_timeout_err(int err) +{ + return hmdfs_is_offline_err(err) || err == -ETIME; +} + +static inline bool hmdfs_need_redirty_page(const struct hmdfs_inode_info *info, + int err) +{ + /* + * 1. stash is enabled + * 2. offline related error + * 3. no restore + */ + return hmdfs_is_stash_enabled(info->conn->sbi) && + hmdfs_is_offline_err(err) && + READ_ONCE(info->stash_status) != HMDFS_REMOTE_INODE_RESTORING; +} + +bool hmdfs_usr_sig_pending(struct task_struct *p); +void hmdfs_writepage_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); +int hmdfs_client_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *param); +int hmdfs_remote_do_writepage(struct hmdfs_peer *con, + struct hmdfs_writepage_context *ctx); +void hmdfs_remote_writepage_retry(struct work_struct *work); + +void hmdfs_client_writepage_done(struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx); + +int hmdfs_send_open(struct hmdfs_peer *con, const char *send_buf, + __u8 file_type, struct hmdfs_open_ret *open_ret); +void hmdfs_send_close(struct hmdfs_peer *con, const struct hmdfs_fid *fid); +int hmdfs_send_fsync(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + __s64 start, __s64 end, __s32 datasync); +int hmdfs_client_readpage(struct hmdfs_peer *con, const struct hmdfs_fid *fid, + struct page *page); + +int hmdfs_send_setattr(struct hmdfs_peer *con, const char *send_buf, + struct setattr_info *attr_info); +int hmdfs_send_getattr(struct hmdfs_peer *con, const char *send_buf, + unsigned int lookup_flags, + struct hmdfs_getattr_ret *getattr_result); +int hmdfs_send_statfs(struct hmdfs_peer *con, const char *path, + struct kstatfs *buf); +void hmdfs_client_recv_readpage(struct hmdfs_head_cmd *head, int err, + struct hmdfs_async_work *async_work); +int hmdfs_send_syncfs(struct hmdfs_peer *con, int syncfs_timeout); +int hmdfs_send_getxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, void *value, size_t size); +int hmdfs_send_setxattr(struct hmdfs_peer *con, const char *send_buf, + const char *name, const void *val, + size_t size, int flags); +ssize_t hmdfs_send_listxattr(struct hmdfs_peer *con, const char *send_buf, + char *list, size_t size); +void hmdfs_recv_syncfs_cb(struct hmdfs_peer *peer, const struct hmdfs_req *req, + const struct hmdfs_resp *resp); + +void __init hmdfs_client_add_node_evt_cb(void); +#endif diff --git a/fs/hmdfs/hmdfs_dentryfile.c b/fs/hmdfs/hmdfs_dentryfile.c new file mode 100755 index 000000000..7b14eaf7e --- /dev/null +++ b/fs/hmdfs/hmdfs_dentryfile.c @@ -0,0 +1,2756 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_dentryfile.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_dentryfile.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/transport.h" +#include "hmdfs_client.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" + +/* Hashing code copied from f2fs */ +#define HMDFS_HASH_COL_BIT ((0x1ULL) << 63) +#define DELTA 0x9E3779B9 + +static bool is_dot_dotdot(const unsigned char *name, __u32 len) +{ + if (len == 1 && name[0] == '.') + return true; + + if (len == 2 && name[0] == '.' && name[1] == '.') + return true; + + return false; +} + +static void str2hashbuf(const unsigned char *msg, size_t len, unsigned int *buf, + int num, bool case_sense) +{ + unsigned int pad, val; + int i; + unsigned char c; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > (size_t)num * 4) + len = (size_t)num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + c = msg[i]; + if (!case_sense) + c = tolower(c); + val = c + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +static void tea_transform(unsigned int buf[4], unsigned int const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4) + a) ^ (b1 + sum) ^ ((b1 >> 5) + b); + b1 += ((b0 << 4) + c) ^ (b0 + sum) ^ ((b0 >> 5) + d); + } while (--n); + + buf[0] += b0; + buf[1] += b1; +} + +static __u32 hmdfs_dentry_hash(const struct qstr *qstr, bool case_sense) +{ + __u32 hash; + __u32 hmdfs_hash; + const unsigned char *p = qstr->name; + __u32 len = qstr->len; + __u32 in[8], buf[4]; + + if (is_dot_dotdot(p, len)) + return 0; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + while (1) { + str2hashbuf(p, len, in, 4, case_sense); + tea_transform(buf, in); + p += 16; + if (len <= 16) + break; + len -= 16; + } + hash = buf[0]; + hmdfs_hash = hash & ~HMDFS_HASH_COL_BIT; + return hmdfs_hash; +} + +static atomic_t curr_ino = ATOMIC_INIT(INUNUMBER_START); +int get_inonumber(void) +{ + return atomic_inc_return(&curr_ino); +} + +static int hmdfs_get_root_dentry_type(struct dentry *dentry, int *is_root) +{ + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + *is_root = 1; + switch (d_info->dentry_type) { + case HMDFS_LAYER_OTHER_LOCAL: + *is_root = 0; + fallthrough; + case HMDFS_LAYER_SECOND_LOCAL: + return HMDFS_LAYER_SECOND_LOCAL; + case HMDFS_LAYER_OTHER_REMOTE: + *is_root = 0; + fallthrough; + case HMDFS_LAYER_SECOND_REMOTE: + return HMDFS_LAYER_SECOND_REMOTE; + default: + hmdfs_info("Unexpected dentry type %d", d_info->dentry_type); + return -EINVAL; + } +} + +static int prepend(char **buffer, int *buflen, const char *str, int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) + return -ENAMETOOLONG; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + +static int prepend_name(char **buffer, int *buflen, const struct qstr *name) +{ + const char *dname = name->name; + u32 dlen = name->len; + char *p = NULL; + + *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; + p = *buffer -= dlen + 1; + *p++ = '/'; + while (dlen--) { + char c = *dname++; + + if (!c) + break; + *p++ = c; + } + return 0; +} + +static char *hmdfs_dentry_path_raw(struct dentry *d, char *buf, int buflen) +{ + struct dentry *dentry = NULL; + char *end = NULL; + char *retval = NULL; + unsigned int len; + unsigned int seq = 0; + int root_flag = 0; + int error = 0; + struct hmdfs_dentry_info *di = hmdfs_d(d); + int hmdfs_root_dentry_type = 0; + + di->time = jiffies; + hmdfs_root_dentry_type = hmdfs_get_root_dentry_type(d, &root_flag); + if (hmdfs_root_dentry_type < 0) + return NULL; + if (root_flag) { + strcpy(buf, "/"); + return buf; + } + rcu_read_lock(); +restart: + dentry = d; + di = hmdfs_d(dentry); + di->time = jiffies; + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); + retval = end - 1; + *retval = '/'; + read_seqbegin_or_lock(&rename_lock, &seq); + while (di->dentry_type != hmdfs_root_dentry_type) { + struct dentry *parent = dentry->d_parent; + + prefetch(parent); + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; + retval = end; + dentry = parent; + di = hmdfs_d(dentry); + di->time = jiffies; + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +char *hmdfs_get_dentry_relative_path(struct dentry *dentry) +{ + char *final_buf = NULL; + char *buf = NULL; + char *p = NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + final_buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!final_buf) { + kfree(buf); + return NULL; + } + + /* NULL dentry return root dir */ + if (!dentry) { + strcpy(final_buf, "/"); + kfree(buf); + return final_buf; + } + p = hmdfs_dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR_OR_NULL(p)) { + kfree(buf); + kfree(final_buf); + return NULL; + } + + if (strlen(p) >= PATH_MAX) { + kfree(buf); + kfree(final_buf); + return NULL; + } + strcpy(final_buf, p); + kfree(buf); + return final_buf; +} + +static char *hmdfs_merge_dentry_path_raw(struct dentry *d, char *buf, int buflen) +{ + struct dentry *dentry = NULL; + char *end = NULL; + char *retval = NULL; + unsigned int len; + unsigned int seq = 0; + int error = 0; + struct hmdfs_dentry_info_merge *mdi = hmdfs_dm(d); + + rcu_read_lock(); +restart: + dentry = d; + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); + retval = end - 1; + *retval = '/'; + read_seqbegin_or_lock(&rename_lock, &seq); + while (mdi->dentry_type != HMDFS_LAYER_FIRST_MERGE) { + struct dentry *parent = dentry->d_parent; + + prefetch(parent); + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; + retval = end; + dentry = parent; + mdi = hmdfs_dm(dentry); + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +char *hmdfs_merge_get_dentry_relative_path(struct dentry *dentry) +{ + char *final_buf = NULL; + char *buf = NULL; + char *p = NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + final_buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!final_buf) { + kfree(buf); + return NULL; + } + + /* NULL dentry return root dir */ + if (!dentry) { + strcpy(final_buf, "/"); + kfree(buf); + return final_buf; + } + p = hmdfs_merge_dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR_OR_NULL(p)) { + kfree(buf); + kfree(final_buf); + return NULL; + } + + if (strlen(p) >= PATH_MAX) { + kfree(buf); + kfree(final_buf); + return NULL; + } + strcpy(final_buf, p); + kfree(buf); + return final_buf; +} + +char *hmdfs_get_dentry_absolute_path(const char *rootdir, + const char *relative_path) +{ + char *buf = 0; + + if (!rootdir || !relative_path) + return NULL; + if (strlen(rootdir) + strlen(relative_path) >= PATH_MAX) + return NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + strcpy(buf, rootdir); + strcat(buf, relative_path); + return buf; +} + +char *hmdfs_connect_path(const char *path, const char *name) +{ + char *buf = 0; + + if (!path || !name) + return NULL; + + if (strlen(path) + strlen(name) + 1 >= PATH_MAX) + return NULL; + + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return NULL; + + strcpy(buf, path); + strcat(buf, "/"); + strcat(buf, name); + return buf; +} + +int hmdfs_metainfo_read(struct hmdfs_sb_info *sbi, struct file *filp, + void *buffer, int size, int bidx) +{ + loff_t pos = get_dentry_group_pos(bidx); + + return cache_file_read(sbi, filp, buffer, (size_t)size, &pos); +} + +int hmdfs_metainfo_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buffer, int size, int bidx) +{ + loff_t pos = get_dentry_group_pos(bidx); + + return cache_file_write(sbi, filp, buffer, (size_t)size, &pos); +} + +/* for each level */ +/* bucketseq start offset by 0,for example + * level0 bucket0(0) + * level1 bucket0(1) bucket1(2) + * level2 bucket0(3) bucket1(4) bucket2(5) bucket3(6) + * return bucket number. + */ +static __u32 get_bucketaddr(int level, int buckoffset) +{ + int all_level_bucketaddr = 0; + __u32 curlevelmaxbucks; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return all_level_bucketaddr; + } + curlevelmaxbucks = (1 << level); + if (buckoffset >= curlevelmaxbucks) { + hmdfs_err("buckoffset %d overflow, level %d has %d buckets max", + buckoffset, level, curlevelmaxbucks); + return all_level_bucketaddr; + } + all_level_bucketaddr = curlevelmaxbucks + buckoffset - 1; + + return all_level_bucketaddr; +} + +static __u32 get_bucket_by_level(int level) +{ + int buckets = 0; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return buckets; + } + + buckets = (1 << level); + return buckets; +} + +static __u32 get_overall_bucket(int level) +{ + int buckets = 0; + + if (level >= MAX_BUCKET_LEVEL) { + hmdfs_err("level = %d overflow", level); + return buckets; + } + buckets = (1 << (level + 1)) - 1; + return buckets; +} + +static inline loff_t get_dcache_file_size(int level) +{ + loff_t buckets = get_overall_bucket(level); + + return buckets * DENTRYGROUP_SIZE * BUCKET_BLOCKS + DENTRYGROUP_HEADER; +} + +static char *get_relative_path(struct hmdfs_sb_info *sbi, char *from) +{ + char *relative; + + if (strncmp(from, sbi->local_src, strlen(sbi->local_src))) { + hmdfs_warning("orig path do not start with local_src"); + return NULL; + } + relative = from + strlen(sbi->local_src); + if (*relative == '/') + relative++; + return relative; +} + +struct file *hmdfs_get_or_create_dents(struct hmdfs_sb_info *sbi, char *name) +{ + struct path root_path, path; + struct file *filp = NULL; + char *relative; + int err; + + err = kern_path(sbi->local_src, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + return NULL; + } + relative = get_relative_path(sbi, name); + if (!relative) { + hmdfs_err("get relative path failed"); + goto err_root_path; + } + err = vfs_path_lookup(root_path.dentry, root_path.mnt, relative, 0, + &path); + if (err) { + hmdfs_err("lookup failed err = %d", err); + goto err_root_path; + } + + filp = hmdfs_server_cache_revalidate(sbi, relative, &path); + if (IS_ERR_OR_NULL(filp)) { + filp = hmdfs_server_rebuild_dents(sbi, &path, NULL, relative); + if (IS_ERR_OR_NULL(filp)) + goto err_lookup_path; + } + +err_lookup_path: + path_put(&path); +err_root_path: + path_put(&root_path); + return filp; +} + +/* read all dentry in target path directory */ +int read_dentry(struct hmdfs_sb_info *sbi, char *file_name, + struct dir_context *ctx) +{ + unsigned long pos = (unsigned long)(ctx->pos); + unsigned long group_id = (pos << (1 + DEV_ID_BIT_NUM)) >> + (POS_BIT_NUM - GROUP_ID_BIT_NUM); + unsigned long offset = pos & OFFSET_BIT_MASK; + struct hmdfs_dentry_group *dentry_group = NULL; + struct file *handler = NULL; + int group_num = 0; + int iterate_result = 0; + int i, j; + const struct cred *saved_cred; + + saved_cred = hmdfs_override_fsids(false); + if (!saved_cred) { + hmdfs_err("prepare cred failed!"); + return -ENOMEM; + } + + + if (!file_name) + return -EINVAL; + + dentry_group = kzalloc(sizeof(*dentry_group), GFP_KERNEL); + if (!dentry_group) + return -ENOMEM; + + handler = hmdfs_get_or_create_dents(sbi, file_name); + if (IS_ERR_OR_NULL(handler)) { + kfree(dentry_group); + return -ENOENT; + } + + group_num = get_dentry_group_cnt(file_inode(handler)); + + for (i = group_id; i < group_num; i++) { + hmdfs_metainfo_read(sbi, handler, dentry_group, + sizeof(struct hmdfs_dentry_group), i); + for (j = offset; j < DENTRY_PER_GROUP; j++) { + int len; + int file_type = 0; + bool is_continue; + + len = le16_to_cpu(dentry_group->nsl[j].namelen); + if (!test_bit_le(j, dentry_group->bitmap) || len == 0) + continue; + + if (S_ISDIR(le16_to_cpu(dentry_group->nsl[j].i_mode))) + file_type = DT_DIR; + else if (S_ISREG(le16_to_cpu( + dentry_group->nsl[j].i_mode))) + file_type = DT_REG; + else + continue; + + pos = hmdfs_set_pos(0, i, j); + is_continue = dir_emit( + ctx, dentry_group->filename[j], len, + le64_to_cpu(dentry_group->nsl[j].i_ino), + file_type); + if (!is_continue) { + ctx->pos = pos; + iterate_result = 1; + goto done; + } + } + offset = 0; + } + +done: + hmdfs_revert_fsids(saved_cred); + kfree(dentry_group); + fput(handler); + return iterate_result; +} + +unsigned int get_max_depth(struct file *filp) +{ + size_t isize; + + isize = get_dentry_group_cnt(file_inode(filp)) / BUCKET_BLOCKS; + + return get_count_order(isize + 1); +} + +struct hmdfs_dentry_group *find_dentry_page(struct hmdfs_sb_info *sbi, + pgoff_t index, struct file *filp) +{ + int size; + struct hmdfs_dentry_group *dentry_blk = NULL; + loff_t pos = get_dentry_group_pos(index); + int err; + + dentry_blk = kmalloc(sizeof(*dentry_blk), GFP_KERNEL); + if (!dentry_blk) + return NULL; + + err = hmdfs_wlock_file(filp, pos, DENTRYGROUP_SIZE); + if (err) { + hmdfs_err("lock file pos %lld failed", pos); + kfree(dentry_blk); + return NULL; + } + + size = cache_file_read(sbi, filp, dentry_blk, (size_t)DENTRYGROUP_SIZE, + &pos); + if (size != DENTRYGROUP_SIZE) { + kfree(dentry_blk); + dentry_blk = NULL; + } + + return dentry_blk; +} + +static ssize_t write_dentry_page(struct file *filp, const void *buffer, + int buffersize, loff_t position) +{ + ssize_t size; + + size = kernel_write(filp, buffer, (size_t)buffersize, &position); + if (size != buffersize) + hmdfs_err("write failed, ret = %zd", size); + + return size; +} + +static struct hmdfs_dentry *find_in_block(struct hmdfs_dentry_group *dentry_blk, + __u32 namehash, + const struct qstr *qstr, + struct hmdfs_dentry **insense_de, + bool case_sense) +{ + struct hmdfs_dentry *de; + unsigned long bit_pos = 0; + int max_len = 0; + + while (bit_pos < DENTRY_PER_GROUP) { + if (!test_bit_le(bit_pos, dentry_blk->bitmap)) { + bit_pos++; + max_len++; + } + de = &dentry_blk->nsl[bit_pos]; + if (unlikely(!de->namelen)) { + bit_pos++; + continue; + } + + if (le32_to_cpu(de->hash) == namehash && + le16_to_cpu(de->namelen) == qstr->len && + !memcmp(qstr->name, dentry_blk->filename[bit_pos], + le16_to_cpu(de->namelen))) + goto found; + if (!(*insense_de) && !case_sense && + le32_to_cpu(de->hash) == namehash && + le16_to_cpu(de->namelen) == qstr->len && + str_n_case_eq(qstr->name, dentry_blk->filename[bit_pos], + le16_to_cpu(de->namelen))) + *insense_de = de; + max_len = 0; + bit_pos += get_dentry_slots(le16_to_cpu(de->namelen)); + } + de = NULL; +found: + return de; +} + +static struct hmdfs_dentry *hmdfs_in_level(struct dentry *child_dentry, + unsigned int level, + struct hmdfs_dcache_lookup_ctx *ctx) +{ + unsigned int nbucket; + unsigned int bidx, end_block; + struct hmdfs_dentry *de = NULL; + struct hmdfs_dentry *tmp_insense_de = NULL; + struct hmdfs_dentry_group *dentry_blk; + + nbucket = get_bucket_by_level(level); + if (!nbucket) + return de; + + bidx = get_bucketaddr(level, ctx->hash % nbucket) * BUCKET_BLOCKS; + end_block = bidx + BUCKET_BLOCKS; + + for (; bidx < end_block; bidx++) { + dentry_blk = find_dentry_page(ctx->sbi, bidx, ctx->filp); + if (!dentry_blk) + break; + + de = find_in_block(dentry_blk, ctx->hash, ctx->name, + &tmp_insense_de, ctx->sbi->s_case_sensitive); + if (!de && !(ctx->insense_de) && tmp_insense_de) { + ctx->insense_de = tmp_insense_de; + ctx->insense_page = dentry_blk; + ctx->insense_bidx = bidx; + } else if (!de) { + hmdfs_unlock_file(ctx->filp, get_dentry_group_pos(bidx), + DENTRYGROUP_SIZE); + kfree(dentry_blk); + } else { + ctx->page = dentry_blk; + break; + } + } + ctx->bidx = bidx; + return de; +} + +struct hmdfs_dentry *hmdfs_find_dentry(struct dentry *child_dentry, + struct hmdfs_dcache_lookup_ctx *ctx) +{ + struct hmdfs_dentry *de = NULL; + unsigned int max_depth; + unsigned int level; + + if (!ctx->filp) + return NULL; + + ctx->hash = hmdfs_dentry_hash(ctx->name, ctx->sbi->s_case_sensitive); + + max_depth = get_max_depth(ctx->filp); + for (level = 0; level < max_depth; level++) { + de = hmdfs_in_level(child_dentry, level, ctx); + if (de) { + if (ctx->insense_page) { + hmdfs_unlock_file(ctx->filp, + get_dentry_group_pos(ctx->insense_bidx), + DENTRYGROUP_SIZE); + kfree(ctx->insense_page); + ctx->insense_page = NULL; + } + return de; + } + } + if (ctx->insense_de) { + ctx->bidx = ctx->insense_bidx; + ctx->page = ctx->insense_page; + ctx->insense_bidx = 0; + ctx->insense_page = NULL; + } + return ctx->insense_de; +} + +void update_dentry(struct hmdfs_dentry_group *d, struct dentry *child_dentry, + struct inode *inode, __u32 name_hash, unsigned int bit_pos) +{ + struct hmdfs_dentry *de; + const struct qstr name = child_dentry->d_name; + int slots = get_dentry_slots(name.len); + int i; + unsigned long ino; + __u32 igen; + + ino = inode->i_ino; + igen = inode->i_generation; + + de = &d->nsl[bit_pos]; + de->hash = cpu_to_le32(name_hash); + de->namelen = cpu_to_le16(name.len); + memcpy(d->filename[bit_pos], name.name, name.len); + de->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + de->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + de->i_size = cpu_to_le64(inode->i_size); + de->i_ino = cpu_to_le64(generate_u64_ino(ino, igen)); + de->i_flag = 0; + de->i_mode = cpu_to_le16(inode->i_mode); + + for (i = 0; i < slots; i++) { + __set_bit_le(bit_pos + i, d->bitmap); + /* avoid wrong garbage data for readdir */ + if (i) + (de + i)->namelen = 0; + } +} + +int room_for_filename(const void *bitmap, int slots, int max_slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; + + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= max_slots) + return max_slots; + goto next; +} + +void create_in_cache_file(uint64_t dev_id, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + + item = hmdfs_find_cache_item(dev_id, dentry->d_parent); + if (item) { + if (d_inode(dentry)) + create_dentry(dentry, d_inode(dentry), item->filp, + hmdfs_sb(dentry->d_sb)); + else + hmdfs_err("inode is null!"); + kref_put(&item->ref, release_cache_item); + } else { + hmdfs_info("find cache item failed, device_id:%llu", dev_id); + } +} + +int create_dentry(struct dentry *child_dentry, struct inode *inode, + struct file *file, struct hmdfs_sb_info *sbi) +{ + unsigned int bit_pos, level; + unsigned long bidx, end_block; + const struct qstr qstr = child_dentry->d_name; + __u32 namehash; + loff_t pos; + ssize_t size; + int ret = 0; + struct hmdfs_dentry_group *dentry_blk = NULL; + + level = 0; + + namehash = hmdfs_dentry_hash(&qstr, sbi->s_case_sensitive); + + dentry_blk = kmalloc(sizeof(*dentry_blk), GFP_KERNEL); + if (!dentry_blk) { + ret = -ENOMEM; + goto out_err; + } +find: + if (level == MAX_BUCKET_LEVEL) { + ret = -ENOSPC; + goto out; + } + bidx = BUCKET_BLOCKS * + get_bucketaddr(level, namehash % get_bucket_by_level(level)); + end_block = bidx + BUCKET_BLOCKS; + if (end_block > get_dentry_group_cnt(file_inode(file))) { + if (cache_file_truncate(sbi, &(file->f_path), + get_dcache_file_size(level))) { + ret = -ENOSPC; + goto out; + } + } + + for (; bidx < end_block; bidx++) { + int size; + + pos = get_dentry_group_pos(bidx); + ret = hmdfs_wlock_file(file, pos, DENTRYGROUP_SIZE); + if (ret) + goto out; + + size = cache_file_read(sbi, file, dentry_blk, + (size_t)DENTRYGROUP_SIZE, &pos); + if (size != DENTRYGROUP_SIZE) { + ret = -ENOSPC; + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); + goto out; + } + + bit_pos = room_for_filename(&dentry_blk->bitmap, + get_dentry_slots(qstr.len), + DENTRY_PER_GROUP); + if (bit_pos < DENTRY_PER_GROUP) + goto add; + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); + } + ++level; + goto find; +add: + pos = get_dentry_group_pos(bidx); + update_dentry(dentry_blk, child_dentry, inode, namehash, bit_pos); + size = cache_file_write(sbi, file, dentry_blk, + sizeof(struct hmdfs_dentry_group), &pos); + if (size != sizeof(struct hmdfs_dentry_group)) + hmdfs_err("cache file write failed!, ret = %zd", size); + hmdfs_unlock_file(file, pos, DENTRYGROUP_SIZE); +out: + kfree(dentry_blk); +out_err: + return ret; +} + +void hmdfs_init_dcache_lookup_ctx(struct hmdfs_dcache_lookup_ctx *ctx, + struct hmdfs_sb_info *sbi, + const struct qstr *qstr, struct file *filp) +{ + ctx->sbi = sbi; + ctx->name = qstr; + ctx->filp = filp; + ctx->bidx = 0; + ctx->page = NULL; + ctx->insense_de = NULL; + ctx->insense_bidx = 0; + ctx->insense_page = NULL; +} + +int update_inode_to_dentry(struct dentry *child_dentry, struct inode *inode) +{ + struct hmdfs_sb_info *sbi = d_inode(child_dentry)->i_sb->s_fs_info; + struct hmdfs_dentry *de = NULL; + loff_t ipos; + struct dentry *parent_dentry; + struct cache_file_node *cfn = NULL; + char *relative_path = NULL; + struct hmdfs_dcache_lookup_ctx ctx; + + parent_dentry = child_dentry->d_parent; + if (hmdfs_d(parent_dentry)->dentry_type == HMDFS_LAYER_FIRST_DEVICE) + return 0; + + relative_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!relative_path) + return -ENOMEM; + + cfn = find_cfn(sbi, HMDFS_SERVER_CID, relative_path, true); + if (!cfn) + goto out; + + hmdfs_init_dcache_lookup_ctx(&ctx, sbi, &child_dentry->d_name, + cfn->filp); + de = hmdfs_find_dentry(child_dentry, &ctx); + if (!de) + goto out_cfn; + + de->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + de->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + de->i_size = cpu_to_le64(inode->i_size); + de->i_ino = cpu_to_le64( + generate_u64_ino(inode->i_ino, inode->i_generation)); + de->i_flag = 0; + + ipos = get_dentry_group_pos(ctx.bidx); + write_dentry_page(cfn->filp, ctx.page, + sizeof(struct hmdfs_dentry_group), ipos); + hmdfs_unlock_file(cfn->filp, ipos, DENTRYGROUP_SIZE); + kfree(ctx.page); +out_cfn: + release_cfn(cfn); +out: + kfree(relative_path); + return 0; +} + +void hmdfs_delete_dentry(struct dentry *d, struct file *filp) +{ + struct hmdfs_dentry *de = NULL; + unsigned int bit_pos; + int slots, i; + loff_t ipos; + ssize_t size; + struct hmdfs_dcache_lookup_ctx ctx; + + hmdfs_init_dcache_lookup_ctx(&ctx, hmdfs_sb(d->d_sb), &d->d_name, filp); + + de = hmdfs_find_dentry(d, &ctx); + if (IS_ERR_OR_NULL(de)) { + hmdfs_info("find dentry failed!, err=%ld", PTR_ERR(de)); + return; + } + slots = get_dentry_slots(le16_to_cpu(de->namelen)); + + bit_pos = de - ctx.page->nsl; + for (i = 0; i < slots; i++) + __clear_bit_le(bit_pos + i, &ctx.page->bitmap); + + ipos = get_dentry_group_pos(ctx.bidx); + size = cache_file_write(hmdfs_sb(d->d_sb), filp, ctx.page, + sizeof(struct hmdfs_dentry_group), &ipos); + if (size != sizeof(struct hmdfs_dentry_group)) + hmdfs_err("cache file write failed!, ret = %zd", size); + hmdfs_unlock_file(filp, ipos, DENTRYGROUP_SIZE); + kfree(ctx.page); +} + +static int hmdfs_get_cache_path(struct hmdfs_sb_info *sbi, struct path *dir) +{ + struct hmdfs_dentry_info *di = hmdfs_d(sbi->sb->s_root); + int err; + + if (!sbi->s_dentry_cache) { + *dir = di->lower_path; + return 0; + } + + err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, dir); + if (err) + hmdfs_err("open failed, errno = %d", err); + + return err; +} + +static void hmdfs_put_cache_path(struct hmdfs_sb_info *sbi, struct path *dir) +{ + if (!sbi->s_dentry_cache) + return; + path_put(dir); +} + +struct file *create_local_dentry_file_cache(struct hmdfs_sb_info *sbi) +{ + struct file *filp = NULL; + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + struct path cache_dir; + int err; + + err = hmdfs_get_cache_path(sbi, &cache_dir); + if (err) { + filp = ERR_PTR(err); + goto out; + } + + filp = file_open_root(&cache_dir, ".", + O_RDWR | O_LARGEFILE | O_TMPFILE, + DENTRY_FILE_PERM); + if (IS_ERR(filp)) + hmdfs_err("dentryfile open failed and exit err=%ld", + PTR_ERR(filp)); + + hmdfs_put_cache_path(sbi, &cache_dir); +out: + hmdfs_revert_creds(old_cred); + return filp; +} + +static int hmdfs_linkat(struct path *old_path, const char *newname) +{ + struct dentry *new_dentry = NULL; + struct path new_path; + int error; + + new_dentry = kern_path_create(AT_FDCWD, newname, &new_path, 0); + if (IS_ERR(new_dentry)) { + hmdfs_err("create kernel path failed, error: %ld", + PTR_ERR(new_dentry)); + return PTR_ERR(new_dentry); + } + + error = -EXDEV; + if (old_path->mnt != new_path.mnt) + goto out_dput; + + error = vfs_link(old_path->dentry, &init_user_ns, new_path.dentry->d_inode, new_dentry, + NULL); + +out_dput: + done_path_create(&new_path, new_dentry); + return error; +} + +static int cache_file_mkdir(const char *name, umode_t mode) +{ + struct dentry *dentry; + struct path path; + int err; + + dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + err = vfs_mkdir(&init_user_ns, d_inode(path.dentry), dentry, mode); + if (err && err != -EEXIST) + hmdfs_err("vfs_mkdir failed, err = %d", err); + + done_path_create(&path, dentry); + return err; +} + +static int cache_file_create_path(const char *fullpath) +{ + char *path; + char *s; + int err = 0; + + path = kstrdup(fullpath, GFP_KERNEL); + if (!path) + return -ENOMEM; + + s = path + 1; + while (true) { + s = strchr(s, '/'); + if (!s) + break; + s[0] = '\0'; + err = cache_file_mkdir(path, 0755); + if (err && err != -EEXIST) + break; + s[0] = '/'; + s++; + } + kfree(path); + return err; +} + +static void hmdfs_cache_path_create(char *s, const char *dir, bool server) +{ + if (server) + snprintf(s, PATH_MAX, "%s/dentry_cache/server/", dir); + else + snprintf(s, PATH_MAX, "%s/dentry_cache/client/", dir); +} + +static void hmdfs_cache_file_create(char *s, uint64_t hash, const char *id, + bool server) +{ + int offset = strlen(s); + + if (server) + snprintf(s + offset, PATH_MAX - offset, "%016llx", hash); + else + snprintf(s + offset, PATH_MAX - offset, "%s_%016llx", id, hash); +} + +int cache_file_name_generate(char *fullname, struct hmdfs_peer *con, + const char *relative_path, bool server) +{ + struct hmdfs_sb_info *sbi = con->sbi; + uint64_t hash; + char cid[HMDFS_CFN_CID_SIZE]; + int err; + + hmdfs_cache_path_create(fullname, sbi->cache_dir, server); + + err = cache_file_create_path(fullname); + if (err && err != -EEXIST) { + hmdfs_err("making dir failed %d", err); + return err; + } + + strncpy(cid, con->cid, HMDFS_CFN_CID_SIZE - 1); + cid[HMDFS_CFN_CID_SIZE - 1] = '\0'; + + hash = path_hash(relative_path, strlen(relative_path), + sbi->s_case_sensitive); + hmdfs_cache_file_create(fullname, hash, cid, server); + + return 0; +} + +static void free_cfn(struct cache_file_node *cfn) +{ + if (!IS_ERR_OR_NULL(cfn->filp)) + filp_close(cfn->filp, NULL); + + kfree(cfn->relative_path); + kfree(cfn); +} + +static bool dentry_file_match(struct cache_file_node *cfn, const char *id, + const char *path) +{ + int ret; + + if (cfn->sbi->s_case_sensitive) + ret = strcmp(cfn->relative_path, path); + else + ret = strcasecmp(cfn->relative_path, path); + + return (!ret && !strncmp((cfn)->cid, id, HMDFS_CFN_CID_SIZE - 1)); +} + +struct cache_file_node *__find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server) +{ + struct cache_file_node *cfn = NULL; + struct list_head *head = get_list_head(sbi, server); + + list_for_each_entry(cfn, head, list) { + if (dentry_file_match(cfn, cid, path)) { + refcount_inc(&cfn->ref); + return cfn; + } + } + return NULL; +} + +struct cache_file_node *create_cfn(struct hmdfs_sb_info *sbi, const char *path, + const char *cid, bool server) +{ + struct cache_file_node *cfn = kzalloc(sizeof(*cfn), GFP_KERNEL); + + if (!cfn) + return NULL; + + cfn->relative_path = kstrdup(path, GFP_KERNEL); + if (!cfn->relative_path) + goto out; + + refcount_set(&cfn->ref, 1); + strncpy(cfn->cid, cid, HMDFS_CFN_CID_SIZE - 1); + cfn->cid[HMDFS_CFN_CID_SIZE - 1] = '\0'; + cfn->sbi = sbi; + cfn->server = server; + return cfn; +out: + free_cfn(cfn); + return NULL; +} + +static struct file *insert_cfn(struct hmdfs_sb_info *sbi, const char *filename, + const char *path, const char *cid, bool server) +{ + const struct cred *old_cred = NULL; + struct cache_file_node *cfn = NULL; + struct cache_file_node *exist = NULL; + struct list_head *head = NULL; + struct file *filp = NULL; + + cfn = create_cfn(sbi, path, cid, server); + if (!cfn) + return ERR_PTR(-ENOMEM); + + old_cred = hmdfs_override_creds(sbi->system_cred); + filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0); + hmdfs_revert_creds(old_cred); + if (IS_ERR(filp)) { + hmdfs_err("open file failed, err=%ld", PTR_ERR(filp)); + goto out; + } + + head = get_list_head(sbi, server); + + mutex_lock(&sbi->cache_list_lock); + exist = __find_cfn(sbi, cid, path, server); + if (!exist) { + cfn->filp = filp; + list_add_tail(&cfn->list, head); + } else { + mutex_unlock(&sbi->cache_list_lock); + release_cfn(exist); + filp_close(filp, NULL); + filp = ERR_PTR(-EEXIST); + goto out; + } + mutex_unlock(&sbi->cache_list_lock); + return filp; +out: + free_cfn(cfn); + return filp; +} + +int hmdfs_rename_dentry(struct dentry *old_dentry, struct dentry *new_dentry, + struct file *old_filp, struct file *new_filp) +{ + int ret; + struct hmdfs_sb_info *sbi = hmdfs_sb(new_dentry->d_sb); + + /* + * Try to delete first, because stale dentry might exist after + * coverwrite. + */ + hmdfs_delete_dentry(new_dentry, new_filp); + + ret = create_dentry(new_dentry, d_inode(old_dentry), new_filp, sbi); + if (ret) { + hmdfs_err("create dentry failed!, err=%d", ret); + return ret; + } + + hmdfs_delete_dentry(old_dentry, old_filp); + return 0; +} + +/** + * cache_file_persistent - link the tmpfile to the cache dir + * @con: the connection peer + * @filp: the file handler of the tmpfile + * @relative_path: the relative path which the tmpfile belongs + * @server: server or client + * + * Return value: the new file handler of the persistent file if the + * persistent operation succeed. Otherwise will return the original handler + * of the tmpfile passed in, so that the caller does not have to check + * the returned handler. + * + */ +struct file *cache_file_persistent(struct hmdfs_peer *con, struct file *filp, + const char *relative_path, bool server) +{ + struct cache_file_node *cfn = NULL; + char *fullname = NULL; + char *cid = server ? HMDFS_SERVER_CID : (char *)con->cid; + struct file *newf = NULL; + int i = 0; + int len; + int err; + + if (!con->sbi->s_dentry_cache) + return filp; + + cfn = find_cfn(con->sbi, cid, relative_path, server); + if (cfn) { + release_cfn(cfn); + return filp; + } + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return filp; + + err = cache_file_name_generate(fullname, con, relative_path, server); + if (err) + goto out; + + err = __vfs_setxattr(&init_user_ns, file_dentry(filp), file_inode(filp), + DENTRY_FILE_XATTR_NAME, relative_path, + strlen(relative_path), 0); + if (err) { + hmdfs_err("setxattr for file failed, err=%d", err); + goto out; + } + + len = strlen(fullname); + + do { + err = hmdfs_linkat(&filp->f_path, fullname); + if (!err) + break; + + snprintf(fullname + len, PATH_MAX - len, "_%d", i); + } while (i++ < DENTRY_FILE_NAME_RETRY); + + if (err) { + hmdfs_err("link for file failed, err=%d", err); + goto out; + } + + newf = insert_cfn(con->sbi, fullname, relative_path, cid, server); + if (!IS_ERR(newf)) + filp = newf; +out: + kfree(fullname); + return filp; +} + +void __destroy_cfn(struct list_head *head) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + list_for_each_entry_safe(cfn, n, head, list) { + list_del_init(&cfn->list); + release_cfn(cfn); + } +} + +void hmdfs_cfn_destroy(struct hmdfs_sb_info *sbi) +{ + mutex_lock(&sbi->cache_list_lock); + __destroy_cfn(&sbi->client_cache); + __destroy_cfn(&sbi->server_cache); + mutex_unlock(&sbi->cache_list_lock); +} + +struct cache_file_node *find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server) +{ + struct cache_file_node *cfn = NULL; + + mutex_lock(&sbi->cache_list_lock); + cfn = __find_cfn(sbi, cid, path, server); + mutex_unlock(&sbi->cache_list_lock); + return cfn; +} + +void release_cfn(struct cache_file_node *cfn) +{ + if (refcount_dec_and_test(&cfn->ref)) + free_cfn(cfn); +} + +void remove_cfn(struct cache_file_node *cfn) +{ + struct hmdfs_sb_info *sbi = cfn->sbi; + bool deleted; + + mutex_lock(&sbi->cache_list_lock); + deleted = list_empty(&cfn->list); + if (!deleted) + list_del_init(&cfn->list); + mutex_unlock(&sbi->cache_list_lock); + if (!deleted) { + delete_dentry_file(cfn->filp); + release_cfn(cfn); + } +} + +int hmdfs_do_lock_file(struct file *filp, unsigned char fl_type, loff_t start, + loff_t len) +{ + struct file_lock fl; + int err; + + locks_init_lock(&fl); + + fl.fl_type = fl_type; + fl.fl_flags = FL_POSIX | FL_CLOSE | FL_SLEEP; + fl.fl_start = start; + fl.fl_end = start + len - 1; + fl.fl_owner = filp; + fl.fl_pid = current->tgid; + fl.fl_file = filp; + fl.fl_ops = NULL; + fl.fl_lmops = NULL; + + err = locks_lock_file_wait(filp, &fl); + if (err) + hmdfs_err("lock file wait failed: %d", err); + + return err; +} + +int hmdfs_wlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_WRLCK, start, len); +} + +int hmdfs_rlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_RDLCK, start, len); +} + +int hmdfs_unlock_file(struct file *filp, loff_t start, loff_t len) +{ + return hmdfs_do_lock_file(filp, F_UNLCK, start, len); +} + +long cache_file_truncate(struct hmdfs_sb_info *sbi, const struct path *path, + loff_t length) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + long ret = vfs_truncate(path, length); + + hmdfs_revert_creds(old_cred); + + return ret; +} + +ssize_t cache_file_read(struct hmdfs_sb_info *sbi, struct file *filp, void *buf, + size_t count, loff_t *pos) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + ssize_t ret = kernel_read(filp, buf, count, pos); + + hmdfs_revert_creds(old_cred); + + return ret; +} + +ssize_t cache_file_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buf, size_t count, loff_t *pos) +{ + const struct cred *old_cred = hmdfs_override_creds(sbi->system_cred); + ssize_t ret = kernel_write(filp, buf, count, pos); + + hmdfs_revert_creds(old_cred); + + return ret; +} + + +int read_header(struct hmdfs_sb_info *sbi, struct file *filp, + struct hmdfs_dcache_header *header) +{ + ssize_t bytes; + loff_t pos = 0; + + bytes = cache_file_read(sbi, filp, header, sizeof(*header), &pos); + if (bytes != sizeof(*header)) { + hmdfs_err("read file failed, err:%zd", bytes); + return -EIO; + } + + return 0; +} + +static unsigned long long cache_get_dentry_count(struct hmdfs_sb_info *sbi, + struct file *filp) +{ + struct hmdfs_dcache_header header; + int overallpage; + + overallpage = get_dentry_group_cnt(file_inode(filp)); + if (overallpage == 0) + return 0; + + if (read_header(sbi, filp, &header)) + return 0; + + return le64_to_cpu(header.num); +} + +static int cache_check_case_sensitive(struct hmdfs_sb_info *sbi, + struct file *filp) +{ + struct hmdfs_dcache_header header; + + if (read_header(sbi, filp, &header)) + return 0; + + if (sbi->s_case_sensitive != (bool)header.case_sensitive) { + hmdfs_info("Case sensitive inconsistent, current fs is: %d, cache is %d, will drop cache", + sbi->s_case_sensitive, header.case_sensitive); + return 0; + } + return 1; +} + +int write_header(struct file *filp, struct hmdfs_dcache_header *header) +{ + loff_t pos = 0; + ssize_t size; + + size = kernel_write(filp, header, sizeof(*header), &pos); + if (size != sizeof(*header)) { + hmdfs_err("update dcache header failed %zd", size); + return -EIO; + } + + return 0; +} + +void add_to_delete_list(struct hmdfs_sb_info *sbi, struct cache_file_node *cfn) +{ + mutex_lock(&sbi->cache_list_lock); + list_add_tail(&cfn->list, &sbi->to_delete); + mutex_unlock(&sbi->cache_list_lock); +} + +void load_cfn(struct hmdfs_sb_info *sbi, const char *fullname, const char *path, + const char *cid, bool server) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *cfn1 = NULL; + struct list_head *head = NULL; + + cfn = create_cfn(sbi, path, cid, server); + if (!cfn) + return; + + cfn->filp = filp_open(fullname, O_RDWR | O_LARGEFILE, 0); + if (IS_ERR(cfn->filp)) { + hmdfs_err("open fail %ld", PTR_ERR(cfn->filp)); + goto out; + } + + if (cache_get_dentry_count(sbi, cfn->filp) < sbi->dcache_threshold) { + add_to_delete_list(sbi, cfn); + return; + } + + if (!cache_check_case_sensitive(sbi, cfn->filp)) { + add_to_delete_list(sbi, cfn); + return; + } + + head = get_list_head(sbi, server); + + mutex_lock(&sbi->cache_list_lock); + cfn1 = __find_cfn(sbi, cid, path, server); + if (!cfn1) { + list_add_tail(&cfn->list, head); + } else { + release_cfn(cfn1); + mutex_unlock(&sbi->cache_list_lock); + add_to_delete_list(sbi, cfn); + return; + } + mutex_unlock(&sbi->cache_list_lock); + + return; +out: + free_cfn(cfn); +} + +static int get_cid_and_hash(const char *name, uint64_t *hash, char *cid) +{ + int len; + char *p = strstr(name, "_"); + + if (!p) + return -EINVAL; + + len = p - name; + if (len >= HMDFS_CFN_CID_SIZE) + return -EINVAL; + + memcpy(cid, name, len); + cid[len] = '\0'; + + if (sscanf(++p, "%llx", hash) != 1) + return -EINVAL; + return 0; +} + +static void store_one(const char *name, struct cache_file_callback *cb) +{ + struct file *file = NULL; + char *fullname = NULL; + char *kvalue = NULL; + char cid[HMDFS_CFN_CID_SIZE]; + uint64_t hash; + ssize_t error; + + if (strlen(name) + strlen(cb->dirname) >= PATH_MAX) + return; + + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return; + + snprintf(fullname, PATH_MAX, "%s%s", cb->dirname, name); + + file = filp_open(fullname, O_RDWR | O_LARGEFILE, 0); + if (IS_ERR(file)) { + hmdfs_err("open fail %ld", PTR_ERR(file)); + goto out; + } + + kvalue = kzalloc(PATH_MAX, GFP_KERNEL); + if (!kvalue) + goto out_file; + + error = __vfs_getxattr(file_dentry(file), file_inode(file), + DENTRY_FILE_XATTR_NAME, kvalue, PATH_MAX); + if (error <= 0 || error >= PATH_MAX) { + hmdfs_err("getxattr return: %zd", error); + goto out_kvalue; + } + kvalue[error] = '\0'; + cid[0] = '\0'; + + if (!cb->server) { + if (get_cid_and_hash(name, &hash, cid)) { + hmdfs_err("get cid and hash fail"); + goto out_kvalue; + } + } + + load_cfn(cb->sbi, fullname, kvalue, cid, cb->server); + +out_kvalue: + kfree(kvalue); +out_file: + filp_close(file, NULL); +out: + kfree(fullname); +} + +static int cache_file_iterate(struct dir_context *ctx, const char *name, + int name_len, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct cache_file_item *cfi = NULL; + struct cache_file_callback *cb = + container_of(ctx, struct cache_file_callback, ctx); + + if (name_len > NAME_MAX) { + hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX); + return 0; + } + + if (d_type != DT_REG) + return 0; + + cfi = kmalloc(sizeof(*cfi), GFP_KERNEL); + if (!cfi) + return -ENOMEM; + + cfi->name = kstrndup(name, name_len, GFP_KERNEL); + if (!cfi->name) { + kfree(cfi); + return -ENOMEM; + } + + list_add_tail(&cfi->list, &cb->list); + + return 0; +} + +void hmdfs_do_load(struct hmdfs_sb_info *sbi, const char *fullname, bool server) +{ + struct file *file = NULL; + struct path dirpath; + int err; + struct cache_file_item *cfi = NULL; + struct cache_file_item *n = NULL; + struct cache_file_callback cb = { + .ctx.actor = cache_file_iterate, + .ctx.pos = 0, + .dirname = fullname, + .sbi = sbi, + .server = server, + }; + INIT_LIST_HEAD(&cb.list); + + + err = kern_path(fullname, LOOKUP_DIRECTORY, &dirpath); + if (err) { + hmdfs_info("No file path"); + return; + } + + file = dentry_open(&dirpath, O_RDONLY, current_cred()); + if (IS_ERR_OR_NULL(file)) { + hmdfs_err("dentry_open failed, error: %ld", PTR_ERR(file)); + path_put(&dirpath); + return; + } + + err = iterate_dir(file, &cb.ctx); + if (err) + hmdfs_err("iterate_dir failed, err: %d", err); + + list_for_each_entry_safe(cfi, n, &cb.list, list) { + store_one(cfi->name, &cb); + list_del_init(&cfi->list); + kfree(cfi->name); + kfree(cfi); + } + + fput(file); + path_put(&dirpath); +} + +/** + * This function just used for delete dentryfile.dat + */ +int delete_dentry_file(struct file *filp) +{ + int err = 0; + struct dentry *dentry = file_dentry(filp); + struct dentry *parent = lock_parent(dentry); + + if (dentry->d_parent == parent) { + dget(dentry); + err = vfs_unlink(&init_user_ns, d_inode(parent), dentry, NULL); + dput(dentry); + } + unlock_dir(parent); + + return err; +} + +void hmdfs_delete_useless_cfn(struct hmdfs_sb_info *sbi) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + mutex_lock(&sbi->cache_list_lock); + + list_for_each_entry_safe(cfn, n, &sbi->to_delete, list) { + delete_dentry_file(cfn->filp); + list_del_init(&cfn->list); + release_cfn(cfn); + } + mutex_unlock(&sbi->cache_list_lock); +} + +void hmdfs_cfn_load(struct hmdfs_sb_info *sbi) +{ + char *fullname = NULL; + + if (!sbi->s_dentry_cache) + return; + + fullname = kzalloc(PATH_MAX, GFP_KERNEL); + if (!fullname) + return; + + snprintf(fullname, PATH_MAX, "%s/dentry_cache/client/", + sbi->cache_dir); + hmdfs_do_load(sbi, fullname, false); + + snprintf(fullname, PATH_MAX, "%s/dentry_cache/server/", + sbi->cache_dir); + hmdfs_do_load(sbi, fullname, true); + kfree(fullname); + + hmdfs_delete_useless_cfn(sbi); +} + +static void __cache_file_destroy_by_path(struct list_head *head, + const char *path) +{ + struct cache_file_node *cfn = NULL; + struct cache_file_node *n = NULL; + + list_for_each_entry_safe(cfn, n, head, list) { + if (strcmp(path, cfn->relative_path) != 0) + continue; + list_del_init(&cfn->list); + delete_dentry_file(cfn->filp); + release_cfn(cfn); + } +} + +static void cache_file_destroy_by_path(struct hmdfs_sb_info *sbi, + const char *path) +{ + mutex_lock(&sbi->cache_list_lock); + + __cache_file_destroy_by_path(&sbi->server_cache, path); + __cache_file_destroy_by_path(&sbi->client_cache, path); + + mutex_unlock(&sbi->cache_list_lock); +} + +static void cache_file_find_and_delete(struct hmdfs_peer *con, + const char *relative_path) +{ + struct cache_file_node *cfn; + + cfn = find_cfn(con->sbi, con->cid, relative_path, false); + if (!cfn) + return; + + remove_cfn(cfn); + release_cfn(cfn); +} + +void cache_file_delete_by_dentry(struct hmdfs_peer *con, struct dentry *dentry) +{ + char *relative_path = NULL; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (unlikely(!relative_path)) { + hmdfs_err("get relative path failed %d", -ENOMEM); + return; + } + cache_file_find_and_delete(con, relative_path); + kfree(relative_path); +} + +struct file *hmdfs_get_new_dentry_file(struct hmdfs_peer *con, + const char *relative_path, + struct hmdfs_dcache_header *header) +{ + struct hmdfs_sb_info *sbi = con->sbi; + int len = strlen(relative_path); + struct file *filp = NULL; + int err; + + filp = create_local_dentry_file_cache(sbi); + if (IS_ERR(filp)) + return filp; + + err = hmdfs_client_start_readdir(con, filp, relative_path, len, header); + if (err) { + if (err != -ENOENT) + hmdfs_err("readdir failed dev: %llu err: %d", + con->device_id, err); + fput(filp); + filp = ERR_PTR(err); + } + + return filp; +} + +void add_cfn_to_item(struct dentry *dentry, struct hmdfs_peer *con, + struct cache_file_node *cfn) +{ + struct file *file = cfn->filp; + int err; + + err = hmdfs_add_cache_list(con->device_id, dentry, file); + if (unlikely(err)) { + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + return; + } +} + +int hmdfs_add_file_to_cache(struct dentry *dentry, struct hmdfs_peer *con, + struct file *file, const char *relative_path) +{ + struct hmdfs_sb_info *sbi = con->sbi; + struct file *newf = file; + + if (cache_get_dentry_count(sbi, file) >= sbi->dcache_threshold) + newf = cache_file_persistent(con, file, relative_path, false); + else + cache_file_find_and_delete(con, relative_path); + + return hmdfs_add_cache_list(con->device_id, dentry, newf); +} + +static struct file *read_header_and_revalidate(struct hmdfs_peer *con, + struct file *filp, + const char *relative_path) +{ + struct hmdfs_dcache_header header; + struct hmdfs_dcache_header *p = NULL; + + if (read_header(con->sbi, filp, &header) == 0) + p = &header; + + return hmdfs_get_new_dentry_file(con, relative_path, p); +} + +void remote_file_revalidate_cfn(struct dentry *dentry, struct hmdfs_peer *con, + struct cache_file_node *cfn, + const char *relative_path) +{ + struct file *file = NULL; + int err; + + file = read_header_and_revalidate(con, cfn->filp, relative_path); + if (IS_ERR(file)) + return; + + /* + * If the request returned ok but file length is 0, we assume + * that the server verified the client cache file is uptodate. + */ + if (i_size_read(file->f_inode) == 0) { + hmdfs_info("The cfn cache for dev:%llu is uptodate", + con->device_id); + fput(file); + add_cfn_to_item(dentry, con, cfn); + return; + } + + /* OK, cfn is not uptodate, let's remove it and add the new file */ + remove_cfn(cfn); + + err = hmdfs_add_file_to_cache(dentry, con, file, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + fput(file); +} + +void remote_file_revalidate_item(struct dentry *dentry, struct hmdfs_peer *con, + struct clearcache_item *item, + const char *relative_path) +{ + struct file *file = NULL; + int err; + + file = read_header_and_revalidate(con, item->filp, relative_path); + if (IS_ERR(file)) + return; + + /* + * If the request returned ok but file length is 0, we assume + * that the server verified the client cache file is uptodate. + */ + if (i_size_read(file->f_inode) == 0) { + hmdfs_info("The item cache for dev:%llu is uptodate", + con->device_id); + item->time = jiffies; + fput(file); + return; + } + + /* We need to replace the old item */ + remove_cache_item(item); + cache_file_find_and_delete(con, relative_path); + + err = hmdfs_add_file_to_cache(dentry, con, file, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%llu err:%d", + con->device_id, err); + fput(file); +} + +bool get_remote_dentry_file(struct dentry *dentry, struct hmdfs_peer *con) +{ + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + struct cache_file_node *cfn = NULL; + struct hmdfs_sb_info *sbi = con->sbi; + char *relative_path = NULL; + int err = 0; + struct file *filp = NULL; + struct clearcache_item *item; + + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), con->device_id, + dentry)) + return false; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (unlikely(!relative_path)) { + hmdfs_err("get relative path failed %d", -ENOMEM); + return false; + } + mutex_lock(&d_info->cache_pull_lock); + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), con->device_id, + dentry)) + goto out_unlock; + + item = hmdfs_find_cache_item(con->device_id, dentry); + if (item) { + remote_file_revalidate_item(dentry, con, item, relative_path); + kref_put(&item->ref, release_cache_item); + goto out_unlock; + } + + cfn = find_cfn(sbi, con->cid, relative_path, false); + if (cfn) { + remote_file_revalidate_cfn(dentry, con, cfn, relative_path); + release_cfn(cfn); + goto out_unlock; + } + + filp = hmdfs_get_new_dentry_file(con, relative_path, NULL); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out_unlock; + } + + err = hmdfs_add_file_to_cache(dentry, con, filp, relative_path); + if (unlikely(err)) + hmdfs_err("add cache list failed devid:%lu err:%d", + (unsigned long)con->device_id, err); + fput(filp); + +out_unlock: + mutex_unlock(&d_info->cache_pull_lock); + if (err && err != -ENOENT) + hmdfs_err("readdir failed dev:%lu err:%d", + (unsigned long)con->device_id, err); + kfree(relative_path); + return true; +} + +int hmdfs_file_type(const char *name) +{ + if (!name) + return -EINVAL; + + if (!strcmp(name, CURRENT_DIR) || !strcmp(name, PARENT_DIR)) + return HMDFS_TYPE_DOT; + + return HMDFS_TYPE_COMMON; +} + +struct clearcache_item *hmdfs_find_cache_item(uint64_t dev_id, + struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + if (!d_info) + return NULL; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry(item, &(d_info->cache_list_head), list) { + if (dev_id == item->dev_id) { + kref_get(&item->ref); + spin_unlock(&d_info->cache_list_lock); + return item; + } + } + spin_unlock(&d_info->cache_list_lock); + return NULL; +} + +bool hmdfs_cache_revalidate(unsigned long conn_time, uint64_t dev_id, + struct dentry *dentry) +{ + bool ret = false; + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + unsigned int timeout; + + if (!d_info) + return ret; + + timeout = hmdfs_sb(dentry->d_sb)->dcache_timeout; + spin_lock(&d_info->cache_list_lock); + list_for_each_entry(item, &(d_info->cache_list_head), list) { + if (dev_id == item->dev_id) { + ret = cache_item_revalidate(conn_time, item->time, + timeout); + break; + } + } + spin_unlock(&d_info->cache_list_lock); + return ret; +} + +void remove_cache_item(struct clearcache_item *item) +{ + bool deleted; + + spin_lock(&item->d_info->cache_list_lock); + deleted = list_empty(&item->list); + if (!deleted) + list_del_init(&item->list); + spin_unlock(&item->d_info->cache_list_lock); + if (!deleted) + kref_put(&item->ref, release_cache_item); +} + +void release_cache_item(struct kref *ref) +{ + struct clearcache_item *item = + container_of(ref, struct clearcache_item, ref); + + if (item->filp) + fput(item->filp); + kfree(item); +} + +void hmdfs_remove_cache_filp(struct hmdfs_peer *con, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + struct clearcache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + // struct path *lower_path = NULL; + + if (!d_info) + return; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry_safe(item, item_temp, &(d_info->cache_list_head), + list) { + if (con->device_id == item->dev_id) { + list_del_init(&item->list); + spin_unlock(&d_info->cache_list_lock); + cache_file_delete_by_dentry(con, dentry); + kref_put(&item->ref, release_cache_item); + return; + } + } + spin_unlock(&d_info->cache_list_lock); +} + +int hmdfs_add_cache_list(uint64_t dev_id, struct dentry *dentry, + struct file *filp) +{ + struct clearcache_item *item = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + + if (!d_info) + return -ENOMEM; + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->dev_id = dev_id; + item->filp = get_file(filp); + item->time = jiffies; + item->d_info = d_info; + kref_init(&item->ref); + spin_lock(&d_info->cache_list_lock); + list_add_tail(&(item->list), &(d_info->cache_list_head)); + spin_unlock(&d_info->cache_list_lock); + return 0; +} + +void hmdfs_add_remote_cache_list(struct hmdfs_peer *con, const char *dir_path) +{ + int err = 0; + struct remotecache_item *item = NULL; + struct remotecache_item *item_temp = NULL; + struct path path, root_path; + struct hmdfs_dentry_info *d_info = NULL; + + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + return; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, dir_path, 0, + &path); + if (err) + goto out_put_root; + + d_info = hmdfs_d(path.dentry); + if (!d_info) { + err = -EINVAL; + goto out; + } + + /* find duplicate con */ + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry_safe(item, item_temp, + &(d_info->remote_cache_list_head), list) { + if (item->con->device_id == con->device_id) { + mutex_unlock(&d_info->remote_cache_list_lock); + goto out; + } + } + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) { + err = -ENOMEM; + mutex_unlock(&d_info->remote_cache_list_lock); + goto out; + } + + item->con = con; + item->drop_flag = 0; + list_add(&(item->list), &(d_info->remote_cache_list_head)); + mutex_unlock(&d_info->remote_cache_list_lock); + +out: + path_put(&path); +out_put_root: + path_put(&root_path); +} + +int hmdfs_drop_remote_cache_dents(struct dentry *dentry) +{ + struct path lower_path; + struct inode *lower_inode = NULL; + struct remotecache_item *item = NULL; + struct remotecache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = NULL; + char *relative_path = NULL; + + if (!dentry) { + hmdfs_err("dentry null and return"); + return 0; + } + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return 0; + } + hmdfs_get_lower_path(dentry, &lower_path); + if (IS_ERR_OR_NULL(lower_path.dentry)) { + hmdfs_put_lower_path(&lower_path); + return 0; + } + lower_inode = d_inode(lower_path.dentry); + hmdfs_put_lower_path(&lower_path); + if (IS_ERR_OR_NULL(lower_inode)) + return 0; + /* only for directory */ + if (!S_ISDIR(lower_inode->i_mode)) + return 0; + + relative_path = hmdfs_get_dentry_relative_path(dentry); + if (!relative_path) { + hmdfs_err("get dentry relative path failed"); + return 0; + } + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry_safe(item, item_temp, + &(d_info->remote_cache_list_head), list) { + if (item->drop_flag) { + item->drop_flag = 0; + continue; + } + mutex_unlock(&d_info->remote_cache_list_lock); + hmdfs_send_drop_push(item->con, relative_path); + mutex_lock(&d_info->remote_cache_list_lock); + list_del(&item->list); + kfree(item); + } + mutex_unlock(&d_info->remote_cache_list_lock); + + kfree(relative_path); + return 0; +} + +/* Clear the dentry cache files of target directory */ +int hmdfs_clear_cache_dents(struct dentry *dentry, bool remove_cache) +{ + struct clearcache_item *item = NULL; + struct clearcache_item *item_temp = NULL; + struct hmdfs_dentry_info *d_info = hmdfs_d(dentry); + char *path = NULL; + + if (!d_info) + return 0; + + spin_lock(&d_info->cache_list_lock); + list_for_each_entry_safe(item, item_temp, &(d_info->cache_list_head), + list) { + list_del_init(&item->list); + kref_put(&item->ref, release_cache_item); + } + spin_unlock(&d_info->cache_list_lock); + + if (!remove_cache) + return 0; + + /* it also need confirm that there are no dentryfile_dev* + * under this dentry + */ + path = hmdfs_get_dentry_relative_path(dentry); + + if (unlikely(!path)) { + hmdfs_err("get relative path failed"); + return 0; + } + + cache_file_destroy_by_path(hmdfs_sb(dentry->d_sb), path); + + kfree(path); + return 0; +} + +void hmdfs_mark_drop_flag(uint64_t device_id, struct dentry *dentry) +{ + struct remotecache_item *item = NULL; + struct hmdfs_dentry_info *d_info = NULL; + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return; + } + + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry(item, &(d_info->remote_cache_list_head), list) { + if (item->con->device_id == device_id) { + item->drop_flag = 1; + break; + } + } + mutex_unlock(&d_info->remote_cache_list_lock); +} + +void hmdfs_clear_drop_flag(struct dentry *dentry) +{ + struct remotecache_item *item = NULL; + struct hmdfs_dentry_info *d_info = NULL; + + if (!dentry) { + hmdfs_err("dentry null and return"); + return; + } + + d_info = hmdfs_d(dentry); + if (!d_info) { + hmdfs_err("d_info null and return"); + return; + } + + mutex_lock(&d_info->remote_cache_list_lock); + list_for_each_entry(item, &(d_info->remote_cache_list_head), list) { + if (item->drop_flag) + item->drop_flag = 0; + } + mutex_unlock(&d_info->remote_cache_list_lock); +} + +#define DUSTBIN_SUFFIX ".hwbk" +static void hmdfs_rename_bak(struct dentry *dentry) +{ + struct path lower_path; + struct dentry *lower_parent = NULL; + struct dentry *lower_dentry = NULL; + struct dentry *new_dentry = NULL; + char *name = NULL; + int len = 0; + int err = 0; + struct renamedata rd; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + len = strlen(lower_dentry->d_name.name) + strlen(DUSTBIN_SUFFIX) + 2; + if (len >= NAME_MAX) { + err = -ENAMETOOLONG; + goto put_lower_path; + } + + name = kmalloc(len, GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto put_lower_path; + } + + snprintf(name, len, ".%s%s", lower_dentry->d_name.name, DUSTBIN_SUFFIX); + err = mnt_want_write(lower_path.mnt); + if (err) { + hmdfs_info("get write access failed, err %d", err); + goto free_name; + } + + lower_parent = lock_parent(lower_dentry); + new_dentry = lookup_one_len(name, lower_parent, strlen(name)); + if (IS_ERR(new_dentry)) { + err = PTR_ERR(new_dentry); + hmdfs_info("lookup new dentry failed, err %d", err); + goto unlock_parent; + } + + rd.old_mnt_userns = &init_user_ns; + rd.old_dir = d_inode(lower_parent); + rd.old_dentry = lower_dentry; + rd.new_mnt_userns = &init_user_ns; + rd.new_dir = d_inode(lower_parent); + rd.new_dentry = new_dentry; + + err = vfs_rename(&rd); + + dput(new_dentry); +unlock_parent: + unlock_dir(lower_parent); + mnt_drop_write(lower_path.mnt); +free_name: + kfree(name); +put_lower_path: + hmdfs_put_lower_path(&lower_path); + + if (err) + hmdfs_err("failed to rename file, err %d", err); +} + +int hmdfs_root_unlink(uint64_t device_id, struct path *root_path, + const char *unlink_dir, const char *unlink_name) +{ + int err = 0; + struct path path; + struct dentry *child_dentry = NULL; + struct inode *dir = NULL; + struct inode *child_inode = NULL; + kuid_t tmp_uid; + + err = vfs_path_lookup(root_path->dentry, root_path->mnt, + unlink_dir, LOOKUP_DIRECTORY, &path); + if (err) { + hmdfs_err("found path failed err = %d", err); + return err; + } + dir = d_inode(path.dentry); + inode_lock_nested(dir, I_MUTEX_PARENT); + + child_dentry = lookup_one_len(unlink_name, path.dentry, + strlen(unlink_name)); + if (IS_ERR(child_dentry)) { + err = PTR_ERR(child_dentry); + hmdfs_err("lookup_one_len failed, err = %d", err); + goto unlock_out; + } + if (d_is_negative(child_dentry)) { + err = -ENOENT; + dput(child_dentry); + goto unlock_out; + } + child_inode = d_inode(child_dentry); + if (!child_inode) + goto unlock_out; + + tmp_uid = hmdfs_override_inode_uid(dir); + + hmdfs_mark_drop_flag(device_id, path.dentry); + ihold(child_inode); + err = vfs_unlink(&init_user_ns, dir, child_dentry, NULL); + /* + * -EOWNERDEAD means we want to put the file in a specail dir instead of + * deleting it, specifically dustbin in phone, so that user can + * recover the deleted images and videos. + */ + if (err == -EOWNERDEAD) { + hmdfs_rename_bak(child_dentry); + err = 0; + } + if (err) + hmdfs_err("unlink path failed err = %d", err); + hmdfs_revert_inode_uid(dir, tmp_uid); + dput(child_dentry); + +unlock_out: + inode_unlock(dir); + if (child_inode) + iput(child_inode); + path_put(&path); + return err; +} + +struct dentry *hmdfs_root_mkdir(uint64_t device_id, const char *local_dst_path, + const char *mkdir_dir, const char *mkdir_name, + umode_t mode) +{ + int err; + struct path path; + struct dentry *child_dentry = NULL; + struct dentry *ret = NULL; + char *mkdir_path = NULL; + char *mkdir_abs_path = NULL; + + mkdir_path = hmdfs_connect_path(mkdir_dir, mkdir_name); + if (!mkdir_path) + return ERR_PTR(-EACCES); + + mkdir_abs_path = + hmdfs_get_dentry_absolute_path(local_dst_path, mkdir_path); + if (!mkdir_abs_path) { + ret = ERR_PTR(-ENOMEM); + goto out; + } + + child_dentry = kern_path_create(AT_FDCWD, mkdir_abs_path, + &path, LOOKUP_DIRECTORY); + if (IS_ERR(child_dentry)) { + ret = child_dentry; + goto out; + } + + hmdfs_mark_drop_flag(device_id, child_dentry->d_parent); + err = vfs_mkdir(&init_user_ns, d_inode(path.dentry), child_dentry, mode); + if (err) { + hmdfs_err("mkdir failed! err=%d", err); + ret = ERR_PTR(err); + goto out_put; + } + ret = dget(child_dentry); +out_put: + done_path_create(&path, child_dentry); +out: + kfree(mkdir_path); + kfree(mkdir_abs_path); + return ret; +} + +struct dentry *hmdfs_root_create(uint64_t device_id, const char *local_dst_path, + const char *create_dir, + const char *create_name, + umode_t mode, bool want_excl) +{ + int err; + struct path path; + struct dentry *child_dentry = NULL; + struct dentry *ret = NULL; + char *create_path = NULL; + char *create_abs_path = NULL; + + create_path = hmdfs_connect_path(create_dir, create_name); + if (!create_path) + return ERR_PTR(-EACCES); + + create_abs_path = + hmdfs_get_dentry_absolute_path(local_dst_path, create_path); + if (!create_abs_path) { + ret = ERR_PTR(-ENOMEM); + goto out; + } + + child_dentry = kern_path_create(AT_FDCWD, create_abs_path, &path, 0); + + if (IS_ERR(child_dentry)) { + ret = child_dentry; + goto out; + } + hmdfs_mark_drop_flag(device_id, child_dentry->d_parent); + err = vfs_create(&init_user_ns, d_inode(path.dentry), child_dentry, mode, want_excl); + if (err) { + hmdfs_err("path create failed! err=%d", err); + ret = ERR_PTR(err); + goto out_put; + } + ret = dget(child_dentry); +out_put: + done_path_create(&path, child_dentry); +out: + kfree(create_path); + kfree(create_abs_path); + return ret; +} + +int hmdfs_root_rmdir(uint64_t device_id, struct path *root_path, + const char *rmdir_dir, const char *rmdir_name) +{ + int err = 0; + struct path path; + struct dentry *child_dentry = NULL; + struct inode *dir = NULL; + + err = vfs_path_lookup(root_path->dentry, root_path->mnt, + rmdir_dir, LOOKUP_DIRECTORY, &path); + if (err) { + hmdfs_err("found path failed err = %d", err); + return err; + } + dir = d_inode(path.dentry); + inode_lock_nested(dir, I_MUTEX_PARENT); + + child_dentry = lookup_one_len(rmdir_name, path.dentry, + strlen(rmdir_name)); + if (IS_ERR(child_dentry)) { + err = PTR_ERR(child_dentry); + hmdfs_err("lookup_one_len failed, err = %d", err); + goto unlock_out; + } + if (d_is_negative(child_dentry)) { + err = -ENOENT; + dput(child_dentry); + goto unlock_out; + } + + hmdfs_mark_drop_flag(device_id, path.dentry); + err = vfs_rmdir(&init_user_ns, dir, child_dentry); + if (err) + hmdfs_err("rmdir failed err = %d", err); + dput(child_dentry); + +unlock_out: + inode_unlock(dir); + path_put(&path); + return err; +} + +int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id, + const char *oldpath, const char *oldname, + const char *newpath, const char *newname, + unsigned int flags) +{ + int err = 0; + struct path path_dst; + struct path path_old; + struct path path_new; + struct dentry *trap = NULL; + struct dentry *old_dentry = NULL; + struct dentry *new_dentry = NULL; + struct renamedata rd; + + err = kern_path(sbi->local_dst, 0, &path_dst); + if (err) { + hmdfs_err("kern_path for local dst failed %d", err); + return err; + } + + err = vfs_path_lookup(path_dst.dentry, path_dst.mnt, oldpath, 0, + &path_old); + if (err) { + hmdfs_info("lookup oldpath from local_dst failed, err %d", err); + goto put_path_dst; + } + + err = vfs_path_lookup(path_dst.dentry, path_dst.mnt, newpath, 0, + &path_new); + if (err) { + hmdfs_info("lookup newpath from local_dst failed, err %d", err); + goto put_path_old; + } + + err = mnt_want_write(path_dst.mnt); + if (err) { + hmdfs_info("get write access failed for local_dst, err %d", + err); + goto put_path_new; + } + + trap = lock_rename(path_new.dentry, path_old.dentry); + + old_dentry = lookup_one_len(oldname, path_old.dentry, strlen(oldname)); + if (IS_ERR(old_dentry)) { + err = PTR_ERR(old_dentry); + hmdfs_info("lookup old dentry failed, err %d", err); + goto unlock; + } + + /* source should not be ancestor of target */ + if (old_dentry == trap) { + err = -EINVAL; + goto put_old_dentry; + } + + new_dentry = lookup_one_len(newname, path_new.dentry, strlen(newname)); + if (IS_ERR(new_dentry)) { + err = PTR_ERR(new_dentry); + hmdfs_info("lookup new dentry failed, err %d", err); + goto put_old_dentry; + } + + /* + * Exchange rename is not supported, thus target should not be an + * ancestor of source. + */ + if (trap == new_dentry) { + err = -ENOTEMPTY; + goto put_new_dentry; + } + + if (d_is_positive(new_dentry) && (flags & RENAME_NOREPLACE)) { + err = -EEXIST; + goto put_new_dentry; + } + + hmdfs_mark_drop_flag(device_id, path_old.dentry); + if (path_old.dentry != path_new.dentry) + hmdfs_mark_drop_flag(device_id, path_new.dentry); + + rd.old_mnt_userns = &init_user_ns; + rd.old_dir = d_inode(path_old.dentry); + rd.old_dentry = old_dentry; + rd.new_mnt_userns = &init_user_ns; + rd.new_dir = d_inode(path_new.dentry); + rd.new_dentry = new_dentry; + + err = vfs_rename(&rd); + +put_new_dentry: + dput(new_dentry); +put_old_dentry: + dput(old_dentry); +unlock: + unlock_rename(path_new.dentry, path_old.dentry); + mnt_drop_write(path_dst.mnt); +put_path_new: + path_put(&path_new); +put_path_old: + path_put(&path_old); +put_path_dst: + path_put(&path_dst); + + return err; +} + +int hmdfs_get_path_in_sb(struct super_block *sb, const char *name, + unsigned int flags, struct path *path) +{ + int err; + + err = kern_path(name, flags, path); + if (err) { + hmdfs_err("can't get %s %d\n", name, err); + return err; + } + + /* should ensure the path is belong sb */ + if (path->dentry->d_sb != sb) { + err = -EINVAL; + hmdfs_err("Wrong sb: %s on %s", name, + path->dentry->d_sb->s_type->name); + path_put(path); + } + + return err; +} diff --git a/fs/hmdfs/hmdfs_dentryfile.h b/fs/hmdfs/hmdfs_dentryfile.h new file mode 100755 index 000000000..df1463007 --- /dev/null +++ b/fs/hmdfs/hmdfs_dentryfile.h @@ -0,0 +1,342 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_dentryfile.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DENTRYFILE_H +#define HMDFS_DENTRYFILE_H + +#include "hmdfs.h" +#include + +/* use for escape from hmdfs file system, hmdfs hide follow names */ +#define CURRENT_DIR "." +#define PARENT_DIR ".." + +/* local dentry cache data */ +#define DENTRY_FILE_XATTR_NAME "user.hmdfs_cache" + +#define DENTRY_FILE_NAME_RETRY 10 + +#define MAX_BUCKET_LEVEL 63 +#define BUCKET_BLOCKS 2 +#define MAX_DIR_BUCKETS (1 << ((MAX_BUCKET_LEVEL / 2) - 1)) + +#define CONFLICTING_FILE_CONST_SUFFIX "_conflict_dev" +#define CONFLICTING_FILE_SUFFIX "_conflict_dev%u" +#define CONFLICTING_DIR_SUFFIX "_remote_directory" + +#define POS_BIT_NUM 64 +#define DEV_ID_BIT_NUM 16 +#define GROUP_ID_BIT_NUM 39 +#define OFFSET_BIT_NUM 8 +#define OFFSET_BIT_MASK 0xFF + +#define DEFAULT_DCACHE_TIMEOUT 30 +#define DEFAULT_DCACHE_PRECISION 10 +#define DEFAULT_DCACHE_THRESHOLD 1000 +#define HMDFS_STALE_REMOTE_ISIZE ULLONG_MAX + +/* Seconds per-week */ +#define MAX_DCACHE_TIMEOUT 604800 + +struct hmdfs_iterate_callback { + struct dir_context ctx; + struct dir_context *caller; + int result; + struct rb_root *root; +}; + +/* + * 4096 = version(1) + bitmap(10) + reserved(5) + * + nsl(80 * 43) + filename(80 * 8) + */ +#define DENTRYGROUP_SIZE 4096 +#define DENTRY_NAME_LEN 8 +#define DENTRY_RESERVED_LENGTH 3 +#define DENTRY_PER_GROUP 80 +#define DENTRY_BITMAP_LENGTH 10 +#define DENTRY_GROUP_RESERVED 5 +#define DENTRYGROUP_HEADER 4096 + +struct hmdfs_dentry { + __le32 hash; + __le16 i_mode; + __le16 namelen; + __le64 i_size; + /* modification time */ + __le64 i_mtime; + /* modification time in nano scale */ + __le32 i_mtime_nsec; + /* combination of inode number and generation */ + __le64 i_ino; + __le32 i_flag; + /* reserved bytes for long term extend, total 43 bytes */ + __u8 reserved[DENTRY_RESERVED_LENGTH]; +} __packed; + +/* 4K/51 Bytes = 80 dentries for per dentrygroup */ +struct hmdfs_dentry_group { + __u8 dentry_version; /* dentry version start from 1 */ + __u8 bitmap[DENTRY_BITMAP_LENGTH]; + struct hmdfs_dentry nsl[DENTRY_PER_GROUP]; + __u8 filename[DENTRY_PER_GROUP][DENTRY_NAME_LEN]; + __u8 reserved[DENTRY_GROUP_RESERVED]; +} __packed; + +/** + * The content of 1st 4k block in dentryfile.dat. + * Used for check whether the dcache can be used directly or + * need to rebuild. + * + * Since the ctime has 10ms or less precision, if the dcache + * rebuild at the same time of the dentry inode ctime, maybe + * non-consistent in dcache. + * eg: create 1.jpg 2.jpg 3.jpg + * dcache rebuild may only has 1.jpg 2.jpg + * So, we need use these time to verify the dcache. + */ +struct hmdfs_dcache_header { + /* The time of dcache rebuild */ + __le64 dcache_crtime; + __le64 dcache_crtime_nsec; + + /* The directory inode ctime when dcache rebuild */ + __le64 dentry_ctime; + __le64 dentry_ctime_nsec; + + /* The dentry count */ + __le64 num; + + /* The case sensitive */ + __u8 case_sensitive; +} __packed; + +static inline loff_t get_dentry_group_pos(unsigned int bidx) +{ + return ((loff_t)bidx) * DENTRYGROUP_SIZE + DENTRYGROUP_HEADER; +} + +static inline unsigned int get_dentry_group_cnt(struct inode *inode) +{ + loff_t size = i_size_read(inode); + + return size >= DENTRYGROUP_HEADER ? + (size - DENTRYGROUP_HEADER) / DENTRYGROUP_SIZE : + 0; +} + +#define DENTRY_NAME_MAX_LEN (DENTRY_PER_GROUP * DENTRY_NAME_LEN) +#define BITS_PER_BYTE 8 +#define HMDFS_SLOT_LEN_BITS 3 +#define get_dentry_slots(x) (((x) + BITS_PER_BYTE - 1) >> HMDFS_SLOT_LEN_BITS) + +#define INUNUMBER_START 10000000 + +#ifdef CONFIG_HMDFS_FS_PERMISSION +#define DENTRY_FILE_PERM 0660 +#else +#define DENTRY_FILE_PERM 0666 +#endif + +struct hmdfs_dcache_lookup_ctx { + struct hmdfs_sb_info *sbi; + const struct qstr *name; + struct file *filp; + __u32 hash; + + /* for case sensitive */ + unsigned int bidx; + struct hmdfs_dentry_group *page; + + /* for case insensitive */ + struct hmdfs_dentry *insense_de; + unsigned int insense_bidx; + struct hmdfs_dentry_group *insense_page; +}; + +extern void hmdfs_init_dcache_lookup_ctx(struct hmdfs_dcache_lookup_ctx *ctx, + struct hmdfs_sb_info *sbi, + const struct qstr *qstr, + struct file *filp); + +int create_dentry(struct dentry *child_dentry, struct inode *inode, + struct file *file, struct hmdfs_sb_info *sbi); +int read_dentry(struct hmdfs_sb_info *sbi, char *file_name, + struct dir_context *ctx); +struct hmdfs_dentry *hmdfs_find_dentry(struct dentry *child_dentry, + struct hmdfs_dcache_lookup_ctx *ctx); +void hmdfs_delete_dentry(struct dentry *d, struct file *filp); +int hmdfs_rename_dentry(struct dentry *old_dentry, struct dentry *new_dentry, + struct file *old_filp, struct file *new_filp); +int get_inonumber(void); +struct file *create_local_dentry_file_cache(struct hmdfs_sb_info *sbi); +int update_inode_to_dentry(struct dentry *child_dentry, struct inode *inode); +struct file *cache_file_persistent(struct hmdfs_peer *con, struct file *filp, + const char *relative_path, bool server); + +#define HMDFS_TYPE_COMMON 0 +#define HMDFS_TYPE_DOT 1 +#define HMDFS_TYPE_DENTRY 2 +#define HMDFS_TYPE_DENTRY_CACHE 3 +int hmdfs_file_type(const char *name); + +loff_t hmdfs_set_pos(unsigned long dev_id, unsigned long group_id, + unsigned long offset); + +struct getdents_callback_real { + struct dir_context ctx; + struct path *parent_path; + loff_t num; + struct file *file; + struct hmdfs_sb_info *sbi; + const char *dir; +}; + +struct file *hmdfs_server_rebuild_dents(struct hmdfs_sb_info *sbi, + struct path *path, loff_t *num, + const char *dir); + +#define DCACHE_LIFETIME 30 + +struct clearcache_item { + uint64_t dev_id; + struct file *filp; + unsigned long time; + struct list_head list; + struct kref ref; + struct hmdfs_dentry_info *d_info; +}; + +void hmdfs_add_remote_cache_list(struct hmdfs_peer *con, const char *dir_path); + +struct remotecache_item { + struct hmdfs_peer *con; + struct list_head list; + __u8 drop_flag; +}; + +#define HMDFS_CFN_CID_SIZE 65 +#define HMDFS_SERVER_CID "" + +struct cache_file_node { + struct list_head list; + struct hmdfs_sb_info *sbi; + char *relative_path; + u8 cid[HMDFS_CFN_CID_SIZE]; + refcount_t ref; + bool server; + struct file *filp; +}; + +struct cache_file_item { + struct list_head list; + const char *name; +}; + +struct cache_file_callback { + struct dir_context ctx; + const char *dirname; + struct hmdfs_sb_info *sbi; + bool server; + struct list_head list; +}; + +int hmdfs_drop_remote_cache_dents(struct dentry *dentry); +void hmdfs_send_drop_push(struct hmdfs_peer *con, const char *path); +void hmdfs_mark_drop_flag(uint64_t device_id, struct dentry *dentry); +void hmdfs_clear_drop_flag(struct dentry *dentry); +void delete_in_cache_file(uint64_t dev_id, struct dentry *dentry); +void create_in_cache_file(uint64_t dev_id, struct dentry *dentry); +struct clearcache_item *hmdfs_find_cache_item(uint64_t dev_id, + struct dentry *dentry); +bool hmdfs_cache_revalidate(unsigned long conn_time, uint64_t dev_id, + struct dentry *dentry); +void hmdfs_remove_cache_filp(struct hmdfs_peer *con, struct dentry *dentry); +int hmdfs_add_cache_list(uint64_t dev_id, struct dentry *dentry, + struct file *filp); +int hmdfs_clear_cache_dents(struct dentry *dentry, bool remove_cache); + +int hmdfs_root_unlink(uint64_t device_id, struct path *root_path, + const char *unlink_dir, const char *unlink_name); +struct dentry *hmdfs_root_mkdir(uint64_t device_id, const char *local_dst_path, + const char *mkdir_dir, const char *mkdir_name, + umode_t mode); +struct dentry *hmdfs_root_create(uint64_t device_id, const char *local_dst_path, + const char *create_dir, + const char *create_name, + umode_t mode, bool want_excl); +int hmdfs_root_rmdir(uint64_t device_id, struct path *root_path, + const char *rmdir_dir, const char *rmdir_name); +int hmdfs_root_rename(struct hmdfs_sb_info *sbi, uint64_t device_id, + const char *oldpath, const char *oldname, + const char *newpath, const char *newname, + unsigned int flags); + +int hmdfs_get_path_in_sb(struct super_block *sb, const char *name, + unsigned int flags, struct path *path); + +int hmdfs_wlock_file(struct file *filp, loff_t start, loff_t len); +int hmdfs_rlock_file(struct file *filp, loff_t start, loff_t len); +int hmdfs_unlock_file(struct file *filp, loff_t start, loff_t len); +long cache_file_truncate(struct hmdfs_sb_info *sbi, const struct path *path, + loff_t length); +ssize_t cache_file_read(struct hmdfs_sb_info *sbi, struct file *filp, void *buf, + size_t count, loff_t *pos); +ssize_t cache_file_write(struct hmdfs_sb_info *sbi, struct file *filp, + const void *buf, size_t count, loff_t *pos); +int hmdfs_metainfo_read(struct hmdfs_sb_info *sbi, struct file *filp, + void *buffer, int buffersize, int bidx); + +bool get_remote_dentry_file(struct dentry *dentry, struct hmdfs_peer *con); +void get_remote_dentry_file_sync(struct dentry *dentry, struct hmdfs_peer *con); + +void release_cache_item(struct kref *ref); +void remove_cache_item(struct clearcache_item *item); + +void hmdfs_cfn_load(struct hmdfs_sb_info *sbi); +void hmdfs_cfn_destroy(struct hmdfs_sb_info *sbi); +struct cache_file_node *find_cfn(struct hmdfs_sb_info *sbi, const char *cid, + const char *path, bool server); +void release_cfn(struct cache_file_node *cfn); +void destroy_cfn(struct hmdfs_sb_info *sbi); +void remove_cfn(struct cache_file_node *cfn); +int delete_dentry_file(struct file *filp); +struct file *hmdfs_server_cache_revalidate(struct hmdfs_sb_info *sbi, + const char *recvpath, + struct path *path); +int write_header(struct file *filp, struct hmdfs_dcache_header *header); + +static inline struct list_head *get_list_head(struct hmdfs_sb_info *sbi, + bool server) +{ + return ((server) ? &(sbi)->server_cache : &(sbi)->client_cache); +} + +/* + * generate_u64_ino - generate a new 64 bit inode number + * + * @ino: origin 32 bit inode number + * @generation: origin 32 bit inode generation + * + * We need both remote inode number and generation to ensure the uniqueness of + * the local inode, thus we store inode->i_ino in lower 32 bits, and + * inode->i_generation in higher 32 bits. + */ +static inline uint64_t generate_u64_ino(unsigned long ino, + unsigned int generation) +{ + return (uint64_t)ino | ((uint64_t)generation << 32); +} + +static inline bool cache_item_revalidate(unsigned long conn_time, + unsigned long item_time, + unsigned int timeout) +{ + return time_before_eq(jiffies, item_time + timeout * HZ) && + time_before_eq(conn_time, item_time); +} + +#endif diff --git a/fs/hmdfs/hmdfs_device_view.h b/fs/hmdfs/hmdfs_device_view.h new file mode 100755 index 000000000..76be42a74 --- /dev/null +++ b/fs/hmdfs/hmdfs_device_view.h @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_device_view.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_DEVICE_VIEW_H +#define HMDFS_DEVICE_VIEW_H + +#include "hmdfs.h" + +/***************************************************************************** + * macro defination + *****************************************************************************/ + +#define DEVICE_VIEW_ROOT "device_view" +#define MERGE_VIEW_ROOT "merge_view" +#define UPDATE_LOCAL_DST "/device_view/local/" + +#define DEVICE_VIEW_LOCAL "local" + +/* + * in order to distinguish from vfs, we define our own bitmask, this should + * covert to vfs bitmask while calling vfs apis + */ +#define HMDFS_LOOKUP_REVAL 0x1 + +enum HMDFS_FILE_TYPE { + HM_REG = 0, + HM_SYMLINK = 1, + HM_SHARE = 2, + + HM_MAX_FILE_TYPE = 0XFF +}; + +struct bydev_inode_info { + struct inode *lower_inode; + uint64_t ino; +}; + +struct hmdfs_dentry_info { + struct path lower_path; + unsigned long time; + struct list_head cache_list_head; + spinlock_t cache_list_lock; + struct list_head remote_cache_list_head; + struct mutex remote_cache_list_lock; + __u8 file_type; + __u8 dentry_type; + uint64_t device_id; + spinlock_t lock; + struct mutex cache_pull_lock; + int async_readdir_in_progress; +}; + +struct hmdfs_lookup_ret { + uint64_t i_size; + uint64_t i_mtime; + uint32_t i_mtime_nsec; + uint16_t i_mode; + uint64_t i_ino; +}; + +struct hmdfs_getattr_ret { + /* + * if stat->result_mask is 0, it means this remote getattr failed with + * look up, see details in hmdfs_server_getattr. + */ + struct kstat stat; + uint32_t i_flags; + uint64_t fsid; +}; + +extern int hmdfs_remote_getattr(struct hmdfs_peer *conn, struct dentry *dentry, + unsigned int lookup_flags, + struct hmdfs_getattr_ret **getattr_result); + +/***************************************************************************** + * local/remote inode/file operations + *****************************************************************************/ + +extern const struct dentry_operations hmdfs_dops; +extern const struct dentry_operations hmdfs_dev_dops; + +/* local device operation */ +extern const struct inode_operations hmdfs_file_iops_local; +extern const struct file_operations hmdfs_file_fops_local; +extern const struct inode_operations hmdfs_dir_inode_ops_local; +extern const struct file_operations hmdfs_dir_ops_local; +extern const struct file_operations hmdfs_dir_ops_share; +extern const struct inode_operations hmdfs_symlink_iops_local; +extern const struct inode_operations hmdfs_dir_inode_ops_share; + +/* remote device operation */ +extern const struct inode_operations hmdfs_dev_file_iops_remote; +extern const struct file_operations hmdfs_dev_file_fops_remote; +extern const struct address_space_operations hmdfs_dev_file_aops_remote; +extern const struct inode_operations hmdfs_dev_dir_inode_ops_remote; +extern const struct file_operations hmdfs_dev_dir_ops_remote; +extern int hmdfs_dev_unlink_from_con(struct hmdfs_peer *conn, + struct dentry *dentry); +extern int hmdfs_dev_readdir_from_con(struct hmdfs_peer *con, struct file *file, + struct dir_context *ctx); +int hmdfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +int hmdfs_rmdir(struct inode *dir, struct dentry *dentry); +int hmdfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl); +int hmdfs_unlink(struct inode *dir, struct dentry *dentry); +int hmdfs_remote_unlink(struct hmdfs_peer *conn, struct dentry *dentry); +int hmdfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags); +loff_t hmdfs_file_llseek_local(struct file *file, loff_t offset, int whence); + +ssize_t hmdfs_do_read_iter(struct file *file, struct iov_iter *iter, + loff_t *ppos); +ssize_t hmdfs_do_write_iter(struct file *file, struct iov_iter *iter, + loff_t *ppos); + +int hmdfs_file_release_local(struct inode *inode, struct file *file); +int hmdfs_file_mmap_local(struct file *file, struct vm_area_struct *vma); +struct dentry *hmdfs_lookup(struct inode *parent_inode, + struct dentry *child_dentry, unsigned int flags); +struct dentry *hmdfs_lookup_local(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); +struct dentry *hmdfs_lookup_remote(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); +int hmdfs_symlink_local(struct inode *dir, struct dentry *dentry, + const char *symname); +int hmdfs_fsync_local(struct file *file, loff_t start, loff_t end, + int datasync); +int hmdfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname); +int hmdfs_fsync(struct file *file, loff_t start, loff_t end, int datasync); + +/***************************************************************************** + * common functions declaration + *****************************************************************************/ + +static inline struct hmdfs_dentry_info *hmdfs_d(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline bool hm_isreg(uint8_t file_type) +{ + return (file_type == HM_REG); +} + +static inline bool hm_islnk(uint8_t file_type) +{ + return (file_type == HM_SYMLINK); +} + +static inline bool hm_isshare(uint8_t file_type) +{ + return (file_type == HM_SHARE); +} + +struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, + struct hmdfs_lookup_ret *lookup_result, + struct inode *dir); +struct hmdfs_lookup_ret *get_remote_inode_info(struct hmdfs_peer *con, + struct dentry *dentry, + unsigned int flags); +void hmdfs_set_time(struct dentry *dentry, unsigned long time); +struct inode *fill_inode_local(struct super_block *sb, + struct inode *lower_inode, const char *name); +struct inode *fill_root_inode(struct super_block *sb, + struct inode *lower_inode); +struct inode *fill_device_inode(struct super_block *sb, + struct inode *lower_inode); +struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con, + struct dentry *dentry, + struct qstr *qstr, + unsigned int flags, + const char *relative_path); +char *hmdfs_connect_path(const char *path, const char *name); + +char *hmdfs_get_dentry_relative_path(struct dentry *dentry); +char *hmdfs_merge_get_dentry_relative_path(struct dentry *dentry); +char *hmdfs_get_dentry_absolute_path(const char *rootdir, + const char *relative_path); +int hmdfs_convert_lookup_flags(unsigned int hmdfs_flags, + unsigned int *vfs_flags); +static inline void hmdfs_get_lower_path(struct dentry *dent, struct path *pname) +{ + spin_lock(&hmdfs_d(dent)->lock); + pname->dentry = hmdfs_d(dent)->lower_path.dentry; + pname->mnt = hmdfs_d(dent)->lower_path.mnt; + path_get(pname); + spin_unlock(&hmdfs_d(dent)->lock); +} + +static inline void hmdfs_put_lower_path(struct path *pname) +{ + path_put(pname); +} + +static inline void hmdfs_put_reset_lower_path(struct dentry *dent) +{ + struct path pname; + + spin_lock(&hmdfs_d(dent)->lock); + if (hmdfs_d(dent)->lower_path.dentry) { + pname.dentry = hmdfs_d(dent)->lower_path.dentry; + pname.mnt = hmdfs_d(dent)->lower_path.mnt; + hmdfs_d(dent)->lower_path.dentry = NULL; + hmdfs_d(dent)->lower_path.mnt = NULL; + spin_unlock(&hmdfs_d(dent)->lock); + path_put(&pname); + } else { + spin_unlock(&hmdfs_d(dent)->lock); + } +} + +static inline void hmdfs_set_lower_path(struct dentry *dent, struct path *pname) +{ + spin_lock(&hmdfs_d(dent)->lock); + hmdfs_d(dent)->lower_path.dentry = pname->dentry; + hmdfs_d(dent)->lower_path.mnt = pname->mnt; + spin_unlock(&hmdfs_d(dent)->lock); +} + +/* Only reg file for HMDFS_LAYER_OTHER_* support xattr */ +static inline bool hmdfs_support_xattr(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_dentry_info *gdi = hmdfs_d(dentry); + + if (info->inode_type != HMDFS_LAYER_OTHER_LOCAL && + info->inode_type != HMDFS_LAYER_OTHER_REMOTE && + info->inode_type != HMDFS_LAYER_OTHER_MERGE) + return false; + + if (!S_ISREG(inode->i_mode)) + return false; + + if (hm_islnk(gdi->file_type)) + return false; + + return true; +} + +int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry, + int dentry_type); + +#endif diff --git a/fs/hmdfs/hmdfs_merge_view.h b/fs/hmdfs/hmdfs_merge_view.h new file mode 100755 index 000000000..f7d21cc5b --- /dev/null +++ b/fs/hmdfs/hmdfs_merge_view.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_merge_view.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_MERGE_VIEW_H +#define HMDFS_MERGE_VIEW_H + +#include "hmdfs.h" + +#include "comm/connection.h" +#include +#include +#include +#include + +/***************************************************************************** + * Dentires for merge view and their comrades. + * A dentry's lower dentry is named COMRADE. + *****************************************************************************/ + +struct merge_lookup_work { + char *name; + int devid; + unsigned int flags; + struct hmdfs_sb_info *sbi; + wait_queue_head_t *wait_queue; + struct work_struct work; +}; + +struct hmdfs_dentry_info_merge { + unsigned long ctime; + int type; + int work_count; + struct mutex work_lock; + wait_queue_head_t wait_queue; + __u8 dentry_type; + struct mutex comrade_list_lock; + struct list_head comrade_list; +}; + +struct hmdfs_dentry_comrade { + uint64_t dev_id; + struct dentry *lo_d; + struct list_head list; +}; + +enum FILE_CMD_MERGE { + F_MKDIR_MERGE = 0, + F_CREATE_MERGE = 1, +}; + +struct hmdfs_recursive_para { + bool is_last; + int opcode; + umode_t mode; + bool want_excl; + const char *name; +}; +static inline struct hmdfs_dentry_info_merge *hmdfs_dm(struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +static inline umode_t hmdfs_cm(struct hmdfs_dentry_comrade *comrade) +{ + return d_inode(comrade->lo_d)->i_mode; +} + +static inline bool comrade_is_local(struct hmdfs_dentry_comrade *comrade) +{ + return comrade->dev_id == HMDFS_DEVID_LOCAL; +} + +struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags); + +struct hmdfs_dentry_comrade *alloc_comrade(struct dentry *lo_d, int dev_id); + +void link_comrade(struct list_head *onstack_comrades_head, + struct hmdfs_dentry_comrade *comrade); + +static inline void destroy_comrade(struct hmdfs_dentry_comrade *comrade) +{ + dput(comrade->lo_d); + kfree(comrade); +} + +void clear_comrades(struct dentry *dentry); + +static inline void link_comrade_unlocked(struct dentry *dentry, + struct hmdfs_dentry_comrade *comrade) +{ + mutex_lock(&hmdfs_dm(dentry)->comrade_list_lock); + link_comrade(&hmdfs_dm(dentry)->comrade_list, comrade); + mutex_unlock(&hmdfs_dm(dentry)->comrade_list_lock); +} + +void clear_comrades_locked(struct list_head *comrade_list); + +static inline bool is_comrade_list_empty(struct hmdfs_dentry_info_merge *mdi) +{ + bool ret; + + mutex_lock(&mdi->comrade_list_lock); + ret = list_empty(&mdi->comrade_list); + mutex_unlock(&mdi->comrade_list_lock); + + return ret; +} + +static inline bool has_merge_lookup_work(struct hmdfs_dentry_info_merge *mdi) +{ + bool ret; + + mutex_lock(&mdi->work_lock); + ret = (mdi->work_count != 0); + mutex_unlock(&mdi->work_lock); + + return ret; +} + +static inline bool is_merge_lookup_end(struct hmdfs_dentry_info_merge *mdi) +{ + bool ret; + + mutex_lock(&mdi->work_lock); + ret = mdi->work_count == 0 || !is_comrade_list_empty(mdi); + mutex_unlock(&mdi->work_lock); + + return ret; +} + +#define for_each_comrade_locked(_dentry, _comrade) \ + list_for_each_entry(_comrade, &(hmdfs_dm(_dentry)->comrade_list), list) + +#define hmdfs_trace_merge(_trace_func, _parent_inode, _child_dentry, err) \ + { \ + struct hmdfs_dentry_comrade *comrade; \ + struct hmdfs_dentry_info_merge *dm = hmdfs_dm(_child_dentry); \ + _trace_func(_parent_inode, _child_dentry, err); \ + if (likely(dm)) { \ + mutex_lock(&dm->comrade_list_lock); \ + for_each_comrade_locked(_child_dentry, comrade) \ + trace_hmdfs_show_comrade(_child_dentry, \ + comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&dm->comrade_list_lock); \ + } \ + } + +#define hmdfs_trace_rename_merge(olddir, olddentry, newdir, newdentry, err) \ + { \ + struct hmdfs_dentry_comrade *comrade; \ + trace_hmdfs_rename_merge(olddir, olddentry, newdir, newdentry, \ + err); \ + mutex_lock(&hmdfs_dm(olddentry)->comrade_list_lock); \ + for_each_comrade_locked(olddentry, comrade) \ + trace_hmdfs_show_comrade(olddentry, comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&hmdfs_dm(olddentry)->comrade_list_lock); \ + mutex_lock(&hmdfs_dm(newdentry)->comrade_list_lock); \ + for_each_comrade_locked(newdentry, comrade) \ + trace_hmdfs_show_comrade(newdentry, comrade->lo_d, \ + comrade->dev_id); \ + mutex_unlock(&hmdfs_dm(newdentry)->comrade_list_lock); \ + } + +/***************************************************************************** + * Helper functions abstarcting out comrade + *****************************************************************************/ + +static inline bool hmdfs_i_merge(struct hmdfs_inode_info *hii) +{ + __u8 t = hii->inode_type; + return t == HMDFS_LAYER_FIRST_MERGE || t == HMDFS_LAYER_OTHER_MERGE; +} + +struct dentry *hmdfs_get_lo_d(struct dentry *dentry, int dev_id); +struct dentry *hmdfs_get_fst_lo_d(struct dentry *dentry); + +/***************************************************************************** + * Inode operations for the merge view + *****************************************************************************/ + +extern const struct inode_operations hmdfs_file_iops_merge; +extern const struct file_operations hmdfs_file_fops_merge; +extern const struct inode_operations hmdfs_dir_iops_merge; +extern const struct file_operations hmdfs_dir_fops_merge; +extern const struct dentry_operations hmdfs_dops_merge; + +/***************************************************************************** + * dentry cache for the merge view + *****************************************************************************/ +extern struct kmem_cache *hmdfs_dentry_merge_cachep; + +#endif // HMDFS_MERGE_H diff --git a/fs/hmdfs/hmdfs_server.c b/fs/hmdfs/hmdfs_server.c new file mode 100755 index 000000000..49f56676c --- /dev/null +++ b/fs/hmdfs/hmdfs_server.c @@ -0,0 +1,1943 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/hmdfs_server.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_server.h" + +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "hmdfs.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_share.h" +#include "hmdfs_trace.h" +#include "server_writeback.h" +#include "comm/node_cb.h" + +#define HMDFS_MAX_HIDDEN_DIR 1 + +struct hmdfs_open_info { + struct file *file; + struct inode *inode; + bool stat_valid; + struct kstat stat; + uint64_t real_ino; + int file_id; +}; + +static int insert_file_into_conn(struct hmdfs_peer *conn, struct file *file) +{ + struct idr *idr = &(conn->file_id_idr); + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&(conn->file_id_lock)); + ret = idr_alloc_cyclic(idr, file, 0, 0, GFP_NOWAIT); + spin_unlock(&(conn->file_id_lock)); + idr_preload_end(); + return ret; +} + +/* + * get_file_from_conn - get file from conn by file_id. It should be noted that + * an additional reference will be acquired for returned file, the called should + * put it after the file is not used anymore. + */ +static struct file *get_file_from_conn(struct hmdfs_peer *conn, __u32 file_id) +{ + struct file *file; + struct idr *idr = &(conn->file_id_idr); + + rcu_read_lock(); + file = idr_find(idr, file_id); + if (file && !get_file_rcu(file)) + file = NULL; + rcu_read_unlock(); + return file; +} + +void remove_file_from_conn(struct hmdfs_peer *conn, __u32 file_id) +{ + spinlock_t *lock = &(conn->file_id_lock); + struct idr *idr = &(conn->file_id_idr); + + spin_lock(lock); + idr_remove(idr, file_id); + spin_unlock(lock); +} + +struct file *hmdfs_open_path(struct hmdfs_sb_info *sbi, const char *path) +{ + struct path root_path; + struct file *file; + int err; + const char *root_name = sbi->local_dst; + + err = kern_path(root_name, 0, &root_path); + if (err) { + hmdfs_info("kern_path failed: %d", err); + return ERR_PTR(err); + } + file = file_open_root(&root_path, path, + O_RDWR | O_LARGEFILE, 0644); + path_put(&root_path); + if (IS_ERR(file)) { + hmdfs_err( + "GRAPERR sb->s_readonly_remount %d sb_flag %lu", + sbi->sb->s_readonly_remount, sbi->sb->s_flags); + hmdfs_info("file_open_root failed: %ld", PTR_ERR(file)); + } else { + hmdfs_info("get file with magic %lu", + file->f_inode->i_sb->s_magic); + } + return file; +} + +inline void hmdfs_close_path(struct file *file) +{ + fput(file); +} + +/* After offline server close all files opened by client */ +void hmdfs_server_offline_notify(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + int id; + int count = 0; + unsigned int next; + struct file *filp = NULL; + struct idr *idr = &conn->file_id_idr; + + /* wait all async work complete */ + flush_workqueue(conn->req_handle_wq); + flush_workqueue(conn->async_wq); + + /* If there is some open requests in processing, + * Maybe, we need to close file when peer offline + */ + idr_for_each_entry(idr, filp, id) { + hmdfs_debug("[%d]Server close: id=%d", count, id); + hmdfs_close_path(filp); + count++; + if (count % HMDFS_IDR_RESCHED_COUNT == 0) + cond_resched(); + } + + hmdfs_clear_share_item_offline(conn); + + /* Reinitialize idr */ + next = idr_get_cursor(idr); + idr_destroy(idr); + + idr_init(idr); + idr_set_cursor(idr, next); + + /* Make old file id to be stale */ + conn->fid_cookie++; +} + +static struct hmdfs_node_cb_desc server_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_server_offline_notify + }, +}; + +void __init hmdfs_server_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(server_cb, ARRAY_SIZE(server_cb)); +} + +static const char *datasl_str[] = { + "s0", "s1", "s2", "s3", "s4" +}; + +static int parse_data_sec_level(const char *sl_value, size_t sl_value_len) +{ + int i; + + for (i = 0; i < sizeof(datasl_str) / sizeof(datasl_str[0]); i++) { + if (!strncmp(sl_value, datasl_str[i], strlen(datasl_str[i]))) + return i + DATA_SEC_LEVEL0; + } + + return DATA_SEC_LEVEL3; +} + +static int check_sec_level(struct hmdfs_peer *node, const char *file_name) +{ + int err; + int ret = 0; + struct path root_path; + struct path file_path; + char *value = NULL; + size_t value_len = DATA_SEC_LEVEL_LENGTH; + + if (node->devsl <= 0) { + ret = -EACCES; + goto out_free; + } + + value = kzalloc(value_len, GFP_KERNEL); + if (!value) { + ret = -ENOMEM; + goto out_free; + } + + err = kern_path(node->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_err("get root path error"); + ret = err; + goto out_free; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, file_name, 0, + &file_path); + if (err) { + hmdfs_err("get file path error"); + ret = err; + goto out_err; + } + + err = vfs_getxattr(&init_user_ns, file_path.dentry, DATA_SEC_LEVEL_LABEL, value, + value_len); + if (err <= 0 && node->devsl >= DATA_SEC_LEVEL3) + goto out; + if (err > 0 && node->devsl >= parse_data_sec_level(value, err)) + goto out; + + ret = -EACCES; +out: + path_put(&file_path); +out_err: + path_put(&root_path); +out_free: + kfree(value); + return ret; +} + +static struct file *hmdfs_open_file(struct hmdfs_peer *con, + const char *filename, uint8_t file_type, + int *file_id) +{ + struct file *file = NULL; + int err = 0; + int id; + + if (!filename) { + hmdfs_err("filename is NULL"); + return ERR_PTR(-EINVAL); + } + + if (check_sec_level(con, filename)) { + hmdfs_err("devsl permission denied"); + return ERR_PTR(-EACCES); + } + + if (hm_isshare(file_type)) { + err = hmdfs_check_share_access_permission(con->sbi, + filename, con->cid); + if (err) + return ERR_PTR(err); + } + file = hmdfs_open_path(con->sbi, filename); + + if (IS_ERR(file)) { + reset_item_opened_status(con->sbi, filename); + return file; + } + + id = insert_file_into_conn(con, file); + if (id < 0) { + hmdfs_err("file_id alloc failed! err=%d", id); + reset_item_opened_status(con->sbi, filename); + hmdfs_close_path(file); + return ERR_PTR(id); + } + *file_id = id; + + return file; +} + +static struct hmdfs_time_t msec_to_timespec(unsigned int msec) +{ + struct hmdfs_time_t timespec = { + .tv_sec = msec / MSEC_PER_SEC, + .tv_nsec = (msec % MSEC_PER_SEC) * NSEC_PER_MSEC, + }; + + return timespec; +} + +static struct hmdfs_time_t hmdfs_current_kernel_time(void) +{ + struct hmdfs_time_t time; + +#if KERNEL_VERSION(4, 18, 0) < LINUX_VERSION_CODE + ktime_get_coarse_real_ts64(&time); +#else + time = current_kernel_time(); +#endif + return time; +} + +/* + * Generate fid version like following format: + * + * | boot cookie | con cookie | + * |---------------------|-------------| + * 49 15 (bits) + */ +static uint64_t hmdfs_server_pack_fid_ver(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd) +{ + uint64_t boot_cookie = con->sbi->boot_cookie; + uint16_t con_cookie = con->fid_cookie; + + return (boot_cookie | + (con_cookie & ((1 << HMDFS_FID_VER_BOOT_COOKIE_SHIFT) - 1))); +} + +static struct file *get_file_by_fid_and_ver(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + __u32 file_id, __u64 file_ver) +{ + struct file *file = NULL; + __u64 cur_file_ver = hmdfs_server_pack_fid_ver(con, cmd); + + if (file_ver != cur_file_ver) { + hmdfs_warning("Stale file version %llu for fid %u", + file_ver, file_id); + return ERR_PTR(-EBADF); + } + + file = get_file_from_conn(con, file_id); + if (!file) + return ERR_PTR(-EBADF); + + return file; +} + +static void hmdfs_update_open_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct hmdfs_open_info *info, + struct open_response *resp) +{ + struct hmdfs_time_t current_time = hmdfs_current_kernel_time(); + struct hmdfs_time_t ctime = info->stat_valid ? info->stat.ctime : + info->inode->i_ctime; + struct hmdfs_time_t precision = + msec_to_timespec(con->sbi->dcache_precision); + loff_t size = info->stat_valid ? info->stat.size : + i_size_read(info->inode); + + resp->ino = cpu_to_le64(info->real_ino); + resp->file_ver = cpu_to_le64(hmdfs_server_pack_fid_ver(con, cmd)); + resp->file_id = cpu_to_le32(info->file_id); + resp->file_size = cpu_to_le64(size); + resp->ctime = cpu_to_le64(ctime.tv_sec); + resp->ctime_nsec = cpu_to_le32(ctime.tv_nsec); + + /* + * In server, ctime might stay the same after coverwrite. We introduce a + * new value stable_ctime to handle the problem. + * - if open rpc time < ctime, stable_ctime = 0; + * - if ctime <= open rpc time < ctime + dcache_precision, stable_ctime + * = ctime + * - else, stable_ctime = ctime + dcache_precision; + */ + precision = hmdfs_time_add(ctime, precision); + if (hmdfs_time_compare(¤t_time, &ctime) < 0) { + resp->stable_ctime = cpu_to_le64(0); + resp->stable_ctime_nsec = cpu_to_le32(0); + } else if (hmdfs_time_compare(¤t_time, &ctime) >= 0 && + hmdfs_time_compare(¤t_time, &precision) < 0) { + resp->stable_ctime = resp->ctime; + resp->stable_ctime_nsec = resp->ctime_nsec; + } else { + resp->stable_ctime = cpu_to_le64(precision.tv_sec); + resp->stable_ctime_nsec = cpu_to_le32(precision.tv_nsec); + } +} + +static int hmdfs_get_open_info(struct hmdfs_peer *con, uint8_t file_type, + const char *filename, + struct hmdfs_open_info *info) +{ + int ret = 0; + + info->inode = file_inode(info->file); + info->stat_valid = false; + if (con->sbi->sb == info->inode->i_sb) { + /* if open a regular file */ + info->inode = hmdfs_i(info->inode)->lower_inode; + } else if (con->sbi->lower_sb != info->inode->i_sb) { + /* It's possible that inode is not from lower, for example: + * 1. touch /f2fs/file + * 2. ln -s /sdcard_fs/file /f2fs/link + * 3. cat /hmdfs/link -> generate dentry cache in sdcard_fs + * 4. echo hi >> /hmdfs/file -> append write not through + * sdcard_fs + * 5. cat /hmdfs/link -> got inode in sdcard, which size is + * still 0 + * + * If src file isn't in lower, use getattr to get + * information. + */ + ret = vfs_getattr(&info->file->f_path, &info->stat, STATX_BASIC_STATS | STATX_BTIME, + 0); + if (ret) { + hmdfs_err("call vfs_getattr failed, err %d", ret); + return ret; + } + info->stat_valid = true; + } + + info->real_ino = generate_u64_ino(info->inode->i_ino, + info->inode->i_generation); + + return 0; +} + +void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct open_request *recv = data; + int sizeread = sizeof(struct open_response); + struct open_response *resp = NULL; + struct hmdfs_open_info *info = NULL; + int ret = 0; + + trace_hmdfs_server_open_enter(con, recv); + + resp = kzalloc(sizeread, GFP_KERNEL); + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!resp || !info) { + ret = -ENOMEM; + goto err_free; + } + + info->file = hmdfs_open_file(con, recv->buf, recv->file_type, + &info->file_id); + if (IS_ERR(info->file)) { + ret = PTR_ERR(info->file); + goto err_free; + } + + ret = hmdfs_get_open_info(con, recv->file_type, recv->buf, info); + if (ret) + goto err_close; + + hmdfs_update_open_response(con, cmd, info, resp); + + trace_hmdfs_server_open_exit(con, resp, info->file, 0); + ret = hmdfs_sendmessage_response(con, cmd, sizeread, resp, 0); + if (ret) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, ret); + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } + kfree(resp); + kfree(info); + return; + +err_close: + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); +err_free: + kfree(resp); + kfree(info); + trace_hmdfs_server_open_exit(con, NULL, NULL, ret); + hmdfs_send_err_response(con, cmd, ret); +} + +static int hmdfs_check_and_create(struct path *path_parent, + struct dentry *dentry, uint64_t device_id, + umode_t mode, bool is_excl) +{ + int err = 0; + + /* if inode doesn't exist, create it */ + if (d_is_negative(dentry)) { + hmdfs_mark_drop_flag(device_id, path_parent->dentry); + err = vfs_create(&init_user_ns, d_inode(path_parent->dentry), dentry, mode, + is_excl); + if (err) + hmdfs_err("create failed, err %d", err); + } else { + if (is_excl) + err = -EEXIST; + else if (S_ISLNK(d_inode(dentry)->i_mode)) + err = -EINVAL; + else if (S_ISDIR(d_inode(dentry)->i_mode)) + err = -EISDIR; + } + + return err; +} +static int hmdfs_lookup_create(struct hmdfs_peer *con, + struct atomic_open_request *recv, + struct path *child_path, bool *truncate) +{ + int err = 0; + struct path path_root; + struct path path_parent; + uint32_t open_flags = le32_to_cpu(recv->open_flags); + char *path = recv->buf; + char *filename = recv->buf + le32_to_cpu(recv->path_len) + 1; + struct dentry *dentry = NULL; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &path_root); + if (err) { + hmdfs_err("no path for %s, err %d", con->sbi->local_dst, err); + return err; + } + + err = vfs_path_lookup(path_root.dentry, path_root.mnt, path, + LOOKUP_DIRECTORY, &path_parent); + if (err) { + hmdfs_info("no dir in %s, err %d", con->sbi->local_dst, err); + goto put_path_root; + } + + inode_lock(d_inode(path_parent.dentry)); + dentry = lookup_one_len(filename, path_parent.dentry, strlen(filename)); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + inode_unlock(d_inode(path_parent.dentry)); + goto put_path_parent; + } + /* only truncate if inode already exists */ + *truncate = ((open_flags & HMDFS_O_TRUNC) && d_is_positive(dentry)); + err = hmdfs_check_and_create(&path_parent, dentry, con->device_id, + le16_to_cpu(recv->mode), + open_flags & HMDFS_O_EXCL); + inode_unlock(d_inode(path_parent.dentry)); + if (err) { + dput(dentry); + } else { + child_path->dentry = dentry; + child_path->mnt = mntget(path_parent.mnt); + } + +put_path_parent: + path_put(&path_parent); +put_path_root: + path_put(&path_root); + return err; +} + +static int hmdfs_dentry_open(struct hmdfs_peer *con, + const struct path *path, + struct hmdfs_open_info *info) +{ + int err = 0; + + info->file = dentry_open(path, O_RDWR | O_LARGEFILE, current_cred()); + if (IS_ERR(info->file)) { + err = PTR_ERR(info->file); + hmdfs_err("open file failed, err %d", err); + return err; + } + + info->file_id = insert_file_into_conn(con, info->file); + if (info->file_id < 0) { + err = info->file_id; + hmdfs_err("file_id alloc failed! err %d", err); + hmdfs_close_path(info->file); + return err; + } + + return 0; +} + +static int hmdfs_server_do_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct atomic_open_request *recv, + struct hmdfs_open_info *info, + struct atomic_open_response *resp) +{ + struct path child_path; + bool truncate = false; + int err = 0; + + err = hmdfs_lookup_create(con, recv, &child_path, &truncate); + if (err) + return err; + + err = hmdfs_dentry_open(con, &child_path, info); + if (err) + goto put_child; + + err = hmdfs_get_open_info(con, HM_REG, NULL, info); + if (err) + goto fail_close; + + if (truncate) { + err = vfs_truncate(&child_path, 0); + if (err) { + hmdfs_err("truncate failed, err %d", err); + goto fail_close; + } + } + hmdfs_update_open_response(con, cmd, info, &resp->open_resp); + resp->i_mode = cpu_to_le16(file_inode(info->file)->i_mode); + +fail_close: + if (err) { + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } +put_child: + path_put(&child_path); + return err; +} + +void hmdfs_server_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + int err; + struct atomic_open_request *recv = data; + struct atomic_open_response *resp = NULL; + struct hmdfs_open_info *info = NULL; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + resp = kzalloc(sizeof(*resp), GFP_KERNEL); + if (!resp || !info) { + err = -ENOMEM; + goto out; + } + + err = hmdfs_server_do_atomic_open(con, cmd, recv, info, resp); + +out: + if (err) { + hmdfs_send_err_response(con, cmd, err); + } else { + err = hmdfs_sendmessage_response(con, cmd, sizeof(*resp), resp, + 0); + if (err) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, err); + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + } + } + kfree(info); + kfree(resp); +} + +void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct release_request *release_recv = data; + struct file *file = NULL; + __u32 file_id; + __u64 file_ver; + int ret = 0; + + file_id = le32_to_cpu(release_recv->file_id); + file_ver = le64_to_cpu(release_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_err("cannot find %u", file_id); + ret = PTR_ERR(file); + goto out; + } + + if (hmdfs_is_share_file(file)) + hmdfs_close_share_item(con->sbi, file, con->cid); + + /* put the reference acquired by get_file_by_fid_and_ver() */ + hmdfs_close_path(file); + hmdfs_info("close %u", file_id); + remove_file_from_conn(con, file_id); + + hmdfs_close_path(file); + +out: + trace_hmdfs_server_release(con, file_id, file_ver, ret); + set_conn_sock_quickack(con); +} + +void hmdfs_server_fsync(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct fsync_request *fsync_recv = data; + __s32 datasync = le32_to_cpu(fsync_recv->datasync); + __s64 start = le64_to_cpu(fsync_recv->start); + __s64 end = le64_to_cpu(fsync_recv->end); + struct file *file = NULL; + __u32 file_id; + __u64 file_ver; + int ret = 0; + + file_id = le32_to_cpu(fsync_recv->file_id); + file_ver = le64_to_cpu(fsync_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_err("cannot find %u", file_id); + ret = PTR_ERR(file); + goto out; + } + + ret = vfs_fsync_range(file, start, end, datasync); + if (ret) + hmdfs_err("fsync fail, ret %d", ret); + + hmdfs_close_path(file); +out: + hmdfs_send_err_response(con, cmd, ret); +} + +void hmdfs_server_readpage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readpage_request *readpage_recv = data; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + struct readpage_response *readpage = NULL; + int ret = 0; + size_t read_len; + + file_id = le32_to_cpu(readpage_recv->file_id); + file_ver = le64_to_cpu(readpage_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_info( + "file with id %u does not exist, pgindex %llu, devid %llu", + file_id, le64_to_cpu(readpage_recv->index), + con->device_id); + ret = PTR_ERR(file); + goto fail; + } + + read_len = (size_t)le32_to_cpu(readpage_recv->size); + if (read_len == 0) + goto fail_put_file; + + readpage = kmalloc(read_len, GFP_KERNEL); + if (!readpage) { + ret = -ENOMEM; + goto fail_put_file; + } + + pos = (loff_t)le64_to_cpu(readpage_recv->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(file, readpage->buf, read_len, &pos); + if (ret < 0) { + hmdfs_send_err_response(con, cmd, -EIO); + } else { + if (ret != read_len) + memset(readpage->buf + ret, 0, read_len - ret); + hmdfs_sendmessage_response(con, cmd, read_len, readpage, 0); + } + + hmdfs_close_path(file); + kfree(readpage); + return; + +fail_put_file: + hmdfs_close_path(file); +fail: + hmdfs_send_err_response(con, cmd, ret); +} + +static struct readpages_response *alloc_readpages_resp(unsigned int len) +{ + struct readpages_response *resp = NULL; + + if (len > HMDFS_PAGE_SIZE) + resp = vmalloc(len); + else + resp = kmalloc(len, GFP_KERNEL); + + return resp; +} + +static void free_readpages_resp(struct readpages_response *resp, + unsigned int len) +{ + if (len > HMDFS_PAGE_SIZE) + vfree(resp); + else + kfree(resp); +} + +void hmdfs_server_readpages(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readpages_request *req = data; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + struct readpages_response *resp = NULL; + ssize_t ret = 0; + size_t read_len; + + file_id = le32_to_cpu(req->file_id); + file_ver = le64_to_cpu(req->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto fail; + } + + read_len = (size_t)le32_to_cpu(req->size); + if (read_len == 0) + goto fail_put_file; + + resp = alloc_readpages_resp(read_len); + if (!resp) { + ret = -ENOMEM; + goto fail_put_file; + } + + pos = (loff_t)le64_to_cpu(req->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(file, resp->buf, read_len, &pos); + if (ret < 0) { + ret = -EIO; + goto fail_free_resp; + } + + hmdfs_sendmessage_response(con, cmd, ret, resp, 0); + hmdfs_close_path(file); + free_readpages_resp(resp, read_len); + return; + +fail_free_resp: + free_readpages_resp(resp, read_len); +fail_put_file: + hmdfs_close_path(file); +fail: + hmdfs_send_err_response(con, cmd, ret); +} + +static int hmdfs_do_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, + struct readpages_open_request *recv, + struct hmdfs_open_info *info, + struct readpages_open_response *resp) +{ + int ret = 0; + loff_t pos = 0; + + info->file = hmdfs_open_file(con, recv->buf, recv->file_type, + &info->file_id); + if (IS_ERR(info->file)) + return PTR_ERR(info->file); + + ret = hmdfs_get_open_info(con, recv->file_type, recv->buf, info); + if (ret) + goto fail_close; + + pos = (loff_t)le64_to_cpu(recv->index) << HMDFS_PAGE_OFFSET; + ret = kernel_read(info->file, resp->buf, le32_to_cpu(recv->size), &pos); + if (ret < 0) + goto fail_close; + + hmdfs_update_open_response(con, cmd, info, &resp->open_resp); + memset(resp->reserved, 0, sizeof(resp->reserved)); + ret = hmdfs_sendmessage_response(con, cmd, sizeof(*resp) + ret, resp, + 0); + if (ret) { + hmdfs_err("sending msg response failed, file_id %d, err %d", + info->file_id, ret); + ret = 0; + goto fail_close; + } + return 0; + +fail_close: + remove_file_from_conn(con, info->file_id); + hmdfs_close_path(info->file); + return ret; +} + +void hmdfs_server_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct readpages_open_request *recv = data; + struct readpages_open_response *resp = NULL; + int ret = -EINVAL; + size_t read_len = 0; + size_t resp_len = 0; + struct hmdfs_open_info *info = NULL; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + + read_len = (size_t)le32_to_cpu(recv->size); + if (read_len == 0) { + ret = -EINVAL; + goto fail_free_info; + } + resp_len = read_len + sizeof(*resp); + resp = vmalloc(resp_len); + if (!resp) { + ret = -ENOMEM; + goto fail_free_info; + } + + ret = hmdfs_do_readpages_open(con, cmd, recv, info, resp); + + vfree(resp); +fail_free_info: + kfree(info); +fail: + if (ret) + hmdfs_send_err_response(con, cmd, ret); +} + +static bool need_rebuild_dcache(struct hmdfs_dcache_header *h, + struct hmdfs_time_t time, + unsigned int precision) +{ + struct hmdfs_time_t crtime = { .tv_sec = le64_to_cpu(h->dcache_crtime), + .tv_nsec = le64_to_cpu( + h->dcache_crtime_nsec) }; + struct hmdfs_time_t ctime = { .tv_sec = le64_to_cpu(h->dentry_ctime), + .tv_nsec = le64_to_cpu( + h->dentry_ctime_nsec) }; + struct hmdfs_time_t pre_time = { .tv_sec = precision / MSEC_PER_SEC, + .tv_nsec = precision % MSEC_PER_SEC * + NSEC_PER_MSEC }; + + if (hmdfs_time_compare(&time, &ctime) != 0) + return true; + + pre_time = hmdfs_time_add(time, pre_time); + if (hmdfs_time_compare(&crtime, &pre_time) < 0) + return true; + + return false; +} + +static bool hmdfs_server_cache_validate(struct file *filp, struct inode *inode, + unsigned long precision) +{ + struct hmdfs_dcache_header header; + int overallpage; + ssize_t bytes; + loff_t pos = 0; + + overallpage = get_dentry_group_cnt(file_inode(filp)); + if (overallpage == 0) { + hmdfs_err("cache file size is 0"); + return false; + } + + bytes = kernel_read(filp, &header, sizeof(header), &pos); + if (bytes != sizeof(header)) { + hmdfs_err("read file failed, err:%zd", bytes); + return false; + } + + return !need_rebuild_dcache(&header, inode->i_ctime, precision); +} + +struct file *hmdfs_server_cache_revalidate(struct hmdfs_sb_info *sbi, + const char *recvpath, + struct path *path) +{ + struct cache_file_node *cfn = NULL; + struct file *file; + + cfn = find_cfn(sbi, HMDFS_SERVER_CID, recvpath, true); + if (!cfn) + return NULL; + + if (!hmdfs_server_cache_validate(cfn->filp, path->dentry->d_inode, + sbi->dcache_precision)) { + remove_cfn(cfn); + release_cfn(cfn); + return NULL; + } + file = cfn->filp; + get_file(cfn->filp); + release_cfn(cfn); + + return file; +} + +bool hmdfs_client_cache_validate(struct hmdfs_sb_info *sbi, + struct readdir_request *readdir_recv, + struct path *path) +{ + struct inode *inode = path->dentry->d_inode; + struct hmdfs_dcache_header header; + + /* always rebuild dentryfile for small dir */ + if (le64_to_cpu(readdir_recv->num) < sbi->dcache_threshold) + return false; + + header.dcache_crtime = readdir_recv->dcache_crtime; + header.dcache_crtime_nsec = readdir_recv->dcache_crtime_nsec; + header.dentry_ctime = readdir_recv->dentry_ctime; + header.dentry_ctime_nsec = readdir_recv->dentry_ctime_nsec; + + return !need_rebuild_dcache(&header, inode->i_ctime, + sbi->dcache_precision); +} + +static char *server_lower_dentry_path_raw(struct hmdfs_peer *peer, + struct dentry *lo_d) +{ + struct hmdfs_dentry_info *di = hmdfs_d(peer->sbi->sb->s_root); + struct dentry *lo_d_root = di->lower_path.dentry; + struct dentry *lo_d_tmp = NULL; + char *lo_p_buf = NULL; + char *buf_head = NULL; + char *buf_tail = NULL; + size_t path_len = 0; + + lo_p_buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (unlikely(!lo_p_buf)) + return ERR_PTR(-ENOMEM); + + /* To generate a reversed path str */ + for (lo_d_tmp = lo_d; lo_d_tmp != lo_d_root && !IS_ROOT(lo_d_tmp); + lo_d_tmp = lo_d_tmp->d_parent) { + u32 dlen = lo_d_tmp->d_name.len; + int reverse_index = dlen - 1; + + /* Considering the appended slash and '\0' */ + if (unlikely(path_len + dlen + 1 > PATH_MAX - 1)) { + kfree(lo_p_buf); + return ERR_PTR(-ENAMETOOLONG); + } + for (; reverse_index >= 0; --reverse_index) + lo_p_buf[path_len++] = + lo_d_tmp->d_name.name[reverse_index]; + lo_p_buf[path_len++] = '/'; + } + + /* Reverse the reversed path str to get the real path str */ + for (buf_head = lo_p_buf, buf_tail = lo_p_buf + path_len - 1; + buf_head < buf_tail; ++buf_head, --buf_tail) + swap(*buf_head, *buf_tail); + + if (path_len == 0) + lo_p_buf[0] = '/'; + return lo_p_buf; +} + +static int server_lookup(struct hmdfs_peer *peer, const char *req_path, + struct path *path) +{ + struct path root_path; + int err = 0; + + err = kern_path(peer->sbi->local_dst, 0, &root_path); + if (err) + goto out_noroot; + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, req_path, + LOOKUP_DIRECTORY, path); + path_put(&root_path); +out_noroot: + return err; +} + +/** + * server_lookup_lower - lookup lower file-system + * @peer: target device node + * @req_path: abs path (mount point as the root) from the request + * @lo_o: the lower path to return + * + * return the lower path's name, with characters' cases matched + */ +static char *server_lookup_lower(struct hmdfs_peer *peer, const char *req_path, + struct path *lo_p) +{ + char *lo_p_name = ERR_PTR(-ENOENT); + struct path up_p; + int err = 0; + + err = server_lookup(peer, req_path, &up_p); + if (err) + goto out; + + hmdfs_get_lower_path(up_p.dentry, lo_p); + path_put(&up_p); + + lo_p_name = server_lower_dentry_path_raw(peer, lo_p->dentry); + if (IS_ERR(lo_p_name)) { + err = PTR_ERR(lo_p_name); + path_put(lo_p); + } +out: + return err ? ERR_PTR(err) : lo_p_name; +} + +void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct readdir_request *readdir_recv = data; + struct path lo_p; + struct file *filp = NULL; + int err = 0; + unsigned long long num = 0; + char *lo_p_name = NULL; + + trace_hmdfs_server_readdir(readdir_recv); + + lo_p_name = server_lookup_lower(con, readdir_recv->path, &lo_p); + if (IS_ERR(lo_p_name)) { + err = PTR_ERR(lo_p_name); + hmdfs_info("Failed to get lower path: %d", err); + goto send_err; + } + + if (le32_to_cpu(readdir_recv->verify_cache)) { + if (hmdfs_client_cache_validate(con->sbi, readdir_recv, &lo_p)) + goto out_response; + } + + filp = hmdfs_server_cache_revalidate(con->sbi, lo_p_name, &lo_p); + if (IS_ERR_OR_NULL(filp)) { + filp = hmdfs_server_rebuild_dents(con->sbi, &lo_p, &num, + lo_p_name); + if (IS_ERR_OR_NULL(filp)) { + err = PTR_ERR(filp); + goto err_lookup_path; + } + } + +out_response: + err = hmdfs_readfile_response(con, cmd, filp); + if (!err) + hmdfs_add_remote_cache_list(con, lo_p_name); + if (num >= con->sbi->dcache_threshold) + cache_file_persistent(con, filp, lo_p_name, true); + if (filp) + fput(filp); +err_lookup_path: + path_put(&lo_p); + kfree(lo_p_name); +send_err: + if (err) + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct mkdir_request *mkdir_recv = data; + struct inode *child_inode = NULL; + struct dentry *dent = NULL; + char *mkdir_dir = NULL; + char *mkdir_name = NULL; + struct hmdfs_inodeinfo_response *mkdir_resp = NULL; + int respsize = sizeof(struct hmdfs_inodeinfo_response); + int path_len = le32_to_cpu(mkdir_recv->path_len); + + mkdir_resp = kzalloc(respsize, GFP_KERNEL); + if (!mkdir_resp) { + err = -ENOMEM; + goto mkdir_out; + } + + mkdir_dir = mkdir_recv->path; + mkdir_name = mkdir_recv->path + path_len + 1; + + dent = hmdfs_root_mkdir(con->device_id, con->sbi->local_dst, + mkdir_dir, mkdir_name, + le16_to_cpu(mkdir_recv->mode)); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + hmdfs_err("hmdfs_root_mkdir failed err = %d", err); + goto mkdir_out; + } + child_inode = d_inode(dent); + mkdir_resp->i_mode = cpu_to_le16(child_inode->i_mode); + mkdir_resp->i_size = cpu_to_le64(child_inode->i_size); + mkdir_resp->i_mtime = cpu_to_le64(child_inode->i_mtime.tv_sec); + mkdir_resp->i_mtime_nsec = cpu_to_le32(child_inode->i_mtime.tv_nsec); + mkdir_resp->i_ino = cpu_to_le64(child_inode->i_ino); + dput(dent); +mkdir_out: + hmdfs_sendmessage_response(con, cmd, respsize, mkdir_resp, err); + kfree(mkdir_resp); +} + +void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct create_request *create_recv = data; + struct inode *child_inode = NULL; + struct dentry *dent = NULL; + char *create_dir = NULL; + char *create_name = NULL; + struct hmdfs_inodeinfo_response *create_resp = NULL; + int respsize = sizeof(struct hmdfs_inodeinfo_response); + int path_len = le32_to_cpu(create_recv->path_len); + + create_resp = kzalloc(respsize, GFP_KERNEL); + if (!create_resp) { + err = -ENOMEM; + goto create_out; + } + + create_dir = create_recv->path; + create_name = create_recv->path + path_len + 1; + + dent = hmdfs_root_create(con->device_id, con->sbi->local_dst, + create_dir, create_name, + le16_to_cpu(create_recv->mode), + create_recv->want_excl); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + hmdfs_err("hmdfs_root_create failed err = %d", err); + goto create_out; + } + child_inode = d_inode(dent); + create_resp->i_mode = cpu_to_le16(child_inode->i_mode); + create_resp->i_size = cpu_to_le64(child_inode->i_size); + create_resp->i_mtime = cpu_to_le64(child_inode->i_mtime.tv_sec); + create_resp->i_mtime_nsec = cpu_to_le32(child_inode->i_mtime.tv_nsec); + /* + * keep same as hmdfs_server_open, + * to prevent hmdfs_open_final_remote from judging ino errors. + */ + create_resp->i_ino = cpu_to_le64( + generate_u64_ino(hmdfs_i(child_inode)->lower_inode->i_ino, + child_inode->i_generation)); + dput(dent); +create_out: + hmdfs_sendmessage_response(con, cmd, respsize, create_resp, err); + kfree(create_resp); +} + +void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct path root_path; + char *path = NULL; + char *name = NULL; + struct rmdir_request *rmdir_recv = data; + + path = rmdir_recv->path; + name = rmdir_recv->path + le32_to_cpu(rmdir_recv->path_len) + 1; + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (!err) { + err = hmdfs_root_rmdir(con->device_id, &root_path, path, name); + path_put(&root_path); + } + + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct path root_path; + char *path = NULL; + char *name = NULL; + struct unlink_request *unlink_recv = data; + + path = unlink_recv->path; + name = unlink_recv->path + le32_to_cpu(unlink_recv->path_len) + 1; + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (!err) { + err = hmdfs_root_unlink(con->device_id, &root_path, path, name); + path_put(&root_path); + } + + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + int old_path_len; + int new_path_len; + int old_name_len; + int new_name_len; + unsigned int flags; + char *path_old = NULL; + char *name_old = NULL; + char *path_new = NULL; + char *name_new = NULL; + struct rename_request *recv = data; + + old_path_len = le32_to_cpu(recv->old_path_len); + new_path_len = le32_to_cpu(recv->new_path_len); + old_name_len = le32_to_cpu(recv->old_name_len); + new_name_len = le32_to_cpu(recv->new_name_len); + flags = le32_to_cpu(recv->flags); + + path_old = recv->path; + path_new = recv->path + old_path_len + 1; + name_old = recv->path + old_path_len + 1 + new_path_len + 1; + name_new = recv->path + old_path_len + 1 + new_path_len + 1 + + old_name_len + 1; + + err = hmdfs_root_rename(con->sbi, con->device_id, path_old, name_old, + path_new, name_new, flags); + + hmdfs_send_err_response(con, cmd, err); +} + +static int hmdfs_filldir_real(struct dir_context *ctx, const char *name, + int name_len, loff_t offset, u64 ino, + unsigned int d_type) +{ + int res = 0; + char namestr[NAME_MAX + 1]; + struct getdents_callback_real *gc = NULL; + struct dentry *child = NULL; + + if (name_len > NAME_MAX) { + hmdfs_err("name_len:%d NAME_MAX:%u", name_len, NAME_MAX); + goto out; + } + + gc = container_of(ctx, struct getdents_callback_real, ctx); + + memcpy(namestr, name, name_len); + namestr[name_len] = '\0'; + + if (hmdfs_file_type(namestr) != HMDFS_TYPE_COMMON) + goto out; + + /* parent lock already hold by iterate_dir */ + child = lookup_one_len(name, gc->parent_path->dentry, name_len); + if (IS_ERR(child)) { + res = PTR_ERR(child); + hmdfs_err("lookup failed because %d", res); + goto out; + } + + if (d_really_is_negative(child)) { + dput(child); + hmdfs_err("lookup failed because negative dentry"); + /* just do not fill this entry and continue for next entry */ + goto out; + } + + if (d_type == DT_REG || d_type == DT_DIR) { + create_dentry(child, d_inode(child), gc->file, gc->sbi); + gc->num++; + } + + dput(child); + +out: + /* + * we always return 0 here, so that the caller can continue to next + * dentry even if failed on this dentry somehow. + */ + return 0; +} + +static void hmdfs_server_set_header(struct hmdfs_dcache_header *header, + struct file *file, struct file *dentry_file) +{ + struct inode *inode = NULL; + struct hmdfs_time_t cur_time; + + inode = file_inode(file); + cur_time = current_time(file_inode(dentry_file)); + header->dcache_crtime = cpu_to_le64(cur_time.tv_sec); + header->dcache_crtime_nsec = cpu_to_le64(cur_time.tv_nsec); + header->dentry_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + header->dentry_ctime_nsec = cpu_to_le64(inode->i_ctime.tv_nsec); +} + +// Get the dentries of target directory +struct file *hmdfs_server_rebuild_dents(struct hmdfs_sb_info *sbi, + struct path *path, loff_t *num, + const char *dir) +{ + int err = 0; + struct getdents_callback_real gc = { + .ctx.actor = hmdfs_filldir_real, + .ctx.pos = 0, + .num = 0, + .sbi = sbi, + .dir = dir, + }; + struct file *file = NULL; + struct file *dentry_file = NULL; + struct hmdfs_dcache_header header; + + dentry_file = create_local_dentry_file_cache(sbi); + if (IS_ERR(dentry_file)) { + hmdfs_err("file create failed err=%ld", PTR_ERR(dentry_file)); + return dentry_file; + } + + file = dentry_open(path, O_RDONLY | O_DIRECTORY, current_cred()); + if (IS_ERR(file)) { + err = PTR_ERR(file); + hmdfs_err("dentry_open failed"); + goto out; + } + + hmdfs_server_set_header(&header, file, dentry_file); + + gc.parent_path = path; + gc.file = dentry_file; + + err = iterate_dir(file, &(gc.ctx)); + if (err) { + hmdfs_err("iterate_dir failed"); + goto out; + } + + header.case_sensitive = sbi->s_case_sensitive; + header.num = cpu_to_le64(gc.num); + if (num) + *num = gc.num; + + err = write_header(dentry_file, &header); +out: + if (!IS_ERR_OR_NULL(file)) + fput(file); + + if (err) { + fput(dentry_file); + dentry_file = ERR_PTR(err); + } + + trace_hmdfs_server_rebuild_dents(&header, err); + return dentry_file; +} + +void hmdfs_server_writepage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct writepage_request *writepage_recv = data; + struct hmdfs_server_writeback *hswb = NULL; + __u64 file_ver; + __u32 file_id; + struct file *file = NULL; + loff_t pos; + __u32 count; + ssize_t ret; + int err = 0; + + file_id = le32_to_cpu(writepage_recv->file_id); + file_ver = le64_to_cpu(writepage_recv->file_ver); + file = get_file_by_fid_and_ver(con, cmd, file_id, file_ver); + if (IS_ERR(file)) { + hmdfs_info( + "file with id %u does not exist, pgindex %llu, devid %llu", + file_id, le64_to_cpu(writepage_recv->index), + con->device_id); + err = PTR_ERR(file); + goto out; + } + + pos = (loff_t)le64_to_cpu(writepage_recv->index) << HMDFS_PAGE_OFFSET; + count = le32_to_cpu(writepage_recv->count); + ret = kernel_write(file, writepage_recv->buf, count, &pos); + if (ret != count) + err = -EIO; + + hmdfs_close_path(file); +out: + hmdfs_send_err_response(con, cmd, err); + + hswb = con->sbi->h_swb; + if (!err && hswb->dirty_writeback_control) + hmdfs_server_check_writeback(hswb); +} + +static struct inode *hmdfs_verify_path(struct dentry *dentry, char *recv_buf, + struct super_block *sb) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = NULL; + + /* if we found path from wrong fs */ + if (inode->i_sb != sb) { + hmdfs_err("super block do not match"); + return NULL; + } + + info = hmdfs_i(inode); + /* make sure lower inode is not NULL */ + if (info->lower_inode) + return info->lower_inode; + + /* + * we don't expect lower inode to be NULL in server. However, it's + * possible because dentry cache can contain stale data. + */ + hmdfs_info("lower inode is NULL, is remote file: %d", + info->conn != NULL); + return NULL; +} + +static int hmdfs_notify_change(struct vfsmount *mnt, struct dentry *dentry, + struct iattr *attr, + struct inode **delegated_inode) +{ +#ifdef CONFIG_SDCARD_FS + /* sdcard_fs need to call setattr2, notify_change will call setattr */ + return notify_change2(mnt, dentry, attr, delegated_inode); +#else + return notify_change(&init_user_ns, dentry, attr, delegated_inode); +#endif +} + +void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct dentry *dentry = NULL; + struct inode *inode = NULL; + struct setattr_request *recv = data; + struct path root_path, dst_path; + struct iattr attr; + __u32 valid = le32_to_cpu(recv->valid); + + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto out; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->buf, 0, + &dst_path); + if (err) + goto out_put_root; + + inode = hmdfs_verify_path(dst_path.dentry, recv->buf, con->sbi->sb); + if (!inode) { + err = -ENOENT; + goto out_put_dst; + } + + if (S_ISLNK(inode->i_mode)) { + err = -EPERM; + goto out_put_dst; + } + + dentry = dst_path.dentry; + memset(&attr, 0, sizeof(attr)); + /* only support size and mtime */ + if (valid & (ATTR_SIZE | ATTR_MTIME)) + attr.ia_valid = + (valid & (ATTR_MTIME | ATTR_MTIME_SET | ATTR_SIZE)); + attr.ia_size = le64_to_cpu(recv->size); + attr.ia_mtime.tv_sec = le64_to_cpu(recv->mtime); + attr.ia_mtime.tv_nsec = le32_to_cpu(recv->mtime_nsec); + + inode_lock(dentry->d_inode); + err = hmdfs_notify_change(dst_path.mnt, dentry, &attr, NULL); + inode_unlock(dentry->d_inode); + +out_put_dst: + path_put(&dst_path); +out_put_root: + path_put(&root_path); +out: + hmdfs_send_err_response(con, cmd, err); +} + +static void update_getattr_response(struct hmdfs_peer *con, struct inode *inode, + struct kstat *ks, + struct getattr_response *resp) +{ + /* if getattr for link, get ino and mode from actual lower inode */ + resp->ino = cpu_to_le64( + generate_u64_ino(inode->i_ino, inode->i_generation)); + resp->mode = cpu_to_le16(inode->i_mode); + + /* get other information from vfs_getattr() */ + resp->result_mask = cpu_to_le32(STATX_BASIC_STATS | STATX_BTIME); + resp->fsid = cpu_to_le64(ks->dev); + resp->nlink = cpu_to_le32(ks->nlink); + resp->uid = cpu_to_le32(ks->uid.val); + resp->gid = cpu_to_le32(ks->gid.val); + resp->size = cpu_to_le64(ks->size); + resp->blocks = cpu_to_le64(ks->blocks); + resp->blksize = cpu_to_le32(ks->blksize); + resp->atime = cpu_to_le64(ks->atime.tv_sec); + resp->atime_nsec = cpu_to_le32(ks->atime.tv_nsec); + resp->mtime = cpu_to_le64(ks->mtime.tv_sec); + resp->mtime_nsec = cpu_to_le32(ks->mtime.tv_nsec); + resp->ctime = cpu_to_le64(ks->ctime.tv_sec); + resp->ctime_nsec = cpu_to_le32(ks->ctime.tv_nsec); + resp->crtime = cpu_to_le64(ks->btime.tv_sec); + resp->crtime_nsec = cpu_to_le32(ks->btime.tv_nsec); +} + +void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + int err = 0; + struct getattr_request *recv = data; + int size_read = sizeof(struct getattr_response); + struct getattr_response *resp = NULL; + struct kstat ks; + struct path root_path, dst_path; + struct inode *inode = NULL; + unsigned int recv_flags = le32_to_cpu(recv->lookup_flags); + unsigned int lookup_flags = 0; + + err = hmdfs_convert_lookup_flags(recv_flags, &lookup_flags); + if (err) + goto err; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto err; + } + err = kern_path(con->sbi->local_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto err_free_resp; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->buf, + lookup_flags, &dst_path); + if (err) + goto out_put_root; + + inode = hmdfs_verify_path(dst_path.dentry, recv->buf, con->sbi->sb); + if (!inode) { + err = -ENOENT; + goto out_put_dst; + } + + if (S_ISLNK(inode->i_mode)) { + err = -EPERM; + goto out_put_dst; + } + + err = vfs_getattr(&dst_path, &ks, STATX_BASIC_STATS | STATX_BTIME, 0); + if (err) + goto err_put_dst; + update_getattr_response(con, inode, &ks, resp); + +out_put_dst: + path_put(&dst_path); +out_put_root: + /* + * if path lookup failed, we return with result_mask setting to + * zero. So we can be aware of such situation in caller. + */ + if (err) + resp->result_mask = cpu_to_le32(0); + path_put(&root_path); + hmdfs_sendmessage_response(con, cmd, size_read, resp, err); + kfree(resp); + return; + +err_put_dst: + path_put(&dst_path); + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +static void init_statfs_response(struct statfs_response *resp, + struct kstatfs *st) +{ + resp->f_type = cpu_to_le64(HMDFS_SUPER_MAGIC); + resp->f_bsize = cpu_to_le64(st->f_bsize); + resp->f_blocks = cpu_to_le64(st->f_blocks); + resp->f_bfree = cpu_to_le64(st->f_bfree); + resp->f_bavail = cpu_to_le64(st->f_bavail); + resp->f_files = cpu_to_le64(st->f_files); + resp->f_ffree = cpu_to_le64(st->f_ffree); + resp->f_fsid_0 = cpu_to_le32(st->f_fsid.val[0]); + resp->f_fsid_1 = cpu_to_le32(st->f_fsid.val[1]); + resp->f_namelen = cpu_to_le64(st->f_namelen); + resp->f_frsize = cpu_to_le64(st->f_frsize); + resp->f_flags = cpu_to_le64(st->f_flags); + /* f_spare is not used in f2fs or ext4 */ + resp->f_spare_0 = cpu_to_le64(st->f_spare[0]); + resp->f_spare_1 = cpu_to_le64(st->f_spare[1]); + resp->f_spare_2 = cpu_to_le64(st->f_spare[2]); + resp->f_spare_3 = cpu_to_le64(st->f_spare[3]); +} + +void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + struct statfs_request *recv = data; + struct statfs_response *resp = NULL; + struct path root_path, path; + struct kstatfs *st = NULL; + int err = 0; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) { + err = -ENOMEM; + goto out; + } + + resp = kmalloc(sizeof(*resp), GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto free_st; + } + + err = kern_path(con->sbi->local_src, 0, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto free_st; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, recv->path, 0, + &path); + if (err) { + hmdfs_info("recv->path found failed err = %d", err); + goto put_root; + } + + err = vfs_statfs(&path, st); + if (err) + hmdfs_info("statfs local dentry failed, err = %d", err); + init_statfs_response(resp, st); + path_put(&path); + +put_root: + path_put(&root_path); +free_st: + kfree(st); +out: + if (err) + hmdfs_send_err_response(con, cmd, err); + else + hmdfs_sendmessage_response(con, cmd, sizeof(*resp), resp, 0); + + kfree(resp); +} + +void hmdfs_server_syncfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data) +{ + /* + * Reserved interface. There is a difference compared with traditional + * syncfs process. Remote syncfs process in client: + * 1. Remote writepages by async call + * 2. Remote syncfs calling + * 3. Wait all remote async calls(writepages) return in step 1 + */ + int ret = 0; + + hmdfs_send_err_response(con, cmd, ret); +} + +void hmdfs_server_getxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct getxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + size_t size_read = sizeof(struct getxattr_response) + size; + struct getxattr_response *resp = NULL; + struct path root_path; + struct path path; + char *file_path = recv->buf; + char *name = recv->buf + recv->path_len + 1; + int err = -ENOMEM; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) + goto err; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err_free_resp; + } + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (!size) + err = vfs_getxattr(&init_user_ns, path.dentry, name, NULL, size); + else + err = vfs_getxattr(&init_user_ns, path.dentry, name, resp->value, size); + if (err < 0) { + hmdfs_info("getxattr failed err %d", err); + goto err_put_path; + } + + resp->size = cpu_to_le32(err); + hmdfs_sendmessage_response(con, cmd, size_read, resp, 0); + path_put(&path); + path_put(&root_path); + kfree(resp); + return; + +err_put_path: + path_put(&path); +err_put_root: + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_setxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct setxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + int flags = le32_to_cpu(recv->flags); + bool del = recv->del; + struct path root_path; + struct path path; + const char *file_path = NULL; + const char *name = NULL; + const void *value = NULL; + int err; + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err; + } + + file_path = recv->buf; + name = recv->buf + recv->path_len + 1; + value = name + recv->name_len + 1; + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (del) { + WARN_ON(flags != XATTR_REPLACE); + err = vfs_removexattr(&init_user_ns, path.dentry, name); + } else { + err = vfs_setxattr(&init_user_ns, path.dentry, name, value, size, flags); + } + + path_put(&path); +err_put_root: + path_put(&root_path); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_listxattr(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct listxattr_request *recv = data; + size_t size = le32_to_cpu(recv->size); + int size_read = sizeof(struct listxattr_response) + size; + struct listxattr_response *resp = NULL; + const char *file_path = NULL; + struct path root_path; + struct path path; + int err = 0; + + resp = kzalloc(size_read, GFP_KERNEL); + if (!resp) { + err = -ENOMEM; + goto err; + } + + err = kern_path(con->sbi->local_dst, LOOKUP_DIRECTORY, &root_path); + if (err) { + hmdfs_info("kern_path failed err = %d", err); + goto err_free_resp; + } + + file_path = recv->buf; + err = vfs_path_lookup(root_path.dentry, root_path.mnt, + file_path, 0, &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto err_put_root; + } + + if (!size) + err = vfs_listxattr(path.dentry, NULL, size); + else + err = vfs_listxattr(path.dentry, resp->list, size); + if (err < 0) { + hmdfs_info("listxattr failed err = %d", err); + goto err_put_path; + } + + resp->size = cpu_to_le32(err); + hmdfs_sendmessage_response(con, cmd, size_read, resp, 0); + path_put(&root_path); + path_put(&path); + kfree(resp); + return; + +err_put_path: + path_put(&path); +err_put_root: + path_put(&root_path); +err_free_resp: + kfree(resp); +err: + hmdfs_send_err_response(con, cmd, err); +} + +void hmdfs_server_get_drop_push(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data) +{ + struct drop_push_request *dp_recv = data; + struct path root_path, path; + int err; + char *tmp_path = NULL; + + err = kern_path(con->sbi->real_dst, 0, &root_path); + if (err) { + hmdfs_err("kern_path failed err = %d", err); + goto quickack; + } + tmp_path = kzalloc(PATH_MAX, GFP_KERNEL); + if (!tmp_path) + goto out; + snprintf(tmp_path, PATH_MAX, "/" DEVICE_VIEW_ROOT "/%s%s", + con->cid, dp_recv->path); + + err = vfs_path_lookup(root_path.dentry, root_path.mnt, tmp_path, 0, + &path); + if (err) { + hmdfs_info("path found failed err = %d", err); + goto free; + } + hmdfs_remove_cache_filp(con, path.dentry); + + path_put(&path); +free: + kfree(tmp_path); +out: + path_put(&root_path); +quickack: + set_conn_sock_quickack(con); +} diff --git a/fs/hmdfs/hmdfs_server.h b/fs/hmdfs/hmdfs_server.h new file mode 100755 index 000000000..740d06f8b --- /dev/null +++ b/fs/hmdfs/hmdfs_server.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_server.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_SERVER_H +#define HMDFS_SERVER_H + +#include "hmdfs.h" +#include "comm/transport.h" +#include "comm/socket_adapter.h" + +#define DATA_SEC_LEVEL0 0 +#define DATA_SEC_LEVEL1 1 +#define DATA_SEC_LEVEL2 2 +#define DATA_SEC_LEVEL3 3 +#define DATA_SEC_LEVEL4 4 +#define DATA_SEC_LEVEL_LABEL "user.security" +#define DATA_SEC_LEVEL_LENGTH 10 + +static inline void hmdfs_send_err_response(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, int err) +{ + if (hmdfs_sendmessage_response(con, cmd, 0, NULL, (__u32)err)) + hmdfs_warning("send err failed"); +} + +void hmdfs_server_open(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_atomic_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); +void hmdfs_server_fsync(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_release(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpages(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_readpages_open(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); +void hmdfs_server_writepage(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_readdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_mkdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_create(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_rmdir(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_unlink(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_rename(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); + +void hmdfs_server_setattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_getattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_statfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_syncfs(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_getxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_setxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_listxattr(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, + void *data); +void hmdfs_server_get_drop_push(struct hmdfs_peer *con, + struct hmdfs_head_cmd *cmd, void *data); + +void __init hmdfs_server_add_node_evt_cb(void); +#endif diff --git a/fs/hmdfs/hmdfs_share.c b/fs/hmdfs/hmdfs_share.c new file mode 100755 index 000000000..6b9557d02 --- /dev/null +++ b/fs/hmdfs/hmdfs_share.c @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/inode_share.h + * + * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + */ + +#include "hmdfs_share.h" + +static inline bool hmdfs_is_dst_path(struct path *src, struct path *dst) +{ + return (src->dentry == dst->dentry) && (src->mnt == dst->mnt); +} + +static inline bool is_dst_device(char *src_cid, char *dst_cid) +{ + return strncmp(src_cid, dst_cid, HMDFS_CID_SIZE) == 0; +} + +bool hmdfs_is_share_file(struct file *file) +{ + struct file *cur_file = file; + struct hmdfs_dentry_info *gdi; + struct hmdfs_file_info *gfi; + + while (cur_file->f_inode->i_sb->s_magic == HMDFS_SUPER_MAGIC) { + gdi = hmdfs_d(cur_file->f_path.dentry); + gfi = hmdfs_f(cur_file); + if (hm_isshare(gdi->file_type)) + return true; + if (gfi->lower_file) + cur_file = gfi->lower_file; + else + break; + } + + return false; +} + +static void remove_and_release_share_item(struct hmdfs_share_item *item) +{ + list_del(&item->list); + item->hst->item_cnt--; + fput(item->file); + kfree(item->relative_path.name); + kfree(item); +} + +static inline bool is_share_item_timeout(struct hmdfs_share_item *item) +{ + return !item->opened && item->timeout; +} + +struct hmdfs_share_item *hmdfs_lookup_share_item(struct hmdfs_share_table *st, + struct qstr *cur_relative_path) +{ + struct hmdfs_share_item *item, *tmp; + + list_for_each_entry_safe(item, tmp, &st->item_list_head, list) { + if (is_share_item_timeout(item)){ + remove_and_release_share_item(item); + } else { + if (qstr_eq(&item->relative_path, cur_relative_path)) + return item; + } + } + + return NULL; +} + +static void share_item_timeout_work(struct work_struct *work) { + struct hmdfs_share_item *item = + container_of(work, struct hmdfs_share_item, d_work.work); + + item->timeout = true; +} + +int insert_share_item(struct hmdfs_share_table *st, struct qstr *relative_path, + struct file *file, char *cid) +{ + struct hmdfs_share_item *new_item = NULL; + char *path_name; + int err = 0; + + if (st->item_cnt >= st->max_cnt) { + int ret = hmdfs_clear_first_item(st); + if (unlikely(ret)) { + err = -EMFILE; + goto err_out; + } + } + + path_name = kzalloc(PATH_MAX, GFP_KERNEL); + if (unlikely(!path_name)) { + err = -EMFILE; + goto err_out; + } + strcpy(path_name, relative_path->name); + + new_item = kmalloc(sizeof(*new_item), GFP_KERNEL); + if (unlikely(!new_item)) { + err = -ENOMEM; + kfree(path_name); + goto err_out; + } + + new_item->file = file; + get_file(file); + new_item->relative_path.name = path_name; + new_item->relative_path.len = relative_path->len; + memcpy(new_item->cid, cid, HMDFS_CID_SIZE); + new_item->opened = false; + new_item->timeout = false; + list_add_tail(&new_item->list, &st->item_list_head); + new_item->hst = st; + + INIT_DELAYED_WORK(&new_item->d_work, share_item_timeout_work); + queue_delayed_work(new_item->hst->share_item_timeout_wq, + &new_item->d_work, HZ * HMDFS_SHARE_ITEM_TIMEOUT_S); + + st->item_cnt++; + +err_out: + return err; +} + +void update_share_item(struct hmdfs_share_item *item, struct file *file, + char *cid) +{ + /* if not the same file, we need to update struct file */ + if (!hmdfs_is_dst_path(&file->f_path, &item->file->f_path)) { + fput(item->file); + get_file(file); + item->file = file; + } + memcpy(item->cid, cid, HMDFS_CID_SIZE); + + if (!cancel_delayed_work_sync(&item->d_work)) + item->timeout = false; + + queue_delayed_work(item->hst->share_item_timeout_wq, &item->d_work, + HZ * HMDFS_SHARE_ITEM_TIMEOUT_S); +} + +bool in_share_dir(struct dentry *child_dentry) +{ + struct dentry *parent_dentry = dget_parent(child_dentry); + bool ret = false; + + if (!strncmp(parent_dentry->d_name.name, SHARE_RESERVED_DIR, + strlen(SHARE_RESERVED_DIR))) + ret = true; + + dput(parent_dentry); + return ret; +} + +inline bool is_share_dir(struct inode *inode, const char *name) +{ + return (S_ISDIR(inode->i_mode) && + !strncmp(name, SHARE_RESERVED_DIR, strlen(SHARE_RESERVED_DIR))); +} + +int get_path_from_share_table(struct hmdfs_sb_info *sbi, + struct dentry *cur_dentry, + struct path *src_path) +{ + struct hmdfs_share_item *item; + const char *path_name; + struct qstr relative_path; + int err = 0; + + path_name = hmdfs_get_dentry_relative_path(cur_dentry); + if (unlikely(!path_name)) { + err = -ENOMEM; + goto err_out; + } + relative_path.name = path_name; + relative_path.len = strlen(path_name); + + spin_lock(&sbi->share_table.item_list_lock); + item = hmdfs_lookup_share_item(&sbi->share_table, &relative_path); + if (!item) { + err = -ENOENT; + goto unlock; + } + path_get(&item->file->f_path); + *src_path = item->file->f_path; +unlock: + spin_unlock(&sbi->share_table.item_list_lock); + kfree(path_name); +err_out: + return err; +} + +void hmdfs_clear_share_item_offline(struct hmdfs_peer *conn) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct hmdfs_share_item *item, *tmp; + + spin_lock(&sbi->share_table.item_list_lock); + list_for_each_entry_safe(item, tmp, &sbi->share_table.item_list_head, + list) { + if (is_dst_device(item->cid, conn->cid)) { + /* release the item that was not closed properly */ + if (item->opened) + remove_and_release_share_item(item); + } + } + spin_unlock(&sbi->share_table.item_list_lock); +} + +void reset_item_opened_status(struct hmdfs_sb_info *sbi, const char *filename) +{ + struct qstr candidate = QSTR_INIT(filename, strlen(filename)); + struct hmdfs_share_item *item = NULL; + + spin_lock(&sbi->share_table.item_list_lock); + item = hmdfs_lookup_share_item(&sbi->share_table, &candidate); + if (item) { + item->opened = false; + queue_delayed_work(item->hst->share_item_timeout_wq, + &item->d_work, HZ * HMDFS_SHARE_ITEM_TIMEOUT_S); + } + spin_unlock(&sbi->share_table.item_list_lock); +} + +void hmdfs_close_share_item(struct hmdfs_sb_info *sbi, struct file *file, + char *cid) +{ + struct qstr relativepath; + const char *path_name; + struct hmdfs_share_item *item = NULL; + + path_name = hmdfs_get_dentry_relative_path(file->f_path.dentry); + if (unlikely(!path_name)) { + hmdfs_err("get dentry relative path error"); + return; + } + + relativepath.name = path_name; + relativepath.len = strlen(path_name); + + spin_lock(&sbi->share_table.item_list_lock); + item = hmdfs_lookup_share_item(&sbi->share_table, &relativepath); + if (unlikely(!item)) { + hmdfs_err("cannot get share item %s", relativepath.name); + goto unlock; + } + + /* + * If the item is shared to all device, we should close the item directly. + */ + if (!strcmp(item->cid, SHARE_ALL_DEVICE)) { + goto close; + } + + if (unlikely(!is_dst_device(item->cid, cid))) { + hmdfs_err("item not right, dst cid is: %s", item->cid); + goto unlock; + } + + /* + * After remote close, we should reset the opened status and restart + * delayed timeout work. + */ +close: + item->opened = false; + queue_delayed_work(item->hst->share_item_timeout_wq, &item->d_work, + HZ * HMDFS_SHARE_ITEM_TIMEOUT_S); + +unlock: + spin_unlock(&sbi->share_table.item_list_lock); + kfree(path_name); +} + +int hmdfs_check_share_access_permission(struct hmdfs_sb_info *sbi, + const char *filename, + char *cid) +{ + struct qstr candidate = QSTR_INIT(filename, strlen(filename)); + struct hmdfs_share_item *item = NULL; + int ret = -ENOENT; + + spin_lock(&sbi->share_table.item_list_lock); + item = hmdfs_lookup_share_item(&sbi->share_table, &candidate); + /* + * When cid matches, we set the item status opened and canel + * its delayed work to ensure that the open process can get + * the correct path + */ + if (item && (is_dst_device(item->cid, cid) || !strcmp(item->cid, SHARE_ALL_DEVICE))) { + item->opened = true; + if (!cancel_delayed_work_sync(&item->d_work)) { + item->timeout = false; + } + ret = 0; + } + spin_unlock(&sbi->share_table.item_list_lock); + + return ret; +} + + +int hmdfs_init_share_table(struct hmdfs_sb_info *sbi) +{ + spin_lock_init(&sbi->share_table.item_list_lock); + INIT_LIST_HEAD(&sbi->share_table.item_list_head); + sbi->share_table.item_cnt = 0; + sbi->share_table.max_cnt = HMDFS_SHARE_ITEMS_MAX; + sbi->share_table.share_item_timeout_wq = + create_singlethread_workqueue("share_item_timeout_wq"); + + if (!sbi->share_table.share_item_timeout_wq) + return -ENOMEM; + return 0; +} + +void hmdfs_clear_share_table(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_share_table *st = &sbi->share_table; + struct hmdfs_share_item *item, *tmp; + + spin_lock(&sbi->share_table.item_list_lock); + list_for_each_entry_safe(item, tmp, &sbi->share_table.item_list_head, + list) { + flush_delayed_work(&item->d_work); + remove_and_release_share_item(item); + } + spin_unlock(&sbi->share_table.item_list_lock); + + destroy_workqueue(st->share_item_timeout_wq); +} + +int hmdfs_clear_first_item(struct hmdfs_share_table *st) +{ + int ret = -EMFILE; + struct hmdfs_share_item *item, *tmp; + list_for_each_entry_safe(item, tmp, &st->item_list_head, list) { + if (!item->timeout) { + cancel_delayed_work_sync(&item->d_work); + } + remove_and_release_share_item(item); + ret = 0; + break; + } + return ret; +} diff --git a/fs/hmdfs/hmdfs_share.h b/fs/hmdfs/hmdfs_share.h new file mode 100755 index 000000000..3c055805b --- /dev/null +++ b/fs/hmdfs/hmdfs_share.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_share.h + * + * Copyright (c) 2021-2022 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_SHARE_H +#define HMDFS_SHARE_H + +#include +#include +#include + +#include "hmdfs_device_view.h" +#include "comm/connection.h" + +#define HMDFS_SHARE_ITEM_TIMEOUT_S 120 +#define HMDFS_SHARE_ITEMS_MAX 128 + +#define HMDFS_IOC 0xf2 +#define HMDFS_IOC_SET_SHARE_PATH _IOW(HMDFS_IOC, 1, \ + struct hmdfs_share_control) + +#define SHARE_RESERVED_DIR ".share" +#define SHARE_ALL_DEVICE "0" + +struct hmdfs_share_control { + __u32 src_fd; + char cid[HMDFS_CID_SIZE]; +}; + +struct hmdfs_share_item { + struct file *file; + struct qstr relative_path; + char cid[HMDFS_CID_SIZE]; + bool opened; + bool timeout; + struct list_head list; + struct delayed_work d_work; + struct hmdfs_share_table *hst; +}; + +bool hmdfs_is_share_file(struct file *file); +struct hmdfs_share_item *hmdfs_lookup_share_item(struct hmdfs_share_table *st, + struct qstr *cur_relative_path); +int insert_share_item(struct hmdfs_share_table *st, struct qstr *relative_path, + struct file *file, char *cid); +void update_share_item(struct hmdfs_share_item *item, struct file *file, + char *cid); +bool in_share_dir(struct dentry *child_dentry); +inline bool is_share_dir(struct inode *inode, const char *name); +int get_path_from_share_table(struct hmdfs_sb_info *sbi, + struct dentry *cur_dentry, struct path *src_path); + +void hmdfs_clear_share_item_offline(struct hmdfs_peer *conn); +void reset_item_opened_status(struct hmdfs_sb_info *sbi, const char *filename); +void hmdfs_close_share_item(struct hmdfs_sb_info *sbi, struct file *file, + char *cid); +int hmdfs_check_share_access_permission(struct hmdfs_sb_info *sbi, + const char *filename, char *cid); + +int hmdfs_init_share_table(struct hmdfs_sb_info *sbi); +void hmdfs_clear_share_table(struct hmdfs_sb_info *sbi); +int hmdfs_clear_first_item(struct hmdfs_share_table *st); + +#endif // HMDFS_SHARE_H diff --git a/fs/hmdfs/hmdfs_trace.h b/fs/hmdfs/hmdfs_trace.h new file mode 100755 index 000000000..86478425a --- /dev/null +++ b/fs/hmdfs/hmdfs_trace.h @@ -0,0 +1,891 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/hmdfs_trace.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hmdfs + +#if !defined(__HMDFS_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) + +#define __HMDFS_TRACE_H__ + +#include +#include "comm/protocol.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_client.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" +#include "client_writeback.h" + +TRACE_EVENT(hmdfs_permission, + + TP_PROTO(unsigned long ino), + + TP_ARGS(ino), + + TP_STRUCT__entry(__field(unsigned long, ino)), + + TP_fast_assign(__entry->ino = ino;), + + TP_printk("permission check for ino %lu failed", __entry->ino)); + +/* communication */ +TRACE_EVENT(hmdfs_recv_mesg_callback, + + TP_PROTO(struct hmdfs_head_cmd *cmd), + + TP_ARGS(cmd), + + TP_STRUCT__entry( + __field(__u32, msg_id) + __field(__u32, magic) + __field(__u16, command) + __field(__u16, cmd_flag) + __field(__u32, data_len) + __field(__u32, ret_code) + ), + + TP_fast_assign( + __entry->msg_id = le32_to_cpu(cmd->msg_id); + __entry->magic = cmd->magic; + __entry->command = cmd->operations.command; + __entry->cmd_flag = cmd->operations.cmd_flag; + __entry->data_len = cmd->data_len; + __entry->ret_code = cmd->ret_code; + ), + + TP_printk("msg_id:%u magic:%u command:%hu, cmd_flag:%hu, data_len:%u, ret_code:%u", + __entry->msg_id, __entry->magic, __entry->command, + __entry->cmd_flag, __entry->data_len, __entry->ret_code) +); + +TRACE_EVENT(hmdfs_tcp_send_message, + + TP_PROTO(struct hmdfs_head_cmd *cmd), + + TP_ARGS(cmd), + + TP_STRUCT__entry( + __field(__u32, msg_id) + __field(__u32, magic) + __field(__u16, command) + __field(__u16, cmd_flag) + __field(__u32, data_len) + __field(__u32, ret_code) + ), + + TP_fast_assign( + __entry->msg_id = le32_to_cpu(cmd->msg_id); + __entry->magic = cmd->magic; + __entry->command = cmd->operations.command; + __entry->cmd_flag = cmd->operations.cmd_flag; + __entry->data_len = cmd->data_len; + __entry->ret_code = cmd->ret_code; + ), + + TP_printk("msg_id:%u magic:%u command:%hu, cmd_flag:%hu, data_len:%u, ret_code:%u", + __entry->msg_id, __entry->magic, __entry->command, + __entry->cmd_flag, __entry->data_len, __entry->ret_code) +); + +/* file system interface */ +DECLARE_EVENT_CLASS(hmdfs_iterate_op_end, + + TP_PROTO(struct dentry *__d, loff_t start_pos, loff_t end_pos, int err), + + TP_ARGS(__d, start_pos, end_pos, err), + + TP_STRUCT__entry( + __string(name_str, __d->d_name.name) + __field(loff_t, start) + __field(loff_t, end) + __field(int, err) + ), + + TP_fast_assign( + __assign_str(name_str, __d->d_name.name); + __entry->start = start_pos; + __entry->end = end_pos; + __entry->err = err; + ), + + TP_printk("dentry[%s] start_pos:%llx, end_pos:%llx, err:%d", + __get_str(name_str), __entry->start, + __entry->end, __entry->err) +); + +#define define_hmdfs_iterate_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_iterate_op_end, event_name, \ + TP_PROTO(struct dentry *__d, loff_t start_pos, \ + loff_t end_pos, int err), \ + TP_ARGS(__d, start_pos, end_pos, err)) + +define_hmdfs_iterate_op_end_event(hmdfs_iterate_local); +define_hmdfs_iterate_op_end_event(hmdfs_iterate_remote); +define_hmdfs_iterate_op_end_event(hmdfs_iterate_merge); + + +TRACE_EVENT(hmdfs_lookup, + + TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(ino_t, ino) + __string(name_str, dentry->d_name.name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->ino = dir->i_ino; + __assign_str(name_str, dentry->d_name.name); + __entry->flags = flags; + ), + + TP_printk("parent_ino = %lu, name:%s, flags:%u", + __entry->ino, __get_str(name_str), __entry->flags) +); + +DECLARE_EVENT_CLASS(hmdfs_lookup_op_end, + + TP_PROTO(struct inode *dir, struct dentry *dentry, int err), + + TP_ARGS(dir, dentry, err), + + TP_STRUCT__entry( + __field(ino_t, ino) + __string(name_str, dentry->d_name.name) + __field(int, err) + ), + + TP_fast_assign( + __entry->ino = dir->i_ino; + __assign_str(name_str, dentry->d_name.name); + __entry->err = err; + ), + + TP_printk("parent_ino = %lu, name:%s, err:%d", + __entry->ino, __get_str(name_str), __entry->err) +); + +#define define_hmdfs_lookup_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_lookup_op_end, event_name, \ + TP_PROTO(struct inode *dir, struct dentry *dentry, \ + int err), \ + TP_ARGS(dir, dentry, err)) + + +define_hmdfs_lookup_op_end_event(hmdfs_root_lookup); +define_hmdfs_lookup_op_end_event(hmdfs_root_lookup_end); + +define_hmdfs_lookup_op_end_event(hmdfs_device_lookup); +define_hmdfs_lookup_op_end_event(hmdfs_device_lookup_end); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_local); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_local_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_local); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_local); +define_hmdfs_lookup_op_end_event(hmdfs_create_local); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_remote); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_remote_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_remote); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_remote); +define_hmdfs_lookup_op_end_event(hmdfs_create_remote); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_merge); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_merge_end); +define_hmdfs_lookup_op_end_event(hmdfs_mkdir_merge); +define_hmdfs_lookup_op_end_event(hmdfs_rmdir_merge); +define_hmdfs_lookup_op_end_event(hmdfs_create_merge); + +define_hmdfs_lookup_op_end_event(hmdfs_lookup_share); +define_hmdfs_lookup_op_end_event(hmdfs_lookup_share_end); + +TRACE_EVENT(hmdfs_show_comrade, + + TP_PROTO(struct dentry *d, struct dentry *lo_d, uint64_t devid), + + TP_ARGS(d, lo_d, devid), + + TP_STRUCT__entry( + __string(name, d->d_name.name) + __string(lo_name, lo_d->d_name.name) + __field(uint64_t, devid) + ), + + TP_fast_assign( + __assign_str(name, d->d_name.name) + __assign_str(lo_name, lo_d->d_name.name) + __entry->devid = devid; + ), + + TP_printk("parent_name:%s -> lo_d_name:%s, lo_d_devid:%llu", + __get_str(name), __get_str(lo_name), __entry->devid) +); + +DECLARE_EVENT_CLASS(hmdfs_rename_op_end, + + TP_PROTO(struct inode *olddir, struct dentry *olddentry, + struct inode *newdir, struct dentry *newdentry, + unsigned int flags), + + TP_ARGS(olddir, olddentry, newdir, newdentry, flags), + + TP_STRUCT__entry( + __field(ino_t, oldino) + __string(oldname_str, olddentry->d_name.name) + __field(ino_t, newino) + __string(newname_str, newdentry->d_name.name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->oldino = olddir->i_ino; + __assign_str(oldname_str, olddentry->d_name.name); + __entry->newino = newdir->i_ino; + __assign_str(newname_str, newdentry->d_name.name); + __entry->flags = flags; + ), + + TP_printk("old_pino = %lu, oldname:%s; new_pino = %lu, newname:%s, flags:%u", + __entry->oldino, __get_str(oldname_str), + __entry->newino, __get_str(newname_str), __entry->flags) +); + +#define define_hmdfs_rename_op_end_event(event_name) \ + DEFINE_EVENT(hmdfs_rename_op_end, event_name, \ + TP_PROTO(struct inode *olddir, struct dentry *olddentry, \ + struct inode *newdir, struct dentry *newdentry, \ + unsigned int flags), \ + TP_ARGS(olddir, olddentry, newdir, newdentry, flags)) + +define_hmdfs_rename_op_end_event(hmdfs_rename_local); +define_hmdfs_rename_op_end_event(hmdfs_rename_remote); +define_hmdfs_rename_op_end_event(hmdfs_rename_merge); + +TRACE_EVENT(hmdfs_statfs, + + TP_PROTO(struct dentry *d, uint8_t type), + + TP_ARGS(d, type), + + TP_STRUCT__entry( + __string(name, d->d_name.name) + __field(uint8_t, type) + ), + + TP_fast_assign( + __assign_str(name, d->d_name.name) + __entry->type = type; + ), + + TP_printk("dentry_name:%s, lo_d_devid:%u", + __get_str(name), __entry->type) +); + + + +TRACE_EVENT(hmdfs_balance_dirty_pages_ratelimited, + + TP_PROTO(struct hmdfs_sb_info *sbi, + struct hmdfs_writeback *hwb, + int bdp_ratelimits), + + TP_ARGS(sbi, hwb, bdp_ratelimits), + + TP_STRUCT__entry( + __array(char, dst, 128) + __field(int, nr_dirtied) + __field(int, nr_dirtied_pause) + __field(int, dirty_exceeded) + __field(long long, bdp_ratelimits) + __field(long, ratelimit_pages) + ), + + TP_fast_assign( + strlcpy(__entry->dst, sbi->local_dst, 128); + + __entry->nr_dirtied = current->nr_dirtied; + __entry->nr_dirtied_pause = current->nr_dirtied_pause; + __entry->dirty_exceeded = hwb->dirty_exceeded; + __entry->bdp_ratelimits = bdp_ratelimits; + __entry->ratelimit_pages = hwb->ratelimit_pages; + ), + + TP_printk("hmdfs dst:%s nr_dirtied=%d nr_dirtied_pause=%d dirty_exceeded=%d bdp_ratelimits=%lld ratelimit_pages=%ld", + __entry->dst, __entry->nr_dirtied, __entry->nr_dirtied_pause, + __entry->dirty_exceeded, __entry->bdp_ratelimits, + __entry->ratelimit_pages) +); + +TRACE_EVENT(hmdfs_balance_dirty_pages, + + TP_PROTO(struct hmdfs_sb_info *sbi, + struct bdi_writeback *wb, + struct hmdfs_dirty_throttle_control *hdtc, + unsigned long pause, + unsigned long start_time), + + TP_ARGS(sbi, wb, hdtc, pause, start_time), + + TP_STRUCT__entry( + __array(char, dst, 128) + __field(unsigned long, write_bw) + __field(unsigned long, avg_write_bw) + __field(unsigned long, file_bg_thresh) + __field(unsigned long, fs_bg_thresh) + __field(unsigned long, file_thresh) + __field(unsigned long, fs_thresh) + __field(unsigned long, file_nr_dirty) + __field(unsigned long, fs_nr_dirty) + __field(unsigned long, file_nr_rec) + __field(unsigned long, fs_nr_rec) + __field(unsigned long, pause) + __field(unsigned long, paused) + ), + + TP_fast_assign( + strlcpy(__entry->dst, sbi->local_dst, 128); + + __entry->write_bw = wb->write_bandwidth; + __entry->avg_write_bw = wb->avg_write_bandwidth; + __entry->file_bg_thresh = hdtc->file_bg_thresh; + __entry->fs_bg_thresh = hdtc->fs_bg_thresh; + __entry->file_thresh = hdtc->file_thresh; + __entry->fs_thresh = hdtc->fs_thresh; + __entry->file_nr_dirty = hdtc->file_nr_dirty; + __entry->fs_nr_dirty = hdtc->fs_nr_dirty; + __entry->file_nr_rec = hdtc->file_nr_reclaimable; + __entry->fs_nr_rec = hdtc->fs_nr_reclaimable; + __entry->pause = pause * 1000 / HZ; + __entry->paused = (jiffies - start_time) * + 1000 / HZ; + ), + + TP_printk("hmdfs dst:%s write_bw=%lu, awrite_bw=%lu, bg_thresh=%lu,%lu thresh=%lu,%lu dirty=%lu,%lu reclaimable=%lu,%lu pause=%lu paused=%lu", + __entry->dst, __entry->write_bw, __entry->avg_write_bw, + __entry->file_bg_thresh, __entry->fs_bg_thresh, + __entry->file_thresh, __entry->fs_thresh, + __entry->file_nr_dirty, __entry->fs_nr_dirty, + __entry->file_nr_rec, __entry->fs_nr_rec, + __entry->pause, __entry->paused + ) +); + +TRACE_EVENT(hmdfs_start_srv_wb, + + TP_PROTO(struct hmdfs_sb_info *sbi, int dirty_pages, + unsigned int dirty_thresh_pg), + + TP_ARGS(sbi, dirty_pages, dirty_thresh_pg), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(int, dirty_pages) + __field(unsigned int, dirty_thresh_pg) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->dirty_pages = dirty_pages; + __entry->dirty_thresh_pg = dirty_thresh_pg; + ), + + TP_printk("hmdfs src: %s, start writeback dirty pages. writeback %d pages dirty_thresh is %d pages", + __entry->src, __entry->dirty_pages, __entry->dirty_thresh_pg) +); + +TRACE_EVENT(hmdfs_fsync_enter_remote, + + TP_PROTO(struct hmdfs_sb_info *sbi, unsigned long long device_id, + unsigned long long remote_ino, int datasync), + + TP_ARGS(sbi, device_id, remote_ino, datasync), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, device_id) + __field(uint64_t, remote_ino) + __field(int, datasync) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->device_id = device_id; + __entry->remote_ino = remote_ino; + __entry->datasync = datasync; + ), + + TP_printk("hmdfs: src %s, start remote fsync file(remote dev_id=%llu,ino=%llu), datasync=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->datasync) +); + +TRACE_EVENT(hmdfs_fsync_exit_remote, + + TP_PROTO(struct hmdfs_sb_info *sbi, unsigned long long device_id, + unsigned long long remote_ino, unsigned int timeout, int err), + + TP_ARGS(sbi, device_id, remote_ino, timeout, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, device_id) + __field(uint64_t, remote_ino) + __field(uint32_t, timeout) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->device_id = device_id; + __entry->remote_ino = remote_ino; + __entry->timeout = timeout; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, finish remote fsync file(remote dev_id=%llu,ino=%llu), timeout=%u, err=%d", + __entry->src, __entry->device_id, __entry->remote_ino, + __entry->timeout, __entry->err) +); + +TRACE_EVENT(hmdfs_syncfs_enter, + + TP_PROTO(struct hmdfs_sb_info *sbi), + + TP_ARGS(sbi), + + TP_STRUCT__entry( + __array(char, src, 128) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + ), + + TP_printk("hmdfs: src %s, start syncfs", __entry->src) +); + +TRACE_EVENT(hmdfs_syncfs_exit, + + TP_PROTO(struct hmdfs_sb_info *sbi, int remain_count, + unsigned int timeout, int err), + + TP_ARGS(sbi, remain_count, timeout, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(int, remain_count) + __field(uint32_t, timeout) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, sbi->local_src, 128); + __entry->remain_count = remain_count; + __entry->timeout = timeout; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, finish syncfs(timeout=%u), remain %d remote devices to response, err=%d", + __entry->src, __entry->timeout, + __entry->remain_count, __entry->err) +); + +TRACE_EVENT(hmdfs_server_release, + + TP_PROTO(struct hmdfs_peer *con, uint32_t file_id, + uint64_t file_ver, int err), + + TP_ARGS(con, file_id, file_ver, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint32_t, file_id) + __field(uint64_t, file_ver) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->file_id = file_id; + __entry->file_ver = file_ver; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, server release file, fid=%u, fid_ver=%llu, remote_dev=%llu, err=%d", + __entry->src, __entry->file_id, __entry->file_ver, + __entry->device_id, __entry->err) +); + +TRACE_EVENT(hmdfs_client_recv_readpage, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, client readpage callback from remote device %llu, remote_ino=%llu, page_idx=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_writepage_cb_enter, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, writepage_cb start, return from remote device %llu, remote_ino=%llu, page_idx=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_writepage_cb_exit, + + TP_PROTO(struct hmdfs_peer *con, unsigned long long remote_ino, + unsigned long page_index, int err), + + TP_ARGS(con, remote_ino, page_index, err), + + TP_STRUCT__entry( + __array(char, src, 128) + __field(uint64_t, remote_ino) + __field(unsigned long, page_index) + __field(uint64_t, device_id) + __field(int, err) + ), + + TP_fast_assign( + strlcpy(__entry->src, con->sbi->local_src, 128); + __entry->remote_ino = remote_ino; + __entry->page_index = page_index; + __entry->device_id = con->device_id; + __entry->err = err; + ), + + TP_printk("hmdfs: src %s, writepage_cb exit, return from remote device %llu, remote_ino=%llu, page_index=%lu, err=%d", + __entry->src, __entry->device_id, + __entry->remote_ino, __entry->page_index, __entry->err) +); + +TRACE_EVENT(hmdfs_server_rebuild_dents, + + TP_PROTO(struct hmdfs_dcache_header *__h, int err), + + TP_ARGS(__h, err), + + TP_STRUCT__entry( + __field(uint64_t, crtime) + __field(uint64_t, crtime_nsec) + __field(uint64_t, ctime) + __field(uint64_t, ctime_nsec) + __field(uint64_t, num) + __field(int, err) + ), + + TP_fast_assign( + __entry->crtime = le64_to_cpu(__h->dcache_crtime); + __entry->crtime_nsec = le64_to_cpu(__h->dcache_crtime_nsec); + __entry->ctime = le64_to_cpu(__h->dentry_ctime); + __entry->ctime_nsec = le64_to_cpu(__h->dentry_ctime_nsec); + __entry->num = le64_to_cpu(__h->num); + __entry->err = err; + ), + + TP_printk("dcache crtime %llu:%llu ctime %llu:%llu has %llu dentry err %d", + __entry->crtime, __entry->crtime_nsec, __entry->ctime, + __entry->ctime_nsec, __entry->num, __entry->err) +); + +TRACE_EVENT(hmdfs_server_readdir, + + TP_PROTO(struct readdir_request *req), + + TP_ARGS(req), + + TP_STRUCT__entry( + __string(path, req->path) + ), + + TP_fast_assign( + __assign_str(path, req->path); + ), + + TP_printk("hmdfs_server_readdir %s", __get_str(path)) +); + +TRACE_EVENT(hmdfs_open_final_remote, + + TP_PROTO(struct hmdfs_inode_info *info, + struct hmdfs_open_ret *open_ret, + struct file *file, + int reason), + + TP_ARGS(info, open_ret, file, reason), + + TP_STRUCT__entry( + __array(char, file_path, MAX_FILTER_STR_VAL) + __field(uint32_t, reason) + __field(uint32_t, file_id) + __field(uint64_t, file_ver) + __field(uint64_t, remote_file_size) + __field(uint64_t, remote_ino) + __field(uint64_t, remote_ctime) + __field(uint64_t, remote_ctime_nsec) + __field(uint64_t, remote_stable_ctime) + __field(uint64_t, remote_stable_ctime_nsec) + __field(uint64_t, local_file_size) + __field(uint64_t, local_ino) + __field(uint64_t, local_ctime) + __field(uint64_t, local_ctime_nsec) + __field(uint64_t, local_stable_ctime) + __field(uint64_t, local_stable_ctime_nsec) + ), + + TP_fast_assign( + strlcpy(__entry->file_path, file->f_path.dentry->d_name.name, + MAX_FILTER_STR_VAL); + __entry->reason = reason; + __entry->file_id = open_ret->fid.id; + __entry->file_ver = open_ret->fid.ver; + __entry->remote_file_size = open_ret->file_size; + __entry->remote_ino = open_ret->ino; + __entry->remote_ctime = open_ret->remote_ctime.tv_sec; + __entry->remote_ctime_nsec = open_ret->remote_ctime.tv_nsec; + __entry->remote_stable_ctime = open_ret->stable_ctime.tv_sec; + __entry->remote_stable_ctime_nsec = + open_ret->stable_ctime.tv_nsec; + __entry->local_file_size = info->vfs_inode.i_size; + __entry->local_ino = info->remote_ino; + __entry->local_ctime = info->remote_ctime.tv_sec; + __entry->local_ctime_nsec = info->remote_ctime.tv_nsec; + __entry->local_stable_ctime = info->stable_ctime.tv_sec; + __entry->local_stable_ctime_nsec = info->stable_ctime.tv_nsec; + ), + + TP_printk("file path: %s, file id: %u, file ver: %llu, reason: %d, file size: %llu/%llu, ino: %llu/%llu, ctime: %llu.%llu/%llu.%llu, stable_ctime: %llu.%llu/%llu.%llu from remote/local", + __entry->file_path, __entry->file_id, __entry->file_ver, + __entry->reason, __entry->remote_file_size, + __entry->local_file_size, __entry->remote_ino, + __entry->local_ino, __entry->remote_ctime, + __entry->remote_ctime_nsec, __entry->local_ctime, + __entry->local_ctime_nsec, __entry->remote_stable_ctime, + __entry->remote_stable_ctime_nsec, + __entry->local_stable_ctime, __entry->local_stable_ctime_nsec) +); + +TRACE_EVENT(hmdfs_server_open_enter, + + TP_PROTO(struct hmdfs_peer *con, + struct open_request *recv), + + TP_ARGS(con, recv), + + TP_STRUCT__entry( + __array(char, open_path, MAX_FILTER_STR_VAL) + __array(char, dst_path, MAX_FILTER_STR_VAL) + __field(uint32_t, file_type) + ), + + TP_fast_assign( + strlcpy(__entry->open_path, recv->buf, MAX_FILTER_STR_VAL); + strlcpy(__entry->dst_path, con->sbi->local_dst, + MAX_FILTER_STR_VAL); + __entry->file_type = recv->file_type; + ), + + TP_printk("server open file %s from %s, file_type is %u", + __entry->open_path, __entry->dst_path, + __entry->file_type) +); + +TRACE_EVENT(hmdfs_server_open_exit, + + TP_PROTO(struct hmdfs_peer *con, + struct open_response *resp, + struct file *file, + int ret), + + TP_ARGS(con, resp, file, ret), + + TP_STRUCT__entry( + __array(char, file_path, MAX_FILTER_STR_VAL) + __array(char, src_path, MAX_FILTER_STR_VAL) + __field(uint32_t, file_id) + __field(uint64_t, file_size) + __field(uint64_t, ino) + __field(uint64_t, ctime) + __field(uint64_t, ctime_nsec) + __field(uint64_t, stable_ctime) + __field(uint64_t, stable_ctime_nsec) + __field(int, retval) + ), + + TP_fast_assign( + if (file) + strlcpy(__entry->file_path, + file->f_path.dentry->d_name.name, + MAX_FILTER_STR_VAL); + else + strlcpy(__entry->file_path, "null", MAX_FILTER_STR_VAL); + strlcpy(__entry->src_path, con->sbi->local_src, + MAX_FILTER_STR_VAL); + __entry->file_id = resp ? resp->file_id : UINT_MAX; + __entry->file_size = resp ? resp->file_size : ULLONG_MAX; + __entry->ino = resp ? resp->ino : 0; + __entry->ctime = resp ? resp->ctime : 0; + __entry->ctime_nsec = resp ? resp->ctime_nsec : 0; + __entry->stable_ctime = resp ? resp->stable_ctime : 0; + __entry->stable_ctime_nsec = resp ? resp->stable_ctime_nsec : 0; + __entry->retval = ret; + ), + + TP_printk("server file %s is opened from %s, open result: %d, file id: %u, file size: %llu, ino: %llu, ctime: %llu.%llu, stable ctime: %llu.%llu", + __entry->file_path, __entry->src_path, + __entry->retval, __entry->file_id, + __entry->file_size, __entry->ino, __entry->ctime, + __entry->ctime_nsec, __entry->stable_ctime, + __entry->stable_ctime_nsec) +); + +TRACE_EVENT(hmdfs_merge_lookup_work_enter, + + TP_PROTO(struct merge_lookup_work *ml_work), + + TP_ARGS(ml_work), + + TP_STRUCT__entry( + __field(int, devid) + __string(name, ml_work->name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->devid = ml_work->devid; + __assign_str(name, ml_work->name); + __entry->flags = ml_work->flags; + ), + + TP_printk("devid = %d, name:%s, flags:%u", + __entry->devid, + __get_str(name), + __entry->flags) +); + +TRACE_EVENT(hmdfs_merge_lookup_work_exit, + + TP_PROTO(struct merge_lookup_work *ml_work, int found), + + TP_ARGS(ml_work, found), + + TP_STRUCT__entry( + __field(int, devid) + __string(name, ml_work->name) + __field(unsigned int, flags) + __field(int, found) + ), + + TP_fast_assign( + __entry->devid = ml_work->devid; + __assign_str(name, ml_work->name); + __entry->flags = ml_work->flags; + __entry->found = found; + ), + + TP_printk("devid = %d, name:%s, flags:%u, found:%d", + __entry->devid, + __get_str(name), + __entry->flags, + __entry->found) +); + +TRACE_EVENT(hmdfs_merge_update_dentry_info_enter, + + TP_PROTO(struct dentry *src_dentry, struct dentry *dst_dentry), + + TP_ARGS(src_dentry, dst_dentry), + + TP_STRUCT__entry( + __string(src_name, src_dentry->d_name.name) + __string(dst_name, dst_dentry->d_name.name) + ), + + TP_fast_assign( + __assign_str(src_name, src_dentry->d_name.name); + __assign_str(dst_name, dst_dentry->d_name.name); + ), + + TP_printk("src name:%s, dst name:%s", + __get_str(src_name), + __get_str(dst_name)) +); + +TRACE_EVENT(hmdfs_merge_update_dentry_info_exit, + + TP_PROTO(struct dentry *src_dentry, struct dentry *dst_dentry), + + TP_ARGS(src_dentry, dst_dentry), + + TP_STRUCT__entry( + __string(src_name, src_dentry->d_name.name) + __string(dst_name, dst_dentry->d_name.name) + ), + + TP_fast_assign( + __assign_str(src_name, src_dentry->d_name.name); + __assign_str(dst_name, dst_dentry->d_name.name); + ), + + TP_printk("src name:%s, dst name:%s", + __get_str(src_name), + __get_str(dst_name)) +); + +#endif + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE hmdfs_trace +#include diff --git a/fs/hmdfs/inode.c b/fs/hmdfs/inode.c new file mode 100755 index 000000000..8cdedf42d --- /dev/null +++ b/fs/hmdfs/inode.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_device_view.h" +#include "inode.h" +#include "comm/connection.h" + +/** + * Rules to generate inode numbers: + * + * "/", "/device_view", "/merge_view", "/device_view/local", "/device_view/cid" + * = DOMAIN {3} : dev_id {29} : HMDFS_ROOT {32} + * + * "/device_view/cid/xxx" + * = DOMAIN {3} : dev_id {29} : hash(remote_ino){32} + * + * "/merge_view/xxx" + * = DOMAIN {3} : lower's dev_id {29} : lower's ino_raw {32} + */ + +#define BIT_WIDE_TOTAL 64 + +#define BIT_WIDE_DOMAIN 3 +#define BIT_WIDE_DEVID 29 +#define BIT_WIDE_INO_RAW 32 + +enum DOMAIN { + DOMAIN_ROOT, + DOMAIN_DEVICE_LOCAL, + DOMAIN_DEVICE_REMOTE, + DOMAIN_MERGE_VIEW, + DOMAIN_INVALID, +}; + +union hmdfs_ino { + const uint64_t ino_output; + struct { + uint64_t ino_raw : BIT_WIDE_INO_RAW; + uint64_t dev_id : BIT_WIDE_DEVID; + uint8_t domain : BIT_WIDE_DOMAIN; + }; +}; + +static uint8_t read_ino_domain(uint64_t ino) +{ + union hmdfs_ino _ino = { + .ino_output = ino, + }; + + return _ino.domain; +} + +struct iget_args { + /* The lower inode of local/merge/root(part) inode */ + struct inode *lo_i; + /* The peer of remote inode */ + struct hmdfs_peer *peer; + /* The ino of remote inode */ + uint64_t remote_ino; + + /* Returned inode's ino */ + union hmdfs_ino ino; +}; + +/** + * iget_test - whether or not the inode with matched hashval is the one we are + * looking for + * + * @inode: the local inode we found in inode cache with matched hashval + * @data: struct iget_args + */ +static int iget_test(struct inode *inode, void *data) +{ + struct hmdfs_inode_info *hii = hmdfs_i(inode); + struct iget_args *ia = data; + int res = 0; + + WARN_ON(ia->ino.domain < DOMAIN_ROOT || + ia->ino.domain >= DOMAIN_INVALID); + + if (read_ino_domain(inode->i_ino) == DOMAIN_ROOT) + return 0; + + switch (ia->ino.domain) { + case DOMAIN_MERGE_VIEW: + res = (ia->lo_i == hii->lower_inode); + break; + case DOMAIN_DEVICE_LOCAL: + res = (ia->lo_i == hii->lower_inode); + break; + case DOMAIN_DEVICE_REMOTE: + res = (ia->peer == hii->conn && + ia->remote_ino == hii->remote_ino); + break; + } + + return res; +} + +/** + * iget_set - initialize a inode with iget_args + * + * @sb: the superblock of current hmdfs instance + * @data: struct iget_args + */ +static int iget_set(struct inode *inode, void *data) +{ + struct hmdfs_inode_info *hii = hmdfs_i(inode); + struct iget_args *ia = (struct iget_args *)data; + + inode->i_ino = ia->ino.ino_output; + inode_inc_iversion(inode); + + hii->conn = ia->peer; + hii->remote_ino = ia->remote_ino; + hii->lower_inode = ia->lo_i; + + return 0; +} + +static uint64_t make_ino_raw_dev_local(uint64_t lo_ino) +{ + if (!(lo_ino >> BIT_WIDE_INO_RAW)) + return lo_ino; + + return lo_ino * GOLDEN_RATIO_64 >> BIT_WIDE_INO_RAW; +} + +static uint64_t make_ino_raw_dev_remote(uint64_t remote_ino) +{ + return hash_long(remote_ino, BIT_WIDE_INO_RAW); +} + +/** + * hmdfs_iget5_locked_merge - obtain an inode for the merge-view + * + * @sb: superblock of current instance + * @fst_lo_i: the lower inode of it's first comrade + * + * Simply replace the lower's domain for a new ino. + */ +struct inode *hmdfs_iget5_locked_merge(struct super_block *sb, + struct dentry *fst_lo_d) +{ + struct iget_args ia = { + .lo_i = d_inode(fst_lo_d), + .peer = NULL, + .remote_ino = 0, + .ino.ino_output = 0, + }; + + if (unlikely(!d_inode(fst_lo_d))) { + hmdfs_err("Received a invalid lower inode"); + return NULL; + } + + ia.ino.ino_raw = d_inode(fst_lo_d)->i_ino; + ia.ino.dev_id = hmdfs_d(fst_lo_d)->device_id; + ia.ino.domain = DOMAIN_MERGE_VIEW; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +/** + * hmdfs_iget5_locked_local - obtain an inode for the local-dev-view + * + * @sb: superblock of current instance + * @lo_i: the lower inode from local filesystem + * + * Hashing local inode's ino to generate our ino. We continue to compare the + * address of the lower_inode for uniqueness when collisions occurred. + */ +struct inode *hmdfs_iget5_locked_local(struct super_block *sb, + struct inode *lo_i) +{ + struct iget_args ia = { + .lo_i = lo_i, + .peer = NULL, + .remote_ino = 0, + .ino.ino_output = 0, + }; + + if (unlikely(!lo_i)) { + hmdfs_err("Received a invalid lower inode"); + return NULL; + } + ia.ino.ino_raw = make_ino_raw_dev_local(lo_i->i_ino); + ia.ino.dev_id = 0; + ia.ino.domain = DOMAIN_DEVICE_LOCAL; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +/** + * hmdfs_iget5_locked_remote - obtain an inode for the remote-dev-view + * + * @sb: superblock of current instance + * @peer: corresponding device node + * @remote_ino: remote inode's ino + * + * Hash remote ino for ino's 32bit~1bit. + * + * Note that currenly implementation assume the each remote inode has unique + * ino. Thus the combination of the peer's unique dev_id and the remote_ino + * is enough to determine a unique remote inode. + */ +struct inode *hmdfs_iget5_locked_remote(struct super_block *sb, + struct hmdfs_peer *peer, + uint64_t remote_ino) +{ + struct iget_args ia = { + .lo_i = NULL, + .peer = peer, + .remote_ino = remote_ino, + .ino.ino_output = 0, + }; + + if (unlikely(!peer)) { + hmdfs_err("Received a invalid peer"); + return NULL; + } + + ia.ino.ino_raw = make_ino_raw_dev_remote(remote_ino); + ia.ino.dev_id = peer->device_id; + ia.ino.domain = DOMAIN_DEVICE_REMOTE; + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} + +struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino, + struct inode *lo_i, + struct hmdfs_peer *peer) +{ + struct iget_args ia = { + .lo_i = lo_i, + .peer = peer, + .remote_ino = 0, + .ino.ino_raw = root_ino, + .ino.dev_id = peer ? peer->device_id : 0, + .ino.domain = DOMAIN_ROOT, + }; + + if (unlikely(root_ino < 0 || root_ino >= HMDFS_ROOT_INVALID)) { + hmdfs_err("Root %llu is invalid", root_ino); + return NULL; + } + if (unlikely(root_ino == HMDFS_ROOT_DEV_REMOTE && !peer)) { + hmdfs_err("Root %llu received a invalid peer", root_ino); + return NULL; + } + + return iget5_locked(sb, ia.ino.ino_output, iget_test, iget_set, &ia); +} diff --git a/fs/hmdfs/inode.h b/fs/hmdfs/inode.h new file mode 100755 index 000000000..47f189f3c --- /dev/null +++ b/fs/hmdfs/inode.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/inode.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef INODE_H +#define INODE_H + +#include "hmdfs.h" + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) +#include +#endif + +enum { + HMDFS_REMOTE_INODE_NONE = 0, + HMDFS_REMOTE_INODE_STASHING, + HMDFS_REMOTE_INODE_RESTORING, +}; + +/***************************************************************************** + * fid + *****************************************************************************/ + +/* Bits for fid_flags */ +enum { + HMDFS_FID_NEED_OPEN = 0, + HMDFS_FID_OPENING, +}; + +struct hmdfs_fid { + __u64 ver; + __u32 id; +}; + +/* + * Cache file is stored in file like following format: + * ________________________________________________________________ + * |meta file info| remote file(s) path | file content | + * | head | path | data | + * ↑ ↑ + * path_offs data_offs + */ +struct hmdfs_cache_info { + /* Path start offset in file (HMDFS_STASH_BLK_SIZE aligned) */ + __u32 path_offs; + __u32 path_len; + __u32 path_cnt; + char *path_buf; + /* Stricky remote file(hardlink)s' path, split by '\0' */ + char *path; + /* Data start offset in file (HMDFS_STASH_BLK_SIZE aligned) */ + __u32 data_offs; + /* # of pages need to be written to remote file during offline */ + atomic64_t to_write_pgs; + /* # of pages written to remote file during offline */ + atomic64_t written_pgs; + /* Stash file handler */ + struct file *cache_file; +}; + +/***************************************************************************** + * inode info and it's inline helpers + *****************************************************************************/ + +struct hmdfs_inode_info { + struct inode *lower_inode; // for local/merge inode + struct hmdfs_peer *conn; // for remote inode + struct kref ref; + spinlock_t fid_lock; + struct hmdfs_fid fid; + unsigned long fid_flags; + wait_queue_head_t fid_wq; + __u8 inode_type; // deprecated: use ino system instead + + /* writeback list */ + struct list_head wb_list; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + __u16 perm; +#endif + /* + * lookup remote file will generate a local inode, this store the + * combination of remote inode number and generation in such situation. + * the uniqueness of local inode can be determined. + */ + __u64 remote_ino; + /* + * if this value is not ULLONG_MAX, it means that remote getattr syscall + * should return this value as inode size. + */ + __u64 getattr_isize; + /* + * this value stores remote ctime, explicitly when remote file is opened + */ + struct hmdfs_time_t remote_ctime; + /* + * this value stores the last time, aligned to dcache_precision, that + * remote file was modified. It should be noted that this value won't + * be effective if writecace_expire is set. + */ + struct hmdfs_time_t stable_ctime; + /* + * If this value is set nonzero, pagecache should be truncated if the + * time that the file is opened is beyond the value. Furthermore, + * the functionality of stable_ctime won't be effective. + */ + unsigned long writecache_expire; + /* + * This value record how many times the file has been written while file + * is opened. 'writecache_expire' will set in close if this value is + * nonzero. + */ + atomic64_t write_counter; + /* + * will be linked to hmdfs_peer::wr_opened_inode_list + * if the remote inode is writable-opened. And using + * wr_opened_cnt to track possibly multiple writeable-open. + */ + struct list_head wr_opened_node; + atomic_t wr_opened_cnt; + spinlock_t stash_lock; + unsigned int stash_status; + struct hmdfs_cache_info *cache; + /* link to hmdfs_peer::stashed_inode_list when stashing completes */ + struct list_head stash_node; + /* + * The flush/fsync thread will hold the write lock while threads + * calling writepage will hold the read lock. We use rwlock to + * eliminate the cases that flush/fsync operations are done with + * re-dirtied pages remain dirty. + * + * Here is the explanation in detail: + * + * During `writepage()`, the state of a re-dirtied page will switch + * to the following states in sequence: + * s1: page dirty + tree dirty + * s2: page dirty + tree dirty + * s3: page clean + tree dirty + * s4: page clean + tree clean + write back + * s5: page dirty + tree dirty + write back + * s6: page dirty + tree dirty + * + * A page upon s4 will thus be ignored by the concurrent + * `do_writepages()` contained by `close()`, `fsync()`, making it's + * state inconsistent. + * + * To avoid such situation, we use per-file rwsems to prevent + * concurrent in-flight `writepage` during `close()` or `fsync()`. + * + * Minimal overhead is brought in since rsems allow concurrent + * `writepage` while `close()` or `fsync()` is natural to wait for + * in-flight `writepage()`s to complete. + * + * NOTE that in the worst case, a process may wait for wsem for TIMEOUT + * even if a signal is pending. But we've to wait there to iterate all + * pages and make sure that no dirty page should remain. + */ + struct rw_semaphore wpage_sem; + + // The real inode shared with vfs. ALWAYS PUT IT AT THE BOTTOM. + struct inode vfs_inode; +}; + +struct hmdfs_readdir_work { + struct list_head head; + struct dentry *dentry; + struct hmdfs_peer *con; + struct delayed_work dwork; +}; + +static inline struct hmdfs_inode_info *hmdfs_i(struct inode *inode) +{ + return container_of(inode, struct hmdfs_inode_info, vfs_inode); +} + +static inline bool hmdfs_inode_is_stashing(const struct hmdfs_inode_info *info) +{ + const struct hmdfs_sb_info *sbi = hmdfs_sb(info->vfs_inode.i_sb); + + /* Refer to comments in hmdfs_stash_remote_inode() */ + return (hmdfs_is_stash_enabled(sbi) && + smp_load_acquire(&info->stash_status)); // protect +} + +static inline void hmdfs_remote_fetch_fid(struct hmdfs_inode_info *info, + struct hmdfs_fid *fid) +{ + spin_lock(&info->fid_lock); + *fid = info->fid; + spin_unlock(&info->fid_lock); +} + +/***************************************************************************** + * ino allocator + *****************************************************************************/ + +enum HMDFS_ROOT { + HMDFS_ROOT_ANCESTOR = 1, // / + HMDFS_ROOT_DEV, // /device_view + HMDFS_ROOT_DEV_LOCAL, // /device_view/local + HMDFS_ROOT_DEV_REMOTE, // /device_view/remote + HMDFS_ROOT_MERGE, // /merge_view + + HMDFS_ROOT_INVALID, +}; + +// delete layer, directory layer, not overlay layer +enum HMDFS_LAYER_TYPE { + HMDFS_LAYER_ZERO = 0, // / + HMDFS_LAYER_FIRST_DEVICE, // /device_view + HMDFS_LAYER_SECOND_LOCAL, // /device_view/local + HMDFS_LAYER_SECOND_REMOTE, // /device_view/remote + HMDFS_LAYER_OTHER_LOCAL, // /device_view/local/xx + HMDFS_LAYER_OTHER_REMOTE, // /device_view/remote/xx + + HMDFS_LAYER_FIRST_MERGE, // /merge_view + HMDFS_LAYER_OTHER_MERGE, // /merge_view/xxx + HMDFS_LAYER_INVALID, +}; + +struct inode *hmdfs_iget_locked_root(struct super_block *sb, uint64_t root_ino, + struct inode *lo_i, + struct hmdfs_peer *peer); +struct inode *hmdfs_iget5_locked_merge(struct super_block *sb, + struct dentry *fst_lo_d); + +struct inode *hmdfs_iget5_locked_local(struct super_block *sb, + struct inode *lo_i); +struct hmdfs_peer; +struct inode *hmdfs_iget5_locked_remote(struct super_block *sb, + struct hmdfs_peer *peer, + uint64_t remote_ino); + +#endif // INODE_H diff --git a/fs/hmdfs/inode_local.c b/fs/hmdfs/inode_local.c new file mode 100755 index 000000000..cb56b342e --- /dev/null +++ b/fs/hmdfs/inode_local.c @@ -0,0 +1,883 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_local.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_share.h" +#include "hmdfs_trace.h" + +extern struct kmem_cache *hmdfs_dentry_cachep; + +struct hmdfs_name_data { + struct dir_context ctx; + const struct qstr *to_find; + char *name; + bool found; +}; + +int init_hmdfs_dentry_info(struct hmdfs_sb_info *sbi, struct dentry *dentry, + int dentry_type) +{ + struct hmdfs_dentry_info *info = + kmem_cache_zalloc(hmdfs_dentry_cachep, GFP_ATOMIC); + + if (!info) + return -ENOMEM; + dentry->d_fsdata = info; + INIT_LIST_HEAD(&info->cache_list_head); + INIT_LIST_HEAD(&info->remote_cache_list_head); + spin_lock_init(&info->cache_list_lock); + mutex_init(&info->remote_cache_list_lock); + mutex_init(&info->cache_pull_lock); + spin_lock_init(&info->lock); + info->dentry_type = dentry_type; + info->device_id = 0; + if (dentry_type == HMDFS_LAYER_ZERO || + dentry_type == HMDFS_LAYER_FIRST_DEVICE || + dentry_type == HMDFS_LAYER_SECOND_LOCAL || + dentry_type == HMDFS_LAYER_SECOND_REMOTE) + d_set_d_op(dentry, &hmdfs_dev_dops); + else + d_set_d_op(dentry, &hmdfs_dops); + return 0; +} + +static inline void set_sharefile_flag(struct hmdfs_dentry_info *gdi) +{ + gdi->file_type = HM_SHARE; +} + +static void check_and_fixup_share_ops(struct inode *inode, + const char *name) +{ + if (is_share_dir(inode, name)) { + inode->i_op = &hmdfs_dir_inode_ops_share; + inode->i_fop = &hmdfs_dir_ops_share; + } +} + +struct inode *fill_inode_local(struct super_block *sb, + struct inode *lower_inode, const char *name) +{ + int ret = 0; + struct inode *inode; + struct hmdfs_inode_info *info; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget5_locked_local(sb, lower_inode); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); +#ifdef CONFIG_HMDFS_FS_PERMISSION + info->perm = hmdfs_read_perm(lower_inode); +#endif + if (S_ISDIR(lower_inode->i_mode)) + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRWXU | + S_IRWXG | S_IXOTH; + else if (S_ISREG(lower_inode->i_mode)) + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | + S_IWUSR | S_IRGRP | S_IWGRP; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = lower_inode->i_uid; + inode->i_gid = lower_inode->i_gid; +#else + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); +#endif + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_generation = lower_inode->i_generation; + + info->inode_type = HMDFS_LAYER_OTHER_LOCAL; + if (S_ISDIR(lower_inode->i_mode)) { + inode->i_op = &hmdfs_dir_inode_ops_local; + inode->i_fop = &hmdfs_dir_ops_local; + inode->i_mode |= S_IXUGO; + } else if (S_ISREG(lower_inode->i_mode)) { + inode->i_op = &hmdfs_file_iops_local; + inode->i_fop = &hmdfs_file_fops_local; + } else { + ret = -EIO; + goto bad_inode; + } + + fsstack_copy_inode_size(inode, lower_inode); + check_and_fixup_share_ops(inode, name); + unlock_new_inode(inode); + return inode; +bad_inode: + iget_failed(inode); + return ERR_PTR(ret); +} + +/* hmdfs_convert_lookup_flags - covert hmdfs lookup flags to vfs lookup flags + * + * @hmdfs_flags: hmdfs lookup flags + * @vfs_flags: pointer to converted flags + * + * return 0 on success, or err code on failure. + */ +int hmdfs_convert_lookup_flags(unsigned int hmdfs_flags, + unsigned int *vfs_flags) +{ + *vfs_flags = 0; + + /* currently only support HMDFS_LOOKUP_REVAL */ + if (hmdfs_flags & ~HMDFS_LOOKUP_REVAL) + return -EINVAL; + + if (hmdfs_flags & HMDFS_LOOKUP_REVAL) + *vfs_flags |= LOOKUP_REVAL; + + return 0; +} + +static int hmdfs_name_match(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct hmdfs_name_data *buf = + container_of(ctx, struct hmdfs_name_data, ctx); + struct qstr candidate = QSTR_INIT(name, namelen); + + if (qstr_case_eq(buf->to_find, &candidate)) { + memcpy(buf->name, name, namelen); + buf->name[namelen] = 0; + buf->found = true; + return 1; + } + return 0; +} + +static int __lookup_nosensitive(struct path *lower_parent_path, + struct dentry *child_dentry, unsigned int flags, + struct path *lower_path) +{ + struct file *file; + const struct cred *cred = current_cred(); + const struct qstr *name = &child_dentry->d_name; + int err; + struct hmdfs_name_data buffer = { + .ctx.actor = hmdfs_name_match, + .to_find = name, + .name = __getname(), + .found = false, + }; + + if (!buffer.name) { + err = -ENOMEM; + goto out; + } + file = dentry_open(lower_parent_path, O_RDONLY, cred); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_name; + } + err = iterate_dir(file, &buffer.ctx); + fput(file); + if (err) + goto put_name; + if (buffer.found) + err = vfs_path_lookup(lower_parent_path->dentry, + lower_parent_path->mnt, buffer.name, + flags, lower_path); + else + err = -ENOENT; +put_name: + __putname(buffer.name); +out: + return err; +} + +struct dentry *hmdfs_lookup_local(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + int err = 0; + struct path lower_path, lower_parent_path; + struct dentry *lower_dentry = NULL, *parent_dentry = NULL, *ret = NULL; + struct hmdfs_dentry_info *gdi = NULL; + struct inode *child_inode = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + trace_hmdfs_lookup_local(parent_inode, child_dentry, flags); + if (child_dentry->d_name.len > NAME_MAX) { + ret = ERR_PTR(-ENAMETOOLONG); + goto out; + } + + /* local device */ + parent_dentry = dget_parent(child_dentry); + hmdfs_get_lower_path(parent_dentry, &lower_parent_path); + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_OTHER_LOCAL); + if (err) { + ret = ERR_PTR(err); + goto out_err; + } + + gdi = hmdfs_d(child_dentry); + + flags &= ~LOOKUP_FOLLOW; + err = vfs_path_lookup(lower_parent_path.dentry, lower_parent_path.mnt, + (child_dentry->d_name.name), 0, &lower_path); + if (err == -ENOENT && !sbi->s_case_sensitive) + err = __lookup_nosensitive(&lower_parent_path, child_dentry, 0, + &lower_path); + if (err && err != -ENOENT) { + ret = ERR_PTR(err); + goto out_err; + } else if (!err) { + hmdfs_set_lower_path(child_dentry, &lower_path); + child_inode = fill_inode_local(parent_inode->i_sb, + d_inode(lower_path.dentry), + child_dentry->d_name.name); + + if (IS_ERR(child_inode)) { + err = PTR_ERR(child_inode); + ret = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out_err; + } + ret = d_splice_alias(child_inode, child_dentry); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + hmdfs_put_reset_lower_path(child_dentry); + goto out_err; + } + + check_and_fixup_ownership(parent_inode, child_inode); + goto out_err; + } + /* + * return 0 here, so that vfs can continue the process of making this + * negative dentry to a positive one while creating a new file. + */ + err = 0; + ret = 0; + + lower_dentry = lookup_one_len_unlocked(d_name, lower_parent_path.dentry, + child_dentry->d_name.len); + if (IS_ERR(lower_dentry)) { + err = PTR_ERR(lower_dentry); + ret = lower_dentry; + goto out_err; + } + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_parent_path.mnt); + hmdfs_set_lower_path(child_dentry, &lower_path); + +out_err: + if (!err) + hmdfs_set_time(child_dentry, jiffies); + hmdfs_put_lower_path(&lower_parent_path); + dput(parent_dentry); +out: + trace_hmdfs_lookup_local_end(parent_inode, child_dentry, err); + return ret; +} + +int hmdfs_mkdir_local_dentry(struct inode *dir, struct dentry *dentry, + umode_t mode) +{ + struct inode *lower_dir = hmdfs_i(dir)->lower_inode; + struct dentry *lower_dir_dentry = NULL; + struct super_block *sb = dir->i_sb; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + struct inode *lower_inode = NULL; + struct inode *child_inode = NULL; + bool local_res = false; + struct cache_fs_override or; + __u16 child_perm; + kuid_t tmp_uid; + + error = hmdfs_override_dir_id_fs(&or, dir, dentry, &child_perm); + if (error) + goto cleanup; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_dir_dentry = lock_parent(lower_dentry); + + tmp_uid = hmdfs_override_inode_uid(lower_dir); + mode = (mode & S_IFMT) | 00771; + + error = vfs_mkdir(&init_user_ns, lower_dir, lower_dentry, mode); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + if (error) { + hmdfs_err("vfs_mkdir() error:%d", error); + goto out; + } + local_res = true; + lower_inode = d_inode(lower_dentry); +#ifdef CONFIG_HMDFS_FS_PERMISSION + error = hmdfs_persist_perm(lower_dentry, &child_perm); +#endif + child_inode = fill_inode_local(sb, lower_inode, dentry->d_name.name); + if (IS_ERR(child_inode)) { + error = PTR_ERR(child_inode); + goto out; + } + d_add(dentry, child_inode); + set_nlink(dir, hmdfs_i(dir)->lower_inode->i_nlink); +out: + unlock_dir(lower_dir_dentry); + if (local_res) + hmdfs_drop_remote_cache_dents(dentry->d_parent); + + if (error) { + hmdfs_clear_drop_flag(dentry->d_parent); + d_drop(dentry); + } + hmdfs_put_lower_path(&lower_path); + hmdfs_revert_dir_id_fs(&or); +cleanup: + return error; +} + +int hmdfs_mkdir_local(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int err = 0; + + if (check_filename(dentry->d_name.name, dentry->d_name.len)) { + err = -EINVAL; + return err; + } + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + return err; + } + err = hmdfs_mkdir_local_dentry(dir, dentry, mode); + trace_hmdfs_mkdir_local(dir, dentry, err); + return err; +} + +int hmdfs_create_local_dentry(struct inode *dir, struct dentry *dentry, + umode_t mode, bool want_excl) +{ + struct inode *lower_dir = NULL; + struct dentry *lower_dir_dentry = NULL; + struct super_block *sb = dir->i_sb; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + struct inode *lower_inode = NULL; + struct inode *child_inode = NULL; + kuid_t tmp_uid; +#ifdef CONFIG_HMDFS_FS_PERMISSION + const struct cred *saved_cred = NULL; + struct fs_struct *saved_fs = NULL, *copied_fs = NULL; + __u16 child_perm; +#endif + +#ifdef CONFIG_HMDFS_FS_PERMISSION + saved_cred = hmdfs_override_file_fsids(dir, &child_perm); + if (!saved_cred) { + error = -ENOMEM; + goto path_err; + } + + saved_fs = current->fs; + copied_fs = hmdfs_override_fsstruct(saved_fs); + if (!copied_fs) { + error = -ENOMEM; + goto revert_fsids; + } +#endif + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + mode = (mode & S_IFMT) | 00660; + lower_dir_dentry = lock_parent(lower_dentry); + lower_dir = d_inode(lower_dir_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + error = vfs_create(&init_user_ns, lower_dir, lower_dentry, mode, want_excl); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + unlock_dir(lower_dir_dentry); + if (error) + goto out; + + lower_inode = d_inode(lower_dentry); +#ifdef CONFIG_HMDFS_FS_PERMISSION + error = hmdfs_persist_perm(lower_dentry, &child_perm); +#endif + child_inode = fill_inode_local(sb, lower_inode, dentry->d_name.name); + if (IS_ERR(child_inode)) { + error = PTR_ERR(child_inode); + goto out_created; + } + d_add(dentry, child_inode); + +out_created: + hmdfs_drop_remote_cache_dents(dentry->d_parent); +out: + if (error) { + hmdfs_clear_drop_flag(dentry->d_parent); + d_drop(dentry); + } + hmdfs_put_lower_path(&lower_path); + +#ifdef CONFIG_HMDFS_FS_PERMISSION + hmdfs_revert_fsstruct(saved_fs, copied_fs); +revert_fsids: + hmdfs_revert_fsids(saved_cred); +#endif +#ifdef CONFIG_HMDFS_FS_PERMISSION +path_err: +#endif + return error; +} + +int hmdfs_create_local(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *child_dentry, + umode_t mode, bool want_excl) +{ + int err = 0; + + if (check_filename(child_dentry->d_name.name, + child_dentry->d_name.len)) { + err = -EINVAL; + return err; + } + + if (hmdfs_file_type(child_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + return err; + } + + err = hmdfs_create_local_dentry(dir, child_dentry, mode, want_excl); + trace_hmdfs_create_local(dir, child_dentry, err); + return err; +} + +int hmdfs_rmdir_local_dentry(struct inode *dir, struct dentry *dentry) +{ + struct inode *lower_dir = NULL; + struct dentry *lower_dir_dentry = NULL; + kuid_t tmp_uid; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error = 0; + + hmdfs_clear_cache_dents(dentry, true); + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_dir_dentry = lock_parent(lower_dentry); + lower_dir = d_inode(lower_dir_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + + error = vfs_rmdir(&init_user_ns, lower_dir, lower_dentry); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + unlock_dir(lower_dir_dentry); + hmdfs_put_lower_path(&lower_path); + if (error) + goto path_err; + hmdfs_drop_remote_cache_dents(dentry->d_parent); +path_err: + if (error) + hmdfs_clear_drop_flag(dentry->d_parent); + return error; +} + +int hmdfs_rmdir_local(struct inode *dir, struct dentry *dentry) +{ + int err = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto out; + } + + err = hmdfs_rmdir_local_dentry(dir, dentry); + if (err != 0) { + hmdfs_err("rm dir failed:%d", err); + goto out; + } + + /* drop dentry even remote failed + * it maybe cause that one remote devices disconnect + * when doing remote rmdir + */ + d_drop(dentry); +out: + /* return connect device's errcode */ + trace_hmdfs_rmdir_local(dir, dentry, err); + return err; +} + +int hmdfs_unlink_local_dentry(struct inode *dir, struct dentry *dentry) +{ + struct inode *lower_dir = hmdfs_i(dir)->lower_inode; + struct dentry *lower_dir_dentry = NULL; + struct path lower_path; + struct dentry *lower_dentry = NULL; + int error; + kuid_t tmp_uid; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + tmp_uid = hmdfs_override_inode_uid(lower_dir); + error = vfs_unlink(&init_user_ns, lower_dir, lower_dentry, NULL); + hmdfs_revert_inode_uid(lower_dir, tmp_uid); + set_nlink(d_inode(dentry), + hmdfs_i(d_inode(dentry))->lower_inode->i_nlink); + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + if (error) + goto path_err; + + hmdfs_drop_remote_cache_dents(dentry->d_parent); + d_drop(dentry); + hmdfs_put_lower_path(&lower_path); + +path_err: + if (error) + hmdfs_clear_drop_flag(dentry->d_parent); + return error; +} + +int hmdfs_unlink_local(struct inode *dir, struct dentry *dentry) +{ + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) + return -EACCES; + + return hmdfs_unlink_local_dentry(dir, dentry); +} + +int hmdfs_rename_local_dentry(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct path lower_old_path; + struct path lower_new_path; + struct dentry *lower_old_dentry = NULL; + struct dentry *lower_new_dentry = NULL; + struct dentry *lower_old_dir_dentry = NULL; + struct dentry *lower_new_dir_dentry = NULL; + struct dentry *trap = NULL; + struct renamedata rd; + int rc = 0; + kuid_t old_dir_uid, new_dir_uid; + + if (flags) + return -EINVAL; + + hmdfs_get_lower_path(old_dentry, &lower_old_path); + lower_old_dentry = lower_old_path.dentry; + if (!lower_old_dentry) { + hmdfs_err("lower_old_dentry as NULL"); + rc = -EACCES; + goto out_put_old_path; + } + + hmdfs_get_lower_path(new_dentry, &lower_new_path); + lower_new_dentry = lower_new_path.dentry; + if (!lower_new_dentry) { + hmdfs_err("lower_new_dentry as NULL"); + rc = -EACCES; + goto out_put_new_path; + } + + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + old_dir_uid = hmdfs_override_inode_uid(d_inode(lower_old_dir_dentry)); + new_dir_uid = hmdfs_override_inode_uid(d_inode(lower_new_dir_dentry)); + + /* source should not be ancestor of target */ + if (trap == lower_old_dentry) { + rc = -EINVAL; + goto out_lock; + } + /* target should not be ancestor of source */ + if (trap == lower_new_dentry) { + rc = -ENOTEMPTY; + goto out_lock; + } + + rd.old_mnt_userns = &init_user_ns; + rd.old_dir = d_inode(lower_old_dir_dentry); + rd.old_dentry = lower_old_dentry; + rd.new_mnt_userns = &init_user_ns; + rd.new_dir = d_inode(lower_new_dir_dentry); + rd.new_dentry = lower_new_dentry; + + rc = vfs_rename(&rd); +out_lock: + dget(old_dentry); + + hmdfs_revert_inode_uid(d_inode(lower_old_dir_dentry), old_dir_uid); + hmdfs_revert_inode_uid(d_inode(lower_new_dir_dentry), new_dir_uid); + + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + if (rc == 0) { + hmdfs_drop_remote_cache_dents(old_dentry->d_parent); + if (old_dentry->d_parent != new_dentry->d_parent) + hmdfs_drop_remote_cache_dents(new_dentry->d_parent); + } else { + hmdfs_clear_drop_flag(old_dentry->d_parent); + if (old_dentry->d_parent != new_dentry->d_parent) + hmdfs_clear_drop_flag(old_dentry->d_parent); + d_drop(new_dentry); + } + + dput(old_dentry); + dput(lower_old_dir_dentry); + dput(lower_new_dir_dentry); + +out_put_new_path: + hmdfs_put_lower_path(&lower_new_path); +out_put_old_path: + hmdfs_put_lower_path(&lower_old_path); + return rc; +} + +int hmdfs_rename_local(struct user_namespace *mnt_userns, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err = 0; + int ret = 0; + + trace_hmdfs_rename_local(old_dir, old_dentry, new_dir, new_dentry, + flags); + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto rename_out; + } + + if (S_ISREG(old_dentry->d_inode->i_mode)) { + err = hmdfs_rename_local_dentry(old_dir, old_dentry, new_dir, + new_dentry, flags); + } else if (S_ISDIR(old_dentry->d_inode->i_mode)) { + ret = hmdfs_rename_local_dentry(old_dir, old_dentry, new_dir, + new_dentry, flags); + if (ret != 0) { + err = ret; + goto rename_out; + } + } + + if (!err) + d_invalidate(old_dentry); + +rename_out: + return err; +} + +static int hmdfs_setattr_local(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = d_inode(dentry); + struct inode *lower_inode = hmdfs_i(inode)->lower_inode; + struct path lower_path; + struct dentry *lower_dentry = NULL; + struct iattr lower_ia; + unsigned int ia_valid = ia->ia_valid; + int err = 0; + kuid_t tmp_uid; + + hmdfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + memcpy(&lower_ia, ia, sizeof(lower_ia)); + if (ia_valid & ATTR_FILE) + lower_ia.ia_file = hmdfs_f(ia->ia_file)->lower_file; + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + if (ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(inode, ia->ia_size); + if (err) + goto out; + truncate_setsize(inode, ia->ia_size); + } + inode_lock(lower_inode); + tmp_uid = hmdfs_override_inode_uid(lower_inode); + + err = notify_change(&init_user_ns, lower_dentry, &lower_ia, NULL); + i_size_write(inode, i_size_read(lower_inode)); + inode->i_atime = lower_inode->i_atime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_ctime = lower_inode->i_ctime; + err = update_inode_to_dentry(dentry, inode); + hmdfs_revert_inode_uid(lower_inode, tmp_uid); + + inode_unlock(lower_inode); +out: + hmdfs_put_lower_path(&lower_path); + return err; +} + +static int hmdfs_getattr_local(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct path lower_path; + int ret; + + hmdfs_get_lower_path(path->dentry, &lower_path); + ret = vfs_getattr(&lower_path, stat, request_mask, flags); + stat->ino = d_inode(path->dentry)->i_ino; + stat->uid = d_inode(path->dentry)->i_uid; + stat->gid = d_inode(path->dentry)->i_gid; + hmdfs_put_lower_path(&lower_path); + + return ret; +} + +int hmdfs_permission(struct user_namespace *mnt_userns, struct inode *inode, int mask) +{ +#ifdef CONFIG_HMDFS_FS_PERMISSION + unsigned int mode = inode->i_mode; + kuid_t cur_uid = current_fsuid(); + + if (uid_eq(cur_uid, ROOT_UID) || uid_eq(cur_uid, SYSTEM_UID)) + return 0; + + if (uid_eq(cur_uid, inode->i_uid)) { + mode >>= 6; + } else if (in_group_p(inode->i_gid)) { + mode >>= 3; + } + + if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + return 0; + + trace_hmdfs_permission(inode->i_ino); + return -EACCES; +#else + + return 0; +#endif +} + +static ssize_t hmdfs_local_listxattr(struct dentry *dentry, char *list, + size_t size) +{ + struct path lower_path; + ssize_t res = 0; + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (size > HMDFS_LISTXATTR_SIZE_MAX) + r_size = HMDFS_LISTXATTR_SIZE_MAX; + + hmdfs_get_lower_path(dentry, &lower_path); + res = vfs_listxattr(lower_path.dentry, list, r_size); + hmdfs_put_lower_path(&lower_path); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support listxattr size over than %d", + HMDFS_LISTXATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} +struct dentry *hmdfs_lookup_share(struct inode *parent_inode, + struct dentry *child_dentry, unsigned int flags) +{ + const struct qstr *d_name = &child_dentry->d_name; + int err = 0; + struct dentry *ret = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct path src_path; + struct inode *child_inode = NULL; + + trace_hmdfs_lookup_share(parent_inode, child_dentry, flags); + if (d_name->len > NAME_MAX) { + ret = ERR_PTR(-ENAMETOOLONG); + goto err_out; + } + + err = init_hmdfs_dentry_info(sbi, child_dentry, HMDFS_LAYER_OTHER_LOCAL); + if (err) { + ret = ERR_PTR(err); + goto err_out; + } + + err = get_path_from_share_table(sbi, child_dentry, &src_path); + if (err) { + ret = ERR_PTR(err); + goto err_out; + } + + hmdfs_set_lower_path(child_dentry, &src_path); + child_inode = fill_inode_local(parent_inode->i_sb, + d_inode(src_path.dentry), d_name->name); + + set_sharefile_flag(hmdfs_d(child_dentry)); + + if (IS_ERR(child_inode)) { + err = PTR_ERR(child_inode); + ret = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto err_out; + } + ret = d_splice_alias(child_inode, child_dentry); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + hmdfs_put_reset_lower_path(child_dentry); + goto err_out; + } + + check_and_fixup_ownership(parent_inode, child_inode); + +err_out: + trace_hmdfs_lookup_share_end(parent_inode, child_dentry, err); + return ret; +} + +const struct inode_operations hmdfs_dir_inode_ops_local = { + .lookup = hmdfs_lookup_local, + .mkdir = hmdfs_mkdir_local, + .create = hmdfs_create_local, + .rmdir = hmdfs_rmdir_local, + .unlink = hmdfs_unlink_local, + .rename = hmdfs_rename_local, + .permission = hmdfs_permission, + .setattr = hmdfs_setattr_local, + .getattr = hmdfs_getattr_local, +}; + +const struct inode_operations hmdfs_dir_inode_ops_share = { + .lookup = hmdfs_lookup_share, + .permission = hmdfs_permission, +}; + +const struct inode_operations hmdfs_file_iops_local = { + .setattr = hmdfs_setattr_local, + .getattr = hmdfs_getattr_local, + .permission = hmdfs_permission, + .listxattr = hmdfs_local_listxattr, +}; diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c new file mode 100755 index 000000000..ea0459479 --- /dev/null +++ b/fs/hmdfs/inode_merge.c @@ -0,0 +1,1401 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_merge.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include "hmdfs_merge_view.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "authority/authentication.h" +#include "hmdfs_trace.h" + +struct kmem_cache *hmdfs_dentry_merge_cachep; + +struct dentry *hmdfs_get_fst_lo_d(struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *d = NULL; + + mutex_lock(&dim->comrade_list_lock); + comrade = list_first_entry_or_null(&dim->comrade_list, + struct hmdfs_dentry_comrade, list); + if (comrade) + d = dget(comrade->lo_d); + mutex_unlock(&dim->comrade_list_lock); + return d; +} + +struct dentry *hmdfs_get_lo_d(struct dentry *dentry, int dev_id) +{ + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *d = NULL; + + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &dim->comrade_list, list) { + if (comrade->dev_id == dev_id) { + d = dget(comrade->lo_d); + break; + } + } + mutex_unlock(&dim->comrade_list_lock); + return d; +} + +static void update_inode_attr(struct inode *inode, struct dentry *child_dentry) +{ + struct inode *li = NULL; + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(child_dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct hmdfs_dentry_comrade *fst_comrade = NULL; + + mutex_lock(&cdi->comrade_list_lock); + fst_comrade = list_first_entry(&cdi->comrade_list, + struct hmdfs_dentry_comrade, list); + list_for_each_entry(comrade, &cdi->comrade_list, list) { + li = d_inode(comrade->lo_d); + if (!li) + continue; + + if (comrade == fst_comrade) { + inode->i_atime = li->i_atime; + inode->i_ctime = li->i_ctime; + inode->i_mtime = li->i_mtime; + inode->i_size = li->i_size; + continue; + } + + if (hmdfs_time_compare(&inode->i_mtime, &li->i_mtime) < 0) + inode->i_mtime = li->i_mtime; + } + mutex_unlock(&cdi->comrade_list_lock); +} + +static int get_num_comrades(struct dentry *dentry) +{ + struct list_head *pos; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + int count = 0; + + mutex_lock(&dim->comrade_list_lock); + list_for_each(pos, &dim->comrade_list) + count++; + mutex_unlock(&dim->comrade_list_lock); + return count; +} + +static struct inode *fill_inode_merge(struct super_block *sb, + struct inode *parent_inode, + struct dentry *child_dentry, + struct dentry *lo_d_dentry) +{ + int ret = 0; + struct dentry *fst_lo_d = NULL; + struct hmdfs_inode_info *info = NULL; + struct inode *inode = NULL; + umode_t mode; + + if (lo_d_dentry) { + fst_lo_d = lo_d_dentry; + dget(fst_lo_d); + } else { + fst_lo_d = hmdfs_get_fst_lo_d(child_dentry); + } + if (!fst_lo_d) { + inode = ERR_PTR(-EINVAL); + goto out; + } + if (hmdfs_i(parent_inode)->inode_type == HMDFS_LAYER_ZERO) + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_MERGE, NULL, + NULL); + else + inode = hmdfs_iget5_locked_merge(sb, fst_lo_d); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + inode = ERR_PTR(-ENOMEM); + goto out; + } + if (!(inode->i_state & I_NEW)) + goto out; + info = hmdfs_i(inode); + if (hmdfs_i(parent_inode)->inode_type == HMDFS_LAYER_ZERO) + info->inode_type = HMDFS_LAYER_FIRST_MERGE; + else + info->inode_type = HMDFS_LAYER_OTHER_MERGE; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + update_inode_attr(inode, child_dentry); + mode = d_inode(fst_lo_d)->i_mode; + + if (S_ISREG(mode)) { + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + inode->i_op = &hmdfs_file_iops_merge; + inode->i_fop = &hmdfs_file_fops_merge; + set_nlink(inode, 1); + } else if (S_ISDIR(mode)) { + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + inode->i_op = &hmdfs_dir_iops_merge; + inode->i_fop = &hmdfs_dir_fops_merge; + set_nlink(inode, get_num_comrades(child_dentry) + 2); + } else { + ret = -EIO; + goto bad_inode; + } + + unlock_new_inode(inode); +out: + dput(fst_lo_d); + return inode; +bad_inode: + iget_failed(inode); + return ERR_PTR(ret); +} + +struct hmdfs_dentry_comrade *alloc_comrade(struct dentry *lo_d, int dev_id) +{ + struct hmdfs_dentry_comrade *comrade = NULL; + + // 文件只有一个 comrade,考虑 {comrade, list + list lock} + comrade = kzalloc(sizeof(*comrade), GFP_KERNEL); + if (unlikely(!comrade)) + return ERR_PTR(-ENOMEM); + + comrade->lo_d = lo_d; + comrade->dev_id = dev_id; + dget(lo_d); + return comrade; +} + +void link_comrade(struct list_head *onstack_comrades_head, + struct hmdfs_dentry_comrade *comrade) +{ + struct hmdfs_dentry_comrade *c = NULL; + + list_for_each_entry(c, onstack_comrades_head, list) { + if (likely(c->dev_id != comrade->dev_id)) + continue; + hmdfs_err("Redundant comrade of device %llu", c->dev_id); + dput(comrade->lo_d); + kfree(comrade); + WARN_ON(1); + return; + } + + if (comrade_is_local(comrade)) + list_add(&comrade->list, onstack_comrades_head); + else + list_add_tail(&comrade->list, onstack_comrades_head); +} + +/** + * assign_comrades_unlocked - assign a child dentry with comrades + * + * We tend to setup a local list of all the comrades we found and place the + * list onto the dentry_info to achieve atomicity. + */ +static void assign_comrades_unlocked(struct dentry *child_dentry, + struct list_head *onstack_comrades_head) +{ + struct hmdfs_dentry_info_merge *cdi = hmdfs_dm(child_dentry); + + mutex_lock(&cdi->comrade_list_lock); + WARN_ON(!list_empty(&cdi->comrade_list)); + list_splice_init(onstack_comrades_head, &cdi->comrade_list); + mutex_unlock(&cdi->comrade_list_lock); +} + +static struct hmdfs_dentry_comrade *lookup_comrade(struct path lower_path, + const char *d_name, + int dev_id, + unsigned int flags) +{ + struct path path; + struct hmdfs_dentry_comrade *comrade = NULL; + int err; + + err = vfs_path_lookup(lower_path.dentry, lower_path.mnt, d_name, flags, + &path); + if (err) + return ERR_PTR(err); + + comrade = alloc_comrade(path.dentry, dev_id); + path_put(&path); + return comrade; +} + +/** + * conf_name_trans_nop - do nothing but copy + * + * WARNING: always check before translation + */ +static char *conf_name_trans_nop(struct dentry *d) +{ + return kstrndup(d->d_name.name, d->d_name.len, GFP_KERNEL); +} + +/** + * conf_name_trans_dir - conflicted name translation for directory + * + * WARNING: always check before translation + */ +static char *conf_name_trans_dir(struct dentry *d) +{ + int len = d->d_name.len - strlen(CONFLICTING_DIR_SUFFIX); + + return kstrndup(d->d_name.name, len, GFP_KERNEL); +} + +/** + * conf_name_trans_reg - conflicted name translation for regular file + * + * WARNING: always check before translation + */ +static char *conf_name_trans_reg(struct dentry *d, int *dev_id) +{ + int dot_pos, start_cpy_pos, num_len, i; + int len = d->d_name.len; + char *name = kstrndup(d->d_name.name, d->d_name.len, GFP_KERNEL); + + if (unlikely(!name)) + return NULL; + + // find the last dot if possible + for (dot_pos = len - 1; dot_pos >= 0; dot_pos--) { + if (name[dot_pos] == '.') + break; + } + if (dot_pos == -1) + dot_pos = len; + + // retrieve the conf sn (i.e. dev_id) + num_len = 0; + for (i = dot_pos - 1; i >= 0; i--) { + if (name[i] >= '0' && name[i] <= '9') + num_len++; + else + break; + } + + *dev_id = 0; + for (i = 0; i < num_len; i++) + *dev_id = *dev_id * 10 + name[dot_pos - num_len + i] - '0'; + + // move the file suffix( '\0' included) right after the file name + start_cpy_pos = + dot_pos - num_len - strlen(CONFLICTING_FILE_CONST_SUFFIX); + memmove(name + start_cpy_pos, name + dot_pos, len - dot_pos + 1); + return name; +} + +int check_filename(const char *name, int len) +{ + int cmp_res = 0; + + if (len >= strlen(CONFLICTING_DIR_SUFFIX)) { + cmp_res = strncmp(name + len - strlen(CONFLICTING_DIR_SUFFIX), + CONFLICTING_DIR_SUFFIX, + strlen(CONFLICTING_DIR_SUFFIX)); + if (cmp_res == 0) + return DT_DIR; + } + + if (len >= strlen(CONFLICTING_FILE_CONST_SUFFIX)) { + int dot_pos, start_cmp_pos, num_len, i; + + for (dot_pos = len - 1; dot_pos >= 0; dot_pos--) { + if (name[dot_pos] == '.') + break; + } + if (dot_pos == -1) + dot_pos = len; + + num_len = 0; + for (i = dot_pos - 1; i >= 0; i--) { + if (name[i] >= '0' && name[i] <= '9') + num_len++; + else + break; + } + + start_cmp_pos = dot_pos - num_len - + strlen(CONFLICTING_FILE_CONST_SUFFIX); + cmp_res = strncmp(name + start_cmp_pos, + CONFLICTING_FILE_CONST_SUFFIX, + strlen(CONFLICTING_FILE_CONST_SUFFIX)); + if (cmp_res == 0) + return DT_REG; + } + + return 0; +} + +static struct hmdfs_dentry_comrade *merge_lookup_comrade( + struct hmdfs_sb_info *sbi, const char *name, int devid, + unsigned int flags) +{ + int err; + struct path root, path; + struct hmdfs_dentry_comrade *comrade = NULL; + const struct cred *old_cred = hmdfs_override_creds(sbi->cred); + + err = kern_path(sbi->real_dst, LOOKUP_DIRECTORY, &root); + if (err) { + comrade = ERR_PTR(err); + goto out; + } + + err = vfs_path_lookup(root.dentry, root.mnt, name, flags, &path); + if (err) { + comrade = ERR_PTR(err); + goto root_put; + } + + comrade = alloc_comrade(path.dentry, devid); + + path_put(&path); +root_put: + path_put(&root); +out: + hmdfs_revert_creds(old_cred); + return comrade; +} + +static bool is_valid_comrade(struct hmdfs_dentry_info_merge *mdi, umode_t mode) +{ + if (mdi->type == DT_UNKNOWN) { + mdi->type = S_ISDIR(mode) ? DT_DIR : DT_REG; + return true; + } + + if (mdi->type == DT_DIR && S_ISDIR(mode)) { + return true; + } + + if (mdi->type == DT_REG && list_empty(&mdi->comrade_list) && + !S_ISDIR(mode)) { + return true; + } + + return false; +} + +static void merge_lookup_work_func(struct work_struct *work) +{ + struct merge_lookup_work *ml_work; + struct hmdfs_dentry_comrade *comrade; + struct hmdfs_dentry_info_merge *mdi; + int found = false; + + ml_work = container_of(work, struct merge_lookup_work, work); + mdi = container_of(ml_work->wait_queue, struct hmdfs_dentry_info_merge, + wait_queue); + + trace_hmdfs_merge_lookup_work_enter(ml_work); + + comrade = merge_lookup_comrade(ml_work->sbi, ml_work->name, + ml_work->devid, ml_work->flags); + if (IS_ERR(comrade)) { + mutex_lock(&mdi->work_lock); + goto out; + } + + mutex_lock(&mdi->work_lock); + mutex_lock(&mdi->comrade_list_lock); + if (!is_valid_comrade(mdi, hmdfs_cm(comrade))) { + destroy_comrade(comrade); + } else { + found = true; + link_comrade(&mdi->comrade_list, comrade); + } + mutex_unlock(&mdi->comrade_list_lock); + +out: + if (--mdi->work_count == 0 || found) + wake_up_all(ml_work->wait_queue); + mutex_unlock(&mdi->work_lock); + + trace_hmdfs_merge_lookup_work_exit(ml_work, found); + kfree(ml_work->name); + kfree(ml_work); +} + +static int merge_lookup_async(struct hmdfs_dentry_info_merge *mdi, + struct hmdfs_sb_info *sbi, int devid, const char *name, + unsigned int flags) +{ + int err = -ENOMEM; + struct merge_lookup_work *ml_work; + + ml_work = kmalloc(sizeof(*ml_work), GFP_KERNEL); + if (!ml_work) + goto out; + + ml_work->name = kstrdup(name, GFP_KERNEL); + if (!ml_work->name) { + kfree(ml_work); + goto out; + } + + ml_work->devid = devid; + ml_work->flags = flags; + ml_work->sbi = sbi; + ml_work->wait_queue = &mdi->wait_queue; + INIT_WORK(&ml_work->work, merge_lookup_work_func); + + schedule_work(&ml_work->work); + ++mdi->work_count; + err = 0; +out: + return err; +} + +static char *hmdfs_get_real_dname(struct dentry *dentry, int *devid, int *type) +{ + char *rname; + + *type = check_filename(dentry->d_name.name, dentry->d_name.len); + if (*type == DT_REG) + rname = conf_name_trans_reg(dentry, devid); + else if (*type == DT_DIR) + rname = conf_name_trans_dir(dentry); + else + rname = conf_name_trans_nop(dentry); + + return rname; +} + +static int lookup_merge_normal(struct dentry *dentry, unsigned int flags) +{ + int ret = -ENOMEM; + int err = 0; + int devid = -1; + struct dentry *pdentry = dget_parent(dentry); + struct hmdfs_dentry_info_merge *mdi = hmdfs_dm(dentry); + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_sb); + struct hmdfs_peer *peer; + char *rname, *ppath, *cpath; + + rname = hmdfs_get_real_dname(dentry, &devid, &mdi->type); + if (unlikely(!rname)) { + goto out; + } + + ppath = hmdfs_merge_get_dentry_relative_path(pdentry); + if (unlikely(!ppath)) { + hmdfs_err("failed to get parent relative path"); + goto out_rname; + } + + cpath = kzalloc(PATH_MAX, GFP_KERNEL); + if (unlikely(!cpath)) { + hmdfs_err("failed to get child device_view path"); + goto out_ppath; + } + + mutex_lock(&mdi->work_lock); + mutex_lock(&sbi->connections.node_lock); + if (mdi->type != DT_REG || devid == 0) { + snprintf(cpath, PATH_MAX, "device_view/local%s/%s", ppath, + rname); + err = merge_lookup_async(mdi, sbi, 0, cpath, flags); + if (err) + hmdfs_err("failed to create local lookup work"); + } + + list_for_each_entry(peer, &sbi->connections.node_list, list) { + if (mdi->type == DT_REG && peer->device_id != devid) + continue; + snprintf(cpath, PATH_MAX, "device_view/%s%s/%s", peer->cid, + ppath, rname); + err = merge_lookup_async(mdi, sbi, peer->device_id, cpath, + flags); + if (err) + hmdfs_err("failed to create remote lookup work"); + } + mutex_unlock(&sbi->connections.node_lock); + mutex_unlock(&mdi->work_lock); + + wait_event(mdi->wait_queue, is_merge_lookup_end(mdi)); + + ret = -ENOENT; + if (!is_comrade_list_empty(mdi)) + ret = 0; + + kfree(cpath); +out_ppath: + kfree(ppath); +out_rname: + kfree(rname); +out: + dput(pdentry); + return ret; +} + +/** + * do_lookup_merge_root - lookup the root of the merge view(root/merge_view) + * + * It's common for a network filesystem to incur various of faults, so we + * intent to show mercy for faults here, except faults reported by the local. + */ +static int do_lookup_merge_root(struct path path_dev, + struct dentry *child_dentry, unsigned int flags) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct hmdfs_dentry_comrade *comrade; + const int buf_len = + max((int)HMDFS_CID_SIZE + 1, (int)sizeof(DEVICE_VIEW_LOCAL)); + char *buf = kzalloc(buf_len, GFP_KERNEL); + struct hmdfs_peer *peer; + LIST_HEAD(head); + int ret; + + if (!buf) + return -ENOMEM; + + // lookup real_dst/device_view/local + memcpy(buf, DEVICE_VIEW_LOCAL, sizeof(DEVICE_VIEW_LOCAL)); + comrade = lookup_comrade(path_dev, buf, HMDFS_DEVID_LOCAL, flags); + if (IS_ERR(comrade)) { + ret = PTR_ERR(comrade); + goto out; + } + link_comrade(&head, comrade); + + // lookup real_dst/device_view/cidxx + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(peer, &sbi->connections.node_list, list) { + mutex_unlock(&sbi->connections.node_lock); + memcpy(buf, peer->cid, HMDFS_CID_SIZE); + comrade = lookup_comrade(path_dev, buf, peer->device_id, flags); + if (IS_ERR(comrade)) + continue; + + link_comrade(&head, comrade); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + assign_comrades_unlocked(child_dentry, &head); + ret = 0; + +out: + kfree(buf); + return ret; +} + +// mkdir -p +static void lock_root_inode_shared(struct inode *root, bool *locked, bool *down) +{ + struct rw_semaphore *sem = &root->i_rwsem; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) +#define RWSEM_READER_OWNED (1UL << 0) +#define RWSEM_RD_NONSPINNABLE (1UL << 1) +#define RWSEM_WR_NONSPINNABLE (1UL << 2) +#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE) +#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) + struct task_struct *sem_owner = + (struct task_struct *)(atomic_long_read(&sem->owner) & + ~RWSEM_OWNER_FLAGS_MASK); +#else + struct task_struct *sem_owner = sem->owner; +#endif + + *locked = false; + *down = false; + + if (sem_owner != current) + return; + + // It's us that takes the wsem + if (!inode_trylock_shared(root)) { + downgrade_write(sem); + *down = true; + } + *locked = true; +} + +static void restore_root_inode_sem(struct inode *root, bool locked, bool down) +{ + if (!locked) + return; + + inode_unlock_shared(root); + if (down) + inode_lock(root); +} + +static int lookup_merge_root(struct inode *root_inode, + struct dentry *child_dentry, unsigned int flags) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct path path_dev; + int ret = -ENOENT; + int buf_len; + char *buf = NULL; + bool locked, down; + + // consider additional one slash and one '\0' + buf_len = strlen(sbi->real_dst) + 1 + sizeof(DEVICE_VIEW_ROOT); + if (buf_len > PATH_MAX) + return -ENAMETOOLONG; + + buf = kmalloc(buf_len, GFP_KERNEL); + if (unlikely(!buf)) + return -ENOMEM; + + sprintf(buf, "%s/%s", sbi->real_dst, DEVICE_VIEW_ROOT); + lock_root_inode_shared(root_inode, &locked, &down); + ret = hmdfs_get_path_in_sb(child_dentry->d_sb, buf, LOOKUP_DIRECTORY, + &path_dev); + if (ret) + goto free_buf; + + ret = do_lookup_merge_root(path_dev, child_dentry, flags); + path_put(&path_dev); + +free_buf: + kfree(buf); + restore_root_inode_sem(root_inode, locked, down); + return ret; +} + +int init_hmdfs_dentry_info_merge(struct hmdfs_sb_info *sbi, + struct dentry *dentry) +{ + struct hmdfs_dentry_info_merge *mdi = NULL; + + mdi = kmem_cache_zalloc(hmdfs_dentry_merge_cachep, GFP_NOFS); + if (!mdi) + return -ENOMEM; + + mdi->ctime = jiffies; + mdi->type = DT_UNKNOWN; + mdi->work_count = 0; + mutex_init(&mdi->work_lock); + init_waitqueue_head(&mdi->wait_queue); + INIT_LIST_HEAD(&mdi->comrade_list); + mutex_init(&mdi->comrade_list_lock); + + d_set_d_op(dentry, &hmdfs_dops_merge); + dentry->d_fsdata = mdi; + return 0; +} + +static void update_dm(struct dentry *dst, struct dentry *src) +{ + struct hmdfs_dentry_info_merge *dmi_dst = hmdfs_dm(dst); + struct hmdfs_dentry_info_merge *dmi_src = hmdfs_dm(src); + + trace_hmdfs_merge_update_dentry_info_enter(src, dst); + + spin_lock(&dst->d_lock); + spin_lock(&src->d_lock); + dst->d_fsdata = dmi_src; + src->d_fsdata = dmi_dst; + spin_unlock(&src->d_lock); + spin_unlock(&dst->d_lock); + + trace_hmdfs_merge_update_dentry_info_exit(src, dst); +} + +// do this in a map-reduce manner +struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + bool create = flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET); + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + struct hmdfs_inode_info *pii = hmdfs_i(parent_inode); + struct inode *child_inode = NULL; + struct dentry *ret_dentry = NULL; + int err = 0; + + /* + * Internal flags like LOOKUP_CREATE should not pass to device view. + * LOOKUP_REVAL is needed because dentry cache in hmdfs might be stale + * after rename in lower fs. LOOKUP_DIRECTORY is not needed because + * merge_view can do the judgement that whether result is directory or + * not. + */ + flags = flags & LOOKUP_REVAL; + + child_dentry->d_fsdata = NULL; + + if (child_dentry->d_name.len > NAME_MAX) { + err = -ENAMETOOLONG; + goto out; + } + + err = init_hmdfs_dentry_info_merge(sbi, child_dentry); + if (unlikely(err)) + goto out; + + if (pii->inode_type == HMDFS_LAYER_ZERO) { + hmdfs_dm(child_dentry)->dentry_type = HMDFS_LAYER_FIRST_MERGE; + err = lookup_merge_root(parent_inode, child_dentry, flags); + } else { + hmdfs_dm(child_dentry)->dentry_type = HMDFS_LAYER_OTHER_MERGE; + err = lookup_merge_normal(child_dentry, flags); + } + + if (!err) { + struct hmdfs_inode_info *info = NULL; + + child_inode = fill_inode_merge(parent_inode->i_sb, parent_inode, + child_dentry, NULL); + ret_dentry = d_splice_alias(child_inode, child_dentry); + if (IS_ERR(ret_dentry)) { + clear_comrades(child_dentry); + err = PTR_ERR(ret_dentry); + goto out; + } + if (ret_dentry) { + update_dm(ret_dentry, child_dentry); + child_dentry = ret_dentry; + } + info = hmdfs_i(child_inode); + if (info->inode_type == HMDFS_LAYER_FIRST_MERGE) + hmdfs_root_inode_perm_init(child_inode); + else + check_and_fixup_ownership_remote(parent_inode, + child_dentry); + + goto out; + } + + if ((err == -ENOENT) && create) + err = 0; + +out: + hmdfs_trace_merge(trace_hmdfs_lookup_merge_end, parent_inode, + child_dentry, err); + return err ? ERR_PTR(err) : ret_dentry; +} + +static int hmdfs_getattr_merge(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + int ret; + struct path lower_path = { + .dentry = hmdfs_get_fst_lo_d(path->dentry), + .mnt = path->mnt, + }; + + if (unlikely(!lower_path.dentry)) { + hmdfs_err("Fatal! No comrades"); + ret = -EINVAL; + goto out; + } + + ret = vfs_getattr(&lower_path, stat, request_mask, flags); +out: + dput(lower_path.dentry); + return ret; +} + +static int hmdfs_setattr_merge(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = d_inode(dentry); + struct dentry *lower_dentry = hmdfs_get_fst_lo_d(dentry); + struct inode *lower_inode = NULL; + struct iattr lower_ia; + unsigned int ia_valid = ia->ia_valid; + int err = 0; + kuid_t tmp_uid; + + if (!lower_dentry) { + WARN_ON(1); + err = -EINVAL; + goto out; + } + + lower_inode = d_inode(lower_dentry); + memcpy(&lower_ia, ia, sizeof(lower_ia)); + if (ia_valid & ATTR_FILE) + lower_ia.ia_file = hmdfs_f(ia->ia_file)->lower_file; + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + + inode_lock(lower_inode); + tmp_uid = hmdfs_override_inode_uid(lower_inode); + + err = notify_change(&init_user_ns, lower_dentry, &lower_ia, NULL); + i_size_write(inode, i_size_read(lower_inode)); + inode->i_atime = lower_inode->i_atime; + inode->i_mtime = lower_inode->i_mtime; + inode->i_ctime = lower_inode->i_ctime; + hmdfs_revert_inode_uid(lower_inode, tmp_uid); + + inode_unlock(lower_inode); + +out: + dput(lower_dentry); + return err; +} + +const struct inode_operations hmdfs_file_iops_merge = { + .getattr = hmdfs_getattr_merge, + .setattr = hmdfs_setattr_merge, + .permission = hmdfs_permission, +}; + +int do_mkdir_merge(struct inode *parent_inode, struct dentry *child_dentry, + umode_t mode, struct inode *lo_i_parent, + struct dentry *lo_d_child) +{ + int ret = 0; + struct super_block *sb = parent_inode->i_sb; + struct inode *child_inode = NULL; + + ret = vfs_mkdir(&init_user_ns, lo_i_parent, lo_d_child, mode); + if (ret) + goto out; + + child_inode = + fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child); + if (IS_ERR(child_inode)) { + ret = PTR_ERR(child_inode); + goto out; + } + child_inode->i_uid = parent_inode->i_uid; + child_inode->i_gid = parent_inode->i_gid; + + d_add(child_dentry, child_inode); + /* nlink should be increased with the joining of children */ + set_nlink(parent_inode, 2); +out: + return ret; +} + +int do_create_merge(struct inode *parent_inode, struct dentry *child_dentry, + umode_t mode, bool want_excl, struct inode *lo_i_parent, + struct dentry *lo_d_child) +{ + int ret = 0; + struct super_block *sb = parent_inode->i_sb; + struct inode *child_inode = NULL; + + ret = vfs_create(&init_user_ns, lo_i_parent, lo_d_child, mode, want_excl); + if (ret) + goto out; + + child_inode = + fill_inode_merge(sb, parent_inode, child_dentry, lo_d_child); + if (IS_ERR(child_inode)) { + ret = PTR_ERR(child_inode); + goto out; + } + child_inode->i_uid = parent_inode->i_uid; + child_inode->i_gid = parent_inode->i_gid; + + d_add(child_dentry, child_inode); + /* nlink should be increased with the joining of children */ + set_nlink(parent_inode, 2); +out: + return ret; +} + +int hmdfs_do_ops_merge(struct inode *i_parent, struct dentry *d_child, + struct dentry *lo_d_child, struct path path, + struct hmdfs_recursive_para *rec_op_para) +{ + int ret = 0; + + if (rec_op_para->is_last) { + switch (rec_op_para->opcode) { + case F_MKDIR_MERGE: + ret = do_mkdir_merge(i_parent, d_child, + rec_op_para->mode, + d_inode(path.dentry), lo_d_child); + break; + case F_CREATE_MERGE: + ret = do_create_merge(i_parent, d_child, + rec_op_para->mode, + rec_op_para->want_excl, + d_inode(path.dentry), lo_d_child); + break; + default: + ret = -EINVAL; + break; + } + } else { + ret = vfs_mkdir(&init_user_ns, d_inode(path.dentry), lo_d_child, + rec_op_para->mode); + } + if (ret) + hmdfs_err("vfs_ops failed, ops %d, err = %d", + rec_op_para->opcode, ret); + return ret; +} + +int hmdfs_create_lower_dentry(struct inode *i_parent, struct dentry *d_child, + struct dentry *lo_d_parent, bool is_dir, + struct hmdfs_recursive_para *rec_op_para) +{ + struct hmdfs_sb_info *sbi = i_parent->i_sb->s_fs_info; + struct hmdfs_dentry_comrade *new_comrade = NULL; + struct dentry *lo_d_child = NULL; + char *path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *absolute_path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *path_name = NULL; + struct path path = { .mnt = NULL, .dentry = NULL }; + int ret = 0; + + if (unlikely(!path_buf || !absolute_path_buf)) { + ret = -ENOMEM; + goto out; + } + + path_name = dentry_path_raw(lo_d_parent, path_buf, PATH_MAX); + if (IS_ERR(path_name)) { + ret = PTR_ERR(path_name); + goto out; + } + if ((strlen(sbi->real_dst) + strlen(path_name) + + strlen(d_child->d_name.name) + 2) > PATH_MAX) { + ret = -ENAMETOOLONG; + goto out; + } + + sprintf(absolute_path_buf, "%s%s/%s", sbi->real_dst, path_name, + d_child->d_name.name); + + if (is_dir) + lo_d_child = kern_path_create(AT_FDCWD, absolute_path_buf, + &path, LOOKUP_DIRECTORY); + else + lo_d_child = kern_path_create(AT_FDCWD, absolute_path_buf, + &path, 0); + if (IS_ERR(lo_d_child)) { + ret = PTR_ERR(lo_d_child); + goto out; + } + // to ensure link_comrade after vfs_mkdir succeed + ret = hmdfs_do_ops_merge(i_parent, d_child, lo_d_child, path, + rec_op_para); + if (ret) + goto out_put; + new_comrade = alloc_comrade(lo_d_child, HMDFS_DEVID_LOCAL); + if (IS_ERR(new_comrade)) { + ret = PTR_ERR(new_comrade); + goto out_put; + } else { + link_comrade_unlocked(d_child, new_comrade); + } + +out_put: + done_path_create(&path, lo_d_child); +out: + kfree(absolute_path_buf); + kfree(path_buf); + return ret; +} + +static int create_lo_d_parent_recur(struct dentry *d_parent, + struct dentry *d_child, umode_t mode, + struct hmdfs_recursive_para *rec_op_para) +{ + struct dentry *lo_d_parent, *d_pparent; + struct hmdfs_dentry_info_merge *pmdi = NULL; + int ret = 0; + + pmdi = hmdfs_dm(d_parent); + wait_event(pmdi->wait_queue, !has_merge_lookup_work(pmdi)); + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + d_pparent = dget_parent(d_parent); + ret = create_lo_d_parent_recur(d_pparent, d_parent, + d_inode(d_parent)->i_mode, + rec_op_para); + dput(d_pparent); + if (ret) + goto out; + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + ret = -ENOENT; + goto out; + } + } + rec_op_para->is_last = false; + rec_op_para->mode = mode; + ret = hmdfs_create_lower_dentry(d_inode(d_parent), d_child, lo_d_parent, + true, rec_op_para); +out: + dput(lo_d_parent); + return ret; +} + +int create_lo_d_child(struct inode *i_parent, struct dentry *d_child, + bool is_dir, struct hmdfs_recursive_para *rec_op_para) +{ + struct dentry *d_pparent, *lo_d_parent, *lo_d_child; + struct dentry *d_parent = dget_parent(d_child); + struct hmdfs_dentry_info_merge *pmdi = hmdfs_dm(d_parent); + int ret = 0; + mode_t d_child_mode = rec_op_para->mode; + + wait_event(pmdi->wait_queue, !has_merge_lookup_work(pmdi)); + + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + d_pparent = dget_parent(d_parent); + ret = create_lo_d_parent_recur(d_pparent, d_parent, + d_inode(d_parent)->i_mode, + rec_op_para); + dput(d_pparent); + if (unlikely(ret)) { + lo_d_child = ERR_PTR(ret); + goto out; + } + lo_d_parent = hmdfs_get_lo_d(d_parent, HMDFS_DEVID_LOCAL); + if (!lo_d_parent) { + lo_d_child = ERR_PTR(-ENOENT); + goto out; + } + } + rec_op_para->is_last = true; + rec_op_para->mode = d_child_mode; + ret = hmdfs_create_lower_dentry(i_parent, d_child, lo_d_parent, is_dir, + rec_op_para); + +out: + dput(d_parent); + dput(lo_d_parent); + return ret; +} + +void hmdfs_init_recursive_para(struct hmdfs_recursive_para *rec_op_para, + int opcode, mode_t mode, bool want_excl, + const char *name) +{ + rec_op_para->is_last = true; + rec_op_para->opcode = opcode; + rec_op_para->mode = mode; + rec_op_para->want_excl = want_excl; + rec_op_para->name = name; +} + +int hmdfs_mkdir_merge(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int ret = 0; + struct hmdfs_recursive_para *rec_op_para = NULL; + + // confict_name & file_type is checked by hmdfs_mkdir_local + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL); + if (!rec_op_para) { + ret = -ENOMEM; + goto out; + } + + hmdfs_init_recursive_para(rec_op_para, F_MKDIR_MERGE, mode, false, + NULL); + ret = create_lo_d_child(dir, dentry, true, rec_op_para); +out: + hmdfs_trace_merge(trace_hmdfs_mkdir_merge, dir, dentry, ret); + if (ret) + d_drop(dentry); + kfree(rec_op_para); + return ret; +} + +int hmdfs_create_merge(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + struct hmdfs_recursive_para *rec_op_para = NULL; + int ret = 0; + + rec_op_para = kmalloc(sizeof(*rec_op_para), GFP_KERNEL); + if (!rec_op_para) { + ret = -ENOMEM; + goto out; + } + hmdfs_init_recursive_para(rec_op_para, F_CREATE_MERGE, mode, want_excl, + NULL); + // confict_name & file_type is checked by hmdfs_create_local + ret = create_lo_d_child(dir, dentry, false, rec_op_para); +out: + hmdfs_trace_merge(trace_hmdfs_create_merge, dir, dentry, ret); + if (ret) + d_drop(dentry); + kfree(rec_op_para); + return ret; +} + +int do_rmdir_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *lo_d = NULL; + struct dentry *lo_d_dir = NULL; + struct inode *lo_i_dir = NULL; + + wait_event(dim->wait_queue, !has_merge_lookup_work(dim)); + + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lo_d = comrade->lo_d; + lo_d_dir = lock_parent(lo_d); + lo_i_dir = d_inode(lo_d_dir); + ret = vfs_rmdir(&init_user_ns, lo_i_dir, lo_d); + unlock_dir(lo_d_dir); + if (ret) + break; + } + mutex_unlock(&dim->comrade_list_lock); + hmdfs_trace_merge(trace_hmdfs_rmdir_merge, dir, dentry, ret); + return ret; +} + +int hmdfs_rmdir_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + + ret = do_rmdir_merge(dir, dentry); + if (ret) { + hmdfs_err("rm dir failed:%d", ret); + goto out; + } + + d_drop(dentry); +out: + hmdfs_trace_merge(trace_hmdfs_rmdir_merge, dir, dentry, ret); + return ret; +} + +int do_unlink_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(dentry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *lo_d = NULL; + struct dentry *lo_d_dir = NULL; + struct inode *lo_i_dir = NULL; + + wait_event(dim->wait_queue, !has_merge_lookup_work(dim)); + + mutex_lock(&dim->comrade_list_lock); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lo_d = comrade->lo_d; + lo_d_dir = lock_parent(lo_d); + lo_i_dir = d_inode(lo_d_dir); + ret = vfs_unlink(&init_user_ns, lo_i_dir, lo_d, NULL); // lo_d GET + unlock_dir(lo_d_dir); + if (ret) + break; + } + mutex_unlock(&dim->comrade_list_lock); + + return ret; +} + +int hmdfs_unlink_merge(struct inode *dir, struct dentry *dentry) +{ + int ret = 0; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto out; + } + + ret = do_unlink_merge(dir, dentry); + if (ret) { + hmdfs_err("unlink failed:%d", ret); + goto out; + } + + d_drop(dentry); +out: + return ret; +} + +int do_rename_merge(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int ret = 0; + struct hmdfs_sb_info *sbi = (old_dir->i_sb)->s_fs_info; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(old_dentry); + struct hmdfs_dentry_comrade *comrade = NULL, *new_comrade = NULL; + struct path lo_p_new = { .mnt = NULL, .dentry = NULL }; + struct inode *lo_i_old_dir = NULL, *lo_i_new_dir = NULL; + struct dentry *lo_d_old_dir = NULL, *lo_d_old = NULL, + *lo_d_new_dir = NULL, *lo_d_new = NULL; + struct dentry *d_new_dir = NULL; + char *path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *abs_path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + char *path_name = NULL; + struct hmdfs_dentry_info_merge *pmdi = NULL; + struct renamedata rd; + + if (flags & ~RENAME_NOREPLACE) { + ret = -EINVAL; + goto out; + } + + if (unlikely(!path_buf || !abs_path_buf)) { + ret = -ENOMEM; + goto out; + } + + wait_event(dim->wait_queue, !has_merge_lookup_work(dim)); + + list_for_each_entry(comrade, &dim->comrade_list, list) { + lo_d_old = comrade->lo_d; + d_new_dir = d_find_alias(new_dir); + pmdi = hmdfs_dm(d_new_dir); + wait_event(pmdi->wait_queue, !has_merge_lookup_work(pmdi)); + lo_d_new_dir = hmdfs_get_lo_d(d_new_dir, comrade->dev_id); + dput(d_new_dir); + + if (!lo_d_new_dir) + continue; + path_name = dentry_path_raw(lo_d_new_dir, path_buf, PATH_MAX); + dput(lo_d_new_dir); + if (IS_ERR(path_name)) { + ret = PTR_ERR(path_name); + continue; + } + + if (strlen(sbi->real_dst) + strlen(path_name) + + strlen(new_dentry->d_name.name) + 2 > PATH_MAX) { + ret = -ENAMETOOLONG; + goto out; + } + + snprintf(abs_path_buf, PATH_MAX, "%s%s/%s", sbi->real_dst, + path_name, new_dentry->d_name.name); + if (S_ISDIR(d_inode(old_dentry)->i_mode)) + lo_d_new = kern_path_create(AT_FDCWD, abs_path_buf, + &lo_p_new, + LOOKUP_DIRECTORY); + else + lo_d_new = kern_path_create(AT_FDCWD, abs_path_buf, + &lo_p_new, 0); + if (IS_ERR(lo_d_new)) + continue; + + lo_d_new_dir = dget_parent(lo_d_new); + lo_i_new_dir = d_inode(lo_d_new_dir); + lo_d_old_dir = dget_parent(lo_d_old); + lo_i_old_dir = d_inode(lo_d_old_dir); + + rd.old_mnt_userns = &init_user_ns; + rd.old_dir = lo_i_old_dir; + rd.old_dentry = lo_d_old; + rd.new_mnt_userns = &init_user_ns; + rd.new_dir = lo_i_new_dir; + rd.new_dentry = lo_d_new; + + ret = vfs_rename(&rd); + new_comrade = alloc_comrade(lo_p_new.dentry, comrade->dev_id); + if (IS_ERR(new_comrade)) { + ret = PTR_ERR(new_comrade); + goto no_comrade; + } + + link_comrade_unlocked(new_dentry, new_comrade); +no_comrade: + done_path_create(&lo_p_new, lo_d_new); + dput(lo_d_old_dir); + dput(lo_d_new_dir); + } +out: + kfree(abs_path_buf); + kfree(path_buf); + return ret; +} + +int hmdfs_rename_merge(struct user_namespace *mnt_userns, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + char *old_dir_buf = NULL; + char *new_dir_buf = NULL; + char *old_dir_path = NULL; + char *new_dir_path = NULL; + struct dentry *old_dir_dentry = NULL; + struct dentry *new_dir_dentry = NULL; + int ret = 0; + + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + ret = -EACCES; + goto rename_out; + } + old_dir_buf = kmalloc(PATH_MAX, GFP_KERNEL); + new_dir_buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!old_dir_buf || !new_dir_buf) { + ret = -ENOMEM; + goto rename_out; + } + + new_dir_dentry = d_find_alias(new_dir); + if (!new_dir_dentry) { + ret = -EINVAL; + goto rename_out; + } + + old_dir_dentry = d_find_alias(old_dir); + if (!old_dir_dentry) { + ret = -EINVAL; + dput(new_dir_dentry); + goto rename_out; + } + + old_dir_path = dentry_path_raw(old_dir_dentry, old_dir_buf, PATH_MAX); + new_dir_path = dentry_path_raw(new_dir_dentry, new_dir_buf, PATH_MAX); + dput(new_dir_dentry); + dput(old_dir_dentry); + if (strcmp(old_dir_path, new_dir_path)) { + ret = -EPERM; + goto rename_out; + } + + trace_hmdfs_rename_merge(old_dir, old_dentry, new_dir, new_dentry, + flags); + ret = do_rename_merge(old_dir, old_dentry, new_dir, new_dentry, flags); + + if (ret != 0) + d_drop(new_dentry); + + if (S_ISREG(old_dentry->d_inode->i_mode) && !ret) + d_invalidate(old_dentry); + +rename_out: + hmdfs_trace_rename_merge(old_dir, old_dentry, new_dir, new_dentry, ret); + kfree(old_dir_buf); + kfree(new_dir_buf); + return ret; +} + +const struct inode_operations hmdfs_dir_iops_merge = { + .lookup = hmdfs_lookup_merge, + .mkdir = hmdfs_mkdir_merge, + .create = hmdfs_create_merge, + .rmdir = hmdfs_rmdir_merge, + .unlink = hmdfs_unlink_merge, + .rename = hmdfs_rename_merge, + .permission = hmdfs_permission, +}; diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c new file mode 100755 index 000000000..055892de2 --- /dev/null +++ b/fs/hmdfs/inode_remote.c @@ -0,0 +1,1001 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_remote.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include + +#include "comm/socket_adapter.h" +#include "hmdfs.h" +#include "hmdfs_client.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_share.h" +#include "hmdfs_trace.h" +#include "authority/authentication.h" +#include "stash.h" + +struct hmdfs_lookup_ret *lookup_remote_dentry(struct dentry *child_dentry, + const struct qstr *qstr, + uint64_t dev_id) +{ + struct hmdfs_lookup_ret *lookup_ret; + struct hmdfs_dentry *dentry = NULL; + struct clearcache_item *cache_item = NULL; + struct hmdfs_dcache_lookup_ctx ctx; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + cache_item = hmdfs_find_cache_item(dev_id, child_dentry->d_parent); + if (!cache_item) + return NULL; + + lookup_ret = kmalloc(sizeof(*lookup_ret), GFP_KERNEL); + if (!lookup_ret) + goto out; + + hmdfs_init_dcache_lookup_ctx(&ctx, sbi, qstr, cache_item->filp); + dentry = hmdfs_find_dentry(child_dentry, &ctx); + if (!dentry) { + kfree(lookup_ret); + lookup_ret = NULL; + goto out; + } + + lookup_ret->i_mode = le16_to_cpu(dentry->i_mode); + lookup_ret->i_size = le64_to_cpu(dentry->i_size); + lookup_ret->i_mtime = le64_to_cpu(dentry->i_mtime); + lookup_ret->i_mtime_nsec = le32_to_cpu(dentry->i_mtime_nsec); + lookup_ret->i_ino = le64_to_cpu(dentry->i_ino); + + hmdfs_unlock_file(ctx.filp, get_dentry_group_pos(ctx.bidx), + DENTRYGROUP_SIZE); + kfree(ctx.page); +out: + kref_put(&cache_item->ref, release_cache_item); + return lookup_ret; +} + +/* get_remote_inode_info - fill hmdfs_lookup_ret by info from remote getattr + * + * @dentry: local dentry + * @hmdfs_peer: which remote devcie + * @flags: lookup flags + * + * return allocaed and initialized hmdfs_lookup_ret on success, and NULL on + * failure. + */ +struct hmdfs_lookup_ret *get_remote_inode_info(struct hmdfs_peer *con, + struct dentry *dentry, + unsigned int flags) +{ + int err = 0; + struct hmdfs_lookup_ret *lookup_ret = NULL; + struct hmdfs_getattr_ret *getattr_ret = NULL; + unsigned int expected_flags = 0; + + lookup_ret = kmalloc(sizeof(*lookup_ret), GFP_KERNEL); + if (!lookup_ret) + return NULL; + + err = hmdfs_remote_getattr(con, dentry, flags, &getattr_ret); + if (err) { + hmdfs_debug("inode info get failed with err %d", err); + kfree(lookup_ret); + return NULL; + } + /* make sure we got everything we need */ + expected_flags = STATX_INO | STATX_SIZE | STATX_MODE | STATX_MTIME; + if ((getattr_ret->stat.result_mask & expected_flags) != + expected_flags) { + hmdfs_debug("remote getattr failed with flag %x", + getattr_ret->stat.result_mask); + kfree(lookup_ret); + kfree(getattr_ret); + return NULL; + } + + lookup_ret->i_mode = getattr_ret->stat.mode; + lookup_ret->i_size = getattr_ret->stat.size; + lookup_ret->i_mtime = getattr_ret->stat.mtime.tv_sec; + lookup_ret->i_mtime_nsec = getattr_ret->stat.mtime.tv_nsec; + lookup_ret->i_ino = getattr_ret->stat.ino; + kfree(getattr_ret); + return lookup_ret; +} + +static void hmdfs_remote_readdir_work(struct work_struct *work) +{ + struct hmdfs_readdir_work *rw = + container_of(to_delayed_work(work), struct hmdfs_readdir_work, + dwork); + struct dentry *dentry = rw->dentry; + struct hmdfs_peer *con = rw->con; + const struct cred *old_cred = hmdfs_override_creds(con->sbi->cred); + bool empty = false; + + get_remote_dentry_file(dentry, con); + hmdfs_d(dentry)->async_readdir_in_progress = 0; + hmdfs_revert_creds(old_cred); + + spin_lock(&con->sbi->async_readdir_work_lock); + list_del(&rw->head); + empty = list_empty(&con->sbi->async_readdir_work_list); + spin_unlock(&con->sbi->async_readdir_work_lock); + + dput(dentry); + peer_put(con); + kfree(rw); + + if (empty) + wake_up_interruptible(&con->sbi->async_readdir_wq); +} + +static void get_remote_dentry_file_in_wq(struct dentry *dentry, + struct hmdfs_peer *con) +{ + struct hmdfs_readdir_work *rw = NULL; + + /* do nothing if async readdir is already in progress */ + if (cmpxchg_relaxed(&hmdfs_d(dentry)->async_readdir_in_progress, 0, + 1)) + return; + + rw = kmalloc(sizeof(*rw), GFP_KERNEL); + if (!rw) { + hmdfs_d(dentry)->async_readdir_in_progress = 0; + return; + } + + dget(dentry); + peer_get(con); + rw->dentry = dentry; + rw->con = con; + spin_lock(&con->sbi->async_readdir_work_lock); + INIT_DELAYED_WORK(&rw->dwork, hmdfs_remote_readdir_work); + list_add(&rw->head, &con->sbi->async_readdir_work_list); + spin_unlock(&con->sbi->async_readdir_work_lock); + queue_delayed_work(con->dentry_wq, &rw->dwork, 0); +} + +void get_remote_dentry_file_sync(struct dentry *dentry, struct hmdfs_peer *con) +{ + get_remote_dentry_file_in_wq(dentry, con); + flush_workqueue(con->dentry_wq); +} + +struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con, + struct dentry *dentry, + struct qstr *qstr, + unsigned int flags, + const char *relative_path) +{ + struct hmdfs_lookup_ret *result = NULL; + + if (con->version > USERSPACE_MAX_VER) { + /* + * LOOKUP_REVAL means we found stale info from dentry file, thus + * we need to use remote getattr. + */ + if (flags & LOOKUP_REVAL) { + /* + * HMDFS_LOOKUP_REVAL means we need to skip dentry cache + * in lookup, because dentry cache in server might have + * stale data. + */ + result = get_remote_inode_info(con, dentry, + HMDFS_LOOKUP_REVAL); + get_remote_dentry_file_in_wq(dentry->d_parent, con); + return result; + } + + /* If cache file is still valid */ + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), + con->device_id, dentry->d_parent)) { + result = lookup_remote_dentry(dentry, qstr, + con->device_id); + /* + * If lookup from cache file failed, use getattr to see + * if remote have created the file. + */ + if (!(flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) && + !result) + result = get_remote_inode_info(con, dentry, 0); + /* If cache file expired, use getattr directly + * except create and rename opt + */ + } else { + result = get_remote_inode_info(con, dentry, 0); + get_remote_dentry_file_in_wq(dentry->d_parent, con); + } + } else { + if (!relative_path) + return NULL; + + result = con->conn_operations->remote_lookup( + con, relative_path, dentry->d_name.name); + } + + return result; +} + +/* + * hmdfs_update_inode_size - update inode size when finding aready existed + * inode. + * + * First of all, if the file is opened for writing, we don't update inode size + * here, because inode size is about to be changed after writing. + * + * If the file is not opened, simply update getattr_isize(not actual inode size, + * just a value showed to user). This is safe because inode size will be + * up-to-date after open. + * + * If the file is opened for read: + * a. getattr_isize == HMDFS_STALE_REMOTE_ISIZE + * 1) i_size == new_size, nothing need to be done. + * 2) i_size > new_size, we keep the i_size and set getattr_isize to new_size, + * stale data might be readed in this case, which is fine because file is + * opened before remote truncate the file. + * 3) i_size < new_size, we drop the last page of the file if i_size is not + * aligned to PAGE_SIZE, clear getattr_isize, and update i_size to + * new_size. + * b. getattr_isize != HMDFS_STALE_REMOTE_ISIZE, getattr_isize will only be set + * after 2). + * 4) getattr_isize > i_size, this situation is impossible. + * 5) i_size >= new_size, this case is the same as 2). + * 6) i_size < new_size, this case is the same as 3). + */ +static void hmdfs_update_inode_size(struct inode *inode, uint64_t new_size) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + int writecount; + uint64_t size; + + inode_lock(inode); + size = info->getattr_isize; + if (size == HMDFS_STALE_REMOTE_ISIZE) + size = i_size_read(inode); + if (size == new_size) { + inode_unlock(inode); + return; + } + + writecount = atomic_read(&inode->i_writecount); + /* check if writing is in progress */ + if (writecount > 0) { + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + inode_unlock(inode); + return; + } + + /* check if there is no one who opens the file */ + if (kref_read(&info->ref) == 0) + goto update_info; + + /* check if there is someone who opens the file for read */ + if (writecount == 0) { + uint64_t aligned_size; + + /* use inode size here instead of getattr_isize */ + size = i_size_read(inode); + if (new_size <= size) + goto update_info; + /* + * if the old inode size is not aligned to HMDFS_PAGE_SIZE, we + * need to drop the last page of the inode, otherwise zero will + * be returned while reading the new range in the page after + * chaning inode size. + */ + aligned_size = round_down(size, HMDFS_PAGE_SIZE); + if (aligned_size != size) + truncate_inode_pages(inode->i_mapping, aligned_size); + i_size_write(inode, new_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + inode_unlock(inode); + return; + } + +update_info: + info->getattr_isize = new_size; + inode_unlock(inode); +} + +static void hmdfs_update_inode(struct inode *inode, + struct hmdfs_lookup_ret *lookup_result) +{ + struct hmdfs_time_t remote_mtime = { + .tv_sec = lookup_result->i_mtime, + .tv_nsec = lookup_result->i_mtime_nsec, + }; + + /* + * We only update mtime if the file is not opened for writing. If we do + * update it before writing is about to start, user might see the mtime + * up-and-down if system time in server and client do not match. However + * mtime in client will eventually match server after timeout without + * writing. + */ + if (!inode_is_open_for_write(inode)) + inode->i_mtime = remote_mtime; + + /* + * We don't care i_size of dir, and lock inode for dir + * might cause deadlock. + */ + if (S_ISREG(inode->i_mode)) + hmdfs_update_inode_size(inode, lookup_result->i_size); +} + +static void hmdfs_fill_inode_remote(struct inode *inode, struct inode *dir, + umode_t mode) +{ +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = dir->i_uid; + inode->i_gid = dir->i_gid; +#endif +} + +struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, + struct hmdfs_lookup_ret *res, struct inode *dir) +{ + int ret = 0; + struct inode *inode = NULL; + struct hmdfs_inode_info *info; + umode_t mode = res->i_mode; + + inode = hmdfs_iget5_locked_remote(sb, con, res->i_ino); + if (!inode) + return ERR_PTR(-ENOMEM); + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_OTHER_REMOTE; + if (con->version > USERSPACE_MAX_VER) { + /* the inode was found in cache */ + if (!(inode->i_state & I_NEW)) { + hmdfs_fill_inode_remote(inode, dir, mode); + hmdfs_update_inode(inode, res); + return inode; + } + + hmdfs_remote_init_stash_status(con, inode, mode); + } + + inode->i_ctime.tv_sec = 0; + inode->i_ctime.tv_nsec = 0; + inode->i_mtime.tv_sec = res->i_mtime; + inode->i_mtime.tv_nsec = res->i_mtime_nsec; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + if (S_ISDIR(mode)) + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + else if (S_ISREG(mode)) + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + else { + ret = -EIO; + goto bad_inode; + } + + if (S_ISREG(mode)) { + inode->i_op = con->conn_operations->remote_file_iops; + inode->i_fop = con->conn_operations->remote_file_fops; + inode->i_size = res->i_size; + set_nlink(inode, 1); + } else if (S_ISDIR(mode)) { + inode->i_op = &hmdfs_dev_dir_inode_ops_remote; + inode->i_fop = &hmdfs_dev_dir_ops_remote; + set_nlink(inode, 2); + } else { + ret = -EIO; + goto bad_inode; + } + + inode->i_mapping->a_ops = con->conn_operations->remote_file_aops; + + hmdfs_fill_inode_remote(inode, dir, mode); + unlock_new_inode(inode); + return inode; +bad_inode: + iget_failed(inode); + return ERR_PTR(ret); +} + +static struct dentry *hmdfs_lookup_remote_dentry(struct inode *parent_inode, + struct dentry *child_dentry, + int flags) +{ + struct dentry *ret = NULL; + struct inode *inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct hmdfs_lookup_ret *lookup_result = NULL; + struct hmdfs_peer *con = NULL; + char *file_name = NULL; + int file_name_len = child_dentry->d_name.len; + struct qstr qstr; + struct hmdfs_dentry_info *gdi = hmdfs_d(child_dentry); + uint64_t device_id = 0; + char *relative_path = NULL; + + file_name = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!file_name) + return ERR_PTR(-ENOMEM); + strncpy(file_name, child_dentry->d_name.name, file_name_len); + + qstr.name = file_name; + qstr.len = strlen(file_name); + + device_id = gdi->device_id; + con = hmdfs_lookup_from_devid(sbi, device_id); + if (!con) { + ret = ERR_PTR(-ESHUTDOWN); + goto done; + } + + relative_path = hmdfs_get_dentry_relative_path(child_dentry->d_parent); + if (unlikely(!relative_path)) { + ret = ERR_PTR(-ENOMEM); + hmdfs_err("get relative path failed %d", -ENOMEM); + goto done; + } + + lookup_result = hmdfs_lookup_by_con(con, child_dentry, &qstr, flags, + relative_path); + if (lookup_result != NULL) { + if (in_share_dir(child_dentry)) + gdi->file_type = HM_SHARE; + inode = fill_inode_remote(sb, con, lookup_result, parent_inode); + ret = d_splice_alias(inode, child_dentry); + if (!IS_ERR_OR_NULL(ret)) + child_dentry = ret; + if (!IS_ERR(ret)) + check_and_fixup_ownership_remote(parent_inode, + child_dentry); + } else { + ret = ERR_PTR(-ENOENT); + } + +done: + if (con) + peer_put(con); + kfree(relative_path); + kfree(lookup_result); + kfree(file_name); + return ret; +} + +struct dentry *hmdfs_lookup_remote(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + int err = 0; + struct dentry *ret = NULL; + struct hmdfs_dentry_info *gdi = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(child_dentry->d_sb); + + trace_hmdfs_lookup_remote(parent_inode, child_dentry, flags); + if (child_dentry->d_name.len > NAME_MAX) { + err = -ENAMETOOLONG; + ret = ERR_PTR(-ENAMETOOLONG); + goto out; + } + + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_OTHER_REMOTE); + if (err) { + ret = ERR_PTR(err); + goto out; + } + gdi = hmdfs_d(child_dentry); + gdi->device_id = hmdfs_d(child_dentry->d_parent)->device_id; + + if (is_current_hmdfs_server_ctx()) + goto out; + + ret = hmdfs_lookup_remote_dentry(parent_inode, child_dentry, flags); + /* + * don't return error if inode do not exist, so that vfs can continue + * to create it. + */ + if (IS_ERR_OR_NULL(ret)) { + err = PTR_ERR(ret); + if (err == -ENOENT) + ret = NULL; + } else { + child_dentry = ret; + } + +out: + if (!err) + hmdfs_set_time(child_dentry, jiffies); + trace_hmdfs_lookup_remote_end(parent_inode, child_dentry, err); + return ret; +} + +/* delete dentry in cache file */ +void delete_in_cache_file(uint64_t dev_id, struct dentry *dentry) +{ + struct clearcache_item *item = NULL; + + item = hmdfs_find_cache_item(dev_id, dentry->d_parent); + if (item) { + hmdfs_delete_dentry(dentry, item->filp); + kref_put(&item->ref, release_cache_item); + } else { + hmdfs_info("find cache item failed, con:%llu", dev_id); + } +} + +int hmdfs_mkdir_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry, + umode_t mode) +{ + int err = 0; + char *dir_path = NULL; + struct dentry *parent_dentry = dentry->d_parent; + struct inode *parent_inode = d_inode(parent_dentry); + struct super_block *sb = parent_inode->i_sb; + const unsigned char *d_name = dentry->d_name.name; + struct hmdfs_lookup_ret *mkdir_ret = NULL; + struct inode *inode = NULL; + + mkdir_ret = kmalloc(sizeof(*mkdir_ret), GFP_KERNEL); + if (!mkdir_ret) { + err = -ENOMEM; + return err; + } + dir_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!dir_path) { + err = -EACCES; + goto mkdir_out; + } + err = hmdfs_client_start_mkdir(conn, dir_path, d_name, mode, mkdir_ret); + if (err) { + hmdfs_err("hmdfs_client_start_mkdir failed err = %d", err); + goto mkdir_out; + } + if (mkdir_ret) { + inode = fill_inode_remote(sb, conn, mkdir_ret, parent_inode); + if (!IS_ERR(inode)) + d_add(dentry, inode); + else + err = PTR_ERR(inode); + check_and_fixup_ownership_remote(parent_inode, dentry); + } else { + err = -ENOENT; + } + +mkdir_out: + kfree(dir_path); + kfree(mkdir_ret); + return err; +} + +int hmdfs_mkdir_remote(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dir); + struct hmdfs_peer *con = info->conn; + + if (!con) { + hmdfs_warning("qpb_debug: con is null!"); + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_mkdir_remote_dentry(con, dentry, mode); + if (!err) + create_in_cache_file(con->device_id, dentry); + else + hmdfs_err("remote mkdir failed err = %d", err); + +out: + trace_hmdfs_mkdir_remote(dir, dentry, err); + return err; +} + +int hmdfs_create_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry, + umode_t mode, bool want_excl) +{ + int err = 0; + char *dir_path = NULL; + struct dentry *parent_dentry = dentry->d_parent; + struct inode *parent_inode = d_inode(parent_dentry); + struct super_block *sb = parent_inode->i_sb; + const unsigned char *d_name = dentry->d_name.name; + struct hmdfs_lookup_ret *create_ret = NULL; + struct inode *inode = NULL; + + create_ret = kmalloc(sizeof(*create_ret), GFP_KERNEL); + if (!create_ret) { + err = -ENOMEM; + return err; + } + dir_path = hmdfs_get_dentry_relative_path(parent_dentry); + if (!dir_path) { + err = -EACCES; + goto create_out; + } + err = hmdfs_client_start_create(conn, dir_path, d_name, mode, + want_excl, create_ret); + if (err) { + hmdfs_err("hmdfs_client_start_create failed err = %d", err); + goto create_out; + } + if (create_ret) { + inode = fill_inode_remote(sb, conn, create_ret, parent_inode); + if (!IS_ERR(inode)) + d_add(dentry, inode); + else + err = PTR_ERR(inode); + check_and_fixup_ownership_remote(parent_inode, dentry); + } else { + err = -ENOENT; + hmdfs_err("get remote inode info failed err = %d", err); + } + +create_out: + kfree(dir_path); + kfree(create_ret); + return err; +} + +int hmdfs_create_remote(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dir); + struct hmdfs_peer *con = info->conn; + + if (!con) { + hmdfs_warning("qpb_debug: con is null!"); + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_create_remote_dentry(con, dentry, mode, want_excl); + if (!err) + create_in_cache_file(con->device_id, dentry); + else + hmdfs_err("remote create failed err = %d", err); + +out: + trace_hmdfs_create_remote(dir, dentry, err); + return err; +} + +int hmdfs_rmdir_remote_dentry(struct hmdfs_peer *conn, struct dentry *dentry) +{ + int error = 0; + char *dir_path = NULL; + const char *dentry_name = dentry->d_name.name; + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto rmdir_out; + } + + error = hmdfs_client_start_rmdir(conn, dir_path, dentry_name); + if (!error) + delete_in_cache_file(conn->device_id, dentry); + +rmdir_out: + kfree(dir_path); + return error; +} + +int hmdfs_rmdir_remote(struct inode *dir, struct dentry *dentry) +{ + int err = 0; + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct hmdfs_peer *con = info->conn; + + if (!con) + goto out; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) { + err = -EACCES; + goto out; + } + if (con->version <= USERSPACE_MAX_VER) { + err = -EPERM; + goto out; + } + err = hmdfs_rmdir_remote_dentry(con, dentry); + /* drop dentry even remote failed + * it maybe cause that one remote devices disconnect + * when doing remote rmdir + */ + d_drop(dentry); +out: + /* return connect device's errcode */ + trace_hmdfs_rmdir_remote(dir, dentry, err); + return err; +} + +int hmdfs_dev_unlink_from_con(struct hmdfs_peer *conn, struct dentry *dentry) +{ + int error = 0; + char *dir_path = NULL; + const char *dentry_name = dentry->d_name.name; + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto unlink_out; + } + error = hmdfs_client_start_unlink(conn, dir_path, dentry_name); + if (!error) { + delete_in_cache_file(conn->device_id, dentry); + drop_nlink(d_inode(dentry)); + d_drop(dentry); + } +unlink_out: + kfree(dir_path); + return error; +} + +int hmdfs_unlink_remote(struct inode *dir, struct dentry *dentry) +{ + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct hmdfs_peer *conn = info->conn; + + if (hmdfs_file_type(dentry->d_name.name) != HMDFS_TYPE_COMMON) + return -EACCES; + + if (!conn) + return 0; + + if (conn->status != NODE_STAT_ONLINE) + return 0; + + return conn->conn_operations->remote_unlink(conn, dentry); +} + +/* rename dentry in cache file */ +static void rename_in_cache_file(uint64_t dev_id, struct dentry *old_dentry, + struct dentry *new_dentry) +{ + struct clearcache_item *old_item = NULL; + struct clearcache_item *new_item = NULL; + + old_item = hmdfs_find_cache_item(dev_id, old_dentry->d_parent); + new_item = hmdfs_find_cache_item(dev_id, new_dentry->d_parent); + if (old_item != NULL && new_item != NULL) { + hmdfs_rename_dentry(old_dentry, new_dentry, old_item->filp, + new_item->filp); + } else if (old_item != NULL) { + hmdfs_err("new cache item find failed!"); + } else if (new_item != NULL) { + hmdfs_err("old cache item find failed!"); + } else { + hmdfs_err("both cache item find failed!"); + } + + if (old_item) + kref_put(&old_item->ref, release_cache_item); + if (new_item) + kref_put(&new_item->ref, release_cache_item); +} + +int hmdfs_rename_remote(struct user_namespace *mnt_userns, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + int err = 0; + int ret = 0; + const char *old_dentry_d_name = old_dentry->d_name.name; + char *relative_old_dir_path = 0; + const char *new_dentry_d_name = new_dentry->d_name.name; + char *relative_new_dir_path = 0; + struct hmdfs_inode_info *info = hmdfs_i(old_dentry->d_inode); + struct hmdfs_peer *con = info->conn; + + trace_hmdfs_rename_remote(old_dir, old_dentry, new_dir, new_dentry, + flags); + + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + if (hmdfs_file_type(old_dentry->d_name.name) != HMDFS_TYPE_COMMON || + hmdfs_file_type(new_dentry->d_name.name) != HMDFS_TYPE_COMMON) { + return -EACCES; + } + + relative_old_dir_path = + hmdfs_get_dentry_relative_path(old_dentry->d_parent); + relative_new_dir_path = + hmdfs_get_dentry_relative_path(new_dentry->d_parent); + if (!relative_old_dir_path || !relative_new_dir_path) { + err = -EACCES; + goto rename_out; + } + if (S_ISREG(old_dentry->d_inode->i_mode)) { + if (con->version > USERSPACE_MAX_VER) { + hmdfs_debug("send MSG to remote devID %llu", + con->device_id); + err = hmdfs_client_start_rename( + con, relative_old_dir_path, old_dentry_d_name, + relative_new_dir_path, new_dentry_d_name, + flags); + if (!err) + rename_in_cache_file(con->device_id, old_dentry, + new_dentry); + } + } else if (S_ISDIR(old_dentry->d_inode->i_mode)) { + if ((con->status == NODE_STAT_ONLINE) && + (con->version > USERSPACE_MAX_VER)) { + ret = hmdfs_client_start_rename( + con, relative_old_dir_path, old_dentry_d_name, + relative_new_dir_path, new_dentry_d_name, + flags); + if (!ret) + rename_in_cache_file(con->device_id, old_dentry, + new_dentry); + else + err = ret; + } + } + if (!err) + d_invalidate(old_dentry); +rename_out: + kfree(relative_old_dir_path); + kfree(relative_new_dir_path); + return err; +} + +static int hmdfs_dir_setattr_remote(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *ia) +{ + // Do not support dir setattr + return 0; +} + +const struct inode_operations hmdfs_dev_dir_inode_ops_remote = { + .lookup = hmdfs_lookup_remote, + .mkdir = hmdfs_mkdir_remote, + .create = hmdfs_create_remote, + .rmdir = hmdfs_rmdir_remote, + .unlink = hmdfs_unlink_remote, + .rename = hmdfs_rename_remote, + .setattr = hmdfs_dir_setattr_remote, + .permission = hmdfs_permission, +}; + +static int hmdfs_setattr_remote(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *ia) +{ + struct hmdfs_inode_info *info = hmdfs_i(d_inode(dentry)); + struct hmdfs_peer *conn = info->conn; + struct inode *inode = d_inode(dentry); + char *send_buf = NULL; + int err = 0; + + if (hmdfs_inode_is_stashing(info)) + return -EAGAIN; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) { + err = -ENOMEM; + goto out_free; + } + if (ia->ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(inode, ia->ia_size); + if (err) + goto out_free; + truncate_setsize(inode, ia->ia_size); + info->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + } + if (ia->ia_valid & ATTR_MTIME) + inode->i_mtime = ia->ia_mtime; + + if ((ia->ia_valid & ATTR_SIZE) || (ia->ia_valid & ATTR_MTIME)) { + struct setattr_info send_setattr_info = { + .size = cpu_to_le64(ia->ia_size), + .valid = cpu_to_le32(ia->ia_valid), + .mtime = cpu_to_le64(ia->ia_mtime.tv_sec), + .mtime_nsec = cpu_to_le32(ia->ia_mtime.tv_nsec), + }; + err = hmdfs_send_setattr(conn, send_buf, &send_setattr_info); + } +out_free: + kfree(send_buf); + return err; +} + +int hmdfs_remote_getattr(struct hmdfs_peer *conn, struct dentry *dentry, + unsigned int lookup_flags, + struct hmdfs_getattr_ret **result) +{ + char *send_buf = NULL; + struct hmdfs_getattr_ret *attr = NULL; + int err = 0; + + if (dentry->d_sb != conn->sbi->sb || !result) + return -EINVAL; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) { + kfree(attr); + return -ENOMEM; + } + + err = hmdfs_send_getattr(conn, send_buf, lookup_flags, attr); + kfree(send_buf); + + if (err) { + kfree(attr); + return err; + } + + *result = attr; + return 0; +} + +static int hmdfs_get_cached_attr_remote(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int flags) +{ + struct inode *inode = d_inode(path->dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + uint64_t size = info->getattr_isize; + + stat->ino = inode->i_ino; + stat->mtime = inode->i_mtime; + stat->mode = inode->i_mode; + stat->uid.val = inode->i_uid.val; + stat->gid.val = inode->i_gid.val; + if (size == HMDFS_STALE_REMOTE_ISIZE) + size = i_size_read(inode); + + stat->size = size; + return 0; +} + +ssize_t hmdfs_remote_listxattr(struct dentry *dentry, char *list, size_t size) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + ssize_t res = 0; + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (size > HMDFS_LISTXATTR_SIZE_MAX) + r_size = HMDFS_LISTXATTR_SIZE_MAX; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_listxattr(conn, send_buf, list, r_size); + kfree(send_buf); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support listxattr size over than %d", + HMDFS_LISTXATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} + +const struct inode_operations hmdfs_dev_file_iops_remote = { + .setattr = hmdfs_setattr_remote, + .permission = hmdfs_permission, + .getattr = hmdfs_get_cached_attr_remote, + .listxattr = hmdfs_remote_listxattr, +}; diff --git a/fs/hmdfs/inode_root.c b/fs/hmdfs/inode_root.c new file mode 100755 index 000000000..b3b2b5c2b --- /dev/null +++ b/fs/hmdfs/inode_root.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/inode_root.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "authority/authentication.h" +#include "comm/socket_adapter.h" +#include "comm/transport.h" +#include "hmdfs_dentryfile.h" +#include "hmdfs_device_view.h" +#include "hmdfs_merge_view.h" +#include "hmdfs_trace.h" + +static struct inode *fill_device_local_inode(struct super_block *sb, + struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV_LOCAL, lower_inode, + NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_SECOND_LOCAL; + + inode->i_mode = + (lower_inode->i_mode & S_IFMT) | S_IRWXU | S_IRWXG | S_IXOTH; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_op = &hmdfs_dir_inode_ops_local; + inode->i_fop = &hmdfs_dir_ops_local; + + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} + +static struct inode *fill_device_inode_remote(struct super_block *sb, + uint64_t dev_id) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + struct hmdfs_peer *con = NULL; + + con = hmdfs_lookup_from_devid(sb->s_fs_info, dev_id); + if (!con) + return ERR_PTR(-ENOENT); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV_REMOTE, NULL, con); + if (!inode) { + hmdfs_err("get inode NULL"); + inode = ERR_PTR(-ENOMEM); + goto out; + } + if (!(inode->i_state & I_NEW)) + goto out; + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_SECOND_REMOTE; + + inode->i_mode = S_IFDIR | S_IRWXU | S_IRWXG | S_IXOTH; + + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + inode->i_op = &hmdfs_dev_dir_inode_ops_remote; + inode->i_fop = &hmdfs_dev_dir_ops_remote; + + unlock_new_inode(inode); + +out: + peer_put(con); + return inode; +} + +struct dentry *hmdfs_device_lookup(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + struct inode *root_inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct dentry *ret_dentry = NULL; + int err = 0; + struct hmdfs_peer *con = NULL; + struct hmdfs_dentry_info *di = NULL; + uint8_t *cid = NULL; + struct path *root_lower_path = NULL; + + trace_hmdfs_device_lookup(parent_inode, child_dentry, flags); + if (!strncmp(d_name, DEVICE_VIEW_LOCAL, + sizeof(DEVICE_VIEW_LOCAL) - 1)) { + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_SECOND_LOCAL); + if (err) { + ret_dentry = ERR_PTR(err); + goto out; + } + di = hmdfs_d(sb->s_root); + root_lower_path = &(di->lower_path); + hmdfs_set_lower_path(child_dentry, root_lower_path); + path_get(root_lower_path); + root_inode = fill_device_local_inode( + sb, d_inode(root_lower_path->dentry)); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + ret_dentry = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out; + } + ret_dentry = d_splice_alias(root_inode, child_dentry); + if (IS_ERR(ret_dentry)) { + err = PTR_ERR(ret_dentry); + ret_dentry = ERR_PTR(err); + hmdfs_put_reset_lower_path(child_dentry); + goto out; + } + } else { + err = init_hmdfs_dentry_info(sbi, child_dentry, + HMDFS_LAYER_SECOND_REMOTE); + di = hmdfs_d(child_dentry); + if (err) { + ret_dentry = ERR_PTR(err); + goto out; + } + cid = kzalloc(HMDFS_CID_SIZE + 1, GFP_KERNEL); + if (!cid) { + err = -ENOMEM; + ret_dentry = ERR_PTR(err); + goto out; + } + strncpy(cid, d_name, HMDFS_CID_SIZE); + cid[HMDFS_CID_SIZE] = '\0'; + con = hmdfs_lookup_from_cid(sbi, cid); + if (!con) { + kfree(cid); + err = -ENOENT; + ret_dentry = ERR_PTR(err); + goto out; + } + di->device_id = con->device_id; + root_inode = fill_device_inode_remote(sb, di->device_id); + if (IS_ERR(root_inode)) { + kfree(cid); + err = PTR_ERR(root_inode); + ret_dentry = ERR_PTR(err); + goto out; + } + ret_dentry = d_splice_alias(root_inode, child_dentry); + kfree(cid); + } + if (root_inode) + hmdfs_root_inode_perm_init(root_inode); + if (!err) + hmdfs_set_time(child_dentry, jiffies); +out: + if (con) + peer_put(con); + trace_hmdfs_device_lookup_end(parent_inode, child_dentry, err); + return ret_dentry; +} + +struct dentry *hmdfs_root_lookup(struct inode *parent_inode, + struct dentry *child_dentry, + unsigned int flags) +{ + const char *d_name = child_dentry->d_name.name; + struct inode *root_inode = NULL; + struct super_block *sb = parent_inode->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + struct dentry *ret = ERR_PTR(-ENOENT); + struct path root_path; + + trace_hmdfs_root_lookup(parent_inode, child_dentry, flags); + if (sbi->s_merge_switch && !strcmp(d_name, MERGE_VIEW_ROOT)) { + ret = hmdfs_lookup_merge(parent_inode, child_dentry, flags); + if (ret && !IS_ERR(ret)) + child_dentry = ret; + root_inode = d_inode(child_dentry); + } else if (!strcmp(d_name, DEVICE_VIEW_ROOT)) { + ret = ERR_PTR(init_hmdfs_dentry_info( + sbi, child_dentry, HMDFS_LAYER_FIRST_DEVICE)); + if (IS_ERR(ret)) + goto out; + ret = ERR_PTR(kern_path(sbi->local_src, 0, &root_path)); + if (IS_ERR(ret)) + goto out; + root_inode = fill_device_inode(sb, d_inode(root_path.dentry)); + ret = d_splice_alias(root_inode, child_dentry); + path_put(&root_path); + } + if (!IS_ERR(ret) && root_inode) + hmdfs_root_inode_perm_init(root_inode); + +out: + trace_hmdfs_root_lookup_end(parent_inode, child_dentry, + PTR_ERR_OR_ZERO(ret)); + return ret; +} + +const struct inode_operations hmdfs_device_ops = { + .lookup = hmdfs_device_lookup, +}; + +const struct inode_operations hmdfs_root_ops = { + .lookup = hmdfs_root_lookup, +}; + +struct inode *fill_device_inode(struct super_block *sb, + struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_DEV, NULL, NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) + return inode; + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_FIRST_DEVICE; + + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | S_IXUSR | + S_IRGRP | S_IXGRP | S_IXOTH; + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); + inode->i_op = &hmdfs_device_ops; + inode->i_fop = &hmdfs_device_fops; + + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} + +struct inode *fill_root_inode(struct super_block *sb, struct inode *lower_inode) +{ + struct inode *inode = NULL; + struct hmdfs_inode_info *info = NULL; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + + inode = hmdfs_iget_locked_root(sb, HMDFS_ROOT_ANCESTOR, lower_inode, + NULL); + if (!inode) { + hmdfs_err("iget5_locked get inode NULL"); + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + info = hmdfs_i(inode); + info->inode_type = HMDFS_LAYER_ZERO; + inode->i_mode = (lower_inode->i_mode & S_IFMT) | S_IRUSR | S_IXUSR | + S_IRGRP | S_IXGRP | S_IXOTH; + +#ifdef CONFIG_HMDFS_FS_PERMISSION + inode->i_uid = lower_inode->i_uid; + inode->i_gid = lower_inode->i_gid; +#else + inode->i_uid = KUIDT_INIT((uid_t)1000); + inode->i_gid = KGIDT_INIT((gid_t)1000); +#endif + inode->i_atime = lower_inode->i_atime; + inode->i_ctime = lower_inode->i_ctime; + inode->i_mtime = lower_inode->i_mtime; + + inode->i_op = &hmdfs_root_ops; + inode->i_fop = &hmdfs_root_fops; + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +} diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c new file mode 100755 index 000000000..55799994e --- /dev/null +++ b/fs/hmdfs/main.c @@ -0,0 +1,1101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/main.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + + +#include "hmdfs.h" + +#include +#include +#include +#include +#include +#if KERNEL_VERSION(5, 9, 0) < LINUX_VERSION_CODE +#include +#else +#include +#endif + +#include "authority/authentication.h" +#include "hmdfs_server.h" +#include "comm/device_node.h" +#include "comm/message_verify.h" +#include "comm/protocol.h" +#include "comm/socket_adapter.h" +#include "hmdfs_merge_view.h" +#include "server_writeback.h" +#include "hmdfs_share.h" + +#include "comm/node_cb.h" +#include "stash.h" + +#define CREATE_TRACE_POINTS +#include "hmdfs_trace.h" + +#define HMDFS_BOOT_COOKIE_RAND_SHIFT 33 + +#define HMDFS_SB_SEQ_FROM 1 + +struct hmdfs_mount_priv { + const char *dev_name; + const char *raw_data; +}; + +struct syncfs_item { + struct list_head list; + struct completion done; + bool need_abort; +}; + +static DEFINE_IDA(hmdfs_sb_seq); + +static inline int hmdfs_alloc_sb_seq(void) +{ + return ida_simple_get(&hmdfs_sb_seq, HMDFS_SB_SEQ_FROM, 0, GFP_KERNEL); +} + +static inline void hmdfs_free_sb_seq(unsigned int seq) +{ + if (!seq) + return; + ida_simple_remove(&hmdfs_sb_seq, seq); +} + +static int hmdfs_xattr_local_get(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct path lower_path; + ssize_t res = 0; + + hmdfs_get_lower_path(dentry, &lower_path); + res = vfs_getxattr(&init_user_ns, lower_path.dentry, name, value, size); + hmdfs_put_lower_path(&lower_path); + return res; +} + +static int hmdfs_xattr_remote_get(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + ssize_t res = 0; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_getxattr(conn, send_buf, name, value, size); + kfree(send_buf); + return res; +} + +static int hmdfs_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) +{ + int res = 0; + struct hmdfs_inode_info *info = hmdfs_i(inode); + size_t r_size = size; + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + return -EOPNOTSUPP; + + if (size > HMDFS_XATTR_SIZE_MAX) + r_size = HMDFS_XATTR_SIZE_MAX; + + if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) + res = hmdfs_xattr_local_get(dentry, name, value, r_size); + else + res = hmdfs_xattr_remote_get(dentry, name, value, r_size); + + if (res == -ERANGE && r_size != size) { + hmdfs_info("no support xattr value size over than: %d", + HMDFS_XATTR_SIZE_MAX); + res = -E2BIG; + } + + return res; +} + +static int hmdfs_xattr_local_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct path lower_path; + int res = 0; + + hmdfs_get_lower_path(dentry, &lower_path); + if (value) { + res = vfs_setxattr(&init_user_ns, lower_path.dentry, name, value, size, flags); + } else { + WARN_ON(flags != XATTR_REPLACE); + res = vfs_removexattr(&init_user_ns, lower_path.dentry, name); + } + + hmdfs_put_lower_path(&lower_path); + return res; +} + +static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = d_inode(dentry); + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_peer *conn = info->conn; + char *send_buf = NULL; + int res = 0; + + send_buf = hmdfs_get_dentry_relative_path(dentry); + if (!send_buf) + return -ENOMEM; + + res = hmdfs_send_setxattr(conn, send_buf, name, value, size, flags); + kfree(send_buf); + return res; +} + +static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + int err = 0; + struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL); + + if (!lower_dentry) { + err = -EOPNOTSUPP; + goto out; + } + err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags); +out: + dput(lower_dentry); + return err; +} + +static int hmdfs_xattr_set(const struct xattr_handler *handler, struct user_namespace *mnt_userns, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + if (!hmdfs_support_xattr(dentry)) + return -EOPNOTSUPP; + + if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + return -EOPNOTSUPP; + + if (size > HMDFS_XATTR_SIZE_MAX) { + hmdfs_info("no support too long xattr value: %zu", size); + return -E2BIG; + } + + if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) + return hmdfs_xattr_local_set(dentry, name, value, size, flags); + else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE) + return hmdfs_xattr_merge_set(dentry, name, value, size, flags); + + return hmdfs_xattr_remote_set(dentry, name, value, size, flags); +} + +const struct xattr_handler hmdfs_xattr_handler = { + .prefix = "", /* catch all */ + .get = hmdfs_xattr_get, + .set = hmdfs_xattr_set, +}; + +static const struct xattr_handler *hmdfs_xattr_handlers[] = { + &hmdfs_xattr_handler, +}; + +#define HMDFS_NODE_EVT_CB_DELAY 2 + +struct kmem_cache *hmdfs_inode_cachep; +struct kmem_cache *hmdfs_dentry_cachep; + +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + kmem_cache_free(hmdfs_inode_cachep, + container_of(inode, struct hmdfs_inode_info, + vfs_inode)); +} + +static void hmdfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, i_callback); +} + +static void hmdfs_evict_inode(struct inode *inode) +{ + struct hmdfs_inode_info *info = hmdfs_i(inode); + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + if (info->inode_type == HMDFS_LAYER_FIRST_DEVICE || + info->inode_type == HMDFS_LAYER_SECOND_REMOTE) + return; + if (info->inode_type == HMDFS_LAYER_ZERO || + info->inode_type == HMDFS_LAYER_OTHER_LOCAL || + info->inode_type == HMDFS_LAYER_SECOND_LOCAL) { + iput(info->lower_inode); + info->lower_inode = NULL; + } +} + +void hmdfs_put_super(struct super_block *sb) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + struct super_block *lower_sb = sbi->lower_sb; + + hmdfs_info("local_dst is %s, local_src is %s", sbi->local_dst, + sbi->local_src); + + hmdfs_cfn_destroy(sbi); + hmdfs_unregister_sysfs(sbi); + hmdfs_connections_stop(sbi); + hmdfs_clear_share_table(sbi); + hmdfs_destroy_server_writeback(sbi); + hmdfs_exit_stash(sbi); + atomic_dec(&lower_sb->s_active); + put_cred(sbi->cred); + if (sbi->system_cred) + put_cred(sbi->system_cred); + hmdfs_destroy_writeback(sbi); + kfree(sbi->local_src); + kfree(sbi->local_dst); + kfree(sbi->real_dst); + kfree(sbi->cache_dir); + kfifo_free(&sbi->notify_fifo); + sb->s_fs_info = NULL; + sbi->lower_sb = NULL; + hmdfs_release_sysfs(sbi); + /* After all access are completed */ + hmdfs_free_sb_seq(sbi->seq); + kfree(sbi->s_server_statis); + kfree(sbi->s_client_statis); + kfree(sbi); +} + +static struct inode *hmdfs_alloc_inode(struct super_block *sb) +{ + struct hmdfs_inode_info *gi = + kmem_cache_alloc(hmdfs_inode_cachep, GFP_KERNEL); + if (!gi) + return NULL; + memset(gi, 0, offsetof(struct hmdfs_inode_info, vfs_inode)); + INIT_LIST_HEAD(&gi->wb_list); + init_rwsem(&gi->wpage_sem); + gi->getattr_isize = HMDFS_STALE_REMOTE_ISIZE; + atomic64_set(&gi->write_counter, 0); + gi->fid.id = HMDFS_INODE_INVALID_FILE_ID; + spin_lock_init(&gi->fid_lock); + INIT_LIST_HEAD(&gi->wr_opened_node); + atomic_set(&gi->wr_opened_cnt, 0); + init_waitqueue_head(&gi->fid_wq); + INIT_LIST_HEAD(&gi->stash_node); + spin_lock_init(&gi->stash_lock); + return &gi->vfs_inode; +} + +static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int error = 0; + int ret = 0; + char *dir_path = NULL; + char *name_path = NULL; + struct hmdfs_peer *con = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(dentry->d_inode->i_sb); + + dir_path = hmdfs_get_dentry_relative_path(dentry->d_parent); + if (!dir_path) { + error = -EACCES; + goto rmdir_out; + } + + name_path = hmdfs_connect_path(dir_path, dentry->d_name.name); + if (!name_path) { + error = -EACCES; + goto rmdir_out; + } + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + if (con->status == NODE_STAT_ONLINE && + con->version > USERSPACE_MAX_VER) { + peer_get(con); + mutex_unlock(&sbi->connections.node_lock); + hmdfs_debug("send MSG to remote devID %llu", + con->device_id); + ret = hmdfs_send_statfs(con, name_path, buf); + if (ret != 0) + error = ret; + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + } + } + mutex_unlock(&sbi->connections.node_lock); + +rmdir_out: + kfree(dir_path); + kfree(name_path); + return error; +} + +static int hmdfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err = 0; + struct path lower_path; + struct hmdfs_inode_info *info = hmdfs_i(dentry->d_inode); + struct super_block *sb = d_inode(dentry)->i_sb; + struct hmdfs_sb_info *sbi = sb->s_fs_info; + + trace_hmdfs_statfs(dentry, info->inode_type); + // merge_view & merge_view/xxx & device_view assigned src_inode info + if (hmdfs_i_merge(info) || + (info->inode_type == HMDFS_LAYER_SECOND_REMOTE)) { + err = kern_path(sbi->local_src, 0, &lower_path); + if (err) + goto out; + err = vfs_statfs(&lower_path, buf); + path_put(&lower_path); + } else if (!IS_ERR_OR_NULL(info->lower_inode)) { + hmdfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, buf); + hmdfs_put_lower_path(&lower_path); + } else { + err = hmdfs_remote_statfs(dentry, buf); + } + + buf->f_type = HMDFS_SUPER_MAGIC; +out: + return err; +} + +static int hmdfs_show_options(struct seq_file *m, struct dentry *root) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(root->d_sb); + + if (sbi->s_case_sensitive) + seq_puts(m, ",sensitive"); + else + seq_puts(m, ",insensitive"); + + if (sbi->s_merge_switch) + seq_puts(m, ",merge_enable"); + else + seq_puts(m, ",merge_disable"); + + seq_printf(m, ",ra_pages=%lu", root->d_sb->s_bdi->ra_pages); + seq_printf(m, ",user_id=%u", sbi->user_id); + + if (sbi->cache_dir) + seq_printf(m, ",cache_dir=%s", sbi->cache_dir); + if (sbi->real_dst) + seq_printf(m, ",real_dst=%s", sbi->real_dst); + + seq_printf(m, ",%soffline_stash", sbi->s_offline_stash ? "" : "no_"); + seq_printf(m, ",%sdentry_cache", sbi->s_dentry_cache ? "" : "no_"); + + return 0; +} + +static int hmdfs_sync_fs(struct super_block *sb, int wait) +{ + int time_left; + int err = 0; + struct hmdfs_peer *con = NULL; + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + int syncfs_timeout = get_cmd_timeout(sbi, F_SYNCFS); + struct syncfs_item item, *entry = NULL, *tmp = NULL; + + if (!wait) + return 0; + + trace_hmdfs_syncfs_enter(sbi); + + spin_lock(&sbi->hsi.list_lock); + if (!sbi->hsi.is_executing) { + sbi->hsi.is_executing = true; + item.need_abort = false; + spin_unlock(&sbi->hsi.list_lock); + } else { + init_completion(&item.done); + list_add_tail(&item.list, &sbi->hsi.wait_list); + spin_unlock(&sbi->hsi.list_lock); + wait_for_completion(&item.done); + } + + if (item.need_abort) + goto out; + + /* + * Syncfs can not concurrent in hmdfs_sync_fs. Because we should make + * sure all remote syncfs calls return back or timeout by waiting, + * during the waiting period we must protect @sbi->remote_syncfs_count + * and @sbi->remote_syncfs_ret from concurrent executing. + */ + + spin_lock(&sbi->hsi.v_lock); + sbi->hsi.version++; + /* + * Attention: We put @sbi->hsi.remote_ret and @sbi->hsi.wait_count + * into spinlock protection area to avoid following scenario caused + * by out-of-order execution: + * + * synfs syncfs_cb + * sbi->hsi.remote_ret = 0; + * atomic_set(&sbi->hsi.wait_count, 0); + * lock + * version == old_version + * sbi->hsi.remote_ret = resp->ret_code + * atomic_dec(&sbi->hsi.wait_count); + * unlock + * lock + * version = old_version + 1 + * unlock + * + * @sbi->hsi.remote_ret and @sbi->hsi.wait_count can be assigned + * before spin lock which may compete with syncfs_cb(), making + * these two values' assignment protected by spinlock can fix this. + */ + sbi->hsi.remote_ret = 0; + atomic_set(&sbi->hsi.wait_count, 0); + spin_unlock(&sbi->hsi.v_lock); + + mutex_lock(&sbi->connections.node_lock); + list_for_each_entry(con, &sbi->connections.node_list, list) { + /* + * Dirty data does not need to be synchronized to remote + * devices that go offline normally. It's okay to drop + * them. + */ + if (con->status != NODE_STAT_ONLINE) + continue; + + peer_get(con); + mutex_unlock(&sbi->connections.node_lock); + + /* + * There exists a gap between sync_inodes_sb() and sync_fs() + * which may race with remote writing, leading error count + * on @sb_dirty_count. The dirty data produced during the + * gap period won't be synced in next syncfs operation. + * To avoid this, we have to invoke sync_inodes_sb() again + * after getting @con->sb_dirty_count. + */ + con->old_sb_dirty_count = atomic64_read(&con->sb_dirty_count); + sync_inodes_sb(sb); + + if (!con->old_sb_dirty_count) { + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + continue; + } + + err = hmdfs_send_syncfs(con, syncfs_timeout); + if (err) { + hmdfs_warning("send syncfs failed with %d on node %llu", + err, con->device_id); + sbi->hsi.remote_ret = err; + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + continue; + } + + atomic_inc(&sbi->hsi.wait_count); + + peer_put(con); + mutex_lock(&sbi->connections.node_lock); + } + mutex_unlock(&sbi->connections.node_lock); + + /* + * Async work in background will make sure @sbi->remote_syncfs_count + * decreased to zero finally whether syncfs success or fail. + */ + time_left = wait_event_interruptible( + sbi->hsi.wq, atomic_read(&sbi->hsi.wait_count) == 0); + if (time_left < 0) { + hmdfs_warning("syncfs is interrupted by external signal"); + err = -EINTR; + } + + if (!err && sbi->hsi.remote_ret) + err = sbi->hsi.remote_ret; + + /* Abandon syncfs processes in pending_list */ + list_for_each_entry_safe(entry, tmp, &sbi->hsi.pending_list, list) { + entry->need_abort = true; + complete(&entry->done); + } + INIT_LIST_HEAD(&sbi->hsi.pending_list); + + /* Pick the last syncfs process in wait_list */ + spin_lock(&sbi->hsi.list_lock); + if (list_empty(&sbi->hsi.wait_list)) { + sbi->hsi.is_executing = false; + } else { + entry = list_last_entry(&sbi->hsi.wait_list, struct syncfs_item, + list); + list_del_init(&entry->list); + list_splice_init(&sbi->hsi.wait_list, &sbi->hsi.pending_list); + entry->need_abort = false; + complete(&entry->done); + } + spin_unlock(&sbi->hsi.list_lock); + +out: + trace_hmdfs_syncfs_exit(sbi, atomic_read(&sbi->hsi.wait_count), + get_cmd_timeout(sbi, F_SYNCFS), err); + + /* TODO: Return synfs err back to syscall */ + + return err; +} + +struct super_operations hmdfs_sops = { + .alloc_inode = hmdfs_alloc_inode, + .destroy_inode = hmdfs_destroy_inode, + .evict_inode = hmdfs_evict_inode, + .put_super = hmdfs_put_super, + .statfs = hmdfs_statfs, + .show_options = hmdfs_show_options, + .sync_fs = hmdfs_sync_fs, +}; + +static void init_once(void *obj) +{ + struct hmdfs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +static int __init hmdfs_init_caches(void) +{ + int err = -ENOMEM; + + hmdfs_inode_cachep = + kmem_cache_create("hmdfs_inode_cache", + sizeof(struct hmdfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + if (unlikely(!hmdfs_inode_cachep)) + goto out; + hmdfs_dentry_cachep = + kmem_cache_create("hmdfs_dentry_cache", + sizeof(struct hmdfs_dentry_info), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (unlikely(!hmdfs_dentry_cachep)) + goto out_des_ino; + hmdfs_dentry_merge_cachep = + kmem_cache_create("hmdfs_dentry_merge_cache", + sizeof(struct hmdfs_dentry_info_merge), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (unlikely(!hmdfs_dentry_merge_cachep)) + goto out_des_dc; + return 0; + +out_des_dc: + kmem_cache_destroy(hmdfs_dentry_cachep); +out_des_ino: + kmem_cache_destroy(hmdfs_inode_cachep); +out: + return err; +} + +static void hmdfs_destroy_caches(void) +{ + rcu_barrier(); + kmem_cache_destroy(hmdfs_inode_cachep); + hmdfs_inode_cachep = NULL; + kmem_cache_destroy(hmdfs_dentry_cachep); + hmdfs_dentry_cachep = NULL; + kmem_cache_destroy(hmdfs_dentry_merge_cachep); + hmdfs_dentry_merge_cachep = NULL; +} + +uint64_t path_hash(const char *path, int len, bool case_sense) +{ + uint64_t res = 0; + const char *kp = path; + char c; + /* Mocklisp hash function. */ + while (*kp) { + c = *kp; + if (!case_sense) + c = tolower(c); + res = (res << 5) - res + (uint64_t)(c); + kp++; + } + return res; +} + +static char *get_full_path(struct path *path) +{ + char *buf, *tmp; + char *ret = NULL; + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + goto out; + + tmp = d_path(path, buf, PATH_MAX); + if (IS_ERR(tmp)) + goto out; + + ret = kstrdup(tmp, GFP_KERNEL); +out: + kfree(buf); + return ret; +} + +static void hmdfs_init_cmd_timeout(struct hmdfs_sb_info *sbi) +{ + memset(sbi->s_cmd_timeout, 0xff, sizeof(sbi->s_cmd_timeout)); + + set_cmd_timeout(sbi, F_OPEN, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RELEASE, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_READPAGE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_WRITEPAGE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_ITERATE, TIMEOUT_30S); + set_cmd_timeout(sbi, F_CREATE, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_MKDIR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RMDIR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_UNLINK, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_RENAME, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_SETATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_STATFS, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_CONNECT_REKEY, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_DROP_PUSH, TIMEOUT_NONE); + set_cmd_timeout(sbi, F_GETATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_FSYNC, TIMEOUT_90S); + set_cmd_timeout(sbi, F_SYNCFS, TIMEOUT_30S); + set_cmd_timeout(sbi, F_GETXATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_SETXATTR, TIMEOUT_COMMON); + set_cmd_timeout(sbi, F_LISTXATTR, TIMEOUT_COMMON); +} + +static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi) +{ + int ret; + + ret = kfifo_alloc(&sbi->notify_fifo, PAGE_SIZE, GFP_KERNEL); + if (ret) + goto out; + + /* + * We have to use dynamic memory since struct server/client_statistic + * are DECLARED in hmdfs.h but DEFINED in socket_adapter.h. + */ + sbi->s_server_statis = + kzalloc(sizeof(*sbi->s_server_statis) * F_SIZE, GFP_KERNEL); + sbi->s_client_statis = + kzalloc(sizeof(*sbi->s_client_statis) * F_SIZE, GFP_KERNEL); + if (!sbi->s_server_statis || !sbi->s_client_statis) { + ret = -ENOMEM; + goto out; + } + + ret = hmdfs_alloc_sb_seq(); + if (ret < 0) { + hmdfs_err("no sb seq available err %d", ret); + goto out; + } + sbi->seq = ret; + ret = 0; + + spin_lock_init(&sbi->notify_fifo_lock); + mutex_init(&sbi->cmd_handler_mutex); + sbi->s_case_sensitive = false; + sbi->s_features = HMDFS_FEATURE_READPAGES | + HMDFS_FEATURE_READPAGES_OPEN | + HMDFS_ATOMIC_OPEN; + sbi->s_merge_switch = false; + sbi->dcache_threshold = DEFAULT_DCACHE_THRESHOLD; + sbi->dcache_precision = DEFAULT_DCACHE_PRECISION; + sbi->dcache_timeout = DEFAULT_DCACHE_TIMEOUT; + sbi->write_cache_timeout = DEFAULT_WRITE_CACHE_TIMEOUT; + hmdfs_init_cmd_timeout(sbi); + sbi->async_cb_delay = HMDFS_NODE_EVT_CB_DELAY; + sbi->async_req_max_active = DEFAULT_SRV_REQ_MAX_ACTIVE; + sbi->s_offline_stash = true; + sbi->s_dentry_cache = true; + sbi->wb_timeout_ms = HMDFS_DEF_WB_TIMEOUT_MS; + /* Initialize before hmdfs_register_sysfs() */ + atomic_set(&sbi->connections.conn_seq, 0); + mutex_init(&sbi->connections.node_lock); + INIT_LIST_HEAD(&sbi->connections.node_list); + + ret = hmdfs_init_share_table(sbi); + if (ret) + goto out; + init_waitqueue_head(&sbi->async_readdir_wq); + INIT_LIST_HEAD(&sbi->async_readdir_msg_list); + INIT_LIST_HEAD(&sbi->async_readdir_work_list); + spin_lock_init(&sbi->async_readdir_msg_lock); + spin_lock_init(&sbi->async_readdir_work_lock); + + return 0; + +out: + return ret; +} + +void hmdfs_client_resp_statis(struct hmdfs_sb_info *sbi, u8 cmd, + enum hmdfs_resp_type type, unsigned long start, + unsigned long end) +{ + unsigned long duration; + + switch (type) { + case HMDFS_RESP_DELAY: + sbi->s_client_statis[cmd].delay_resp_cnt++; + break; + case HMDFS_RESP_TIMEOUT: + sbi->s_client_statis[cmd].timeout_cnt++; + break; + case HMDFS_RESP_NORMAL: + duration = end - start; + sbi->s_client_statis[cmd].total += duration; + sbi->s_client_statis[cmd].resp_cnt++; + if (sbi->s_client_statis[cmd].max < duration) + sbi->s_client_statis[cmd].max = duration; + break; + default: + hmdfs_err("Wrong cmd %d with resp type %d", cmd, type); + } +} + +static int hmdfs_update_dst(struct hmdfs_sb_info *sbi) +{ + int err = 0; + const char *path_local = UPDATE_LOCAL_DST; + int len = 0; + + sbi->real_dst = kstrdup(sbi->local_dst, GFP_KERNEL); + if (!sbi->real_dst) { + err = -ENOMEM; + goto out_err; + } + kfree(sbi->local_dst); + sbi->local_dst = NULL; + + len = strlen(sbi->real_dst) + strlen(path_local) + 1; + if (len > PATH_MAX) { + err = -EINVAL; + goto out_err; + } + sbi->local_dst = kmalloc(len, GFP_KERNEL); + if (!sbi->local_dst) { + err = -ENOMEM; + goto out_err; + } + snprintf(sbi->local_dst, strlen(sbi->real_dst) + strlen(path_local) + 1, + "%s%s", sbi->real_dst, path_local); +out_err: + return err; +} + +/* + * Generate boot cookie like following format: + * + * | random | boot time(ms) | 0x00 | + * |--------|-----------------|-------| + * 16 33 15 (bits) + * + * This will make sure boot cookie is unique in a period + * 2^33 / 1000 / 3600 / 24 = 99.4(days). + */ +uint64_t hmdfs_gen_boot_cookie(void) +{ + uint64_t now; + uint16_t rand; + + now = ktime_to_ms(ktime_get()); + prandom_bytes(&rand, sizeof(rand)); + + now &= (1ULL << HMDFS_BOOT_COOKIE_RAND_SHIFT) - 1; + now |= ((uint64_t)rand << HMDFS_BOOT_COOKIE_RAND_SHIFT); + + return now << HMDFS_FID_VER_BOOT_COOKIE_SHIFT; +} + +static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct hmdfs_mount_priv *priv = (struct hmdfs_mount_priv *)data; + const char *dev_name = priv->dev_name; + const char *raw_data = priv->raw_data; + struct hmdfs_sb_info *sbi; + int err = 0; + struct inode *root_inode; + struct path lower_path; + struct super_block *lower_sb; + struct dentry *root_dentry; + char ctrl_path[CTRL_PATH_MAX_LEN]; + uint64_t ctrl_hash; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) { + err = -ENOMEM; + goto out_err; + } + err = hmdfs_init_sbi(sbi); + if (err) + goto out_freesbi; + sbi->sb = sb; + err = hmdfs_parse_options(sbi, raw_data); + if (err) + goto out_freesbi; + + sb->s_fs_info = sbi; + sb->s_magic = HMDFS_SUPER_MAGIC; + sb->s_xattr = hmdfs_xattr_handlers; + sb->s_op = &hmdfs_sops; + + sbi->boot_cookie = hmdfs_gen_boot_cookie(); + + err = hmdfs_init_writeback(sbi); + if (err) + goto out_freesbi; + err = hmdfs_init_server_writeback(sbi); + if (err) + goto out_freesbi; + + err = hmdfs_init_stash(sbi); + if (err) + goto out_freesbi; + + // add ctrl sysfs node + ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true); + scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash); + hmdfs_debug("hash %llu", ctrl_hash); + err = hmdfs_register_sysfs(ctrl_path, sbi); + if (err) + goto out_freesbi; + + err = hmdfs_update_dst(sbi); + if (err) + goto out_unreg_sysfs; + + err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &lower_path); + if (err) { + hmdfs_err("open dev failed, errno = %d", err); + goto out_unreg_sysfs; + } + + lower_sb = lower_path.dentry->d_sb; + atomic_inc(&lower_sb->s_active); + sbi->lower_sb = lower_sb; + sbi->local_src = get_full_path(&lower_path); + if (!sbi->local_src) { + hmdfs_err("get local_src failed!"); + goto out_sput; + } + + sb->s_time_gran = lower_sb->s_time_gran; + sb->s_maxbytes = lower_sb->s_maxbytes; + sb->s_stack_depth = lower_sb->s_stack_depth + 1; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + hmdfs_err("maximum fs stacking depth exceeded"); + err = -EINVAL; + goto out_sput; + } + root_inode = fill_root_inode(sb, d_inode(lower_path.dentry)); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto out_sput; + } + hmdfs_root_inode_perm_init(root_inode); + sb->s_root = root_dentry = d_make_root(root_inode); + if (!root_dentry) { + err = -ENOMEM; + goto out_sput; + } + + err = init_hmdfs_dentry_info(sbi, root_dentry, HMDFS_LAYER_ZERO); + if (err) + goto out_freeroot; + hmdfs_set_lower_path(root_dentry, &lower_path); + sbi->cred = get_cred(current_cred()); + INIT_LIST_HEAD(&sbi->client_cache); + INIT_LIST_HEAD(&sbi->server_cache); + INIT_LIST_HEAD(&sbi->to_delete); + mutex_init(&sbi->cache_list_lock); + hmdfs_cfn_load(sbi); + + /* Initialize syncfs info */ + spin_lock_init(&sbi->hsi.v_lock); + init_waitqueue_head(&sbi->hsi.wq); + sbi->hsi.version = 0; + sbi->hsi.is_executing = false; + INIT_LIST_HEAD(&sbi->hsi.wait_list); + INIT_LIST_HEAD(&sbi->hsi.pending_list); + spin_lock_init(&sbi->hsi.list_lock); + + return err; +out_freeroot: + dput(sb->s_root); + sb->s_root = NULL; +out_sput: + atomic_dec(&lower_sb->s_active); + path_put(&lower_path); +out_unreg_sysfs: + hmdfs_unregister_sysfs(sbi); + hmdfs_release_sysfs(sbi); +out_freesbi: + if (sbi) { + sb->s_fs_info = NULL; + hmdfs_exit_stash(sbi); + hmdfs_destroy_writeback(sbi); + hmdfs_destroy_server_writeback(sbi); + kfifo_free(&sbi->notify_fifo); + hmdfs_free_sb_seq(sbi->seq); + kfree(sbi->local_src); + kfree(sbi->local_dst); + kfree(sbi->real_dst); + kfree(sbi->cache_dir); + kfree(sbi->s_server_statis); + kfree(sbi->s_client_statis); + kfree(sbi); + } +out_err: + return err; +} + +static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct hmdfs_mount_priv priv = { + .dev_name = dev_name, + .raw_data = raw_data, + }; + + /* hmdfs needs a valid dev_name to get the lower_sb's metadata */ + if (!dev_name || !*dev_name) + return ERR_PTR(-EINVAL); + return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super); +} + + +static void hmdfs_cancel_async_readdir(struct hmdfs_sb_info *sbi) +{ + struct sendmsg_wait_queue *msg_wq = NULL; + struct hmdfs_readdir_work *rw = NULL; + struct hmdfs_readdir_work *tmp = NULL; + struct list_head del_work; + + /* cancel work that are not running */ + + INIT_LIST_HEAD(&del_work); + spin_lock(&sbi->async_readdir_work_lock); + list_for_each_entry_safe(rw, tmp, &sbi->async_readdir_work_list, head) { + if (cancel_delayed_work(&rw->dwork)) + list_move(&rw->head, &del_work); + } + spin_unlock(&sbi->async_readdir_work_lock); + + list_for_each_entry_safe(rw, tmp, &del_work, head) { + dput(rw->dentry); + peer_put(rw->con); + kfree(rw); + } + + /* wake up async readdir that are waiting for remote */ + spin_lock(&sbi->async_readdir_msg_lock); + sbi->async_readdir_prohibit = true; + list_for_each_entry(msg_wq, &sbi->async_readdir_msg_list, async_msg) + hmdfs_response_wakeup(msg_wq, -EINTR, 0, NULL); + spin_unlock(&sbi->async_readdir_msg_lock); + + /* wait for all async readdir to finish */ + if (!list_empty(&sbi->async_readdir_work_list)) + wait_event_interruptible_timeout(sbi->async_readdir_wq, + (list_empty(&sbi->async_readdir_work_list)), HZ); + + WARN_ON(!(list_empty(&sbi->async_readdir_work_list))); +} + +static void hmdfs_kill_super(struct super_block *sb) +{ + struct hmdfs_sb_info *sbi = hmdfs_sb(sb); + + /* + * async readdir is holding ref for dentry, not for vfsmount. Thus + * shrink_dcache_for_umount() will warn about dentry still in use + * if async readdir is not done. + */ + if (sbi) + hmdfs_cancel_async_readdir(sbi); + kill_anon_super(sb); +} + +static struct file_system_type hmdfs_fs_type = { + .owner = THIS_MODULE, + .name = "hmdfs", + .mount = hmdfs_mount, + .kill_sb = hmdfs_kill_super, +}; + +static int __init hmdfs_init(void) +{ + int err = 0; + + err = hmdfs_init_caches(); + if (err) + goto out_err; + + hmdfs_node_evt_cb_init(); + + hmdfs_stash_add_node_evt_cb(); + hmdfs_client_add_node_evt_cb(); + hmdfs_server_add_node_evt_cb(); + + err = register_filesystem(&hmdfs_fs_type); + if (err) { + hmdfs_err("hmdfs register failed!"); + goto out_err; + } + + err = hmdfs_init_configfs(); + if (err) + goto out_err; + + err = hmdfs_sysfs_init(); + if (err) + goto out_err; + + hmdfs_message_verify_init(); + return 0; +out_err: + hmdfs_sysfs_exit(); + hmdfs_exit_configfs(); + unregister_filesystem(&hmdfs_fs_type); + hmdfs_destroy_caches(); + hmdfs_err("hmdfs init failed!"); + return err; +} + +static void __exit hmdfs_exit(void) +{ + hmdfs_sysfs_exit(); + hmdfs_exit_configfs(); + unregister_filesystem(&hmdfs_fs_type); + ida_destroy(&hmdfs_sb_seq); + hmdfs_destroy_caches(); + hmdfs_info("hmdfs exited!"); +} + +module_init(hmdfs_init); +module_exit(hmdfs_exit); + +EXPORT_TRACEPOINT_SYMBOL_GPL(hmdfs_recv_mesg_callback); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("LongPing.WEI, Jingjing.Mao"); +MODULE_DESCRIPTION("Harmony distributed file system"); diff --git a/fs/hmdfs/server_writeback.c b/fs/hmdfs/server_writeback.c new file mode 100755 index 000000000..b3a18ff67 --- /dev/null +++ b/fs/hmdfs/server_writeback.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/server_writeback.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include + +#include "hmdfs.h" +#include "hmdfs_trace.h" +#include "server_writeback.h" + +#define HMDFS_SRV_WB_DEF_DIRTY_THRESH 50UL + +static void hmdfs_srv_wb_handler(struct work_struct *work) +{ + struct hmdfs_server_writeback *hswb = container_of(work, + struct hmdfs_server_writeback, + dirty_sb_writeback_work); + struct super_block *lower_sb = hswb->sbi->lower_sb; + int dirty_pages; + + if (writeback_in_progress(&lower_sb->s_bdi->wb) || + !down_read_trylock(&lower_sb->s_umount)) + return; + + dirty_pages = hswb->dirty_nr_pages_to_wb; + writeback_inodes_sb_nr(lower_sb, dirty_pages, WB_REASON_FS_FREE_SPACE); + up_read(&lower_sb->s_umount); + + trace_hmdfs_start_srv_wb(hswb->sbi, dirty_pages, hswb->dirty_thresh_pg); +} + +void hmdfs_server_check_writeback(struct hmdfs_server_writeback *hswb) +{ + unsigned long old_time, now; + int dirty_nr_pages; + + old_time = hswb->last_reset_time; + now = jiffies; + dirty_nr_pages = atomic_inc_return(&hswb->dirty_nr_pages); + if (time_after(now, old_time + HZ) && + cmpxchg(&hswb->last_reset_time, old_time, now) == old_time) { + /* + * We calculate the speed of page dirting to handle + * following situations: + * + * 1. Dense writing, average page writing speed + * exceeds @hswb->dirty_thresh_pg: + * 0-1s 100MB + * 2. Sporadic writing, average page writing speed + * belows @hswb->dirty_thresh_pg: + * 0-0.1s 40MB + * 3.1-3.2 20MB + */ + unsigned int writepage_speed; + + writepage_speed = dirty_nr_pages / ((now - old_time) / HZ); + if (writepage_speed >= hswb->dirty_thresh_pg) { + /* + * Writeback @hswb->dirty_nr_pages_to_wb pages in + * server-writeback work. If work is delayed after + * 1s, @hswb->dirty_nr_pages_to_wb could be assigned + * another new value (eg. 60MB), the old value (eg. + * 80MB) will be overwritten, which means 80MB data + * will be omitted to writeback. We can tolerate this + * situation, The writeback pressure is too high if + * the previous work is not completed, so it's + * meaningless to continue subsequent work. + */ + hswb->dirty_nr_pages_to_wb = dirty_nr_pages; + /* + * There are 3 conditions to trigger queuing work: + * + * A. Server successfully handles writepage for client + * B. Every 1 second interval + * C. Speed for page dirting exceeds @dirty_thresh_pg + */ + queue_work(hswb->dirty_writeback_wq, + &hswb->dirty_sb_writeback_work); + } + + /* + * There is no need to account the number of dirty pages + * from remote client very accurately. Allow the missing + * count to increase by other process in the gap between + * increment and zero out. + */ + atomic_set(&hswb->dirty_nr_pages, 0); + } +} + +void hmdfs_destroy_server_writeback(struct hmdfs_sb_info *sbi) +{ + if (!sbi->h_swb) + return; + + flush_work(&sbi->h_swb->dirty_sb_writeback_work); + destroy_workqueue(sbi->h_swb->dirty_writeback_wq); + kfree(sbi->h_swb); + sbi->h_swb = NULL; +} + +int hmdfs_init_server_writeback(struct hmdfs_sb_info *sbi) +{ + struct hmdfs_server_writeback *hswb; + char name[HMDFS_WQ_NAME_LEN]; + + hswb = kzalloc(sizeof(struct hmdfs_server_writeback), GFP_KERNEL); + if (!hswb) + return -ENOMEM; + + hswb->sbi = sbi; + hswb->dirty_writeback_control = true; + hswb->dirty_thresh_pg = HMDFS_SRV_WB_DEF_DIRTY_THRESH << + HMDFS_MB_TO_PAGE_SHIFT; + atomic_set(&hswb->dirty_nr_pages, 0); + hswb->last_reset_time = jiffies; + + snprintf(name, sizeof(name), "dfs_srv_wb%u", sbi->seq); + hswb->dirty_writeback_wq = create_singlethread_workqueue(name); + if (!hswb->dirty_writeback_wq) { + hmdfs_err("Failed to create server writeback workqueue!"); + kfree(hswb); + return -ENOMEM; + } + INIT_WORK(&hswb->dirty_sb_writeback_work, hmdfs_srv_wb_handler); + sbi->h_swb = hswb; + + return 0; +} + diff --git a/fs/hmdfs/server_writeback.h b/fs/hmdfs/server_writeback.h new file mode 100755 index 000000000..eb645e639 --- /dev/null +++ b/fs/hmdfs/server_writeback.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/server_writeback.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef SERVER_WRITEBACK_H +#define SERVER_WRITEBACK_H + +#include "hmdfs.h" + +#define HMDFS_MB_TO_PAGE_SHIFT (20 - HMDFS_PAGE_OFFSET) + +struct hmdfs_server_writeback { + struct hmdfs_sb_info *sbi; + /* Enable hmdfs server dirty writeback control */ + bool dirty_writeback_control; + + /* Current # of dirty pages from remote client in recent 1s */ + atomic_t dirty_nr_pages; + /* Current # of dirty pages to writeback */ + int dirty_nr_pages_to_wb; + /* Dirty thresh(Dirty data pages in 1s) to trigger wb */ + unsigned int dirty_thresh_pg; + /* Last reset timestamp(in jiffies) for @dirty_nr_pages */ + unsigned long last_reset_time; + + struct workqueue_struct *dirty_writeback_wq; + /* Per-fs pages from client writeback work */ + struct work_struct dirty_sb_writeback_work; +}; + +void hmdfs_server_check_writeback(struct hmdfs_server_writeback *hswb); + +void hmdfs_destroy_server_writeback(struct hmdfs_sb_info *sbi); + +int hmdfs_init_server_writeback(struct hmdfs_sb_info *sbi); + +#endif diff --git a/fs/hmdfs/stash.c b/fs/hmdfs/stash.c new file mode 100755 index 000000000..7f458a49d --- /dev/null +++ b/fs/hmdfs/stash.c @@ -0,0 +1,2247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/stash.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stash.h" +#include "comm/node_cb.h" +#include "comm/protocol.h" +#include "comm/connection.h" +#include "file_remote.h" +#include "hmdfs_dentryfile.h" +#include "authority/authentication.h" + +/* Head magic used to identify a stash file */ +#define HMDFS_STASH_FILE_HEAD_MAGIC 0xF7AB06C3 +/* Head and path in stash file are aligned with HMDFS_STASH_BLK_SIZE */ +#define HMDFS_STASH_BLK_SIZE 4096 +#define HMDFS_STASH_BLK_SHIFT 12 +#define HMDFS_STASH_PAGE_TO_SECTOR_SHIFT 3 +#define HMDFS_STASH_DIR_NAME "stash" +#define HMDFS_STASH_FMT_DIR_NAME "v1" +#define HMDFS_STASH_WORK_DIR_NAME \ + (HMDFS_STASH_DIR_NAME "/" HMDFS_STASH_FMT_DIR_NAME) + +#define HMDFS_STASH_FILE_NAME_LEN 20 + +#define HMDFS_STASH_FLUSH_CNT 2 + +#define HMDFS_STASH_PATH_LEN (HMDFS_CID_SIZE + HMDFS_STASH_FILE_NAME_LEN + 1) + +struct hmdfs_cache_file_head { + __le32 magic; + __le32 crc_offset; + __le64 ino; + __le64 size; + __le64 blocks; + __le64 last_write_pos; + __le64 ctime; + __le32 ctime_nsec; + __le32 change_detect_cap; + __le64 ichange_count; + __le32 path_offs; + __le32 path_len; + __le32 path_cnt; + __le32 data_offs; + /* Attention: expand new fields in here to compatible with old ver */ + __le32 crc32; +} __packed; + +struct hmdfs_stash_work { + struct hmdfs_peer *conn; + struct list_head *list; + struct work_struct work; + struct completion done; +}; + +struct hmdfs_inode_tbl { + unsigned int cnt; + unsigned int max; + uint64_t inodes[0]; +}; + +struct hmdfs_stash_dir_context { + struct dir_context dctx; + char name[NAME_MAX + 1]; + struct hmdfs_inode_tbl *tbl; +}; + +struct hmdfs_restore_stats { + unsigned int succeed; + unsigned int fail; + unsigned int keep; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_stash_stats { + unsigned int succeed; + unsigned int donothing; + unsigned int fail; + unsigned long long ok_pages; + unsigned long long fail_pages; +}; + +struct hmdfs_file_restore_ctx { + struct hmdfs_peer *conn; + struct path src_dir_path; + struct path dst_root_path; + char *dst; + char *page; + struct file *src_filp; + uint64_t inum; + uint64_t pages; + unsigned int seq; + unsigned int data_offs; + /* output */ + bool keep; +}; + +struct hmdfs_copy_args { + struct file *src; + struct file *dst; + void *buf; + size_t buf_len; + unsigned int seq; + unsigned int data_offs; + uint64_t inum; +}; + +struct hmdfs_copy_ctx { + struct hmdfs_copy_args args; + loff_t src_pos; + loff_t dst_pos; + /* output */ + size_t copied; + bool eof; +}; + +struct hmdfs_rebuild_stats { + unsigned int succeed; + unsigned int total; + unsigned int fail; + unsigned int invalid; +}; + +struct hmdfs_check_work { + struct hmdfs_peer *conn; + struct work_struct work; + struct completion done; +}; + +typedef int (*stash_operation_func)(struct hmdfs_peer *, + unsigned int, + struct path *, + const struct hmdfs_inode_tbl *, + void *); + +static struct dentry *hmdfs_do_vfs_mkdir(struct dentry *parent, + const char *name, int namelen, + umode_t mode) +{ + struct inode *dir = d_inode(parent); + struct dentry *child = NULL; + int err; + + inode_lock_nested(dir, I_MUTEX_PARENT); + + child = lookup_one_len(name, parent, namelen); + if (IS_ERR(child)) + goto out; + + if (d_is_positive(child)) { + if (d_can_lookup(child)) + goto out; + + dput(child); + child = ERR_PTR(-EINVAL); + goto out; + } + + err = vfs_mkdir(&init_user_ns, dir, child, mode); + if (err) { + dput(child); + child = ERR_PTR(err); + goto out; + } + +out: + inode_unlock(dir); + return child; +} + +struct dentry *hmdfs_stash_new_work_dir(struct dentry *parent) +{ + struct dentry *base = NULL; + struct dentry *work = NULL; + + base = hmdfs_do_vfs_mkdir(parent, HMDFS_STASH_DIR_NAME, + strlen(HMDFS_STASH_DIR_NAME), 0700); + if (IS_ERR(base)) + return base; + + work = hmdfs_do_vfs_mkdir(base, HMDFS_STASH_FMT_DIR_NAME, + strlen(HMDFS_STASH_FMT_DIR_NAME), 0700); + dput(base); + + return work; +} + +static struct file *hmdfs_new_stash_file(struct path *d_path, const char *cid) +{ + struct dentry *parent = NULL; + struct dentry *child = NULL; + struct file *filp = NULL; + struct path stash; + int err; + + parent = hmdfs_do_vfs_mkdir(d_path->dentry, cid, strlen(cid), 0700); + if (IS_ERR(parent)) { + err = PTR_ERR(parent); + hmdfs_err("mkdir error %d", err); + goto mkdir_err; + } + + child = vfs_tmpfile(&init_user_ns, parent, S_IFREG | 0600, 0); + if (IS_ERR(child)) { + err = PTR_ERR(child); + hmdfs_err("new stash file error %d", err); + goto tmpfile_err; + } + + stash.mnt = d_path->mnt; + stash.dentry = child; + filp = dentry_open(&stash, O_LARGEFILE | O_WRONLY, current_cred()); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + hmdfs_err("open stash file error %d", err); + goto open_err; + } + + dput(child); + dput(parent); + + return filp; + +open_err: + dput(child); +tmpfile_err: + dput(parent); +mkdir_err: + return ERR_PTR(err); +} + +static inline bool hmdfs_is_dir(struct dentry *child) +{ + return d_is_positive(child) && d_can_lookup(child); +} + +static inline bool hmdfs_is_reg(struct dentry *child) +{ + return d_is_positive(child) && d_is_reg(child); +} + +static void hmdfs_set_stash_file_head(const struct hmdfs_cache_info *cache, + uint64_t ino, + struct hmdfs_cache_file_head *head) +{ + long long blocks; + unsigned int crc_offset; + + memset(head, 0, sizeof(*head)); + head->magic = cpu_to_le32(HMDFS_STASH_FILE_HEAD_MAGIC); + head->ino = cpu_to_le64(ino); + head->size = cpu_to_le64(i_size_read(file_inode(cache->cache_file))); + blocks = atomic64_read(&cache->written_pgs) << + HMDFS_STASH_PAGE_TO_SECTOR_SHIFT; + head->blocks = cpu_to_le64(blocks); + head->path_offs = cpu_to_le32(cache->path_offs); + head->path_len = cpu_to_le32(cache->path_len); + head->path_cnt = cpu_to_le32(cache->path_cnt); + head->data_offs = cpu_to_le32(cache->data_offs); + crc_offset = offsetof(struct hmdfs_cache_file_head, crc32); + head->crc_offset = cpu_to_le32(crc_offset); + head->crc32 = cpu_to_le32(crc32(0, head, crc_offset)); +} + +static int hmdfs_flush_stash_file_metadata(struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + struct hmdfs_peer *conn = info->conn; + struct hmdfs_cache_file_head cache_head; + size_t written; + loff_t pos; + unsigned int head_size; + + /* No metadata if no cache file info */ + cache = info->cache; + if (!cache) + return -EINVAL; + + if (strlen(cache->path) == 0) { + long long to_write_pgs = atomic64_read(&cache->to_write_pgs); + + /* Nothing to stash. No need to flush meta data. */ + if (to_write_pgs == 0) + return 0; + + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx lost %lld pages due to no path", + conn->owner, conn->device_id, + info->remote_ino, to_write_pgs); + return -EINVAL; + } + + hmdfs_set_stash_file_head(cache, info->remote_ino, &cache_head); + + /* Write head */ + pos = 0; + head_size = sizeof(cache_head); + written = kernel_write(cache->cache_file, &cache_head, head_size, &pos); + if (written != head_size) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write head len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + head_size, written); + return -EIO; + } + /* Write path */ + pos = (loff_t)cache->path_offs << HMDFS_STASH_BLK_SHIFT; + written = kernel_write(cache->cache_file, cache->path, cache->path_len, + &pos); + if (written != cache->path_len) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx write path len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + cache->path_len, written); + return -EIO; + } + + return 0; +} + +/* Mainly from inode_wait_for_writeback() */ +static void hmdfs_wait_remote_writeback_once(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *inode = &info->vfs_inode; + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); + wait_queue_head_t *wq_head = NULL; + bool in_sync = false; + + spin_lock(&inode->i_lock); + in_sync = inode->i_state & I_SYNC; + spin_unlock(&inode->i_lock); + + if (!in_sync) + return; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx wait for wb once", + conn->owner, conn->device_id, info->remote_ino); + + wq_head = bit_waitqueue(&inode->i_state, __I_SYNC); + __wait_on_bit(wq_head, &wq, bit_wait, TASK_UNINTERRUPTIBLE); +} + +static void hmdfs_reset_remote_write_err(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct address_space *mapping = info->vfs_inode.i_mapping; + int flags_err; + errseq_t old; + int wb_err; + + flags_err = filemap_check_errors(mapping); + + old = errseq_sample(&mapping->wb_err); + wb_err = errseq_check_and_advance(&mapping->wb_err, &old); + if (flags_err || wb_err) + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx wb error %d %d before stash", + conn->owner, conn->device_id, info->remote_ino, + flags_err, wb_err); +} + +static bool hmdfs_is_mapping_clean(struct address_space *mapping) +{ + bool clean = false; + + /* b93b016313b3b ("page cache: use xa_lock") introduces i_pages */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + xa_lock_irq(&mapping->i_pages); +#else + spin_lock_irq(&mapping->tree_lock); +#endif + clean = !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + xa_unlock_irq(&mapping->i_pages); +#else + spin_unlock_irq(&mapping->tree_lock); +#endif + return clean; +} + +static int hmdfs_flush_stash_file_data(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *inode = &info->vfs_inode; + struct address_space *mapping = inode->i_mapping; + bool all_clean = true; + int err = 0; + int i; + + /* Wait for the completion of write syscall */ + inode_lock(inode); + inode_unlock(inode); + + all_clean = hmdfs_is_mapping_clean(mapping); + if (all_clean) { + hmdfs_reset_remote_write_err(conn, info); + return 0; + } + + /* + * No-sync_all writeback during offline may have not seen + * the setting of stash_status as HMDFS_REMOTE_INODE_STASHING + * and will call mapping_set_error() after we just reset + * the previous error. So waiting for these writeback once, + * and the following writeback will do local write. + */ + hmdfs_wait_remote_writeback_once(conn, info); + + /* Need to clear previous error ? */ + hmdfs_reset_remote_write_err(conn, info); + + /* + * 1. dirty page: do write back + * 2. writeback page: wait for its completion + * 3. writeback -> redirty page: do filemap_write_and_wait() + * twice, so 2th writeback should not allow + * writeback -> redirty transition + */ + for (i = 0; i < HMDFS_STASH_FLUSH_CNT; i++) { + err = filemap_write_and_wait(mapping); + if (err) { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx #%d stash flush error %d", + conn->owner, conn->device_id, + info->remote_ino, i, err); + return err; + } + } + + if (!hmdfs_is_mapping_clean(mapping)) + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx is still dirty dt %d wb %d", + conn->owner, conn->device_id, info->remote_ino, + !!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY), + !!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)); + + return 0; +} + +static int hmdfs_flush_stash_file(struct hmdfs_inode_info *info) +{ + int err; + + err = hmdfs_flush_stash_file_data(info->conn, info); + if (!err) + err = hmdfs_flush_stash_file_metadata(info); + + return err; +} + +static int hmdfs_enable_stash_file(struct hmdfs_inode_info *info, + struct dentry *stash) +{ + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct dentry *parent = NULL; + struct inode *dir = NULL; + struct dentry *child = NULL; + int err = 0; + bool retried = false; + + snprintf(name, sizeof(name), "0x%llx", info->remote_ino); + + parent = lock_parent(stash); + dir = d_inode(parent); + +lookup_again: + child = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(child)) { + err = PTR_ERR(child); + child = NULL; + hmdfs_err("lookup %s err %d", name, err); + goto out; + } + + if (d_is_positive(child)) { + hmdfs_warning("%s exists (mode 0%o)", + name, d_inode(child)->i_mode); + + err = vfs_unlink(&init_user_ns, dir, child, NULL); + if (err) { + hmdfs_err("unlink %s err %d", name, err); + goto out; + } + if (retried) { + err = -EEXIST; + goto out; + } + + retried = true; + dput(child); + goto lookup_again; + } + + err = vfs_link(stash, &init_user_ns, dir, child, NULL); + if (err) { + hmdfs_err("link stash file to %s err %d", name, err); + goto out; + } + +out: + unlock_dir(parent); + if (child) + dput(child); + + return err; +} + +/* Return 1 if stash is done, 0 if nothing is stashed */ +static int hmdfs_close_stash_file(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct file *cache_file = info->cache->cache_file; + struct dentry *c_dentry = file_dentry(cache_file); + struct inode *c_inode = d_inode(c_dentry); + long long to_write_pgs = atomic64_read(&info->cache->to_write_pgs); + int err; + + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx stashed bytes %lld pages %lld", + conn->owner, conn->device_id, info->remote_ino, + i_size_read(c_inode), to_write_pgs); + + if (to_write_pgs == 0) + return 0; + + err = vfs_fsync(cache_file, 0); + if (!err) + err = hmdfs_enable_stash_file(info, c_dentry); + else + hmdfs_err("fsync stash file err %d", err); + + return err < 0 ? err : 1; +} + +static void hmdfs_del_file_cache(struct hmdfs_cache_info *cache) +{ + if (!cache) + return; + + fput(cache->cache_file); + kfree(cache->path_buf); + kfree(cache); +} + +static struct hmdfs_cache_info * +hmdfs_new_file_cache(struct hmdfs_peer *conn, struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + struct dentry *stash_dentry = NULL; + int err; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return ERR_PTR(-ENOMEM); + + atomic64_set(&cache->to_write_pgs, 0); + atomic64_set(&cache->written_pgs, 0); + cache->path_buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!cache->path_buf) { + err = -ENOMEM; + goto free_cache; + } + + /* Need to handle "hardlink" ? */ + stash_dentry = d_find_any_alias(&info->vfs_inode); + if (stash_dentry) { + /* Needs full path in hmdfs, will be a device-view path */ + cache->path = dentry_path_raw(stash_dentry, cache->path_buf, + PATH_MAX); + dput(stash_dentry); + if (IS_ERR(cache->path)) { + err = PTR_ERR(cache->path); + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx gen path err %d", + conn->owner, conn->device_id, + info->remote_ino, err); + goto free_path; + } + } else { + /* Write-opened file was closed before finding dentry */ + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx no dentry found", + conn->owner, conn->device_id, info->remote_ino); + cache->path_buf[0] = '\0'; + cache->path = cache->path_buf; + } + + cache->path_cnt = 1; + cache->path_len = strlen(cache->path) + 1; + cache->path_offs = DIV_ROUND_UP(sizeof(struct hmdfs_cache_file_head), + HMDFS_STASH_BLK_SIZE); + cache->data_offs = cache->path_offs + DIV_ROUND_UP(cache->path_len, + HMDFS_STASH_BLK_SIZE); + cache->cache_file = hmdfs_new_stash_file(&conn->sbi->stash_work_dir, + conn->cid); + if (IS_ERR(cache->cache_file)) { + err = PTR_ERR(cache->cache_file); + goto free_path; + } + + return cache; + +free_path: + kfree(cache->path_buf); +free_cache: + kfree(cache); + return ERR_PTR(err); +} + +static void hmdfs_init_stash_file_cache(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct hmdfs_cache_info *cache = NULL; + + cache = hmdfs_new_file_cache(conn, info); + if (IS_ERR(cache)) + /* + * Continue even creating stash info failed. + * We need to ensure there is no dirty pages + * after stash completes + */ + cache = NULL; + + /* Make write() returns */ + spin_lock(&info->stash_lock); + info->cache = cache; + info->stash_status = HMDFS_REMOTE_INODE_STASHING; + spin_unlock(&info->stash_lock); +} + +static void hmdfs_update_stash_stats(struct hmdfs_stash_stats *stats, + const struct hmdfs_cache_info *cache, + int err) +{ + unsigned long long ok_pages, fail_pages; + + if (cache) { + ok_pages = err > 0 ? atomic64_read(&cache->written_pgs) : 0; + fail_pages = atomic64_read(&cache->to_write_pgs) - ok_pages; + stats->ok_pages += ok_pages; + stats->fail_pages += fail_pages; + } + + if (err > 0) + stats->succeed++; + else if (!err) + stats->donothing++; + else + stats->fail++; +} + +/* Return 1 if stash is done, 0 if nothing is stashed */ +static int hmdfs_stash_remote_inode(struct hmdfs_inode_info *info, + struct hmdfs_stash_stats *stats) +{ + struct hmdfs_cache_info *cache = info->cache; + struct hmdfs_peer *conn = info->conn; + unsigned int status; + int err = 0; + + hmdfs_info("stash peer 0x%x:0x%llx ino 0x%llx", + conn->owner, conn->device_id, info->remote_ino); + + err = hmdfs_flush_stash_file(info); + if (!err) + err = hmdfs_close_stash_file(conn, info); + + if (err <= 0) + set_bit(HMDFS_FID_NEED_OPEN, &info->fid_flags); + status = err > 0 ? HMDFS_REMOTE_INODE_RESTORING : + HMDFS_REMOTE_INODE_NONE; + spin_lock(&info->stash_lock); + info->cache = NULL; + /* + * Use smp_store_release() to ensure order between HMDFS_FID_NEED_OPEN + * and HMDFS_REMOTE_INODE_NONE. + */ + smp_store_release(&info->stash_status, status); + spin_unlock(&info->stash_lock); + + hmdfs_update_stash_stats(stats, cache, err); + hmdfs_del_file_cache(cache); + + return err; +} + +static void hmdfs_init_cache_for_stash_files(struct hmdfs_peer *conn, + struct list_head *list) +{ + const struct cred *old_cred = NULL; + struct hmdfs_inode_info *info = NULL; + + /* For file creation under stash_work_dir */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + list_for_each_entry(info, list, stash_node) + hmdfs_init_stash_file_cache(conn, info); + hmdfs_revert_creds(old_cred); +} + +static void hmdfs_init_stash_cache_work_fn(struct work_struct *base) +{ + struct hmdfs_stash_work *work = + container_of(base, struct hmdfs_stash_work, work); + + hmdfs_init_cache_for_stash_files(work->conn, work->list); + complete(&work->done); +} + +static void hmdfs_init_cache_for_stash_files_by_work(struct hmdfs_peer *conn, + struct list_head *list) +{ + struct hmdfs_stash_work work = { + .conn = conn, + .list = list, + .done = COMPLETION_INITIALIZER_ONSTACK(work.done), + }; + + INIT_WORK_ONSTACK(&work.work, hmdfs_init_stash_cache_work_fn); + schedule_work(&work.work); + wait_for_completion(&work.done); +} + +static void hmdfs_stash_fetch_ready_files(struct hmdfs_peer *conn, + bool check, struct list_head *list) +{ + struct hmdfs_inode_info *info = NULL; + + spin_lock(&conn->wr_opened_inode_lock); + list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) { + int status; + + /* Paired with *_release() in hmdfs_reset_stashed_inode() */ + status = smp_load_acquire(&info->stash_status); + if (status == HMDFS_REMOTE_INODE_NONE) { + list_add_tail(&info->stash_node, list); + /* + * Prevent close() removing the inode from + * writeable-opened inode list + */ + hmdfs_remote_add_wr_opened_inode_nolock(conn, info); + /* Prevent the inode from eviction */ + ihold(&info->vfs_inode); + } else if (check && status == HMDFS_REMOTE_INODE_STASHING) { + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unexpected stash status %d", + conn->owner, conn->device_id, + info->remote_ino, status); + } + } + spin_unlock(&conn->wr_opened_inode_lock); +} + +static void hmdfs_stash_offline_prepare(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + LIST_HEAD(preparing); + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + mutex_lock(&conn->offline_cb_lock); + + hmdfs_stash_fetch_ready_files(conn, true, &preparing); + + if (list_empty(&preparing)) + goto out; + + hmdfs_init_cache_for_stash_files_by_work(conn, &preparing); +out: + mutex_unlock(&conn->offline_cb_lock); +} + +static void hmdfs_track_inode_locked(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + spin_lock(&conn->stashed_inode_lock); + list_add_tail(&info->stash_node, &conn->stashed_inode_list); + conn->stashed_inode_nr++; + spin_unlock(&conn->stashed_inode_lock); +} + +static void +hmdfs_update_peer_stash_stats(struct hmdfs_stash_statistics *stash_stats, + const struct hmdfs_stash_stats *stats) +{ + stash_stats->cur_ok = stats->succeed; + stash_stats->cur_nothing = stats->donothing; + stash_stats->cur_fail = stats->fail; + stash_stats->total_ok += stats->succeed; + stash_stats->total_nothing += stats->donothing; + stash_stats->total_fail += stats->fail; + stash_stats->ok_pages += stats->ok_pages; + stash_stats->fail_pages += stats->fail_pages; +} + +static void hmdfs_stash_remote_inodes(struct hmdfs_peer *conn, + struct list_head *list) +{ + const struct cred *old_cred = NULL; + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + struct hmdfs_stash_stats stats; + + /* For file creation, write and relink under stash_work_dir */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + + memset(&stats, 0, sizeof(stats)); + list_for_each_entry_safe(info, next, list, stash_node) { + int err; + + list_del_init(&info->stash_node); + + err = hmdfs_stash_remote_inode(info, &stats); + if (err > 0) + hmdfs_track_inode_locked(conn, info); + + hmdfs_remote_del_wr_opened_inode(conn, info); + if (err <= 0) + iput(&info->vfs_inode); + } + hmdfs_revert_creds(old_cred); + + hmdfs_update_peer_stash_stats(&conn->stats.stash, &stats); + hmdfs_info("peer 0x%x:0x%llx total stashed %u cur ok %u none %u fail %u", + conn->owner, conn->device_id, conn->stashed_inode_nr, + stats.succeed, stats.donothing, stats.fail); +} + +static void hmdfs_stash_offline_do_stash(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_inode_info *info = NULL; + LIST_HEAD(preparing); + LIST_HEAD(stashing); + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + /* release seq_lock to prevent blocking no-offline sync cb */ + mutex_unlock(&conn->seq_lock); + /* acquire offline_cb_lock to serialized with offline sync cb */ + mutex_lock(&conn->offline_cb_lock); + + hmdfs_stash_fetch_ready_files(conn, false, &preparing); + if (!list_empty(&preparing)) + hmdfs_init_cache_for_stash_files(conn, &preparing); + + spin_lock(&conn->wr_opened_inode_lock); + list_for_each_entry(info, &conn->wr_opened_inode_list, wr_opened_node) { + int status = READ_ONCE(info->stash_status); + + if (status == HMDFS_REMOTE_INODE_STASHING) + list_add_tail(&info->stash_node, &stashing); + } + spin_unlock(&conn->wr_opened_inode_lock); + + if (list_empty(&stashing)) + goto unlock; + + hmdfs_stash_remote_inodes(conn, &stashing); + +unlock: + mutex_unlock(&conn->offline_cb_lock); + mutex_lock(&conn->seq_lock); +} + +static struct hmdfs_inode_info * +hmdfs_lookup_stash_inode(struct hmdfs_peer *conn, uint64_t inum) +{ + struct hmdfs_inode_info *info = NULL; + + list_for_each_entry(info, &conn->stashed_inode_list, stash_node) { + if (info->remote_ino == inum) + return info; + } + + return NULL; +} + +static void hmdfs_untrack_stashed_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + list_del_init(&info->stash_node); + iput(&info->vfs_inode); + + conn->stashed_inode_nr--; +} + +static void hmdfs_reset_stashed_inode(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info) +{ + struct inode *ino = &info->vfs_inode; + + /* + * For updating stash_status after iput() + * in hmdfs_untrack_stashed_inode() + */ + ihold(ino); + hmdfs_untrack_stashed_inode(conn, info); + /* + * Ensure the order of stash_node and stash_status: + * only update stash_status to NONE after removal of + * stash_node is completed. + */ + smp_store_release(&info->stash_status, + HMDFS_REMOTE_INODE_NONE); + iput(ino); +} + +static void hmdfs_drop_stashed_inodes(struct hmdfs_peer *conn) +{ + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + + if (list_empty(&conn->stashed_inode_list)) + return; + + hmdfs_warning("peer 0x%x:0x%llx drop unrestorable file %u", + conn->owner, conn->device_id, conn->stashed_inode_nr); + + list_for_each_entry_safe(info, next, + &conn->stashed_inode_list, stash_node) { + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx unrestorable status %u", + conn->owner, conn->device_id, info->remote_ino, + READ_ONCE(info->stash_status)); + + hmdfs_reset_stashed_inode(conn, info); + } +} + +static struct file *hmdfs_open_stash_dir(struct path *d_path, const char *cid) +{ + int err = 0; + struct dentry *parent = d_path->dentry; + struct inode *dir = d_inode(parent); + struct dentry *child = NULL; + struct path peer_path; + struct file *filp = NULL; + + inode_lock_nested(dir, I_MUTEX_PARENT); + child = lookup_one_len(cid, parent, strlen(cid)); + if (!IS_ERR(child)) { + if (!hmdfs_is_dir(child)) { + if (d_is_positive(child)) { + hmdfs_err("invalid stash dir mode 0%o", d_inode(child)->i_mode); + err = -EINVAL; + } else { + err = -ENOENT; + } + dput(child); + } + } else { + err = PTR_ERR(child); + hmdfs_err("lookup stash dir err %d", err); + } + inode_unlock(dir); + + if (err) + return ERR_PTR(err); + + peer_path.mnt = d_path->mnt; + peer_path.dentry = child; + filp = dentry_open(&peer_path, O_RDONLY | O_DIRECTORY, current_cred()); + if (IS_ERR(filp)) + hmdfs_err("open err %d", (int)PTR_ERR(filp)); + + dput(child); + + return filp; +} + +static int hmdfs_new_inode_tbl(struct hmdfs_inode_tbl **tbl) +{ + struct hmdfs_inode_tbl *new = NULL; + + new = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!new) + return -ENOMEM; + + new->cnt = 0; + new->max = (PAGE_SIZE - offsetof(struct hmdfs_inode_tbl, inodes)) / + sizeof(new->inodes[0]); + *tbl = new; + + return 0; +} + +static int hmdfs_parse_stash_file_name(struct dir_context *dctx, + const char *name, + int namelen, + unsigned int d_type, + uint64_t *stash_inum) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + int err; + + if (d_type != DT_UNKNOWN && d_type != DT_REG) + return 0; + if (namelen > NAME_MAX) + return 0; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + memcpy(ctx->name, name, namelen); + ctx->name[namelen] = '\0'; + err = kstrtoull(ctx->name, 16, stash_inum); + if (err) { + hmdfs_err("unexpected stash file err %d", err); + return 0; + } + return 1; +} + +static int hmdfs_has_stash_file(struct dir_context *dctx, const char *name, + int namelen, loff_t offset, + u64 inum, unsigned int d_type) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + uint64_t stash_inum; + int err; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + err = hmdfs_parse_stash_file_name(dctx, name, namelen, + d_type, &stash_inum); + if (!err) + return 0; + + ctx->tbl->cnt++; + return 1; +} + +static int hmdfs_fill_stash_file(struct dir_context *dctx, const char *name, + int namelen, loff_t offset, + u64 inum, unsigned int d_type) +{ + struct hmdfs_stash_dir_context *ctx = NULL; + uint64_t stash_inum; + int err; + + ctx = container_of(dctx, struct hmdfs_stash_dir_context, dctx); + err = hmdfs_parse_stash_file_name(dctx, name, namelen, + d_type, &stash_inum); + if (!err) + return 0; + if (ctx->tbl->cnt >= ctx->tbl->max) + return 1; + + ctx->tbl->inodes[ctx->tbl->cnt++] = stash_inum; + + return 0; +} + +static int hmdfs_del_stash_file(struct dentry *parent, struct dentry *child) +{ + struct inode *dir = d_inode(parent); + int err = 0; + + /* Prevent d_delete() from calling dentry_unlink_inode() */ + dget(child); + + inode_lock_nested(dir, I_MUTEX_PARENT); + err = vfs_unlink(&init_user_ns, dir, child, NULL); + if (err) + hmdfs_err("remove stash file err %d", err); + inode_unlock(dir); + + dput(child); + + return err; +} + +static inline bool hmdfs_is_node_offlined(const struct hmdfs_peer *conn, + unsigned int seq) +{ + /* + * open()/fsync() may fail due to "status = NODE_STAT_OFFLINE" + * in hmdfs_disconnect_node(). + * Pair with smp_mb() in hmdfs_disconnect_node() to ensure + * getting the newest event sequence. + */ + smp_mb__before_atomic(); + return hmdfs_node_evt_seq(conn) != seq; +} + +static int hmdfs_verify_restore_file_head(struct hmdfs_file_restore_ctx *ctx, + const struct hmdfs_cache_file_head *head) +{ + struct inode *inode = file_inode(ctx->src_filp); + struct hmdfs_peer *conn = ctx->conn; + unsigned int crc, read_crc, crc_offset; + loff_t path_offs, data_offs, isize; + int err = 0; + + if (le32_to_cpu(head->magic) != HMDFS_STASH_FILE_HEAD_MAGIC) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid magic: got 0x%x, exp 0x%x", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->magic), + HMDFS_STASH_FILE_HEAD_MAGIC); + goto out; + } + + crc_offset = le32_to_cpu(head->crc_offset); + read_crc = le32_to_cpu(*((__le32 *)((char *)head + crc_offset))); + crc = crc32(0, head, crc_offset); + if (read_crc != crc) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid crc: got 0x%x, exp 0x%x", + conn->owner, conn->device_id, ctx->inum, + read_crc, crc); + goto out; + } + + if (le64_to_cpu(head->ino) != ctx->inum) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid ino: got %llu, exp %llu", + conn->owner, conn->device_id, ctx->inum, + le64_to_cpu(head->ino), ctx->inum); + goto out; + } + + path_offs = (loff_t)le32_to_cpu(head->path_offs) << + HMDFS_STASH_BLK_SHIFT; + if (path_offs <= 0 || path_offs >= i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_offs %d, stash file size %llu", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->path_offs), i_size_read(inode)); + goto out; + } + + data_offs = (loff_t)le32_to_cpu(head->data_offs) << + HMDFS_STASH_BLK_SHIFT; + if (path_offs >= data_offs) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, path_offs %d", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->data_offs), + le32_to_cpu(head->path_offs)); + goto out; + } + if (data_offs <= 0 || data_offs >= i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid data_offs %d, stash file size %llu", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->data_offs), i_size_read(inode)); + goto out; + } + + isize = le64_to_cpu(head->size); + if (isize != i_size_read(inode)) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid isize: got %llu, exp %llu", + conn->owner, conn->device_id, ctx->inum, + le64_to_cpu(head->size), i_size_read(inode)); + goto out; + } + + if (le32_to_cpu(head->path_cnt) < 1) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid path_cnt %d", + conn->owner, conn->device_id, ctx->inum, + le32_to_cpu(head->path_cnt)); + goto out; + } + +out: + return err; +} + +static int hmdfs_get_restore_file_metadata(struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_cache_file_head head; + struct hmdfs_peer *conn = ctx->conn; + unsigned int head_size, read_size, head_crc_offset; + loff_t pos; + ssize_t rd; + int err = 0; + + head_size = sizeof(struct hmdfs_cache_file_head); + memset(&head, 0, head_size); + /* Read part head */ + pos = 0; + read_size = offsetof(struct hmdfs_cache_file_head, crc_offset) + + sizeof(head.crc_offset); + rd = kernel_read(ctx->src_filp, &head, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read part head err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + head_crc_offset = le32_to_cpu(head.crc_offset); + if (head_crc_offset + sizeof(head.crc32) < head_crc_offset || + head_crc_offset + sizeof(head.crc32) > head_size) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx got bad head: Too long crc_offset %u which exceeds head size %u", + conn->owner, conn->device_id, ctx->inum, + head_crc_offset, head_size); + goto out; + } + + /* Read full head */ + pos = 0; + read_size = le32_to_cpu(head.crc_offset) + sizeof(head.crc32); + rd = kernel_read(ctx->src_filp, &head, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read full head err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + + err = hmdfs_verify_restore_file_head(ctx, &head); + if (err) + goto out; + + ctx->pages = le64_to_cpu(head.blocks) >> + HMDFS_STASH_PAGE_TO_SECTOR_SHIFT; + ctx->data_offs = le32_to_cpu(head.data_offs); + /* Read path */ + read_size = min_t(unsigned int, le32_to_cpu(head.path_len), PATH_MAX); + pos = (loff_t)le32_to_cpu(head.path_offs) << HMDFS_STASH_BLK_SHIFT; + rd = kernel_read(ctx->src_filp, ctx->dst, read_size, &pos); + if (rd != read_size) { + err = rd < 0 ? rd : -ENODATA; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path err %d", + conn->owner, conn->device_id, ctx->inum, err); + goto out; + } + if (strnlen(ctx->dst, read_size) >= read_size) { + err = -EUCLEAN; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx read path not end with \\0", + conn->owner, conn->device_id, ctx->inum); + goto out; + } + /* TODO: Pick a valid path from all paths */ + +out: + return err; +} + +static int hmdfs_open_restore_dst_file(struct hmdfs_file_restore_ctx *ctx, + unsigned int rw_flag, struct file **filp) +{ + struct hmdfs_peer *conn = ctx->conn; + struct file *dst = NULL; + int err = 0; + + err = hmdfs_get_restore_file_metadata(ctx); + if (err) + goto out; + + /* Error comes from connection or server ? */ + dst = file_open_root(&ctx->dst_root_path, + ctx->dst, O_LARGEFILE | rw_flag, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + hmdfs_err("open remote file ino 0x%llx err %d", ctx->inum, err); + if (hmdfs_is_node_offlined(conn, ctx->seq)) + err = -ESHUTDOWN; + goto out; + } + + *filp = dst; +out: + return err; +} + +static bool hmdfs_need_abort_restore(struct hmdfs_file_restore_ctx *ctx, + struct hmdfs_inode_info *pinned, + struct file *opened_file) +{ + struct hmdfs_inode_info *opened = hmdfs_i(file_inode(opened_file)); + + if (opened->inode_type != HMDFS_LAYER_OTHER_REMOTE) + goto abort; + + if (opened == pinned) + return false; + +abort: + hmdfs_warning("peer 0x%x:0x%llx inode 0x%llx invalid remote file", + ctx->conn->owner, ctx->conn->device_id, ctx->inum); + hmdfs_warning("got: peer 0x%x:0x%llx inode 0x%llx type %d status %d", + opened->conn ? opened->conn->owner : 0, + opened->conn ? opened->conn->device_id : 0, + opened->remote_ino, opened->inode_type, + opened->stash_status); + hmdfs_warning("pinned: peer 0x%x:0x%llx inode 0x%llx type %d status %d", + pinned->conn->owner, pinned->conn->device_id, + pinned->remote_ino, pinned->inode_type, + pinned->stash_status); + return true; +} + +static void hmdfs_init_copy_args(const struct hmdfs_file_restore_ctx *ctx, + struct file *dst, struct hmdfs_copy_args *args) +{ + args->src = ctx->src_filp; + args->dst = dst; + args->buf = ctx->page; + args->buf_len = PAGE_SIZE; + args->seq = ctx->seq; + args->data_offs = ctx->data_offs; + args->inum = ctx->inum; +} + +static ssize_t hmdfs_write_dst(struct hmdfs_peer *conn, struct file *filp, + void *buf, size_t len, loff_t pos) +{ + mm_segment_t old_fs; + struct kiocb kiocb; + struct iovec iov; + struct iov_iter iter; + ssize_t wr; + int err = 0; + + file_start_write(filp); + + old_fs = force_uaccess_begin(); + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = pos; + + iov.iov_base = buf; + iov.iov_len = len; + iov_iter_init(&iter, WRITE, &iov, 1, len); + + wr = hmdfs_file_write_iter_remote_nocheck(&kiocb, &iter); + + force_uaccess_end(old_fs); + + file_end_write(filp); + + if (wr != len) { + struct hmdfs_inode_info *info = hmdfs_i(file_inode(filp)); + + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short write ret %zd exp %zu", + conn->owner, conn->device_id, info->remote_ino, + wr, len); + err = wr < 0 ? (int)wr : -EFAULT; + } + + return err; +} + +static int hmdfs_rd_src_wr_dst(struct hmdfs_peer *conn, + struct hmdfs_copy_ctx *ctx) +{ + const struct hmdfs_copy_args *args = NULL; + int err = 0; + loff_t rd_pos; + ssize_t rd; + + ctx->eof = false; + ctx->copied = 0; + + args = &ctx->args; + rd_pos = ctx->src_pos; + rd = kernel_read(args->src, args->buf, args->buf_len, &rd_pos); + if (rd < 0) { + err = (int)rd; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx short read err %d", + conn->owner, conn->device_id, args->inum, err); + goto out; + } else if (rd == 0) { + ctx->eof = true; + goto out; + } + + err = hmdfs_write_dst(conn, args->dst, args->buf, rd, ctx->dst_pos); + if (!err) + ctx->copied = rd; + else if (hmdfs_is_node_offlined(conn, args->seq)) + err = -ESHUTDOWN; +out: + return err; +} + +static int hmdfs_copy_src_to_dst(struct hmdfs_peer *conn, + const struct hmdfs_copy_args *args) +{ + int err = 0; + struct file *src = NULL; + struct hmdfs_copy_ctx ctx; + loff_t seek_pos, data_init_pos; + loff_t src_size; + + ctx.args = *args; + + src = ctx.args.src; + data_init_pos = (loff_t)ctx.args.data_offs << HMDFS_STASH_BLK_SHIFT; + seek_pos = data_init_pos; + src_size = i_size_read(file_inode(src)); + while (true) { + loff_t data_pos; + + data_pos = vfs_llseek(src, seek_pos, SEEK_DATA); + if (data_pos > seek_pos) { + seek_pos = data_pos; + continue; + } else if (data_pos < 0) { + if (data_pos == -ENXIO) { + loff_t src_blks = file_inode(src)->i_blocks; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx end at 0x%llx (sz 0x%llx blk 0x%llx)", + conn->owner, conn->device_id, + args->inum, seek_pos, + src_size, src_blks); + } else { + err = (int)data_pos; + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx seek pos 0x%llx err %d", + conn->owner, conn->device_id, + args->inum, seek_pos, err); + } + break; + } + + hmdfs_debug("peer 0x%x:0x%llx ino 0x%llx seek to 0x%llx", + conn->owner, conn->device_id, args->inum, data_pos); + + ctx.src_pos = data_pos; + ctx.dst_pos = data_pos - data_init_pos; + err = hmdfs_rd_src_wr_dst(conn, &ctx); + if (err || ctx.eof) + break; + + seek_pos += ctx.copied; + if (seek_pos >= src_size) + break; + } + + return err; +} + +static int hmdfs_restore_src_to_dst(struct hmdfs_file_restore_ctx *ctx, + struct file *dst) +{ + struct file *src = ctx->src_filp; + struct hmdfs_copy_args args; + int err; + + hmdfs_init_copy_args(ctx, dst, &args); + err = hmdfs_copy_src_to_dst(ctx->conn, &args); + if (err) + goto out; + + err = vfs_fsync(dst, 0); + if (err) { + hmdfs_err("fsync remote file ino 0x%llx err %d", ctx->inum, err); + if (hmdfs_is_node_offlined(ctx->conn, ctx->seq)) + err = -ESHUTDOWN; + } + +out: + if (err) + truncate_inode_pages(file_inode(dst)->i_mapping, 0); + + /* Remove the unnecessary cache */ + invalidate_mapping_pages(file_inode(src)->i_mapping, 0, -1); + + return err; +} + + +static int hmdfs_restore_file(struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_peer *conn = ctx->conn; + uint64_t inum = ctx->inum; + struct hmdfs_inode_info *pinned_info = NULL; + struct file *dst_filp = NULL; + int err = 0; + bool keep = false; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx do restore", + conn->owner, conn->device_id, inum); + + pinned_info = hmdfs_lookup_stash_inode(conn, inum); + if (pinned_info) { + unsigned int status = READ_ONCE(pinned_info->stash_status); + + if (status != HMDFS_REMOTE_INODE_RESTORING) { + hmdfs_err("peer 0x%x:0x%llx ino 0x%llx invalid status %u", + conn->owner, conn->device_id, inum, status); + err = -EINVAL; + goto clean; + } + } else { + hmdfs_warning("peer 0x%x:0x%llx ino 0x%llx doesn't being pinned", + conn->owner, conn->device_id, inum); + err = -EINVAL; + goto clean; + } + + set_bit(HMDFS_FID_NEED_OPEN, &pinned_info->fid_flags); + err = hmdfs_open_restore_dst_file(ctx, O_RDWR, &dst_filp); + if (err) { + if (err == -ESHUTDOWN) + keep = true; + goto clean; + } + + if (hmdfs_need_abort_restore(ctx, pinned_info, dst_filp)) + goto abort; + + err = hmdfs_restore_src_to_dst(ctx, dst_filp); + if (err == -ESHUTDOWN) + keep = true; +abort: + fput(dst_filp); +clean: + if (pinned_info && !keep) + hmdfs_reset_stashed_inode(conn, pinned_info); + ctx->keep = keep; + + hmdfs_info("peer 0x%x:0x%llx ino 0x%llx restore err %d keep %d", + conn->owner, conn->device_id, inum, err, ctx->keep); + + return err; +} + +static int hmdfs_init_file_restore_ctx(struct hmdfs_peer *conn, + unsigned int seq, struct path *src_dir, + struct hmdfs_file_restore_ctx *ctx) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct path dst_root; + char *dst = NULL; + char *page = NULL; + int err = 0; + + err = hmdfs_get_path_in_sb(sbi->sb, sbi->real_dst, LOOKUP_DIRECTORY, + &dst_root); + if (err) + return err; + + dst = kmalloc(PATH_MAX, GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto put_path; + } + + page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto free_dst; + } + + ctx->conn = conn; + ctx->src_dir_path = *src_dir; + ctx->dst_root_path = dst_root; + ctx->dst = dst; + ctx->page = page; + ctx->seq = seq; + + return 0; +free_dst: + kfree(dst); +put_path: + path_put(&dst_root); + return err; +} + +static void hmdfs_exit_file_restore_ctx(struct hmdfs_file_restore_ctx *ctx) +{ + path_put(&ctx->dst_root_path); + kfree(ctx->dst); + kfree(ctx->page); +} + +static struct file *hmdfs_open_stash_file(struct path *p_path, char *name) +{ + struct dentry *parent = NULL; + struct inode *dir = NULL; + struct dentry *child = NULL; + struct file *filp = NULL; + struct path c_path; + int err = 0; + + parent = p_path->dentry; + dir = d_inode(parent); + inode_lock_nested(dir, I_MUTEX_PARENT); + child = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(child) && !hmdfs_is_reg(child)) { + if (d_is_positive(child)) { + hmdfs_err("invalid stash file (mode 0%o)", + d_inode(child)->i_mode); + err = -EINVAL; + } else { + hmdfs_err("missing stash file"); + err = -ENOENT; + } + dput(child); + } else if (IS_ERR(child)) { + err = PTR_ERR(child); + hmdfs_err("lookup stash file err %d", err); + } + inode_unlock(dir); + + if (err) + return ERR_PTR(err); + + c_path.mnt = p_path->mnt; + c_path.dentry = child; + filp = dentry_open(&c_path, O_RDONLY | O_LARGEFILE, current_cred()); + if (IS_ERR(filp)) + hmdfs_err("open stash file err %d", (int)PTR_ERR(filp)); + + dput(child); + + return filp; +} + +static void hmdfs_update_restore_stats(struct hmdfs_restore_stats *stats, + bool keep, uint64_t pages, int err) +{ + if (!err) { + stats->succeed++; + stats->ok_pages += pages; + } else if (keep) { + stats->keep++; + } else { + stats->fail++; + stats->fail_pages += pages; + } +} + +static int hmdfs_restore_files(struct hmdfs_peer *conn, + unsigned int seq, struct path *dir, + const struct hmdfs_inode_tbl *tbl, + void *priv) +{ + unsigned int i; + struct hmdfs_file_restore_ctx ctx; + int err = 0; + struct hmdfs_restore_stats *stats = priv; + + err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx); + if (err) + return err; + + for (i = 0; i < tbl->cnt; i++) { + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct file *filp = NULL; + + snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]); + filp = hmdfs_open_stash_file(dir, name); + /* Continue to restore if any error */ + if (IS_ERR(filp)) { + stats->fail++; + continue; + } + + ctx.inum = tbl->inodes[i]; + ctx.src_filp = filp; + ctx.keep = false; + ctx.pages = 0; + err = hmdfs_restore_file(&ctx); + hmdfs_update_restore_stats(stats, ctx.keep, ctx.pages, err); + + if (!ctx.keep) + hmdfs_del_stash_file(dir->dentry, + file_dentry(ctx.src_filp)); + fput(ctx.src_filp); + + /* Continue to restore */ + if (err == -ESHUTDOWN) + break; + err = 0; + } + + hmdfs_exit_file_restore_ctx(&ctx); + + return err; +} + +static bool hmdfs_is_valid_stash_status(struct hmdfs_inode_info *inode_info, + uint64_t ino) +{ + return (inode_info->inode_type == HMDFS_LAYER_OTHER_REMOTE && + inode_info->stash_status == HMDFS_REMOTE_INODE_RESTORING && + inode_info->remote_ino == ino); +} + +static int hmdfs_rebuild_stash_list(struct hmdfs_peer *conn, + unsigned int seq, + struct path *dir, + const struct hmdfs_inode_tbl *tbl, + void *priv) +{ + struct hmdfs_file_restore_ctx ctx; + unsigned int i; + int err; + struct hmdfs_rebuild_stats *stats = priv; + + err = hmdfs_init_file_restore_ctx(conn, seq, dir, &ctx); + if (err) + return err; + + stats->total += tbl->cnt; + + for (i = 0; i < tbl->cnt; i++) { + char name[HMDFS_STASH_FILE_NAME_LEN]; + struct file *src_filp = NULL; + struct file *dst_filp = NULL; + struct hmdfs_inode_info *inode_info = NULL; + bool is_valid = true; + + snprintf(name, sizeof(name), "0x%llx", tbl->inodes[i]); + src_filp = hmdfs_open_stash_file(dir, name); + if (IS_ERR(src_filp)) { + stats->fail++; + continue; + } + ctx.inum = tbl->inodes[i]; + ctx.src_filp = src_filp; + + /* No need to track the open which only needs meta info */ + err = hmdfs_open_restore_dst_file(&ctx, O_RDONLY, &dst_filp); + if (err) { + fput(src_filp); + if (err == -ESHUTDOWN) + break; + stats->fail++; + err = 0; + continue; + } + + inode_info = hmdfs_i(file_inode(dst_filp)); + is_valid = hmdfs_is_valid_stash_status(inode_info, + ctx.inum); + if (is_valid) { + stats->succeed++; + } else { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx invalid state: type: %d, status: %u, inode: %llu", + conn->owner, conn->device_id, ctx.inum, + inode_info->inode_type, + READ_ONCE(inode_info->stash_status), + inode_info->remote_ino); + stats->invalid++; + } + + fput(ctx.src_filp); + fput(dst_filp); + } + + hmdfs_exit_file_restore_ctx(&ctx); + return err; +} + +static int hmdfs_iter_stash_file(struct hmdfs_peer *conn, + unsigned int seq, + struct file *filp, + stash_operation_func op, + void *priv) +{ + int err = 0; + struct hmdfs_stash_dir_context ctx = { + .dctx.actor = hmdfs_fill_stash_file, + }; + struct hmdfs_inode_tbl *tbl = NULL; + struct path dir; + + err = hmdfs_new_inode_tbl(&tbl); + if (err) + goto out; + + dir.mnt = filp->f_path.mnt; + dir.dentry = file_dentry(filp); + + ctx.tbl = tbl; + ctx.dctx.pos = 0; + do { + tbl->cnt = 0; + err = iterate_dir(filp, &ctx.dctx); + if (err || !tbl->cnt) { + if (err) + hmdfs_err("iterate stash dir err %d", err); + break; + } + err = op(conn, seq, &dir, tbl, priv); + } while (!err); + +out: + kfree(tbl); + return err; +} + +static void hmdfs_rebuild_check_work_fn(struct work_struct *base) +{ + struct hmdfs_check_work *work = + container_of(base, struct hmdfs_check_work, work); + struct hmdfs_peer *conn = work->conn; + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + struct hmdfs_stash_dir_context ctx = { + .dctx.actor = hmdfs_has_stash_file, + }; + struct hmdfs_inode_tbl tbl; + int err; + + old_cred = hmdfs_override_creds(sbi->cred); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) + goto out; + + memset(&tbl, 0, sizeof(tbl)); + ctx.tbl = &tbl; + err = iterate_dir(filp, &ctx.dctx); + if (!err && ctx.tbl->cnt > 0) + conn->need_rebuild_stash_list = true; + + fput(filp); +out: + hmdfs_revert_creds(old_cred); + hmdfs_info("peer 0x%x:0x%llx %sneed to rebuild stash list", + conn->owner, conn->device_id, + conn->need_rebuild_stash_list ? "" : "don't "); + complete(&work->done); +} + +static void hmdfs_stash_add_do_check(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct hmdfs_check_work work = { + .conn = conn, + .done = COMPLETION_INITIALIZER_ONSTACK(work.done), + }; + + if (!hmdfs_is_stash_enabled(sbi)) + return; + + INIT_WORK_ONSTACK(&work.work, hmdfs_rebuild_check_work_fn); + schedule_work(&work.work); + wait_for_completion(&work.done); +} + +static void +hmdfs_update_peer_rebuild_stats(struct hmdfs_rebuild_statistics *rebuild_stats, + const struct hmdfs_rebuild_stats *stats) +{ + rebuild_stats->cur_ok = stats->succeed; + rebuild_stats->cur_fail = stats->fail; + rebuild_stats->cur_invalid = stats->invalid; + rebuild_stats->total_ok += stats->succeed; + rebuild_stats->total_fail += stats->fail; + rebuild_stats->total_invalid += stats->invalid; +} + +/* rebuild stash inode list */ +static void hmdfs_stash_online_prepare(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + int err; + struct hmdfs_rebuild_stats stats; + + if (!hmdfs_is_stash_enabled(sbi) || + !conn->need_rebuild_stash_list) + return; + + /* release seq_lock to prevent blocking no-online sync cb */ + mutex_unlock(&conn->seq_lock); + old_cred = hmdfs_override_creds(sbi->cred); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) + goto out; + + memset(&stats, 0, sizeof(stats)); + err = hmdfs_iter_stash_file(conn, seq, filp, + hmdfs_rebuild_stash_list, &stats); + if (err == -ESHUTDOWN) { + hmdfs_info("peer 0x%x:0x%llx offline again during rebuild", + conn->owner, conn->device_id); + } else { + WRITE_ONCE(conn->need_rebuild_stash_list, false); + if (err) + hmdfs_warning("partial rebuild fail err %d", err); + } + + hmdfs_update_peer_rebuild_stats(&conn->stats.rebuild, &stats); + hmdfs_info("peer 0x%x:0x%llx rebuild stashed-file total %u succeed %u fail %u invalid %u", + conn->owner, conn->device_id, stats.total, stats.succeed, + stats.fail, stats.invalid); + fput(filp); +out: + conn->stats.rebuild.time++; + hmdfs_revert_creds(old_cred); + if (!READ_ONCE(conn->need_rebuild_stash_list)) { + /* + * Use smp_mb__before_atomic() to ensure order between + * writing @conn->need_rebuild_stash_list and + * reading conn->rebuild_inode_status_nr. + */ + smp_mb__before_atomic(); + /* + * Wait until all inodes finish rebuilding stash status before + * accessing @conn->stashed_inode_list in restoring. + */ + wait_event(conn->rebuild_inode_status_wq, + !atomic_read(&conn->rebuild_inode_status_nr)); + } + mutex_lock(&conn->seq_lock); +} + +static void +hmdfs_update_peer_restore_stats(struct hmdfs_restore_statistics *restore_stats, + const struct hmdfs_restore_stats *stats) +{ + restore_stats->cur_ok = stats->succeed; + restore_stats->cur_fail = stats->fail; + restore_stats->cur_keep = stats->keep; + restore_stats->total_ok += stats->succeed; + restore_stats->total_fail += stats->fail; + restore_stats->total_keep += stats->keep; + restore_stats->ok_pages += stats->ok_pages; + restore_stats->fail_pages += stats->fail_pages; +} + +static void hmdfs_stash_online_do_restore(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_sb_info *sbi = conn->sbi; + struct file *filp = NULL; + const struct cred *old_cred = NULL; + struct hmdfs_restore_stats stats; + int err = 0; + + if (!hmdfs_is_stash_enabled(sbi) || conn->need_rebuild_stash_list) { + if (conn->need_rebuild_stash_list) + hmdfs_info("peer 0x%x:0x%llx skip restoring due to rebuild-need", + conn->owner, conn->device_id); + return; + } + + /* release seq_lock to prevent blocking no-online sync cb */ + mutex_unlock(&conn->seq_lock); + /* For dir iteration, file read and unlink */ + old_cred = hmdfs_override_creds(conn->sbi->cred); + + memset(&stats, 0, sizeof(stats)); + filp = hmdfs_open_stash_dir(&sbi->stash_work_dir, conn->cid); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + + err = hmdfs_iter_stash_file(conn, seq, filp, + hmdfs_restore_files, &stats); + + fput(filp); +out: + hmdfs_revert_creds(old_cred); + + /* offline again ? */ + if (err != -ESHUTDOWN) + hmdfs_drop_stashed_inodes(conn); + + hmdfs_update_peer_restore_stats(&conn->stats.restore, &stats); + hmdfs_info("peer 0x%x:0x%llx restore stashed-file ok %u fail %u keep %u", + conn->owner, conn->device_id, + stats.succeed, stats.fail, stats.keep); + + mutex_lock(&conn->seq_lock); +} + +static void hmdfs_stash_del_do_cleanup(struct hmdfs_peer *conn, int evt, + unsigned int seq) +{ + struct hmdfs_inode_info *info = NULL; + struct hmdfs_inode_info *next = NULL; + unsigned int preparing; + + if (!hmdfs_is_stash_enabled(conn->sbi)) + return; + + /* Async cb is cancelled */ + preparing = 0; + list_for_each_entry_safe(info, next, &conn->wr_opened_inode_list, + wr_opened_node) { + int status = READ_ONCE(info->stash_status); + + if (status == HMDFS_REMOTE_INODE_STASHING) { + struct hmdfs_cache_info *cache = NULL; + + spin_lock(&info->stash_lock); + cache = info->cache; + info->cache = NULL; + info->stash_status = HMDFS_REMOTE_INODE_NONE; + spin_unlock(&info->stash_lock); + + hmdfs_remote_del_wr_opened_inode(conn, info); + hmdfs_del_file_cache(cache); + /* put inode after all access are completed */ + iput(&info->vfs_inode); + preparing++; + } + } + hmdfs_info("release %u preparing inodes", preparing); + + hmdfs_info("release %u pinned inodes", conn->stashed_inode_nr); + if (list_empty(&conn->stashed_inode_list)) + return; + + list_for_each_entry_safe(info, next, + &conn->stashed_inode_list, stash_node) + hmdfs_untrack_stashed_inode(conn, info); +} + +void hmdfs_exit_stash(struct hmdfs_sb_info *sbi) +{ + if (!sbi->s_offline_stash) + return; + + if (sbi->stash_work_dir.dentry) { + path_put(&sbi->stash_work_dir); + sbi->stash_work_dir.dentry = NULL; + } +} + +int hmdfs_init_stash(struct hmdfs_sb_info *sbi) +{ + int err = 0; + struct path parent; + struct dentry *child = NULL; + + if (!sbi->s_offline_stash) + return 0; + + err = kern_path(sbi->cache_dir, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &parent); + if (err) { + hmdfs_err("invalid cache dir err %d", err); + goto out; + } + + child = hmdfs_stash_new_work_dir(parent.dentry); + if (!IS_ERR(child)) { + sbi->stash_work_dir.mnt = mntget(parent.mnt); + sbi->stash_work_dir.dentry = child; + } else { + err = PTR_ERR(child); + hmdfs_err("create stash work dir err %d", err); + } + + path_put(&parent); +out: + return err; +} + +static int hmdfs_stash_write_local_file(struct hmdfs_peer *conn, + struct hmdfs_inode_info *info, + struct hmdfs_writepage_context *ctx, + struct hmdfs_cache_info *cache) +{ + struct page *page = ctx->page; + const struct cred *old_cred = NULL; + void *buf = NULL; + loff_t pos; + unsigned int flags; + ssize_t written; + int err = 0; + + buf = kmap(page); + pos = (loff_t)page->index << PAGE_SHIFT; + /* enable NOFS for memory allocation */ + flags = memalloc_nofs_save(); + old_cred = hmdfs_override_creds(conn->sbi->cred); + pos += cache->data_offs << HMDFS_STASH_BLK_SHIFT; + written = kernel_write(cache->cache_file, buf, ctx->count, &pos); + hmdfs_revert_creds(old_cred); + memalloc_nofs_restore(flags); + kunmap(page); + + if (written != ctx->count) { + hmdfs_err("stash peer 0x%x:0x%llx ino 0x%llx page 0x%lx data_offs 0x%x len %u err %zd", + conn->owner, conn->device_id, info->remote_ino, + page->index, cache->data_offs, ctx->count, written); + err = -EIO; + } + + return err; +} + +int hmdfs_stash_writepage(struct hmdfs_peer *conn, + struct hmdfs_writepage_context *ctx) +{ + struct inode *inode = ctx->page->mapping->host; + struct hmdfs_inode_info *info = hmdfs_i(inode); + struct hmdfs_cache_info *cache = NULL; + int err; + + /* e.g. fail to create stash file */ + cache = info->cache; + if (!cache) + return -EIO; + + err = hmdfs_stash_write_local_file(conn, info, ctx, cache); + if (!err) { + hmdfs_client_writepage_done(info, ctx); + atomic64_inc(&cache->written_pgs); + put_task_struct(ctx->caller); + kfree(ctx); + } + atomic64_inc(&cache->to_write_pgs); + + return err; +} + +static void hmdfs_stash_rebuild_status(struct hmdfs_peer *conn, + struct inode *inode) +{ + char *path_str = NULL; + struct hmdfs_inode_info *info = NULL; + const struct cred *old_cred = NULL; + struct path path; + struct path *stash_path = NULL; + int err = 0; + + path_str = kmalloc(HMDFS_STASH_PATH_LEN, GFP_KERNEL); + if (!path_str) { + err = -ENOMEM; + return; + } + + info = hmdfs_i(inode); + err = snprintf(path_str, HMDFS_STASH_PATH_LEN, "%s/0x%llx", + conn->cid, info->remote_ino); + if (err >= HMDFS_STASH_PATH_LEN) { + kfree(path_str); + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx too long name len", + conn->owner, conn->device_id, info->remote_ino); + return; + } + old_cred = hmdfs_override_creds(conn->sbi->cred); + stash_path = &conn->sbi->stash_work_dir; + err = vfs_path_lookup(stash_path->dentry, stash_path->mnt, + path_str, 0, &path); + hmdfs_revert_creds(old_cred); + if (!err) { + if (hmdfs_is_reg(path.dentry)) { + WRITE_ONCE(info->stash_status, + HMDFS_REMOTE_INODE_RESTORING); + ihold(&info->vfs_inode); + hmdfs_track_inode_locked(conn, info); + } else { + hmdfs_info("peer 0x%x:0x%llx inode 0x%llx unexpected stashed file mode 0%o", + conn->owner, conn->device_id, + info->remote_ino, + d_inode(path.dentry)->i_mode); + } + + path_put(&path); + } else if (err && err != -ENOENT) { + hmdfs_err("peer 0x%x:0x%llx inode 0x%llx find %s err %d", + conn->owner, conn->device_id, info->remote_ino, + path_str, err); + } + + kfree(path_str); +} + +static inline bool +hmdfs_need_rebuild_inode_stash_status(struct hmdfs_peer *conn, umode_t mode) +{ + return hmdfs_is_stash_enabled(conn->sbi) && + READ_ONCE(conn->need_rebuild_stash_list) && + S_ISREG(mode); +} + +void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn, + struct inode *inode, umode_t mode) +{ + if (!hmdfs_need_rebuild_inode_stash_status(conn, mode)) + return; + + atomic_inc(&conn->rebuild_inode_status_nr); + /* + * Use smp_mb__after_atomic() to ensure order between writing + * @conn->rebuild_inode_status_nr and reading + * @conn->need_rebuild_stash_list. + */ + smp_mb__after_atomic(); + if (READ_ONCE(conn->need_rebuild_stash_list)) + hmdfs_stash_rebuild_status(conn, inode); + if (atomic_dec_and_test(&conn->rebuild_inode_status_nr)) + wake_up(&conn->rebuild_inode_status_wq); +} + +static struct hmdfs_node_cb_desc stash_cb[] = { + { + .evt = NODE_EVT_OFFLINE, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_stash_offline_prepare, + }, + { + .evt = NODE_EVT_OFFLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_offline_do_stash, + }, + /* Don't known peer version yet, so min_version is 0 */ + { + .evt = NODE_EVT_ADD, + .sync = true, + .fn = hmdfs_stash_add_do_check, + }, + { + .evt = NODE_EVT_ONLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_online_prepare, + }, + { + .evt = NODE_EVT_ONLINE, + .sync = false, + .min_version = DFS_2_0, + .fn = hmdfs_stash_online_do_restore, + }, + { + .evt = NODE_EVT_DEL, + .sync = true, + .min_version = DFS_2_0, + .fn = hmdfs_stash_del_do_cleanup, + }, +}; + +void __init hmdfs_stash_add_node_evt_cb(void) +{ + hmdfs_node_add_evt_cb(stash_cb, ARRAY_SIZE(stash_cb)); +} + diff --git a/fs/hmdfs/stash.h b/fs/hmdfs/stash.h new file mode 100755 index 000000000..f38e737f9 --- /dev/null +++ b/fs/hmdfs/stash.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/hmdfs/stash.h + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#ifndef HMDFS_STASH_H +#define HMDFS_STASH_H + +#include "hmdfs.h" +#include "hmdfs_client.h" + +extern void hmdfs_stash_add_node_evt_cb(void); + +extern void hmdfs_exit_stash(struct hmdfs_sb_info *sbi); +extern int hmdfs_init_stash(struct hmdfs_sb_info *sbi); + +extern int hmdfs_stash_writepage(struct hmdfs_peer *conn, + struct hmdfs_writepage_context *ctx); + +extern void hmdfs_remote_init_stash_status(struct hmdfs_peer *conn, + struct inode *inode, umode_t mode); + +#endif diff --git a/fs/hmdfs/super.c b/fs/hmdfs/super.c new file mode 100755 index 000000000..18f222c6d --- /dev/null +++ b/fs/hmdfs/super.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/hmdfs/super.c + * + * Copyright (c) 2020-2021 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include + +#include "hmdfs.h" + +enum { + OPT_RA_PAGES, + OPT_LOCAL_DST, + OPT_CACHE_DIR, + OPT_S_CASE, + OPT_VIEW_TYPE, + OPT_NO_OFFLINE_STASH, + OPT_NO_DENTRY_CACHE, + OPT_USER_ID, + OPT_ERR, +}; + +static match_table_t hmdfs_tokens = { + { OPT_RA_PAGES, "ra_pages=%s" }, + { OPT_LOCAL_DST, "local_dst=%s" }, + { OPT_CACHE_DIR, "cache_dir=%s" }, + { OPT_S_CASE, "sensitive" }, + { OPT_VIEW_TYPE, "merge" }, + { OPT_NO_OFFLINE_STASH, "no_offline_stash" }, + { OPT_NO_DENTRY_CACHE, "no_dentry_cache" }, + { OPT_USER_ID, "user_id=%s"}, + { OPT_ERR, NULL }, +}; + +#define DEAULT_RA_PAGES 128 + +void __hmdfs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (ratelimited) + printk_ratelimited("%s hmdfs: %s() %pV\n", level, + function, &vaf); + else + printk("%s hmdfs: %s() %pV\n", level, function, &vaf); + va_end(args); +} + +static int hmdfs_match_strdup(const substring_t *s, char **dst) +{ + char *dup = NULL; + + dup = match_strdup(s); + if (!dup) + return -ENOMEM; + + if (*dst) + kfree(*dst); + *dst = dup; + + return 0; +} + +int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data) +{ + char *p = NULL; + char *name = NULL; + char *options = NULL; + char *options_src = NULL; + substring_t args[MAX_OPT_ARGS]; + unsigned long value = DEAULT_RA_PAGES; + unsigned int user_id = 0; + struct super_block *sb = sbi->sb; + int err = 0; + size_t size = 0; + + size = strlen(data); + if (size >= HMDFS_PAGE_SIZE) { + return -EINVAL; + } + + options = kstrdup(data, GFP_KERNEL); + if (data && !options) { + err = -ENOMEM; + goto out; + } + options_src = options; + err = super_setup_bdi(sb); + if (err) + goto out; + + while ((p = strsep(&options_src, ",")) != NULL) { + int token; + + if (!*p) + continue; + args[0].to = args[0].from = NULL; + token = match_token(p, hmdfs_tokens, args); + + switch (token) { + case OPT_RA_PAGES: + name = match_strdup(&args[0]); + if (name) { + err = kstrtoul(name, 10, &value); + kfree(name); + name = NULL; + if (err) + goto out; + } + break; + case OPT_LOCAL_DST: + err = hmdfs_match_strdup(&args[0], &sbi->local_dst); + if (err) + goto out; + break; + case OPT_CACHE_DIR: + err = hmdfs_match_strdup(&args[0], &sbi->cache_dir); + if (err) + goto out; + break; + case OPT_S_CASE: + sbi->s_case_sensitive = true; + break; + case OPT_VIEW_TYPE: + sbi->s_merge_switch = true; + break; + case OPT_NO_OFFLINE_STASH: + sbi->s_offline_stash = false; + break; + case OPT_NO_DENTRY_CACHE: + sbi->s_dentry_cache = false; + break; + case OPT_USER_ID: + name = match_strdup(&args[0]); + if (name) { + err = kstrtouint(name, 10, &user_id); + kfree(name); + name = NULL; + if (err) + goto out; + sbi->user_id = user_id; + } + break; + default: + err = -EINVAL; + goto out; + } + } +out: + kfree(options); + sb->s_bdi->ra_pages = value; + if (sbi->local_dst == NULL) + err = -EINVAL; + + if (sbi->s_offline_stash && !sbi->cache_dir) { + hmdfs_warning("no cache_dir for offline stash"); + sbi->s_offline_stash = false; + } + + if (sbi->s_dentry_cache && !sbi->cache_dir) { + hmdfs_warning("no cache_dir for dentry cache"); + sbi->s_dentry_cache = false; + } + + return err; +} diff --git a/fs/proc/base.c b/fs/proc/base.c index 300d53ee7..46242e250 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3196,6 +3196,14 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, } #endif /* CONFIG_STACKLEAK_METRICS */ +#ifdef CONFIG_ACCESS_TOKENID +static int proc_token_operations(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + seq_printf(m, "%#llx %#llx\n", task->token, task->ftoken); + return 0; +} +#endif /* CONFIG_ACCESS_TOKENID */ /* * Thread groups */ @@ -3312,6 +3320,12 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif +#ifdef CONFIG_ACCESS_TOKENID + ONE("tokenid", S_IRUSR, proc_token_operations), +#endif +#ifdef CONFIG_SECURITY_XPM + REG("xpm_region", S_IRUGO, proc_xpm_region_operations), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3646,6 +3660,12 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif +#ifdef CONFIG_ACCESS_TOKENID + ONE("tokenid", S_IRUSR, proc_token_operations), +#endif +#ifdef CONFIG_SECURITY_XPM + REG("xpm_region", S_IRUGO, proc_xpm_region_operations), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/dfx/hiview_hisysevent.h b/include/dfx/hiview_hisysevent.h new file mode 100755 index 000000000..c47d419a2 --- /dev/null +++ b/include/dfx/hiview_hisysevent.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef HIVIEW_HISYSEVENT_H +#define HIVIEW_HISYSEVENT_H + +enum hisysevent_type { + /* fault event */ + FAULT = 1, + + /* statistic event */ + STATISTIC = 2, + + /* security event */ + SECURITY = 3, + + /* behavior event */ + BEHAVIOR = 4 +}; + +struct hiview_hisysevent; + +#ifdef CONFIG_HISYSEVENT + +struct hiview_hisysevent * +hisysevent_create(const char *domain, const char *name, enum hisysevent_type type); +void hisysevent_destroy(struct hiview_hisysevent **event); +int hisysevent_put_integer(struct hiview_hisysevent *event, const char *key, long long value); +int hisysevent_put_string(struct hiview_hisysevent *event, const char *key, const char *value); +int hisysevent_write(struct hiview_hisysevent *event); + +#else + +#include +#include + +static inline struct hiview_hisysevent * +hisysevent_create(const char *domain, const char *name, enum hisysevent_type type) +{ + return NULL; +} + +static inline void hisysevent_destroy(struct hiview_hisysevent **event) +{} + +static inline int +hisysevent_put_integer(struct hiview_hisysevent *event, const char *key, long long value) +{ + return -EOPNOTSUPP; +} + +static inline int +hisysevent_put_string(struct hiview_hisysevent *event, const char *key, const char *value) +{ + return -EOPNOTSUPP; +} + +static inline int hisysevent_write(struct hiview_hisysevent *event) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_HISYSEVENT */ + +#endif /* HIVIEW_HISYSEVENT_H */ diff --git a/include/dfx/hung_wp_screen.h b/include/dfx/hung_wp_screen.h new file mode 100755 index 000000000..39bad044c --- /dev/null +++ b/include/dfx/hung_wp_screen.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef HUNG_WP_SCREEN_H +#define HUNG_WP_SCREEN_H + +#define WP_SCREEN_PWK_RELEASE 0 +#define WP_SCREEN_PWK_PRESS 1 + +#define ZRHUNG_WP_NONE 0 +#define ZRHUNG_WP_SCREENON 1 +#define ZRHUNG_WP_SCREENOFF 2 + +#define WP_SCREEN_DOMAIN "KERNEL_VENDOR" +#define WP_SCREEN_PWK_NAME "POWER_KEY" +#define WP_SCREEN_LPRESS_NAME "LONG_PRESS" +#define WP_SCREEN_ON_NAME "SCREEN_ON" +#define WP_SCREEN_OFF_NAME "SCREEN_OFF" + +void hung_wp_screen_powerkey_ncb(int event); + +#endif /* HUNG_WP_SCREEN_H */ diff --git a/include/dfx/hungtask_base.h b/include/dfx/hungtask_base.h new file mode 100755 index 000000000..b3cf189a0 --- /dev/null +++ b/include/dfx/hungtask_base.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef DFX_HUNGTASK_BASE_H +#define DFX_HUNGTASK_BASE_H + +#include +#include +#include + +#define ENABLE_SHOW_LEN 8 +#define WHITELIST_STORE_LEN 400 +#define WHITELIST_LEN 61 +#define WHITE_LIST 1 +#define BLACK_LIST 2 +#define HT_ENABLE 1 +#define HT_DISABLE 0 +#define HEARTBEAT_TIME 3 +#define MAX_LOOP_NUM (CONFIG_DEFAULT_HUNG_TASK_TIMEOUT / HEARTBEAT_TIME) +#define ONE_MINUTE (60 / HEARTBEAT_TIME) +#define ONE_AND_HALF_MINUTE (90 / HEARTBEAT_TIME) +#define TWO_MINUTES (120 / HEARTBEAT_TIME) +#define THREE_MINUTES (180 / HEARTBEAT_TIME) +#define TWENTY_SECONDS (21 / HEARTBEAT_TIME) +#define THIRTY_SECONDS (30 / HEARTBEAT_TIME) +#define HUNG_ONE_HOUR (3600 / HEARTBEAT_TIME) +#define HUNG_TEN_MINUTES (600 / HEARTBEAT_TIME) +#define HUNGTASK_REPORT_TIMECOST TWENTY_SECONDS +#define HT_DUMP_IN_PANIC_LOOSE 5 +#define HT_DUMP_IN_PANIC_STRICT 2 +#define REFRESH_INTERVAL THREE_MINUTES +#define FLAG_DUMP_WHITE (1 << 0) +#define FLAG_DUMP_APP (1 << 1) +#define FLAG_DUMP_NOSCHEDULE (1 << 2) +#define FLAG_DUMP_JANK (1 << 3) +#define FLAG_PANIC (1 << 4) +#define FLAG_PF_FROZEN (1 << 6) +#define TASK_TYPE_IGNORE 0 +#define TASK_TYPE_WHITE (1 << 0) +#define TASK_TYPE_APP (1 << 1) +#define TASK_TYPE_JANK (1 << 2) +#define TASK_TYPE_KERNEL (1 << 3) +#define TASK_TYPE_NATIVE (1 << 4) +#define TASK_TYPE_FROZEN (1 << 6) +#define PID_INIT 1 +#define PID_KTHREAD 2 +#define DEFAULT_WHITE_DUMP_CNT MAX_LOOP_NUM +#define DEFAULT_WHITE_PANIC_CNT MAX_LOOP_NUM +#define HUNG_TASK_UPLOAD_ONCE 1 +#define FROZEN_BUF_LEN 1024 +#define MAX_REMOVE_LIST_NUM 200 +#define HUNGTASK_DOMAIN "KERNEL_VENDOR" +#define HUNGTASK_NAME "HUNGTASK" +#define INIT_FREEZE_NAME "INIT_FREEZE" +#define HUNG_TASK_BATCHING 1024 +#define TIME_REFRESH_PIDS 20 +#define PID_ERROR (-1) +#define HUNGTASK_EVENT_WHITELIST 1 +#define REPORT_MSGLENGTH 200 + +struct task_item { + struct rb_node node; + pid_t pid; + pid_t tgid; + char name[TASK_COMM_LEN + 1]; + unsigned long switch_count; + unsigned int task_type; + int dump_wa; + int panic_wa; + int dump_jank; + int d_state_time; + bool isdone_wa; +}; + +struct hashlist_node { + pid_t pid; + struct hlist_node list; +}; + +struct whitelist_item { + pid_t pid; + char name[TASK_COMM_LEN + 1]; +}; + +struct task_hung_upload { + char name[TASK_COMM_LEN + 1]; + pid_t pid; + pid_t tgid; + unsigned int flag; + int duration; +}; + +extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned int sysctl_hung_task_panic; + +void do_dump_task(struct task_struct *task); +int dump_task_wa(struct task_item *item, int dump_cnt, + struct task_struct *task, unsigned int flag); +void do_show_task(struct task_struct *task, unsigned int flag, int d_state_time); +void hungtask_show_state_filter(unsigned long state_filter); +int htbase_create_sysfs(void); +void htbase_set_panic(int new_did_panic); +void htbase_set_timeout_secs(unsigned long new_hungtask_timeout_secs); +void htbase_check_tasks(unsigned long timeout); +bool hashlist_find(struct hlist_head *head, int count, pid_t tgid); +void hashlist_clear(struct hlist_head *head, int count); +bool hashlist_insert(struct hlist_head *head, int count, pid_t tgid); + +#endif /* DFX_HUNGTASK_BASE_H */ diff --git a/include/dfx/zrhung.h b/include/dfx/zrhung.h new file mode 100755 index 000000000..a63462a28 --- /dev/null +++ b/include/dfx/zrhung.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef ZRHUNG_H +#define ZRHUNG_H + +int zrhung_send_event(const char *domain, const char *event_name, const char *msg_buf); + +#endif /* ZRHUNG_H */ diff --git a/include/linux/blackbox.h b/include/linux/blackbox.h new file mode 100755 index 000000000..ed470e4a6 --- /dev/null +++ b/include/linux/blackbox.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef BLACKBOX_H +#define BLACKBOX_H + +#include +#include + +#define PATH_MAX_LEN 256 +#define EVENT_MAX_LEN 32 +#define CATEGORY_MAX_LEN 32 +#define MODULE_MAX_LEN 32 +#define TIMESTAMP_MAX_LEN 24 +#define ERROR_DESC_MAX_LEN 512 +#define LOG_FLAG "VALIDLOG" + +/* module type */ +#define MODULE_SYSTEM "SYSTEM" + +/* fault category type */ +#define CATEGORY_SYSTEM_REBOOT "SYSREBOOT" +#define CATEGORY_SYSTEM_POWEROFF "POWEROFF" +#define CATEGORY_SYSTEM_PANIC "PANIC" +#define CATEGORY_SYSTEM_OOPS "OOPS" +#define CATEGORY_SYSTEM_CUSTOM "CUSTOM" +#define CATEGORY_SYSTEM_WATCHDOG "HWWATCHDOG" +#define CATEGORY_SYSTEM_HUNGTASK "HUNGTASK" +#define CATEGORY_SUBSYSTEM_CUSTOM "CUSTOM" + +/* fault event type */ +#define EVENT_SYSREBOOT "SYSREBOOT" +#define EVENT_LONGPRESS "LONGPRESS" +#define EVENT_COMBINATIONKEY "COMBINATIONKEY" +#define EVENT_SUBSYSREBOOT "SUBSYSREBOOT" +#define EVENT_POWEROFF "POWEROFF" +#define EVENT_PANIC "PANIC" +#define EVENT_OOPS "OOPS" +#define EVENT_SYS_WATCHDOG "SYSWATCHDOG" +#define EVENT_HUNGTASK "HUNGTASK" +#define EVENT_BOOTFAIL "BOOTFAIL" + +#define FILE_NAME(x) (strrchr(x, '/') ? (strrchr(x, '/') + 1) : x) +#define BBOX_DECORATOR_HILOG(level, fmt, args...) \ + pr_err("bbox:[%s][%s:%d] " fmt, level, FILE_NAME(__FILE__), __LINE__, ##args) + +#define bbox_print_fatal(fmt, args...) BBOX_DECORATOR_HILOG("fatal", fmt, ##args) +#define bbox_print_err(fmt, args...) BBOX_DECORATOR_HILOG("err", fmt, ##args) +#define bbox_print_warn(fmt, args...) BBOX_DECORATOR_HILOG("warn", fmt, ##args) +#define bbox_print_info(fmt, args...) BBOX_DECORATOR_HILOG("info", fmt, ##args) +#define bbox_print_debug(fmt, args...) BBOX_DECORATOR_HILOG("debug", fmt, ##args) + +struct error_info { + char event[EVENT_MAX_LEN]; + char category[CATEGORY_MAX_LEN]; + char module[MODULE_MAX_LEN]; + char error_time[TIMESTAMP_MAX_LEN]; + char error_desc[ERROR_DESC_MAX_LEN]; +}; + +struct fault_log_info { + char flag[8]; /* 8 is the length of the flag */ + size_t len; /* length of the kernel fault log */ + struct error_info info; +}; + +struct module_ops { + char module[MODULE_MAX_LEN]; + void (*dump)(const char *log_dir, struct error_info *info); + void (*reset)(struct error_info *info); + int (*get_last_log_info)(struct error_info *info); + int (*save_last_log)(const char *log_dir, struct error_info *info); +}; + +void get_timestamp(char *buf, size_t buf_size); +int bbox_register_module_ops(struct module_ops *ops); +int bbox_notify_error(const char event[EVENT_MAX_LEN], + const char module[MODULE_MAX_LEN], + const char error_desc[ERROR_DESC_MAX_LEN], + int need_sys_reset); + +#endif /* BLACKBOX_H */ diff --git a/include/linux/blackbox_common.h b/include/linux/blackbox_common.h new file mode 100755 index 000000000..ae8e0d229 --- /dev/null +++ b/include/linux/blackbox_common.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef BLACKBOX_COMMON_H +#define BLACKBOX_COMMON_H + +#include + +/* bbox/BBOX - blackbox */ +#define YEAR_BASE 1900 +#define SECONDS_PER_MINUTE 60 +#define AID_ROOT 0 +#define AID_SYSTEM 1000 +#define BBOX_DIR_LIMIT 0775 +#define BBOX_FILE_LIMIT 0664 +#define PATH_MAX_LEN 256 + +/* + * format: + * [topCategoryName],module[moduleName],category[categoryName],\ + * event[eventName],time[seconds from 1970-01-01 00:00:00 UTC-tick],\ + * sysreboot[true|false],errordesc[errorDescription],logpath[logpath]\n + */ +#define HISTORY_LOG_FORMAT "[%s],module[%s],category[%s],event[%s],"\ + "time[%s],sysreboot[%s],errdesc[%s],logpath[%s]\n" +#define TIMESTAMP_FORMAT "%04d%02d%02d%02d%02d%02d-%08llu" + +void sys_reset(void); +void change_own(char *path, int uid, int gid); +int full_write_file(const char *pfile_path, char *buf, + size_t buf_size, bool read_file); +int file_exists(const char *name); +int create_log_dir(const char *path); +unsigned long long get_ticks(void); +struct file *file_open(const char *filename, int open_mode, int mode); +void file_close(struct file *filp); +ssize_t file_read(struct file *file, loff_t offset, unsigned char *data, + size_t size); +int file_delete(struct file *filp); +char *getfullpath(struct file *filp); + +#endif /* BLACKBOX_COMMON_H */ diff --git a/include/linux/blackbox_storage.h b/include/linux/blackbox_storage.h new file mode 100755 index 000000000..52d67523d --- /dev/null +++ b/include/linux/blackbox_storage.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef BLACKBOX_STORAGE_H +#define BLACKBOX_STORAGE_H + +#include + +struct reboot_crashlog_storage { + int (*storage_log)(void *out, unsigned int outlen); + int (*get_log)(void *in, unsigned int inlen); + void (*blackbox_dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); + const char *material; +}; + +extern char *storage_material; +extern const struct reboot_crashlog_storage *storage_lastword; +extern const struct reboot_crashlog_storage storage_lastwords[]; + +#endif /* BLACKBOX_STORAGE_H */ diff --git a/include/linux/hck/lite_hck_ced.h b/include/linux/hck/lite_hck_ced.h new file mode 100755 index 000000000..9d1ffb7cc --- /dev/null +++ b/include/linux/hck/lite_hck_ced.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#ifndef _LITE_HCK_CED_H +#define _LITE_HCK_CED_H + +#include +#include +#include + +#ifndef CONFIG_HCK +#undef CALL_HCK_LITE_HOOK +#define CALL_HCK_LITE_HOOK(name, args...) +#undef REGISTER_HCK_LITE_HOOK +#define REGISTER_HCK_LITE_HOOK(name, probe) +#undef REGISTER_HCK_LITE_DATA_HOOK +#define REGISTER_HCK_LITE_DATA_HOOK(name, probe, data) +#else +DECLARE_HCK_LITE_HOOK(ced_setattr_insert_lhck, + TP_PROTO(struct task_struct *task), + TP_ARGS(task)); + +DECLARE_HCK_LITE_HOOK(ced_switch_task_namespaces_lhck, + TP_PROTO(const struct nsproxy *new), + TP_ARGS(new)); + +DECLARE_HCK_LITE_HOOK(ced_detection_lhck, + TP_PROTO(struct task_struct *task), + TP_ARGS(task)); + +DECLARE_HCK_LITE_HOOK(ced_exit_lhck, + TP_PROTO(struct task_struct *task), + TP_ARGS(task)); + +DECLARE_HCK_LITE_HOOK(ced_kernel_clone_lhck, + TP_PROTO(struct task_struct *task), + TP_ARGS(task)); + +DECLARE_HCK_LITE_HOOK(ced_commit_creds_lhck, + TP_PROTO(const struct cred *new), + TP_ARGS(new)); + +DECLARE_HCK_LITE_HOOK(ced_switch_task_namespaces_permission_lhck, + TP_PROTO(const struct nsproxy *new, int *ret), + TP_ARGS(new, ret)); +#endif /* CONFIG_HCK */ + +#endif /* _LITE_HCK_CED_H */ diff --git a/include/linux/hck/lite_hck_code_sign.h b/include/linux/hck/lite_hck_code_sign.h new file mode 100755 index 000000000..d479babbf --- /dev/null +++ b/include/linux/hck/lite_hck_code_sign.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#ifndef LITE_HCK_CODE_SIGN_H +#define LITE_HCK_CODE_SIGN_H + +#include + +#ifndef CONFIG_HCK + +#define CALL_HCK_LITE_HOOK(name, args...) +#define REGISTER_HCK_LITE_HOOK(name, probe) +#define REGISTER_HCK_LITE_DATA_HOOK(name, probe, data) + +#else + +DECLARE_HCK_LITE_HOOK(code_sign_verify_certchain_lhck, + TP_PROTO(const void *raw_pkcs7, size_t pkcs7_len, struct cs_info *cs_info, + int *ret), + TP_ARGS(raw_pkcs7, pkcs7_len, cs_info, ret)); + +DECLARE_HCK_LITE_HOOK(code_sign_check_descriptor_lhck, + TP_PROTO(const struct inode *inode, const void *desc, int *ret), + TP_ARGS(inode, desc, ret)); + +DECLARE_HCK_LITE_HOOK(code_sign_before_measurement_lhck, + TP_PROTO(void *desc, int *ret), + TP_ARGS(desc, ret)); + +DECLARE_HCK_LITE_HOOK(code_sign_after_measurement_lhck, + TP_PROTO(void *desc, int version), + TP_ARGS(desc, version)); + +#endif /* CONFIG_HCK */ + +#endif /* LITE_HCK_CODE_SIGN_H */ diff --git a/include/linux/hck/lite_hck_hideaddr.h b/include/linux/hck/lite_hck_hideaddr.h new file mode 100755 index 000000000..e7dbf9695 --- /dev/null +++ b/include/linux/hck/lite_hck_hideaddr.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#ifndef _LITE_HCK_HIDEADDR_H +#define _LITE_HCK_HIDEADDR_H + +#include "linux/seq_file.h" +#include "linux/mm_types.h" +#include + +#ifndef CONFIG_HCK +#define CALL_HCK_LITE_HOOK(name, args...) +#define REGISTER_HCK_LITE_HOOK(name, probe) +#define REGISTER_HCK_LITE_DATA_HOOK(name, probe, data) +#else + + +DECLARE_HCK_LITE_HOOK(hideaddr_header_prefix_lhck, + TP_PROTO(unsigned long *start, unsigned long *end, vm_flags_t *flags, struct seq_file *m, struct vm_area_struct *vma), + TP_ARGS(start, end, flags, m, vma)); + +#endif /* CONFIG_HCK */ +#endif /* _LITE_HCK_HIDEADDR_H */ diff --git a/include/linux/hck/lite_hck_inet.h b/include/linux/hck/lite_hck_inet.h new file mode 100755 index 000000000..77eff93a8 --- /dev/null +++ b/include/linux/hck/lite_hck_inet.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#ifndef LITE_HCK_INET_H +#define LITE_HCK_INET_H + +#include +#include + +#ifndef CONFIG_HCK +#undef CALL_HCK_LITE_HOOK +#define CALL_HCK_LITE_HOOK(name, args...) +#undef REGISTER_HCK_LITE_HOOK +#define REGISTER_HCK_LITE_HOOK(name, probe) +#undef REGISTER_HCK_LITE_DATA_HOOK +#define REGISTER_HCK_LITE_DATA_HOOK(name, probe, data) +#else + +DECLARE_HCK_LITE_HOOK(nip_ninet_ehashfn_lhck, + TP_PROTO(const struct sock *sk, u32 *ret), + TP_ARGS(sk, ret)); + +DECLARE_HCK_LITE_HOOK(nip_ninet_gifconf_lhck, + TP_PROTO(struct net_device *dev, char __user *buf, int len, int size, int *ret), + TP_ARGS(dev, buf, len, size, ret)); + +#endif /* CONFIG_HCK */ + +#endif /* LITE_HCK_INET_H */ diff --git a/include/linux/hck/lite_hck_jit_memory.h b/include/linux/hck/lite_hck_jit_memory.h new file mode 100755 index 000000000..dbce24a43 --- /dev/null +++ b/include/linux/hck/lite_hck_jit_memory.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* +* Copyright (c) 2023 Huawei Device Co., Ltd. +*/ + +#ifndef LITE_HCK_JIT_MEMORY_H +#define LITE_HCK_JIT_MEMORY_H + +#include +#include + +#ifndef CONFIG_HCK +#undef CALL_HCK_LITE_HOOK +#define CALL_HCK_LITE_HOOK(name, args...) +#undef REGISTER_HCK_LITE_HOOK +#define REGISTER_HCK_LITE_HOOK(name, probe) +#undef REGISTER_HCK_LITE_DATA_HOOK +#define REGISTER_HCK_LITE_DATA_HOOK(name, probe, data) + +#else + +DECLARE_HCK_LITE_HOOK(find_jit_memory_lhck, + TP_PROTO(struct task_struct *task, unsigned long start, unsigned long size, int *err), + TP_ARGS(task, start, size, err)); + +DECLARE_HCK_LITE_HOOK(check_jit_memory_lhck, + TP_PROTO(struct task_struct *task, unsigned long cookie, unsigned long prot, + unsigned long flag, unsigned long size, unsigned long *err), + TP_ARGS(task, cookie, prot, flag, size, err)); + +DECLARE_HCK_LITE_HOOK(delete_jit_memory_lhck, + TP_PROTO(struct task_struct *task, unsigned long start, unsigned long size, int *err), + TP_ARGS(task, start, size, err)); + +DECLARE_HCK_LITE_HOOK(exit_jit_memory_lhck, + TP_PROTO(struct task_struct *task), + TP_ARGS(task)); + +#endif /* CONFIG_HCK */ + +#endif /* LITE_HCK_JIT_MEMORY_H */ diff --git a/include/linux/hck/lite_hck_sample.h b/include/linux/hck/lite_hck_sample.h new file mode 100755 index 000000000..f29dec41a --- /dev/null +++ b/include/linux/hck/lite_hck_sample.h @@ -0,0 +1,36 @@ +//SPDX-License-Identifier: GPL-2.0-only +/*lite_hck_sample.h + * + *OpenHarmony Common Kernel Vendor Hook Smaple + * + */ + +#ifndef LITE_HCK_SAMPLE_H +#define LITE_HCK_SAMPLE_H + +#include + + +struct sample_hck_data { + int stat; + char* name; +}; + +/* + * Follwing tracepoints are not exported in trace and provide a + * mechanism for vendor modules to hok and extend functionality + */ +#ifndef CONFIG_HCK + +#define CALL_HCK_LITE_HOOK(name, args...) +#define REGISTER_HCK_LITE_HOOK(name, probe) +#define REGISTER_HCK_LITE_DATA_HOOK(name, probe, data) + +#else + +DECLARE_HCK_LITE_HOOK(get_boot_config_lhck, TP_PROTO(int* s), TP_ARGS(s)); +DECLARE_HCK_LITE_HOOK(set_boot_stat_lhck, TP_PROTO(int m), TP_ARGS(m)); + +#endif + +#endif /* LITE_HCK_SAMPLE_H */ diff --git a/include/linux/hck/lite_hck_xpm.h b/include/linux/hck/lite_hck_xpm.h new file mode 100755 index 000000000..0ec0063d3 --- /dev/null +++ b/include/linux/hck/lite_hck_xpm.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#ifndef _LITE_HCK_XPM_H +#define _LITE_HCK_XPM_H + +#include +#include +#include + +#ifndef CONFIG_HCK +#undef CALL_HCK_LITE_HOOK +#define CALL_HCK_LITE_HOOK(name, args...) +#undef REGISTER_HCK_LITE_HOOK +#define REGISTER_HCK_LITE_HOOK(name, probe) +#undef REGISTER_HCK_LITE_DATA_HOOK +#define REGISTER_HCK_LITE_DATA_HOOK(name, probe, data) +#else +DECLARE_HCK_LITE_HOOK(xpm_delete_cache_node_lhck, + TP_PROTO(struct inode *file_node), + TP_ARGS(file_node)); + +DECLARE_HCK_LITE_HOOK(xpm_region_outer_lhck, + TP_PROTO(unsigned long addr_start, unsigned long addr_end, + unsigned long flags, bool *ret), + TP_ARGS(addr_start, addr_end, flags, ret)); + +DECLARE_HCK_LITE_HOOK(xpm_get_unmapped_area_lhck, + TP_PROTO(unsigned long addr, unsigned long len, unsigned long map_flags, + unsigned long unmapped_flags, unsigned long *ret), + TP_ARGS(addr, len, map_flags, unmapped_flags, ret)); + +DECLARE_HCK_LITE_HOOK(xpm_integrity_equal_lhck, + TP_PROTO(struct page *page, struct page *kpage, bool *ret), + TP_ARGS(page, kpage, ret)); + +DECLARE_HCK_LITE_HOOK(xpm_integrity_check_lhck, + TP_PROTO(struct vm_area_struct *vma, unsigned int vflags, + unsigned long addr, struct page *page, vm_fault_t *ret), + TP_ARGS(vma, vflags, addr, page, ret)); + +DECLARE_HCK_LITE_HOOK(xpm_integrity_validate_lhck, + TP_PROTO(struct vm_area_struct *vma, unsigned int vflags, + unsigned long addr, struct page *page, vm_fault_t *ret), + TP_ARGS(vma, vflags, addr, page, ret)); + +DECLARE_HCK_LITE_HOOK(xpm_integrity_update_lhck, + TP_PROTO(struct vm_area_struct *vma, unsigned int vflags, + struct page *page), + TP_ARGS(vma, vflags, page)); +#endif /* CONFIG_HCK */ + +#endif /* _LITE_HCK_XPM_H */ diff --git a/include/linux/hck/lite_vendor_hooks.h b/include/linux/hck/lite_vendor_hooks.h new file mode 100755 index 000000000..4b0f30f6c --- /dev/null +++ b/include/linux/hck/lite_vendor_hooks.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022-2022. All rights reserved. + * OpenHarmony Common Kernel Vendor Hook Support + * Based on include/trace/hooks/lite_vendor_hooks.h + * + */ + +#ifndef LITE_VENDOR_HOOK_H +#define LITE_VENDOR_HOOK_H + +#include +#include +#include +#include +#include +#include + +struct __lvh_func { + void *func; + void *data; + bool has_data; +}; + +struct lite_vendor_hook { + struct mutex mutex; + struct __lvh_func *funcs; +}; +#endif // LITE_VENDOR_HOOK_H + +#ifdef CREATE_LITE_VENDOR_HOOK + +#define DEFINE_HCK_LITE_HOOK(name, proto, args) \ + struct lite_vendor_hook __lvh_##name __used \ + __section("__vendor_hooks") = { \ + .mutex = __MUTEX_INITIALIZER(__lvh_##name.mutex), \ + .funcs = NULL }; \ + EXPORT_SYMBOL(__lvh_##name); \ + void lvh_probe_##name(proto) { return; } \ + void lvh_probe_data_##name(void *lvh_data, proto) { return; } + +#undef DECLARE_HCK_LITE_HOOK +#define DECLARE_HCK_LITE_HOOK(name, proto, args) \ + DEFINE_HCK_LITE_HOOK(name, PARAMS(proto), PARAMS(args)) + +#else // #ifndef CREATE_LITE_VENDOR_HOOK + +#define REGISTER_HCK_LITE_HOOK(name, probe) \ + extern typeof(lvh_probe_##name) (probe); \ + do { \ + if (register_lvh_##name(probe)) \ + WARN_ONCE(1, "LVH register failed!\n"); \ + } while (0) + +#define REGISTER_HCK_LITE_DATA_HOOK(name, probe, data) \ + extern typeof(lvh_probe_data_##name) (probe); \ + do { \ + if (register_lvh_data_##name(probe, data)) \ + WARN_ONCE(1, "LVH register failed!\n"); \ + } while (0) + +#define CALL_HCK_LITE_HOOK(name, args...) \ + call_lvh_##name(args) + +#define __DECLARE_HCK_LITE_HOOK(name, proto, args) \ + extern struct lite_vendor_hook __lvh_##name; \ + extern void lvh_probe_##name(proto); \ + extern void lvh_probe_data_##name(void *lvh_data, proto); \ + static inline void \ + call_lvh_##name(proto) \ + { \ + struct __lvh_func *funcs = (&__lvh_##name)->funcs; \ + if (funcs && funcs->func) { \ + if (funcs->has_data) \ + ((void(*)(void *, proto))funcs->func)(funcs->data, args); \ + else \ + ((void(*)(proto))funcs->func)(args); \ + } \ + } \ + static inline int \ + __register_lvh_##name(void *probe, void *data, bool has_data) \ + { \ + int err = 0; \ + struct __lvh_func *funcs; \ + struct module *mod; \ + mutex_lock(&__lvh_##name.mutex); \ + funcs = (&__lvh_##name)->funcs; \ + if (funcs) { \ + if (funcs->func != probe || funcs->data != data) \ + err = -EBUSY; \ + goto out; \ + } \ + \ + funcs = (struct __lvh_func*)kmalloc(sizeof(struct __lvh_func), GFP_KERNEL); \ + if (!funcs) { \ + err = -ENOMEM; \ + goto out; \ + } \ + \ + funcs->func = probe; \ + funcs->data = data; \ + funcs->has_data = has_data; \ + mod = __module_address((uintptr_t)probe); \ + if (mod) \ + (void)try_module_get(mod); \ + (&__lvh_##name)->funcs = funcs; \ + out: \ + mutex_unlock(&__lvh_##name.mutex); \ + return err; \ + } \ + static inline int \ + register_lvh_##name(void (*probe)(proto)) \ + { \ + return __register_lvh_##name((void *)probe, NULL, false); \ + } \ + static inline int \ + register_lvh_data_##name(void (*probe)(void *lvh_data, proto), void *data) \ + { \ + return __register_lvh_##name((void *)probe, data, true); \ + } + +#undef DECLARE_HCK_LITE_HOOK +#define DECLARE_HCK_LITE_HOOK(name, proto, args) \ + __DECLARE_HCK_LITE_HOOK(name, PARAMS(proto), PARAMS(args)) + +#endif // CREATE_LITE_VENDOR_HOOK diff --git a/include/linux/sched.h b/include/linux/sched.h index dcba347cb..8dbd1e021 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1488,6 +1488,10 @@ struct task_struct { */ struct callback_head l1d_flush_kill; #endif +#ifdef CONFIG_ACCESS_TOKENID + u64 token; + u64 ftoken; +#endif /* * New fields for task_struct should be added above here, so that diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index b0542cd11..ada59ae1d 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -55,6 +55,9 @@ static inline gid_t __kgid_val(kgid_t gid) #define GLOBAL_ROOT_UID KUIDT_INIT(0) #define GLOBAL_ROOT_GID KGIDT_INIT(0) +#ifdef CONFIG_ACCESS_TOKENID +#define NWEBSPAWN_UID KUIDT_INIT(3081) +#endif #define INVALID_UID KUIDT_INIT(-1) #define INVALID_GID KGIDT_INIT(-1) diff --git a/kernel/fork.c b/kernel/fork.c index 908ba3c93..00370f996 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -895,6 +895,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) err = arch_dup_task_struct(tsk, orig); +#ifdef CONFIG_ACCESS_TOKENID + tsk->token = orig->token; + tsk->ftoken = 0; +#endif /* * arch_dup_task_struct() clobbers the stack-related fields. Make * sure they're properly initialized before using any stack-related -- Gitee